In [1]:
# Importing the necessary packages
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS, Chroma
from langchain.chains import RetrievalQA
import textwrap

In [2]:
# Load multiple PDFs into a list of documents
def load_pdfs(file_paths):
    all_docs = []
    for file_path in file_paths:
        loader = PyMuPDFLoader(file_path=file_path)
        docs = loader.load()
        all_docs.extend(docs)  # Add documents from each PDF to the combined list
    return all_docs

In [3]:
# Responsible for splitting the documents into several chunks
def split_docs(documents, chunk_size=1000, chunk_overlap=20):

    # Initializing the RecursiveCharacterTextSplitter with
    # chunk_size and chunk_overlap
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )

    # Splitting the documents into chunks
    chunks = text_splitter.split_documents(documents=documents)

    # returning the document chunks
    return chunks

In [4]:
# function for loading the embedding model
def load_embedding_model(model_path, normalize_embedding=True):
    return HuggingFaceEmbeddings(
        model_name=model_path,
        model_kwargs={'device':'cpu'}, # here we will run the model with CPU only
        encode_kwargs = {
            'normalize_embeddings': normalize_embedding # keep True to compute cosine similarity
        }
    )

In [1]:
# Function for creating embeddings using FAISS
def create_embeddings_faiss(chunks, embedding_model, storing_path="vectorstore"):
    # Creating the embeddings using FAISS
    vectorstore = FAISS.from_documents(chunks, embedding_model, storing_path)

    # returning the vectorstore
    return vectorstore

In [3]:
def create_embeddings_chroma(chunks, embedding_model, storing_path="chroma_store"):
    # Create the Chroma vectorstore
    vectorstore = Chroma.from_documents(
        chunks,
        embedding_model
    )
    
    # Persist the Chroma vectorstore to disk
    vectorstore.persist(storing_path)

    return vectorstore


In [7]:
# Creating the chain for Question Answering
def load_qa_chain(retriever, llm, prompt):
    return RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever, # here we are using the vectorstore as a retriever
        chain_type="stuff",
        return_source_documents=True, # including source documents in output
        chain_type_kwargs={'prompt': prompt} # customizing the prompt
    )

In [8]:
# Prettifying the response
def get_response(query, chain):
    # Getting response from chain
    response = chain.invoke({'query': query})
    
    # Wrapping the text for better output in Jupyter Notebook
    wrapped_text = textwrap.fill(response['result'], width=100)
    print(response['result'])

## Implementation

In [9]:
from langchain_ollama.llms import OllamaLLM
from langchain import PromptTemplate

In [10]:
# llm = OllamaLLM(model="orca-mini", temperature=0)

In [11]:
llm = OllamaLLM(model="llama3.2", temperature=0)

In [12]:
# llm = OllamaLLM(model="mistral", temperature=0)

In [13]:
# embed = load_embedding_model(model_path="all-MiniLM-L6-v2")
embed = load_embedding_model(model_path="sentence-transformers/all-mpnet-base-v2")
# embed = load_embedding_model(model_path="sentence-transformers/multi-qa-mpnet-base-dot-v1")

In [17]:
file_paths = [
    "./docs/rodtep_guidelines-output.pdf",
    "./docs/Drawback-Rates.pdf",
    "./docs/furniture_tv.pdf",
]
docs = load_pdfs(file_paths=file_paths)

In [20]:
documents = split_docs(documents=docs)

In [2]:
# creating vectorstore
# vectorstore = create_embeddings_faiss(documents, embed)
vectorstore = create_embeddings_chroma(documents, embed)

# converting vectorstore to a retriever
retriever = vectorstore.as_retriever()

NameError: name 'create_embeddings_chroma' is not defined

In [33]:
# Creating the prompt from the template which we created before
prompt = PromptTemplate.from_template("""
### System:
You are an respectful and honest assistant. You have to answer the user's \
questions using only the context provided to you. If you don't know the answer, \
please think rationally and answer from your own knowledge base

### Context:
{context}

### User:
{question}

### Response:
""")

# Creating the chain
chain = load_qa_chain(retriever, llm, prompt)

In [25]:
get_response("Explain the Kaplan–Meier Survival Curve", chain)

I'd be happy to explain the Kaplan-Meier Survival Curve.

The Kaplan-Meier Survival Curve is a graphical representation of the survival probability of subjects in a study over time, typically used in medical research and statistics. It's named after Edwin Kaplan and Maurice Meier, who first described it in 1958.

The curve shows the proportion of patients who have not experienced an event (such as death, relapse, or recurrence) at different time points. The x-axis represents time, and the y-axis represents the survival probability.

Here's how it works:

1. **Censoring**: In a study, some participants may still be alive at the end of the observation period, but their outcome status (e.g., dead or alive) is not known. These participants are said to be censored.
2. **Event occurrence**: When an event occurs (e.g., death), the participant's survival time is recorded as the time of event occurrence.
3. **Kaplan-Meier estimation**: The Kaplan-Meier estimator calculates the survival probabil

In [26]:
get_response("Explain the RDOTEP Guidelines", chain)

The RoDTEP (Remission of Duties and Taxes on Exported Products) Scheme is a government initiative aimed at refunding duties and taxes levied on exported products. Here's an overview of the guidelines:

**Objective:**
The primary objective of the RoDTEP Scheme is to refund duties and taxes borne by exporters, including prior stage cumulative indirect taxes on goods and services used in production.

**Eligibility:**
The scheme is available for exports made by categories mentioned in para 4.55 (x), (xi), and (xii) above.

**Rebate Nature:**
The rebate under the RoDTEP Scheme will be used only for payment of duty of Customs leviable under the First Schedule to the Customs Tariff Act, 1975, specifically Basic Customs Duty.

**Scheme Structure:**

1. **Output-Outcome Framework:** A broad-level monitoring framework will be maintained and monitored at regular intervals.
2. **Risk Management System (RMS):** An IT-based RMS will be implemented by the Central Board of Indirect Taxes and Customs (

In [27]:
get_response("What are some of the things I have to keep in mind while exporting electronics", chain)

As an assistant, I'll provide you with general guidance on what to consider when exporting electronics. Please note that specific requirements may vary depending on the destination country and regulations.

When exporting electronics, here are some key things to keep in mind:

1. **Compliance with Regulations**: Ensure your products comply with the regulations of the destination country, including those related to hazardous substances (e.g., lead, cadmium, phthalates), formaldehyde emissions, and other chemicals mentioned in the context provided.
2. **Labeling and Declaration**: Familiarize yourself with labeling requirements under Federal Hazardous Substances Act (FHSA) and other relevant regulations. Declare any hazardous materials or substances used in your products to avoid customs issues.
3. **Export Documentation**: Accurately complete export documentation, including commercial invoices, packing lists, and certificates of origin. Ensure all necessary information is provided to fa

In [28]:
get_response("What if i want to export a apple iphone mobile to USA", chain)

Based on the context provided, it appears that you are referring to exporting an Apple iPhone mobile from India to the USA.

According to the information available, Apple iPhones manufactured in India would fall under the category of products manufactured partly or wholly in India under Section 65 of the Customs Act, 1962.

As per the Foreign Trade Policy, export of such products is subject to various regulations and requirements. However, I couldn't find specific information on the export procedures for Apple iPhones from India to the USA.

To determine the correct export documentation, sampling procedures, record-keeping, and other requirements, you would need to consult the Central Board of Indirect Taxes and Customs (CBIC) or the Directorate General of Foreign Trade (DGFT), which are responsible for regulating exports in India.

It's also worth noting that Apple iPhones manufactured in India may be eligible for various export incentives, such as the RoDTEP (Reduced Rate of Duty on 

In [29]:
get_response("What if i want to export furniture", chain)

If you want to export furniture, there are several requirements and regulations you need to be aware of:

1. **Registration with EPA**: If your furniture contains any antimicrobial textile or other pesticide-related characteristics, it must be registered with the Environmental Protection Agency (EPA).
2. **TSCA Compliance**: As a manufacturer of wooden products, you are required to comply with the formaldehyde emission limits established by the Toxic Substances Control Act (TSCA) Title VI. You will need to conduct testing of your product through a CPSCA-affiliated lab and obtain a certificate from the Importer on Record.
3. **Customs Regulations**: Depending on the type of furniture you are exporting, there may be specific categories that apply to it. For example:
	* Wooden frames for paintings, photographs, mirrors, or similar objects (4414) fall under 1.3% duty rate.
	* Packing cases, boxes, crates, drums, and similar packings of wood; cable-drums of wood; pallets, box pallets, and o

In [30]:
get_response("Give me a checklist if i want to export furniture", chain)

Based on the provided context, here is a checklist for exporting furniture:

**Pre-Export Compliance Requirements:**

1. **Check product classification**: Ensure that your furniture falls under one of the following categories and complies with the relevant regulations:
	* 4414: Wooden frames for paintings, photographs, mirrors or similar objects
	* 4415: Packing cases, boxes, crates, drums and similar packings, of wood; cable-drums of wood; pallets, box pallets and other load boards, of wood; pallet collars of wood
	* 4416: Casks, barrels, vats, tubs and other coopers' products and parts thereof, of wood, including staves
	* 4417: Tools, tool bodies, tool handles, broom or brush bodies and handles, of wood; boot or shoe lasts and trees, of wood
	* 4418: Builders’ joinery and carpentry of wood, including cellular wood panels, assembled flooring panels, Shingles and shakes
	* 4419: Tableware and kitchenware, of wood
2. **Check for antimicrobial textile or pesticide-related characteristic

In [31]:
get_response("What are the rates?", chain)

The rates mentioned in the provided context are:

1. 1.3% for Tariff Items 3303, 3304, 3305, and 3307.
2. 2.9% for Tariff Item 030701 (in chilled or frozen form).
3. 24.2% for Tariff Item 030801 (in chilled or frozen form).

These rates are expressed as percentages of the free on board value or the rate per unit quantity of the export goods, as mentioned in column (4) of the Schedule.
