Import Library

In [11]:
from langchain.chains import RetrievalQA
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import PyMuPDFLoader

Extract Texts from PDFs

In [1]:
import os
import ollama
from langchain_community.document_loaders import PDFPlumberLoader

# Replace 'your_document.pdf' with the path to your PDF file
loader = PDFPlumberLoader('SALES STRATEGIES OF E.pdf')
documents = loader.load()

In [2]:
documents

[Document(metadata={'source': 'pdf/SALES STRATEGIES OF E.pdf', 'file_path': 'pdf/SALES STRATEGIES OF E.pdf', 'page': 0, 'total_pages': 4, 'Author': 'Admin', 'Creator': 'Microsoft® Word 2010', 'CreationDate': "D:20230927090127+05'30'", 'ModDate': "D:20230927090127+05'30'", 'Producer': 'Microsoft® Word 2010'}, page_content='SALES STRATEGIES OF E-COMMERCE OPERATORS PRE\nAND POST GST\nINTRODUCTION:\nGoods and Services Tax (GST) is an indirect tax (or consumption tax) used in India on the\nsupply of goods and services. It is a comprehensive, multistage, destination based tax: comprehensive\nbecause it has subsumed almost all the indirect taxes except a few state taxes. Multi-staged as it is, the\nGST is imposed at every step in the production process, but is meant to be refunded to all parties in\nthe various stages of production other than the final consumer and as a destination based tax, it is\ncollected from point of consumption and not point of origin like previous taxes. E-commerce\nc

 Split Text

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

In [4]:
docs

[Document(metadata={'source': 'pdf/SALES STRATEGIES OF E.pdf', 'file_path': 'pdf/SALES STRATEGIES OF E.pdf', 'page': 0, 'total_pages': 4, 'Author': 'Admin', 'Creator': 'Microsoft® Word 2010', 'CreationDate': "D:20230927090127+05'30'", 'ModDate': "D:20230927090127+05'30'", 'Producer': 'Microsoft® Word 2010'}, page_content='SALES STRATEGIES OF E-COMMERCE OPERATORS PRE\nAND POST GST\nINTRODUCTION:\nGoods and Services Tax (GST) is an indirect tax (or consumption tax) used in India on the\nsupply of goods and services. It is a comprehensive, multistage, destination based tax: comprehensive\nbecause it has subsumed almost all the indirect taxes except a few state taxes. Multi-staged as it is, the\nGST is imposed at every step in the production process, but is meant to be refunded to all parties in\nthe various stages of production other than the final consumer and as a destination based tax, it is\ncollected from point of consumption and not point of origin like previous taxes. E-commerce\nc

Create Embedding from text chunks

In [5]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model='nomic-embed-text',
)

Store and Use Embeddings in Chroma DB

In [6]:
from langchain.vectorstores import Chroma

# Initialize Chroma vector store
vector_store = Chroma(collection_name="ragllm", embedding_function=embeddings)

# Add documents to the vector store
vector_store.add_documents(docs)


  vector_store = Chroma(collection_name="ragllm", embedding_function=embeddings)


['e57d8e65-e4dc-444e-9629-69700ea1ee84',
 '4b4b6828-4100-454a-81d2-87c950bf45d2',
 '6576265a-1d96-4a3e-868c-61f0b6ef7090',
 'ebe4c3f8-18aa-443f-a675-594bec4561b3',
 '864e8231-6f65-4372-b8bd-572858d9714e',
 '7ebf9da9-a0b6-478a-99c1-542c9c61cc0d',
 '27e51632-92e1-4982-a4dd-7f857388f970',
 '2266f061-5de3-4c40-96b9-15a8ba534f91',
 '9938992e-a8e1-4ad0-9c6a-70d56be37187',
 '282f67ea-b2f4-4f65-92d0-78a312618552',
 'cb582535-8a94-4aff-87ce-673de0411d43']

Initialize the model

In [7]:
from langchain_ollama import OllamaLLM

# Initialize the model
local_model = "llama3.2"
llm = OllamaLLM(model=local_model, temperature=1)


Query Processing

In [8]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)


In [9]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_ollama import ChatOllama

retriever = MultiQueryRetriever.from_llm(
                                          vector_store.as_retriever(),
                                          ChatOllama(model=local_model),
                                          prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [10]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

Qery and answers

In [11]:
from IPython.display import Markdown, display

questions = "what is property tax?"
display(Markdown(chain.invoke(questions)))

Property tax is a type of tax that is levied on the value of real estate or other forms of property, such as land or buildings. It is typically collected by local governments, such as municipalities or counties, and is used to fund various public goods and services, such as education, infrastructure, and law enforcement.

In general, property tax is based on the assessed value of the property, which is usually determined by a local government's appraisal process. The amount of property tax owed depends on several factors, including:

1. Value of the property
2. Local tax rates
3. Exemptions (such as homestead exemptions or senior citizen exemptions)
4. Any applicable abatements (reductions in tax rate)

The primary purpose of property tax is to raise revenue for local governments to fund various public services and infrastructure. The amount of property tax collected can vary significantly depending on the location, type of property, and other factors.

Some common types of property taxes include:

1. Real estate tax
2. Personal property tax (levied on personal belongings, such as cars or jewelry)
3. Commercial property tax (levied on businesses)

Overall, property tax is an important source of revenue for local governments, and its administration and collection can have a significant impact on residents and businesses in the affected areas.

In [14]:
from IPython.display import Markdown, display

questions = "what is goods and Services tax?"
display(Markdown(chain.invoke(questions)))

Goods and Services Tax (GST) is a type of consumption tax that is levied on the supply of goods and services in India. It is an indirect tax that is used to fund government expenditures.

In simple terms, GST is a tax on:

* Goods: products such as food, clothing, electronics, etc.
* Services: intangible goods like transportation, education, healthcare, etc.

GST is a comprehensive tax system that aims to simplify and consolidate various taxes, such as excise duty, customs duty, value-added tax (VAT), and service tax. It is a destination-based tax, meaning that it is collected at the point of consumption, rather than the point of origin.

In [15]:
from IPython.display import Markdown, display

questions = "give me some SALES STRATEGIES OF E-COMMERCE?"
display(Markdown(chain.invoke(questions)))

Based on the provided text, here are some sales strategies adopted by e-commerce operators since the introduction of GST:

1. **Offering lower prices**: E-commerce operators have made online shopping more affordable for consumers by reducing prices.
2. **Wider range of products**: E-commerce operators have expanded their product offerings to cater to a broader customer base.
3. **Better customer experience**: E-commerce operators are focusing on providing a better customer experience through various means, such as personalization and social media engagement.
4. **Personalization**: E-commerce operators are using data and analytics to offer personalized products and recommendations to customers.
5. **Omnichannel retailing**: E-commerce operators are adopting omnichannel strategies that allow customers to start shopping online and finish in-store, or vice versa.
6. **Social commerce**: E-commerce operators are using social media platforms to connect with customers and drive sales through contests, promotions, and direct product sales.

These strategies have helped e-commerce operators adapt to the post-GST era and capitalize on new opportunities for growth.