In [18]:
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS, Chroma
from langchain_ollama import ChatOllama
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
import pandas as pd

In [19]:
# load embedding model and language model
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

chat_ollama = ChatOllama(model="llama3.1", temperature=0, verbose=True)

In [20]:
# Load text file and pdf file and combine
text_doc = TextLoader("data/moglix.txt", encoding="utf-8").load()
pdf_doc = PyPDFLoader("data/Iphoneinvoicev.pdf").load()
document = text_doc + pdf_doc

In [21]:
# create chunk, generate embedding and vectorstore using Chroma
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(document)
vectorstore = Chroma.from_documents(splitter, embed_model)
retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000023BAC8CA750>, search_kwargs={})

In [22]:
# retriever example for similarity search  
retriever.get_relevant_documents("Iphone details")

[Document(metadata={'page': 0, 'total_pages': 1, 'producer': 'iText 2.0.8 (by lowagie.com)', 'source': 'data/Iphoneinvoicev.pdf', 'page_label': '1', 'moddate': '2022-02-04T17:07:27+00:00', 'creator': 'PyPDF', 'creationdate': '2022-02-04T17:07:27+00:00'}, page_content='Debigarh 2nd Lane\nMadhyamgram, WEST BENGAL, 700129\nIN\nState/UT Code: 19\nShipping Address :\nSOUVIK HALDAR \nSOUVIK HALDAR\nDebigarh 2nd Lane\nMadhyamgram, WEST BENGAL, 700129\nIN\nState/UT Code: 19 \nPlace of supply: WEST BENGAL \nPlace of delivery: WEST BENGAL'),
 Document(metadata={'page': 0, 'creator': 'PyPDF', 'page_label': '1', 'source': 'data/Iphoneinvoicev.pdf', 'total_pages': 1, 'creationdate': '2022-02-04T17:07:27+00:00', 'producer': 'iText 2.0.8 (by lowagie.com)', 'moddate': '2022-02-04T17:07:27+00:00'}, page_content='Tax Invoice/Bill of Supply/Cash Memo\n(Original for Recipient)\n*ASSPL-Amazon Seller Services Pvt. Ltd., ARIPL-Amazon Retail India Pvt. Ltd. (only where Amazon Retail India Pvt. Ltd. fulfillmen

In [23]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain

# General prompt to control llm behaviour
prompt=ChatPromptTemplate.from_template(
"""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question
<context>
{context}
<context>
Questions:{input}

"""
)

In [24]:
# creating retrievel chain with llm to generate meaningful answer.
document_chain = create_stuff_documents_chain(chat_ollama, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [25]:
# Simple Example
retrieval_chain.invoke({"input":"who is rahul garg"})

{'input': 'who is rahul garg',
 'context': [Document(metadata={'source': 'data/moglix.txt'}, page_content="Building on Moglix's unique full-stack operating system for B2B commerce, we at Credlix, offer a bespoke digital supply chain finance solution that brings together anchor enterprises, suppliers, exporters, and lenders to keep domestic supply chain and export finance moving all time.\n\nOur people are led by our executive leadership team at Credlix. We are innovators in action who deliver tech-led finance to the complete B2B commerce and manufacturing supply chain.\n\nMeet the Team\nAt Credlix, our executive leadership team brings along the strong experience of having worked in top technology, finance, and manufacturing companies to transform the way buyers, sellers, and exporters access working capital through automation in finance.\n\nRahul Garg\nCEO & Founder\n\nRishabh Agarwal\nSenior Vice President\n\nSanjeev Arora\nChief Financial Officer\n\nPramit Joshi\nSenior Director\n\nY

In [26]:
# Simple Example
result = retrieval_chain.invoke({"input":"who is rahul garg"})
print(result['input'])
print(result['answer'])

who is rahul garg
Rahul Garg is the CEO & Founder of Credlix.


In [27]:
# Final Answer: Example
result = retrieval_chain.invoke({"input":"what is mogix business model"})
print(result['answer'])

Based on the provided context, it appears that Moglix (also referred to as Credlix) operates a B2B e-commerce platform for industrial supplies. Their business model can be summarized as follows:

1. **Digital Supply Chain Solutions**: Moglix provides a digital platform for manufacturers and suppliers to connect, source, and manage their procurement needs.
2. **Online Marketplace**: They have an extensive online marketplace with over 8,00,000 products from various sellers, offering nationwide delivery to more than 25,000+ PIN codes.
3. **Supply Chain Finance**: Credlix (a part of Moglix) offers tech-led global supply chain finance solutions, integrating intelligent financial technology to reimagine cash-flow lending for manufacturers across the globe.

Their business model seems to be focused on:

* Providing a digital platform for B2B commerce
* Offering a wide range of industrial supplies from various sellers
* Enabling nationwide delivery and secure payment options
* Integrating supp

In [28]:
csv_path = r'C:\Users\Pradeep\Desktop\Data\AI_Test\Agentic_RAG\Test\customer_booking.csv'

try:
    df = pd.read_csv(csv_path, encoding='utf-8')
except UnicodeDecodeError:
    df = pd.read_csv(csv_path, encoding='cp1252')

csv_agent = create_pandas_dataframe_agent(llm=chat_ollama, df=df, verbose=True, allow_dangerous_code=True)

In [29]:
csv_agent.run("How many rows and columns are in this csv")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the number of rows and columns in the dataframe.
Action: python_repl_ast
Action Input: print(df.shape)[0m[36;1m[1;3m(50000, 14)
[0m[32;1m[1;3mThought: Now that I know the shape of the dataframe, I can answer the question about the number of rows and columns.

Final Answer: The dataframe has 50,000 rows and 14 columns.[0m

[1m> Finished chain.[0m


'The dataframe has 50,000 rows and 14 columns.'