In [1]:
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader, DirectoryLoader, TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama

In [2]:
drive_folder = "./data/Deal Details"

In [3]:
loader = DirectoryLoader(drive_folder, glob='**/*.json', show_progress=True, loader_cls=TextLoader)
json_docs = loader.load()
print(json_docs)

100%|██████████| 5/5 [00:00<00:00, 5005.14it/s]

[Document(page_content='{\n"DealId": "DS-001",\n"Product": "High-Performance Servers",\n"Pricing": "$50,000",\n"Discounts": "10%",\n"Total Sale": "$45,000"\n}', metadata={'source': 'data\\Deal Details\\Ds-001.json'}), Document(page_content='{\n"DealId": "DS-002",\n"Product": "Customized Storage Solutions",\n"Pricing": "$75,000",\n"Discounts": "15%",\n"Total Sale": "$63,750"\n}', metadata={'source': 'data\\Deal Details\\Ds-002.json'}), Document(page_content='{\n"DealId": "DS-003",\n"Product": "High-Performance Computing Infrastructure",\n"Pricing": "$100,000",\n"Discounts": "12%",\n"Total Sale": "$88,000"\n}', metadata={'source': 'data\\Deal Details\\Ds-003.json'}), Document(page_content='{\n"DealId": "DS-004",\n"Product": "Customized Solution for Data Analytics",\n"Pricing": "$125,000",\n"Discounts": "18%",\n"Total Sale": "$103,750"\n}', metadata={'source': 'data\\Deal Details\\Ds-004.json'}), Document(page_content='{\n"DealId": "DS-005",\n"Product": "High-Performance Computing Hardwar




In [5]:
loader = PyPDFDirectoryLoader("./data/Case Studies")
cs_docs = loader.load()
print(cs_docs[0:5])

[Document(page_content='', metadata={'source': 'data\\Case Studies\\DS-001 Case Study_ NovaTech Corporation.pdf', 'page': 0}), Document(page_content='', metadata={'source': 'data\\Case Studies\\DS-002 Case Study_EcoCycle Recycling.pdf', 'page': 0}), Document(page_content='', metadata={'source': 'data\\Case Studies\\DS-003 Case Study_Proxima Solutions.pdf', 'page': 0}), Document(page_content='', metadata={'source': 'data\\Case Studies\\DS-004 Case Study_BioSpark Biotechnology.pdf', 'page': 0}), Document(page_content='', metadata={'source': 'data\\Case Studies\\DS-005 Case Study_SmartHome Realty.pdf', 'page': 0})]


In [5]:
loader = PyPDFDirectoryLoader("./data/Sales Pitch")
pitch_docs = loader.load()
print(pitch_docs)

[Document(page_content='', metadata={'source': 'data\\Sales Pitch\\DS-001 Sales Pitch for NovaTech Corporation.pdf', 'page': 0}), Document(page_content='', metadata={'source': 'data\\Sales Pitch\\DS-002 Sales Pitch for EcoCycle Recycling.pdf', 'page': 0}), Document(page_content='', metadata={'source': 'data\\Sales Pitch\\DS-003 Sales Pitch for Proxima Solutions.pdf', 'page': 0}), Document(page_content='', metadata={'source': 'data\\Sales Pitch\\DS-004 Sales Pitch for BioSpark Biotechnology.pdf', 'page': 0}), Document(page_content='', metadata={'source': 'data\\Sales Pitch\\DS-005 Sales Pitch for SmartHome Realty.pdf', 'page': 0})]


In [6]:
all_docs = json_docs + cs_docs + pitch_docs
print(all_docs)

[Document(page_content='{\n"DealId": "DS-001",\n"Product": "High-Performance Servers",\n"Pricing": "$50,000",\n"Discounts": "10%",\n"Total Sale": "$45,000"\n}', metadata={'source': 'data\\Deal Details\\Ds-001.json'}), Document(page_content='{\n"DealId": "DS-002",\n"Product": "Customized Storage Solutions",\n"Pricing": "$75,000",\n"Discounts": "15%",\n"Total Sale": "$63,750"\n}', metadata={'source': 'data\\Deal Details\\Ds-002.json'}), Document(page_content='{\n"DealId": "DS-003",\n"Product": "High-Performance Computing Infrastructure",\n"Pricing": "$100,000",\n"Discounts": "12%",\n"Total Sale": "$88,000"\n}', metadata={'source': 'data\\Deal Details\\Ds-003.json'}), Document(page_content='{\n"DealId": "DS-004",\n"Product": "Customized Solution for Data Analytics",\n"Pricing": "$125,000",\n"Discounts": "18%",\n"Total Sale": "$103,750"\n}', metadata={'source': 'data\\Deal Details\\Ds-004.json'}), Document(page_content='{\n"DealId": "DS-005",\n"Product": "High-Performance Computing Hardwar

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_documents = text_splitter.split_documents(all_docs)
final_documents[0:5]

[Document(page_content='{\n"DealId": "DS-001",\n"Product": "High-Performance Servers",\n"Pricing": "$50,000",\n"Discounts": "10%",\n"Total Sale": "$45,000"\n}', metadata={'source': 'data\\Deal Details\\Ds-001.json'}),
 Document(page_content='{\n"DealId": "DS-002",\n"Product": "Customized Storage Solutions",\n"Pricing": "$75,000",\n"Discounts": "15%",\n"Total Sale": "$63,750"\n}', metadata={'source': 'data\\Deal Details\\Ds-002.json'}),
 Document(page_content='{\n"DealId": "DS-003",\n"Product": "High-Performance Computing Infrastructure",\n"Pricing": "$100,000",\n"Discounts": "12%",\n"Total Sale": "$88,000"\n}', metadata={'source': 'data\\Deal Details\\Ds-003.json'}),
 Document(page_content='{\n"DealId": "DS-004",\n"Product": "Customized Solution for Data Analytics",\n"Pricing": "$125,000",\n"Discounts": "18%",\n"Total Sale": "$103,750"\n}', metadata={'source': 'data\\Deal Details\\Ds-004.json'}),
 Document(page_content='{\n"DealId": "DS-005",\n"Product": "High-Performance Computing Har

In [19]:
vectordb=FAISS.from_documents(final_documents, OllamaEmbeddings())

In [21]:
llm=Ollama(model="llama2")
llm

Ollama()

In [22]:
prompt=ChatPromptTemplate.from_template("""
You are a very Successful Sales executive. Your job is to provide an exceptional sales pitches based on user query. If you are unable to get the context, do not make up an answer, reply with the following statement "Unable to generate pitch"
<context>
{context}
</context>
Question: {input}""")

In [23]:
document_chain = create_stuff_documents_chain(llm,prompt)

In [24]:
retriever=vectordb.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000191758754E0>)

In [28]:
retrieval_chain = create_retrieval_chain(retriever,document_chain)
response = retrieval_chain.invoke({"input":"I want to expand our sales for servers and SmartHome Realty wants to upgrade its existing systems. Help me with a pitch"})
response['answer']

'Absolutely! As a successful sales executive, I would be happy to help you craft an exceptional sales pitch for your servers and customized solutions. Based on the context provided, here\'s a pitch that could potentially resonate with SmartHome Realty:\n\n"Thank you for considering our high-performance servers and customized solutions. We understand that SmartHome Realty is looking to upgrade its existing systems, and we believe that our products can help streamline your operations and increase efficiency.\n\nOur servers are designed to handle the most demanding workloads with ease, providing the fastest processing times and largest memory capacities in the market. With a price of $50,000, you\'ll be getting an exceptional value for your money. Plus, with a 10% discount, you can save even more on your purchase.\n\nBut that\'s not all. Our customized solutions are tailored to meet the unique needs of your business. Whether it\'s data analytics, cloud computing, or something else entirel