# Install and import packages

In [None]:
!pip install langchain openai langchain-openai langchain-community unstructured

In [None]:
import os
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import CSVLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.indexes import VectorstoreIndexCreator

# Enter your OpenAI API Key below

In [None]:

os.environ["OPENAI_API_KEY"] = ""

# Prompt Template for efficient outputs

In [None]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer or the question is not related to the provided context, just say that you don't know, don't try to make up an answer.
Use 10 sentences maximum, add bullet points where applicable and keep the answer as reasonable as possible.
Get idea from below Examples:
Examples:
"question": What does the study "Disrupting Industries With Blockchain: The Industry, Venture Capital Funding, and Regional Distribution of Blockchain Ventures" investigate and what are its key findings?",
"answer": "The study investigates the emerging landscape of blockchain business applications by analyzing their presence across industries, venture capital funding, and regional distribution. It uses data from four venture databases to explore the diffusion of blockchain technology. Key findings include:
1- Blockchain startups are present across all industry segments, with the most significant representation in the Finance & Insurance and Information & Communication industries.
2- These industries are also the primary recipients of venture capital funding, though blockchain startups exist in various sectors.
3- The regional distribution analysis identifies the US and UK as leading geographical clusters for blockchain ventures."
"question": "How to play snooker?"
"answer": "I don't know the answer. The question is not relevant to the provided context."
"question": "Who is Nicolas Poran?"
"answer": "I don't know the answer. The question is not relevant to the provided context."

You will know answer the questions from the provided context. If the questions is not relevant, just say you don't know the answer.
{context}
Question: {question}
Helpful Answer:"""

# CSV Files path and loader

In [None]:
csv_file_path = 'Enter Your csv folder path here'

In [None]:
loader = DirectoryLoader(csv_file_path, glob="**/*.csv", loader_cls=CSVLoader, loader_kwargs={'encoding': 'utf-8'})

# Index Creation and Retriever

In [None]:
embedding_model = OpenAIEmbeddings()
index_creator = VectorstoreIndexCreator(embedding=embedding_model)
docsearch = index_creator.from_loaders([loader])
retriever = docsearch.vectorstore.as_retriever(search_type="mmr", search_kwargs={'k': 1})

# Model Integration and Chain Type Settings

In [None]:
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT}
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs)

# Queries Execution

In [None]:
question = "How to play snooker?"
result = qa_chain({"query": question})
result["result"]

In [None]:
user_question = "Is Ernst & Young Global Limited a service provider to clients?"
result = qa_chain({"query": user_question})
print("Question:", user_question, "\nAnswer: ", result["result"], "\nSource:\n", [document.metadata for document in result["source_documents"]])