In [1]:
# import langchain dir loader from document loaders
from langchain.document_loaders import DirectoryLoader

directory = './contents'

def load_docs(directory):
    loader = DirectoryLoader(directory)
    documents = loader.load()
    return documents

documents = load_docs(directory)
len(documents)

2

In [9]:
# use text splitter to split text in chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split the docs into chunks using recursive character splitter
def split_docs(documents,chunk_size=100,chunk_overlap=2):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs

# store the splitte documnets in docs variable
docs = split_docs(documents)

In [10]:
# embeddings using langchain
from langchain.embeddings import SentenceTransformerEmbeddings
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Assuming docs is a list of your documents and embeddings is a list of their corresponding embeddings
text_embeddings = zip(docs, embeddings)

from langchain.vectorstores import FAISS
# db = Chroma.from_documents(docs, embeddings)

db = FAISS.from_documents(docs, embeddings)

In [6]:
# # Doing similarity search  using query
# # query = "How much Interim dividend was given by ITC?"

# embedding_vector = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2").embed_query("How much Interim dividend was given by ITC?")
# matching_docs = db.similarity_search_by_vector(embedding_vector)

# matching_docs

In [11]:
# insert an openai key below parameter
import os
os.environ["OPENAI_API_KEY"] = "xxxxxxxxx"

# load the LLM model
from langchain.chat_models import ChatOpenAI
model_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name=model_name)

# Using q&a chain to get the answer for our query
from langchain.chains.question_answering import load_qa_chain
chain = load_qa_chain(llm, chain_type="stuff",verbose=False)


In [12]:
# write your query and perform similarity search to generate an answer
query = "How much Interim dividend was given by ITC?"

embedding_vector = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2").embed_query(query)
matching_docs = db.similarity_search_by_vector(embedding_vector)

answer =  chain.run(input_documents=matching_docs, question=query,)
answer

'The interim dividend given by ITC was Rs. 103,520.'