# Import Libraries

In [None]:

import openai
import langchain
import pinecone
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
import os

## Let Read the document

In [None]:
def read_doc(directory):
    file_loader = PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents

In [None]:
doc = read_doc('Path Documents')
doc

# Divide the docs into chunks

In [None]:
def chunk_data(docs, chunk_size = 800, chunk_overlap = 50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = chunk_size, chunk_overlap = chunk_overlap)
    doc = text_splitter.split_documents(docs)
    return docs

In [None]:
documents = chunk_data(docs = doc)
len(documents)

# Embedding Technique of OPENAI

In [None]:
embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
embeddings

In [None]:
vectors = embeddings.embed_query("How are you")
len(vectors)

# Vector Search DB In Pinecone

In [None]:
pinecone.init(
    api_key="",
    environment=""
)
index_name=""

In [None]:
index=Pinecone.from_documents(doc,embeddings,index_name=index_name)

# Cosine Similarity Retrieve Results

In [None]:
def retrieve_query(query, k=2):
    matching_results = index.similarity_search(query, k=k)
    return matching_results

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain import OpenAI

In [None]:
llm = OpenAI(model="text-devinci-003", temperature=0.5)
chain = load_qa_chain(llm, chain_type="stuff")

## Search Answer From VectorDB

In [None]:
def retrieve_answers(query):
    doc_search = retrieve_query(query)
    print(doc_search)
    response = chain.run(input_documents=doc_search, question = query)
    return response

In [None]:
our_query = " You can write any questions/query"
answer = retrieve_answers(our_query)
print(answer)