In [41]:
import openai
import langchain
import pinecone

from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI

In [42]:
from dotenv import load_dotenv
load_dotenv()
import os

### Read documents from a file

In [43]:
def load_documents(directory):
    loader = PyPDFDirectoryLoader(directory)
    documents = loader.load()
    return documents

In [44]:
doc = load_documents("documents/")
len(doc)

221

### Divide the documents into Chunks

In [45]:
def chunl_data(docs,size,overlap):
    splitter = RecursiveCharacterTextSplitter(chunk_size=size, chunk_overlap=overlap)
    chunks = splitter.split_documents(docs)
    return chunks

In [46]:
chunks = chunl_data(doc,1000,100)
len(chunks)

436

### Embedding technique of OpenAI

In [47]:
embeddings = OpenAIEmbeddings(api_key=os.environ["OPENAI_API_KEY"])
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x11544b710>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x11525db10>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='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', openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None)

In [48]:
vectors = embeddings.embed_query("how are you")
len(vectors)


1536

In [49]:
### vector search using pinecone
pinecone.init(api_key="aa92a9dd-0bc7-4ee8-8143-44d08420552b",environment="gcp-starter")
index_name = "langchainvector"

In [50]:
index = Pinecone.from_documents(doc,embeddings,index_name=index_name)

In [51]:
### cosine similarity Retrieval using pinecone from db

def retrieve_query(query, k=2):
    matchings = index.similarity_search(query, k=k)
    return matchings


In [52]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI


In [57]:
llm = OpenAI(model_name="text-davinci-003",temperature=0.7)
chain = load_qa_chain(llm,chain_type="stuff")

In [60]:
### Search using pinecone

def retreive_answer(query):
    matchings = retrieve_query(query)
    print(matchings)
    answer = chain.run(input_documents=matchings,question=query)
    return answer

In [66]:
query = "give me a simple use cases of useReducer react hook"
answer = retreive_answer(query)
answer

[Document(page_content="ReactJS        \n   163 \n \n(state) => { prop1: this.state.anyvalue  } \n\uf0b7 mapDispatchToProps : Accepts a function with below signature.  \nObject | (dispatch , ownProps ?) => Object \nHere, dispatch  refers the dispatch object used to dispatch action in the redux store and \nObject  refers one or more dispatch functions as props of the component.  \n(dispatch) => { \n    addDispatcher : (dispatch) => dispatch ({ type: 'ADD_ITEM' , payload: { } }), \n    removeispatcher : (dispatch) => dispatch ({ type: 'REMOVE_ITEM' , payload: { } \n}), \n} \nProvider component  \nReact Redux provides a Provider component and its sole purpose to make the Redux store \navailable to its all nested components connected to store using connect API. The sample \ncode is  given below:  \nimport React from 'react' \nimport ReactDOM from 'react-dom' \nimport { Provider } from 'react-redux' \n \nimport { App } from './App' \nimport createStore from './createReduxStore'  \n \nconst 

" I don't know."