In [None]:
#Import libraries:
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
import os

In [None]:
# Load env variables for Open AI and PineCone
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [None]:
OPENAI_API_KEY=os.getenv('OPENAI_API_KEY')

In [None]:
# Import and instantiate OpenAI embeddings

from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
# Import and instantiate PineCone
import pinecone
from langchain.vectorstores import Pinecone

pinecone.init(
    api_key=os.getenv('PINECONE_API_KEY'),
    environment=os.getenv('PINECONE_ENV')
)

In [None]:
FILE_NAME="ML.pdf"

In [None]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader(FILE_NAME)
data = loader.load()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 100,
)
texts = text_splitter.split_documents(data)

In [None]:
index_name = "starter-index"
search = Pinecone.from_documents(texts, embeddings, index_name=index_name)

In [None]:
PROMPT="Explian novelty detection in simple terms?"

In [None]:
# Perform similarity search to get 4 relevant documents
docs = search.similarity_search(PROMPT, k=4)
print(docs)

In [None]:
# Load the question answering chain
llm = ChatOpenAI(temperature=0.0, model='gpt-3.5-turbo-16k')
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
# Query the documents and get the answer
answer = chain.run(input_documents=docs, question=PROMPT)
print(answer)