In [1]:
# TODO
import os 
import openai 
from dotenv import load_dotenv 

load_dotenv() 

openai.api_key = os.getenv("OPENAI_API_KEY") 

In [6]:
llm_name = "gpt-3.5-turbo"
index_name = "deeplearningai-langchain"

In [7]:
from langchain_pinecone import PineconeVectorStore 
from langchain_openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings()
vectorstore = PineconeVectorStore(embedding=embedding, index_name=index_name) 

In [9]:
question = "What are major topics for this class?" 
vectorstore.similarity_search(question, k=3)

[Document(metadata={'page': 5.0, 'source': 'data/MachineLearning-Lecture01.pdf'}, page_content="So all right, online resources. The class has a home page, so it's in on the handouts. I \nwon't write on the chalkboard — http:// cs229.stanford.edu. And so when there are \nhomework assignments or things like that, we  usually won't sort of — in the mission of \nsaving trees, we will usually not give out many handouts in class. So homework \nassignments, homework solutions will be posted online at the course home page.  \nAs far as this class, I've also written, a nd I guess I've also revised every year a set of \nfairly detailed lecture notes that cover the te chnical content of this  class. And so if you \nvisit the course homepage, you'll also find the detailed lecture notes that go over in detail \nall the math and equations and so on  that I'll be doing in class.  \nThere's also a newsgroup, su.class.cs229, also written on the handout. This is a \nnewsgroup that's sort of a forum for 

In [10]:
from langchain_openai import ChatOpenAI 
llm = ChatOpenAI(model_name=llm_name, temperature=0) 

In [11]:
from langchain.chains import RetrievalQA 
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever()
)

result = qa_chain({"query": question})
result["result"]

'The major topics for this class are not explicitly mentioned in the provided context.'

In [12]:
from langchain.prompts import PromptTemplate 

# Build prompt
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [14]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [15]:
question = "Is probability a class topic?"
result = qa_chain({"query": question}) 
result['result']

'Yes, probability is a class topic assumed to be familiar to students in this class. Thanks for asking!'

In [16]:
result['source_documents']

[Document(metadata={'page': 4.0, 'source': 'data/MachineLearning-Lecture01.pdf'}, page_content="of this class will not be very program ming intensive, although we will do some \nprogramming, mostly in either MATLAB or Octa ve. I'll say a bit more about that later.  \nI also assume familiarity with basic proba bility and statistics. So most undergraduate \nstatistics class, like Stat 116 taught here at Stanford, will be more than enough. I'm gonna \nassume all of you know what ra ndom variables are, that all of you know what expectation \nis, what a variance or a random variable is. And in case of some of you, it's been a while \nsince you've seen some of this material. At some of the discussion sections, we'll actually \ngo over some of the prerequisites, sort of as  a refresher course under prerequisite class. \nI'll say a bit more about that later as well.  \nLastly, I also assume familiarity with basi c linear algebra. And again, most undergraduate \nlinear algebra courses are more 

In [17]:
# RetrievalQA Chain Types 
qa_chain_mr = RetrievalQA.from_chain_type(
    llm, 
    retriever=vectorstore.as_retriever(), 
    chain_type="map_reduce"
)

result = qa_chain_mr({"query": question}) 
result['result']

'Yes, probability is a class topic in the document. The instructor mentions assuming familiarity with basic probability and statistics, stating that most undergraduate statistics classes, like Stat 116 taught at Stanford, will be more than enough.'

In [19]:
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type="refine"
)
result = qa_chain_mr({"query": question})
result["result"]

'The additional context provided does not directly impact the original answer, as it remains relevant and accurate. Probability is indeed a topic that will be covered in the class, assuming familiarity with basic probability and statistics. The instructor also mentions that some review of prerequisites will be provided during discussion sections for those who may need a refresher.'

In [20]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever()
)

In [21]:
question = "Is probability a class topic?"
result = qa_chain({"query": question})
result["result"]

'Yes, probability is a class topic in the course being discussed. The instructor assumes familiarity with basic probability and statistics, so it will likely be covered in the class.'

In [22]:
question = "why are those prerequesites needed?"
result = qa_chain({"query": question})
result["result"]

"I don't have enough information to answer your question about prerequisites."