In [2]:
from dotenv import load_dotenv

In [3]:
import openai

In [4]:
load_dotenv('/home/tom/two/envapi/my-env')

True

In [5]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings


In [6]:
persist_directory = '/home/tom/two/vstore/chroma/'

In [7]:
embedding = OpenAIEmbeddings()

In [8]:
vectordb = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding
)

In [9]:
print(vectordb._collection.count())

209


In [10]:
question = 'what are the major topics for this class?'

In [11]:
docs = vectordb.similarity_search(question, k=3)

In [12]:
docs

[Document(page_content="statistics for a while or maybe algebra, we'll go over those in the discussion sections as a \nrefresher for those of you that want one.  \nLater in this quarter, we'll also use the disc ussion sections to go over extensions for the \nmaterial that I'm teaching in the main lectur es. So machine learning is a huge field, and \nthere are a few extensions that we really want  to teach but didn't have time in the main \nlectures for.", metadata={'page': 8, 'source': 'https://see.stanford.edu/materials/aimlcs229/transcripts/machinelearning-lecture01.pdf'}),
 Document(page_content="statistics for a while or maybe algebra, we'll go over those in the discussion sections as a \nrefresher for those of you that want one.  \nLater in this quarter, we'll also use the disc ussion sections to go over extensions for the \nmaterial that I'm teaching in the main lectur es. So machine learning is a huge field, and \nthere are a few extensions that we really want  to teach but didn

In [13]:
from langchain.chat_models import ChatOpenAI

In [14]:
llm = ChatOpenAI(
    model_name ='gpt-3.5-turbo',
    temperature=0
)

In [15]:
from langchain.chains import RetrievalQA

In [20]:
qa_chain = RetrievalQA.from_chain_type(
    llm, 
    retriever = vectordb.as_retriever(), 
    verbose = True
)

In [18]:
result = qa_chain({'query':question})



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [19]:
result

{'query': 'what are the major topics for this class?',
 'result': 'The major topics for this class are machine learning and its various subfields.'}

In [21]:
result['result']

'The major topics for this class are machine learning and its various subfields.'

In [22]:
from langchain.prompts import PromptTemplate

In [24]:
# Build prompt
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum. Keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 

{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [25]:
qa_chain = RetrievalQA.from_chain_type(
    llm, 
    retriever = vectordb.as_retriever(),
    return_source_documents= True,
    chain_type_kwargs={'prompt':QA_CHAIN_PROMPT}
)

In [26]:
result = qa_chain({'query':question})

In [27]:
result

{'query': 'what are the major topics for this class?',
 'result': 'The major topics for this class are machine learning, statistics, and algebra. Thanks for asking!',
 'source_documents': [Document(page_content="statistics for a while or maybe algebra, we'll go over those in the discussion sections as a \nrefresher for those of you that want one.  \nLater in this quarter, we'll also use the disc ussion sections to go over extensions for the \nmaterial that I'm teaching in the main lectur es. So machine learning is a huge field, and \nthere are a few extensions that we really want  to teach but didn't have time in the main \nlectures for.", metadata={'page': 8, 'source': 'https://see.stanford.edu/materials/aimlcs229/transcripts/machinelearning-lecture01.pdf'}),
  Document(page_content="statistics for a while or maybe algebra, we'll go over those in the discussion sections as a \nrefresher for those of you that want one.  \nLater in this quarter, we'll also use the disc ussion sections t

In [28]:
question = "is probability a pre-requisite for this course?"

In [29]:
result = qa_chain({'query':question})

In [31]:
result['result']

'Yes, probability is a pre-requisite for this course. Thanks for asking!'

In [34]:
for i,d in enumerate(result['source_documents']):
    print(f'Document {i}-----------------------------------------------------------------------------------')
    print(d.page_content)

Document 0-----------------------------------------------------------------------------------
of this class will not be very program ming intensive, although we will do some 
programming, mostly in either MATLAB or Octa ve. I'll say a bit more about that later.  
I also assume familiarity with basic proba bility and statistics. So most undergraduate 
statistics class, like Stat 116 taught here at Stanford, will be more than enough. I'm gonna 
assume all of you know what ra ndom variables are, that all of you know what expectation 
is, what a variance or a random variable is. And in case of some of you, it's been a while 
since you've seen some of this material. At some of the discussion sections, we'll actually 
go over some of the prerequisites, sort of as  a refresher course under prerequisite class. 
I'll say a bit more about that later as well.  
Lastly, I also assume familiarity with basi c linear algebra. And again, most undergraduate 
linear algebra courses are more than enough.

In [35]:
!pwd

/media/tom/StorageONE/my-github
