## Question and Answering

## Use Gemini LLM with a VectorDB

In [75]:
import os
import getpass

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass("Provide your Google API key here")

API_KEY = os.environ["GOOGLE_API_KEY"]

In [76]:
# pip install google-generativeai
import google.generativeai as genai
genai.configure(api_key=API_KEY)
genai_models = [(idx, model) for idx,  model in enumerate(genai.list_models())]
genai_models

[(0,
  Model(name='models/chat-bison-001',
        base_model_id='',
        version='001',
        display_name='PaLM 2 Chat (Legacy)',
        description='A legacy text-only model optimized for chat conversations',
        input_token_limit=4096,
        output_token_limit=1024,
        supported_generation_methods=['generateMessage', 'countMessageTokens'],
        temperature=0.25,
        top_p=0.95,
        top_k=40)),
 (1,
  Model(name='models/text-bison-001',
        base_model_id='',
        version='001',
        display_name='PaLM 2 (Legacy)',
        description='A legacy model that understands text and generates text as an output',
        input_token_limit=8196,
        output_token_limit=1024,
        supported_generation_methods=['generateText', 'countTextTokens', 'createTunedTextModel'],
        temperature=0.7,
        top_p=0.95,
        top_k=40)),
 (2,
  Model(name='models/embedding-gecko-001',
        base_model_id='',
        version='001',
        display_name='

In [77]:
# pip install langchain-google-genai
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma

gemini_embedding = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
persist_directory  = 'docs/chroma'
vectordb = Chroma(persist_directory=persist_directory,
                  embedding_function = gemini_embedding)
vectordb._collection.count()

209

In [78]:
question = "?"
docs = vectordb.similarity_search_with_relevance_scores(question, k=7)
print(docs)
# for doc in docs:    
#     print(doc.page_content)
#     print("")

[(Document(page_content='[End of Audio]  \nDuration: 69 minutes', metadata={'page': 21, 'source': 'pdf/MachineLearning-Lecture01.pdf'}), 0.7836388792148101), (Document(page_content='[End of Audio]  \nDuration: 69 minutes', metadata={'page': 21, 'source': 'pdf/MachineLearning-Lecture01.pdf'}), 0.7836388792148101), (Document(page_content="slowly yourself, go to the course homepage and download detailed lecture notes that \npretty much describe all the mathematical, te chnical contents I'm going to go over today.  \nToday, I'm also going to delve into a fair amount  – some amount of linear algebra, and so \nif you would like to see a refres her on linear algebra, this w eek's discussion section will \nbe taught by the TAs and will be a refresher on linear algebra. So if some of the linear \nalgebra I talk about today sort of seems to be going by pretty quickl y, or if you just want \nto see some of the things I'm claiming today with our proof, if you wa nt to just see some \nof those thin

In [160]:
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-pro", 
                             convert_system_message_to_human=True,
                             temperature=0.2)

## RetrievalQA chain

In [114]:
from langchain.chains import RetrievalQA

In [81]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever= vectordb.as_retriever(),
    chain_type="refine"    
)

In [82]:
question = "What are goal of this lecture?"
result = qa_chain({'query':question})

In [83]:
result['result']

"The new context does not provide any additional information about the goal of the lecture, so I cannot refine the original answer.\n\nThe original answer is:\n\n> The goal of the lecture is to introduce students to some mathematical and technical content, as well as some linear algebra. The lecturer also mentions that there will be a discussion section this week taught by the TAs that will serve as a refresher on linear algebra. Students who find the lecturer's discussion of linear algebra to be too quick or who want to see more detailed proofs of the claims made in the lecture are encouraged to attend the discussion section.\n\nThis answer is still accurate and comprehensive, even with the new context. The new context simply provides some additional information about the logistics of the lecture, such as the availability of online resources and the volume of the lecturer's voice. However, this information does not change the overall goal of the lecture."

## Prompt

In [161]:
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.  Use three sentences maximum. Keep the answer as concise as possiable. Alwasys say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [165]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(
    search_type="mmr",
    search_kwargs={'k': 50, 'fetch_k': 50}
    ),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

In [166]:
question = "what is the class name?"
result = qa_chain.invoke({'query':question})

In [167]:
result['result']

'The class name is CS229. Thanks for asking!'

In [168]:
result['source_documents'][19].page_content

"of this class will not be very program ming intensive, although we will do some \nprogramming, mostly in either MATLAB or Octa ve. I'll say a bit more about that later.  \nI also assume familiarity with basic proba bility and statistics. So most undergraduate \nstatistics class, like Stat 116 taught here at Stanford, will be more than enough. I'm gonna \nassume all of you know what ra ndom variables are, that all of you know what expectation \nis, what a variance or a random variable is. And in case of some of you, it's been a while \nsince you've seen some of this material. At some of the discussion sections, we'll actually \ngo over some of the prerequisites, sort of as  a refresher course under prerequisite class. \nI'll say a bit more about that later as well.  \nLastly, I also assume familiarity with basi c linear algebra. And again, most undergraduate \nlinear algebra courses are more than enough. So if you've taken courses like Math 51, \n103, Math 113 or CS205 at Stanford, tha