In [3]:
import os
from dotenv import load_dotenv
_ = load_dotenv()  # Loads .env file
_ = os.environ['GOOGLE_API_KEY']  # Make sure your key is set here

'AIzaSyBis_mK0T90hXjYPRKssuQfehUkdpa29tY'

In [4]:
# !uv add langchain-google-genai

In [5]:
import datetime
from langchain_google_genai import ChatGoogleGenerativeAI

In [6]:
current_date = datetime.datetime.now().date()
llm_name = "gemini-2.0-flash"  # Or your preferred model
llm = ChatGoogleGenerativeAI(
    model=llm_name,
    temperature=0,
)

In [7]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [8]:
loader = PyPDFLoader("ML_notes.pdf")
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = splitter.split_documents(documents)

In [9]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import DocArrayInMemorySearch

embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
db = DocArrayInMemorySearch.from_documents(docs, embedding)
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})



In [41]:
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say you don't know. Be concise. Always say "thanks for asking!" at the end.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [42]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [43]:
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=db.as_retriever(),
    chain_type="map_reduce",
    return_source_documents=True,
)
question = "what is machine learning?"
result = qa_chain_mr({"query": question})
result["result"]

'Machine learning (ML) focuses on making decisions or predictions based on data. The goal of a ML system is typically to estimate or generalize, based on data provided. Almost any application that involves understanding data or signals that come from the real world can be nicely addressed using machine learning.'

In [46]:
qa_chain_rf = RetrievalQA.from_chain_type(
    llm,
    retriever=db.as_retriever(),
    chain_type="refine",
    return_source_documents=True,
)
question = "what is machine learning?"
result = qa_chain_rf({"query": question})
result["result"]

'Okay, here\'s a refined answer to "What is machine learning?" incorporating the provided context from the MIT 6.390 course material:\n\nMachine learning (ML) is a field focused on building systems that make decisions or predictions based on data, with the goal of estimating or generalizing from that data. A core principle in many ML systems is to select actions that minimize the expected loss (or risk, though that term can be ambiguous). This approach, rooted in a theory of rational agency, aims to optimize performance, such as maximizing long-term gains in a gambling setting. While model-making is central to ML, the focus is often on achieving accurate predictions and effective decision-making, rather than solely on discovering underlying causal processes. ML systems can employ various types of models, including non-parametric models that generate predictions directly from the data in response to queries. The strength of ML lies in its ability to provide fast, robust, and efficient s

In [47]:
qa_chain_r = RetrievalQA.from_chain_type(
    llm,
    retriever=db.as_retriever(),
    chain_type="map_rerank",
    return_source_documents=True,
)
question = "what is machine learning?"
result = qa_chain_r({"query": question})
result["result"]



'The main focus of machine learning (ML) is making decisions or predictions based on data.'

In [48]:
response = qa_chain({"query": question})
print(response["result"])

Machine learning is making decisions or predictions based on data. Thanks for asking!


In [49]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

In [50]:
qa({"question": "what is machine learning?"})

{'question': 'what is machine learning?',
 'chat_history': [HumanMessage(content='what is machine learning?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Machine learning (ML) is focused on making decisions or predictions based on data. It has become a way to approach many applications, like face detection, speech recognition, and language-processing tasks, in terms of speed, human engineering time, and robustness.', additional_kwargs={}, response_metadata={})],
 'answer': 'Machine learning (ML) is focused on making decisions or predictions based on data. It has become a way to approach many applications, like face detection, speech recognition, and language-processing tasks, in terms of speed, human engineering time, and robustness.'}

In [51]:
qa({"question": "what is the lecture name where it is defined?"})

{'question': 'what is the lecture name where it is defined?',
 'chat_history': [HumanMessage(content='what is machine learning?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Machine learning (ML) is focused on making decisions or predictions based on data. It has become a way to approach many applications, like face detection, speech recognition, and language-processing tasks, in terms of speed, human engineering time, and robustness.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='what is the lecture name where it is defined?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Based on the provided text, machine learning is defined in Chapter 1, titled "Introduction".', additional_kwargs={}, response_metadata={})],
 'answer': 'Based on the provided text, machine learning is defined in Chapter 1, titled "Introduction".'}

In [52]:

qa({"question": "What are the topics covered in this class?"})

{'question': 'What are the topics covered in this class?',
 'chat_history': [HumanMessage(content='what is machine learning?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Machine learning (ML) is focused on making decisions or predictions based on data. It has become a way to approach many applications, like face detection, speech recognition, and language-processing tasks, in terms of speed, human engineering time, and robustness.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='what is the lecture name where it is defined?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Based on the provided text, machine learning is defined in Chapter 1, titled "Introduction".', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What are the topics covered in this class?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The class covers the following topics: classification, regression, reinforcement l