In [1]:
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser

from langchain_core.prompts import ChatPromptTemplate
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.history_aware_retriever import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage




In [None]:
llm=ChatGroq(api_key="Your own api key", model="openai/gpt-oss-20b", temperature=0)
embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [3]:
parser=StrOutputParser()

In [4]:
doc=PyPDFLoader("ML Guide.pdf")
page=doc.load()

In [5]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts=text_splitter.split_documents(page)

In [6]:

vectorstore=FAISS.from_documents(texts, embedding)

In [7]:
retriever=vectorstore.as_retriever(kwargs={"search_kwargs":{"k":3}})

In [8]:
system_prompt = (
    "You are a helpful and precise AI assistant. "
    "Answer the user's question strictly based on the provided context. "
    "If the context does not contain enough information to answer, respond with: 'I don't know.' "
    "Do not use any external or prior knowledge. "
    "If the user makes grammar mistakes, fix them in your answer. "
    "Keep your answer clear, concise, and directly relevant to the context below.\n\n"
    "Context:\n{context}"
    )

contextualize_q_system_prompt = (
    "You are an AI assistant that helps provide context for user questions based on previous interactions. "
    "Given the conversation history and a new user question, rephrase or expand the question to include relevant context from the history. "
    "If the history does not provide additional context, return the original question.\n\n"
    )

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}")],)

question_contextualizer_prompt = ChatPromptTemplate.from_messages([
    ("system", contextualize_q_system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}")],)

In [9]:
history_aware_retriever=create_history_aware_retriever(llm,retriever,question_contextualizer_prompt)

In [10]:
qa_chain=create_stuff_documents_chain(llm,qa_prompt)

In [11]:
chain= create_retrieval_chain(history_aware_retriever, qa_chain)

In [17]:
chat_history=[]


In [18]:
def trim_message(chat_history, max_len=20):
    """Keep only the most recent N messages in chat history."""
    if len(chat_history)>max_len:
        return chat_history[-max_len:]
    return chat_history

In [19]:
def ask_question(question):
    global chat_history
    chat_history=trim_message(chat_history)
    response=chain.invoke({"input":question,"chat_history":chat_history})
    print(f"Question={question}")
    print(f"Answer={response['answer']}\n")
    chat_history.extend([
        HumanMessage(content=question),
        AIMessage(content=response['answer'])
    ])


In [20]:
ask_question("what is supervised learning?")

Question=what is supervised learning?
Answer=Supervised learning is a type of machine learning where the model learns from labelled data. Each training example consists of input features paired with an expected output (label), and the algorithm learns to map inputs to outputs—examples include classification (e.g., spam detection) and regression (e.g., house‑price prediction).



In [21]:
ask_question("can you give me an code example?")

Question=can you give me an code example?
Answer=Sure! Here’s a simple Python example that trains a decision‑tree classifier on the Iris dataset:

```python
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Load data
iris = load_iris()
X, y = iris.data, iris.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Evaluate
print("Test accuracy:", clf.score(X_test, y_test))
```

This code demonstrates a typical supervised‑learning workflow: loading labelled data, splitting it, training a model, and evaluating its performance.



In [22]:
ask_question("Who is Virat Kohli?")

Question=Who is Virat Kohli?
Answer=I don't know.



In [23]:
chat_history

[HumanMessage(content='what is supervised learning?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Supervised learning is a type of machine learning where the model learns from labelled data. Each training example consists of input features paired with an expected output (label), and the algorithm learns to map inputs to outputs—examples include classification (e.g., spam detection) and regression (e.g., house‑price prediction).', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='can you give me an code example?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Sure! Here’s a simple Python example that trains a decision‑tree classifier on the Iris dataset:\n\n```python\nfrom sklearn.datasets import load_iris\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.tree import DecisionTreeClassifier\n\n# Load data\niris = load_iris()\nX, y = iris.data, iris.target\n\n# Split into training and test sets\nX_train, X_test, y

In [24]:
for msg in chat_history:
    print(f"{str.upper(msg.type)}: {msg.content}\n")

HUMAN: what is supervised learning?

AI: Supervised learning is a type of machine learning where the model learns from labelled data. Each training example consists of input features paired with an expected output (label), and the algorithm learns to map inputs to outputs—examples include classification (e.g., spam detection) and regression (e.g., house‑price prediction).

HUMAN: can you give me an code example?

AI: Sure! Here’s a simple Python example that trains a decision‑tree classifier on the Iris dataset:

```python
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Load data
iris = load_iris()
X, y = iris.data, iris.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Evaluate
print("Test accuracy:", clf.score(X_test, y_test))
```