In [1]:
# Step 1: Installing Required Libraries
# Downgrading to a stable version (0.1.20). It fixes the issue where classes like RetrievalQA were moved or deprecated in newer versions (0.2.x).

!pip uninstall langchain langchain-groq langchain-community -y

!pip install langchain==0.1.20 \
             langchain-groq==0.1.4 \
             chromadb==1.3.5 \
             pypdf==4.2.0 \
             sentence-transformers==2.7.0 \
             langchain-community==0.0.38 \
             langchain-core==0.1.52

Found existing installation: langchain 0.1.20
Uninstalling langchain-0.1.20:
  Successfully uninstalled langchain-0.1.20
Found existing installation: langchain-groq 0.1.4
Uninstalling langchain-groq-0.1.4:
  Successfully uninstalled langchain-groq-0.1.4
Found existing installation: langchain-community 0.0.38
Uninstalling langchain-community-0.0.38:
  Successfully uninstalled langchain-community-0.0.38
Collecting langchain==0.1.20
  Using cached langchain-0.1.20-py3-none-any.whl.metadata (13 kB)
Collecting langchain-groq==0.1.4
  Using cached langchain_groq-0.1.4-py3-none-any.whl.metadata (2.8 kB)
Collecting langchain-community==0.0.38
  Using cached langchain_community-0.0.38-py3-none-any.whl.metadata (8.7 kB)
Using cached langchain-0.1.20-py3-none-any.whl (1.0 MB)
Using cached langchain_groq-0.1.4-py3-none-any.whl (11 kB)
Using cached langchain_community-0.0.38-py3-none-any.whl (2.0 MB)
Installing collected packages: langchain-groq, langchain-community, langchain
Successfully installe

In [21]:
from google.colab import userdata
userdata.get('APIKeytest')

'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'

In [27]:
import os

from google.colab import userdata
userdata.get('APIKeytest')
from google.colab import userdata
userdata.get('APIKeytest')

os.environ["HUGGINGFACEHUB_API_TONKEN"] = userdata.get('APIKeytest')
os.environ["GROQ_API_KEY"] = userdata.get('APIKeytest')
print("Hugging Face Token Set!")
print("Groq API Key Set!")

Hugging Face Token Set!
Groq API Key Set!


In [28]:
# Step 3: Coding Basic Functionalities

# ===============================
# AI Study Assistant with Groq
# ===============================

from langchain_groq import ChatGroq
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
import os

# 1. Load File (PDF)
def load_pdf(pdf_path):
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()
    return docs

# 2. Split Text
def split_docs(docs):
    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
    return splitter.split_documents(docs)

# 3. Create Vector Database
def create_db(chunks):
    emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = Chroma.from_documents(chunks, emb)
    return db

# 4. LLM (Groq)
llm = ChatGroq(
    model="llama-3.1-8b-instant",  # NEW/UPDATED MODEL
    temperature=0.2
)


def create_qa(db):
    return RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever()
    )

# 6. Summarizer
def summarize(text):
    prompt = f"Summarize the following text in simple bullet points:\n\n{text}"
    response = llm.invoke(prompt)
    return response.content

# 7. MCQ Generator
def generate_mcqs(text):
    prompt = f"Generate 10 MCQs with answers from this text:\n\n{text}"
    response = llm.invoke(prompt)
    return response.content

print("AI Study Assistant Ready!")

AI Study Assistant Ready!


In [29]:
# Step 4: Uploading PDF & Test

from google.colab import files

# This initiates the Colab file upload dialog
uploaded = files.upload()

# Get the filename of the uploaded PDF
pdf_path = list(uploaded.keys())[0]

# Process the PDF
docs = load_pdf(pdf_path)
chunks = split_docs(docs)
db = create_db(chunks)
qa = create_qa(db)

print("PDF Loaded & Indexed!")


Saving Expository writing 1.pdf to Expository writing 1.pdf


  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

PDF Loaded & Indexed!


In [30]:
# Step 5 : Front-End Prview (Q/A + Summarizer + MCQ's)

!pip install gradio -qq  # Install Gradio silently
import gradio as gr
import os # Ensure os is available

# The 'qa' object, 'chunks', 'summarize', and 'generate_mcqs' must be defined
# by running Steps 5 and 6 before this cell.

# 1. Define the QA Interface Function (for the Chat Tab)
def qa_interface(question, history):
    """Processes the user message using the RetrievalQA chain."""
    # Ensure a question is asked before running the chain
    if question:
        return qa.run(question)
    return "Please ask a question based on your notes."

# 2. Define the Summarizer Interface Function (for the Summarize Tab)
def summarize_interface():
    # We will summarize the first 5 chunks for a decent overview
    text_to_summarize = " ".join([c.page_content for c in chunks[:5]])
    if text_to_summarize:
        return summarize(text_to_summarize)
    return "PDF chunks not available for summarization."

# 3. Define the MCQ Interface Function (for the MCQ Tab)
def mcq_interface():
    # We will generate MCQs from the first chunk
    if chunks:
        return generate_mcqs(chunks[0].page_content)
    return "PDF chunks not available for MCQ generation."

# 4. Create the Tabbed Interface
with gr.Blocks(title="AI Study Assistant") as demo:
    gr.Markdown("# ðŸ“š AI Study Assistant Powered By GROQ")

    with gr.Tab("Ask Questions (RAG)"):
        gr.ChatInterface(
            fn=qa_interface,
            title="Question Answering from Notes",
            description="Ask specific questions about the content of your PDF.",
            submit_btn="Ask"
        )

    with gr.Tab("Summarizer"):
        gr.Markdown("### Summarize Key Content")
        summary_btn = gr.Button("Generate Summary of Notes")
        summary_output = gr.Textbox(label="Summary Output", lines=10)
        summary_btn.click(fn=summarize_interface, inputs=None, outputs=summary_output)

    with gr.Tab("MCQ Generator"):
        gr.Markdown("### Generate Practice MCQs")
        mcq_btn = gr.Button("Generate 10 MCQs")
        mcq_output = gr.Textbox(label="MCQ Output", lines=10)
        mcq_btn.click(fn=mcq_interface, inputs=None, outputs=mcq_output)

# 5. Launch the Application
demo.launch(share=True) # share=True is required to get a public URL in Colab

  self.chatbot = Chatbot(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://56c601272d3a4a791d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


