In [None]:
# Cell 1: Install Required Packages (Updated for Groq)
# Run this first - it will take a few minutes

print("🔧 Installing required packages...")

# Core packages for Groq integration
!pip install -q groq langchain-groq

print("✅ Installed Groq and LangChain-Groq")

# LangChain ecosystem
!pip install -q langchain langchain-community langchain-huggingface

print("✅ Installed LangChain packages")

# UI and utilities
!pip install -q gradio

print("✅ Installed Gradio")

# Vector search and embeddings
!pip install -q faiss-cpu sentence-transformers

print("✅ Installed FAISS and sentence-transformers")

# Document processing
!pip install -q pypdf python-docx

print("✅ Installed document processors")

print("\n🎉 All packages installed successfully!")
print("Now run the next cell to import libraries...")

🔧 Installing required packages...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Installed Groq and LangChain-Groq
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m95.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m73.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m43.9 MB

In [None]:
print("📚 Importing libraries...")

import os
import tempfile
import shutil
from typing import List, Optional
import re

print("✅ Basic imports done")

# Gradio for UI
import gradio as gr
print("✅ Gradio imported")

# Groq integration
from groq import Groq
from langchain_groq import ChatGroq
print("✅ Groq imports done")

# LangChain components
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.docstore.document import Document
from langchain.chains import RetrievalQA
from langchain.schema import HumanMessage, SystemMessage
print("✅ LangChain components imported")

print("\n🎉 All imports successful!")
print("Check for any import errors above. If none, proceed to Cell 3...")

📚 Importing libraries...
✅ Basic imports done
✅ Gradio imported
✅ Groq imports done
✅ LangChain components imported

🎉 All imports successful!
Check for any import errors above. If none, proceed to Cell 3...


In [None]:
print("🚀 Setting up Groq API...")

# Set your Groq API key here
#GROQ_API_KEY = "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"  # Replace with your actual API key

# Alternative: Set as environment variable (more secure)
os.environ["GROQ_API_KEY"] = "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"
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

#if not GROQ_API_KEY or GROQ_API_KEY == "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":
    #print("⚠️ Please set your Groq API key!")
    #print("Replace 'your-groq-api-key-here' with your actual API key")
    #print("Or uncomment the environment variable lines above")
    #raise ValueError("Groq API key not set")

try:
    # Initialize Groq client
    groq_client = Groq(api_key=GROQ_API_KEY)
    print("✅ Groq client initialized")

    # Available Groq models (choose one):
    # - "llama3-8b-8192": Llama 3 8B (fast, good quality)
    # - "llama3-70b-8192": Llama 3 70B (slower, higher quality)
    # - "mixtral-8x7b-32768": Mixtral 8x7B (good for reasoning)
    # - "gemma-7b-it": Google Gemma 7B

    model_name = "llama3-8b-8192"  # Fast and efficient
    # model_name = "llama3-70b-8192"  # Higher quality but slower

    print(f"🦙 Using Groq model: {model_name}")

    # Create LangChain ChatGroq instance
    llm = ChatGroq(
        groq_api_key=GROQ_API_KEY,
        model_name=model_name,
        temperature=0.7,
        max_tokens=1024,
        timeout=60,
        max_retries=3,
    )
    print("✅ Groq LLM created successfully")

    # Test the connection
    test_response = llm.invoke("Hello! Can you help with studying?")
    print(f"🧪 Test response: {test_response.content[:100]}...")
    print("✅ Groq API connection verified")

except Exception as e:
    print(f"❌ Error setting up Groq: {e}")
    print("Please check:")
    print("1. Your API key is correct")
    print("2. You have Groq API credits")
    print("3. Internet connection is working")
    raise

# Initialize embeddings (for local vector search)
print("🔍 Loading embeddings model...")
try:
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    print("✅ Embeddings model loaded")
except Exception as e:
    print(f"⚠️ Primary embeddings failed: {e}")
    # Try alternative
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/paraphrase-MiniLM-L6-v2"
    )
    print("✅ Alternative embeddings model loaded")

print(f"\n🎉 Setup complete!")
print(f"🚀 Using Groq API with {model_name}")
print("✨ This will be much faster than local models!")
print("Proceed to Cell 4...")

🚀 Setting up Groq API...
✅ Groq client initialized
🦙 Using Groq model: llama3-8b-8192
✅ Groq LLM created successfully
🧪 Test response: I'd be happy to help with studying. What subject or topic are you studying, and what specifically do...
✅ Groq API connection verified
🔍 Loading embeddings model...


  embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Embeddings model loaded

🎉 Setup complete!
🚀 Using Groq API with llama3-8b-8192
✨ This will be much faster than local models!
Proceed to Cell 4...


In [None]:
print("📝 Creating enhanced StudyBuddy class for Groq...")

class StudyBuddy:
    def __init__(self):
        self.vectorstore = None
        self.qa_chain = None
        self.conversation_history = []
        self.current_documents = []
        # Use the Groq LLM and embeddings
        self.llm = llm
        self.embeddings = embeddings
        print("✅ StudyBuddy initialized with Groq")

    def process_uploaded_file(self, file_path: str) -> str:
        """Process uploaded study material"""
        print(f"📄 Processing file: {file_path}")

        if not file_path:
            return "❌ No file uploaded. Please upload a study material file."

        try:
            # Load documents based on file type
            if file_path.endswith('.pdf'):
                print("📚 Loading PDF...")
                loader = PyPDFLoader(file_path)
                documents = loader.load()
            elif file_path.endswith('.txt'):
                print("📄 Loading text file...")
                loader = TextLoader(file_path, encoding='utf-8')
                documents = loader.load()
            else:
                print("📄 Loading as plain text...")
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                except UnicodeDecodeError:
                    with open(file_path, 'r', encoding='latin-1') as f:
                        content = f.read()
                documents = [Document(page_content=content, metadata={"source": file_path})]

            print(f"✅ Loaded {len(documents)} document(s)")

            # Split documents into chunks (optimized for Groq)
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1500,  # Larger chunks for Groq
                chunk_overlap=300,
                length_function=len
            )

            chunks = text_splitter.split_documents(documents)
            self.current_documents = chunks
            print(f"✅ Split into {len(chunks)} chunks")

            # Create vector store
            print("🔍 Creating vector store...")
            self.vectorstore = FAISS.from_documents(chunks, self.embeddings)
            print("✅ Vector store created")

            # Create QA chain optimized for Groq
            print("🔗 Creating QA chain...")
            self.qa_chain = RetrievalQA.from_chain_type(
                llm=self.llm,
                chain_type="stuff",
                retriever=self.vectorstore.as_retriever(
                    search_type="similarity",
                    search_kwargs={"k": 4}  # Get more context for Groq
                ),
                return_source_documents=True,
                chain_type_kwargs={
                    "prompt": self._create_qa_prompt()
                }
            )
            print("✅ QA chain created")

            return f"✅ Successfully processed {len(chunks)} sections from your study material!"

        except Exception as e:
            error_msg = f"❌ Error processing file: {str(e)}"
            print(error_msg)
            return error_msg

    def _create_qa_prompt(self):
        """Create optimized prompt for Groq"""
        from langchain.prompts import PromptTemplate

        template = """You are an AI study assistant. Use the following context to answer the student's question clearly and comprehensively.

Context: {context}

Question: {question}

Instructions:
- Provide accurate, detailed answers based on the context
- If the context doesn't contain enough information, say so clearly
- Use examples and explanations appropriate for students
- Structure your response with clear headings if helpful
- Be encouraging and supportive in your tone

Answer:"""

        return PromptTemplate(
            template=template,
            input_variables=["context", "question"]
        )

    def ask_question(self, question: str, difficulty_level: str = "Medium") -> tuple:
        """Answer student questions using Groq API"""
        print(f"💭 Answering question with Groq: {question[:50]}...")

        if not self.qa_chain:
            return "❌ Please upload study materials first!", ""

        if not question.strip():
            return "❌ Please ask a question!", ""

        try:
            # Adjust question based on difficulty level
            difficulty_prompts = {
                "Beginner": f"Explain in simple, easy-to-understand terms: {question}",
                "Medium": f"Provide a clear explanation with examples: {question}",
                "Advanced": f"Give a detailed, comprehensive analysis: {question}"
            }

            enhanced_question = difficulty_prompts.get(difficulty_level, question)

            # Get response from QA chain
            print("🤔 Generating answer with Groq...")
            result = self.qa_chain({"query": enhanced_question})

            answer = result["result"]
            source_docs = result["source_documents"]

            # Format sources
            sources = ""
            if source_docs:
                sources = "**📚 Sources:**\n"
                for i, doc in enumerate(source_docs[:3], 1):
                    preview = doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content
                    sources += f"{i}. {preview}\n\n"

            # Add to conversation history
            self.conversation_history.append({
                "question": question,
                "answer": answer,
                "difficulty": difficulty_level,
                "timestamp": str(len(self.conversation_history) + 1)
            })

            print("✅ Answer generated with Groq")
            return answer, sources

        except Exception as e:
            error_msg = f"❌ Error with Groq API: {str(e)}"
            print(error_msg)
            return error_msg, ""

    def generate_quiz(self, topic: str, num_questions: int = 3) -> str:
        """Generate quiz using Groq"""
        if not self.vectorstore:
            return "❌ Please upload study materials first!"

        try:
            # Get relevant documents for the topic
            relevant_docs = self.vectorstore.similarity_search(topic, k=3)
            context = "\n".join([doc.page_content for doc in relevant_docs])

            quiz_prompt = f"""Based on the following study material, create a quiz with {num_questions} questions about {topic}.

Study Material:
{context}

Create a quiz with:
- {num_questions} multiple choice questions
- 4 options (A, B, C, D) for each question
- Indicate the correct answer
- Include brief explanations for each correct answer

Format as:
**Question 1:** [Question text]
A) Option 1
B) Option 2
C) Option 3
D) Option 4
**Correct Answer:** [Letter] - [Brief explanation]

Quiz:"""

            response = self.llm.invoke(quiz_prompt)
            return response.content

        except Exception as e:
            return f"❌ Error generating quiz: {str(e)}"

    def get_topic_summary(self, topic: str) -> str:
        """Get topic summary using Groq"""
        if not self.vectorstore:
            return "❌ Please upload study materials first!"

        try:
            # Get relevant documents
            relevant_docs = self.vectorstore.similarity_search(topic, k=5)
            context = "\n".join([doc.page_content for doc in relevant_docs])

            summary_prompt = f"""Create a comprehensive summary of {topic} based on the following study material.

Study Material:
{context}

Create a structured summary with:
- Key concepts and definitions
- Main points and important details
- Examples if available
- How this topic relates to other concepts

Summary of {topic}:"""

            response = self.llm.invoke(summary_prompt)
            return response.content

        except Exception as e:
            return f"❌ Error generating summary: {str(e)}"

    def get_conversation_history(self) -> str:
        """Get formatted conversation history"""
        if not self.conversation_history:
            return "No questions asked yet. Start by asking a question about your study materials!"

        history = "## 📚 Recent Study Session\n\n"
        for i, item in enumerate(self.conversation_history[-5:], 1):  # Last 5 questions
            history += f"**Question {i}** ({item['difficulty']} level):\n"
            history += f"❓ {item['question']}\n\n"
            history += f"💡 {item['answer'][:300]}...\n\n"
            history += "---\n\n"

        return history

# Create StudyBuddy instance
try:
    study_buddy = StudyBuddy()
    print("\n🎉 Enhanced StudyBuddy with Groq created successfully!")
    print("🚀 Ready for lightning-fast responses!")
    print("Proceed to Cell 6 (interface creation)...")
except Exception as e:
    print(f"\n❌ Error creating StudyBuddy: {e}")
    print("Check your Groq API setup above.")

📝 Creating enhanced StudyBuddy class for Groq...
✅ StudyBuddy initialized with Groq

🎉 Enhanced StudyBuddy with Groq created successfully!
🚀 Ready for lightning-fast responses!
Proceed to Cell 6 (interface creation)...


In [None]:
# Cell 6: Create Gradio Interface
# This creates the web UI for the Study Buddy

print("🎨 Creating Gradio interface...")

def create_interface():
    with gr.Blocks(title="🎓 PrepGenie:Ask,Learn and Excel", theme=gr.themes.Soft()) as app:
        gr.Markdown("""
        # 🎓 PrepGenie:Ask,Learn and Excel
        Upload your study materials and get personalized help with your exam preparation!

        **Instructions:**
        1. Start with the "Upload Materials" tab
        2. Upload your PDF or text files
        3. Ask questions in the "Ask Questions" tab
        4. Generate quizzes and summaries as needed
        """)

        with gr.Tab("📤 Upload Materials"):
            gr.Markdown("### Upload your study materials (PDF, TXT files)")

            with gr.Row():
                file_upload = gr.File(
                    label="Upload Study Materials",
                    file_types=[".pdf", ".txt", ".docx"],
                    type="filepath"
                )
                upload_btn = gr.Button("Process Material", variant="primary")

            upload_status = gr.Textbox(
                label="Status",
                placeholder="Upload a file and click 'Process Material'",
                interactive=False,
                lines=3
            )

            upload_btn.click(
                study_buddy.process_uploaded_file,
                inputs=[file_upload],
                outputs=[upload_status]
            )

        with gr.Tab("❓ Ask Questions"):
            gr.Markdown("### Ask questions about your uploaded materials")

            with gr.Row():
                with gr.Column(scale=3):
                    question_input = gr.Textbox(
                        label="Ask a Question",
                        placeholder="What would you like to know about your study material?",
                        lines=2
                    )
                with gr.Column(scale=1):
                    difficulty = gr.Dropdown(
                        choices=["Beginner", "Medium", "Advanced"],
                        value="Medium",
                        label="Difficulty Level"
                    )

            ask_btn = gr.Button("Get Answer", variant="primary")

            with gr.Row():
                with gr.Column():
                    answer_output = gr.Textbox(
                        label="Answer",
                        lines=8,
                        interactive=False
                    )
                with gr.Column():
                    sources_output = gr.Textbox(
                        label="Sources",
                        lines=8,
                        interactive=False
                    )

            ask_btn.click(
                study_buddy.ask_question,
                inputs=[question_input, difficulty],
                outputs=[answer_output, sources_output]
            )

        with gr.Tab("🧠 Generate Quiz"):
            gr.Markdown("### Generate practice quizzes on specific topics")

            with gr.Row():
                quiz_topic = gr.Textbox(
                    label="Quiz Topic",
                    placeholder="Enter topic for quiz generation (e.g., 'photosynthesis', 'calculus derivatives')"
                )
                quiz_questions = gr.Slider(
                    minimum=1,
                    maximum=5,
                    value=3,
                    step=1,
                    label="Number of Questions"
                )

            quiz_btn = gr.Button("Generate Quiz", variant="primary")
            quiz_output = gr.Textbox(
                label="Generated Quiz",
                lines=15,
                interactive=False
            )

            quiz_btn.click(
                study_buddy.generate_quiz,
                inputs=[quiz_topic, quiz_questions],
                outputs=[quiz_output]
            )

        with gr.Tab("📋 Topic Summary"):
            gr.Markdown("### Get summaries of specific topics from your materials")

            summary_topic = gr.Textbox(
                label="Topic for Summary",
                placeholder="Enter topic you want summarized (e.g., 'machine learning', 'world war 2')"
            )
            summary_btn = gr.Button("Generate Summary", variant="primary")
            summary_output = gr.Textbox(
                label="Topic Summary",
                lines=10,
                interactive=False
            )

            summary_btn.click(
                study_buddy.get_topic_summary,
                inputs=[summary_topic],
                outputs=[summary_output]
            )

        with gr.Tab("📚 Study History"):
            gr.Markdown("### Review your recent questions and answers")

            history_btn = gr.Button("View Recent Questions", variant="secondary")
            history_output = gr.Textbox(
                label="Recent Study Session",
                lines=15,
                interactive=False
            )

            history_btn.click(
                study_buddy.get_conversation_history,
                outputs=[history_output]
            )

        gr.Markdown("""
        ---
        ### 💡 Tips for Better Results:
        - Upload clear, well-formatted study materials (PDF or TXT files work best)
        - Ask specific questions about your study content
        - Use different difficulty levels to adjust explanation complexity
        - Generate quizzes to test your understanding
        - Review topic summaries for quick revision

        ### 🔧 Troubleshooting:
        - If you get errors, try uploading smaller files
        - Make sure your files are readable text (not images or scanned PDFs)
        - For best results, use files under 10MB
        """)

    return app

# Create the interface
try:
    app = create_interface()
    print("✅ Gradio interface created successfully!")
    print("Ready to launch! Proceed to Cell 7...")

except Exception as e:
    print(f"❌ Error creating interface: {e}")
    print("Check the error above and make sure all previous cells ran successfully.")

🎨 Creating Gradio interface...
✅ Gradio interface created successfully!
Ready to launch! Proceed to Cell 7...


In [None]:
# Cell 7: Launch the Application
# This launches the Study Buddy web interface

print("🚀 Launching AI Study Buddy...")

try:
    # Launch with share=True to get a public URL
    app.launch(
        share=True,      # Creates a public URL you can access from anywhere
        debug=True,      # Shows debug information
        inbrowser=True,  # Opens in a new browser tab
        height=800,      # Set interface height
        show_error=True  # Show detailed error messages
    )

    print("\n🎉 AI Study Buddy is now running!")
    print("Click on the public URL above to access your Study Buddy")
    print("You can share this URL with others too!")

except Exception as e:
    print(f"❌ Error launching application: {e}")

    # Try alternative launch method
    print("🔄 Trying alternative launch...")
    try:
        app.launch(share=False, debug=True)
        print("✅ Local version launched successfully!")
    except Exception as e2:
        print(f"❌ Alternative launch also failed: {e2}")
        print("\n🔧 Troubleshooting steps:")
        print("1. Make sure all previous cells ran without errors")
        print("2. Try restarting the runtime and running all cells again")
        print("3. Check if you have sufficient GPU/RAM resources")

🚀 Launching AI Study Buddy...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://ceee934a3a141c9ac2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


📄 Processing file: /tmp/gradio/5590e10c1f2e0efe2ad522918e941684fff50818c594a8e8f1a3a999a024ee69/ROBOTICS_MOD2 -part2.pdf
📚 Loading PDF...
✅ Loaded 90 document(s)
✅ Split into 90 chunks
🔍 Creating vector store...
✅ Vector store created
🔗 Creating QA chain...
✅ QA chain created
💭 Answering question with Groq: servo motor usage...
🤔 Generating answer with Groq...


  result = self.qa_chain({"query": enhanced_question})


✅ Answer generated with Groq
📄 Processing file: /tmp/gradio/5590e10c1f2e0efe2ad522918e941684fff50818c594a8e8f1a3a999a024ee69/ROBOTICS_MOD2 -part2.pdf
📚 Loading PDF...
✅ Loaded 90 document(s)
✅ Split into 90 chunks
🔍 Creating vector store...
✅ Vector store created
🔗 Creating QA chain...
✅ QA chain created
💭 Answering question with Groq: working of servo motor...
🤔 Generating answer with Groq...
✅ Answer generated with Groq
📄 Processing file: /tmp/gradio/5590e10c1f2e0efe2ad522918e941684fff50818c594a8e8f1a3a999a024ee69/ROBOTICS_MOD2 -part2.pdf
📚 Loading PDF...
✅ Loaded 90 document(s)
✅ Split into 90 chunks
🔍 Creating vector store...
✅ Vector store created
🔗 Creating QA chain...
✅ QA chain created
💭 Answering question with Groq: working of servo motor...
🤔 Generating answer with Groq...
✅ Answer generated with Groq
