<a href="https://colab.research.google.com/github/bhavani-priya880/machine-learning-mini-projects/blob/main/studybuddyAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Hugging Face transformers
!pip install transformers sentencepiece accelerate datasets

from transformers import pipeline

# 1. Named Entity Recognition (NER)
ner = pipeline("ner", model="dslim/bert-base-NER", grouped_entities=True)
text = "Albert Einstein developed the theory of relativity in Germany."
print("NER:", ner(text))

# 2. Question Answering (like chatbot)
qa = pipeline("question-answering", model="deepset/roberta-base-squad2")
context = "Albert Einstein developed the theory of relativity. He was born in Germany in 1879."
question = "Who developed the theory of relativity?"
print("QA:", qa(question=question, context=context))

# 3. Question Generation (quiz)
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
qg_tokenizer = AutoTokenizer.from_pretrained("iarfmoose/t5-base-question-generator")
qg_model = AutoModelForSeq2SeqLM.from_pretrained("iarfmoose/t5-base-question-generator")

from transformers import pipeline
qg = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)

study_text = "Mahatma Gandhi led India’s independence movement using non-violence and civil disobedience."
generated = qg("generate questions: " + study_text, max_length=64, num_return_sequences=2)
print("Generated Questions:", generated)


In [None]:
# STEP 1: Install dependencies
!pip install transformers sentencepiece accelerate datasets gradio gTTS SpeechRecognition pydub
!apt-get -qq install -y ffmpeg


In [None]:
# STEP 2: Imports
import os, random, tempfile, json
import gradio as gr
from gtts import gTTS
import speech_recognition as sr
from pydub import AudioSegment
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

# Hugging Face pipelines
ner_pipe = pipeline("ner", model="dslim/bert-base-NER", grouped_entities=True)
qa_pipe = pipeline("question-answering", model="deepset/roberta-base-squad2")

# Question generation model
qg_tokenizer = AutoTokenizer.from_pretrained("iarfmoose/t5-base-question-generator")
qg_model = AutoModelForSeq2SeqLM.from_pretrained("iarfmoose/t5-base-question-generator")
qg_pipe = pipeline("text2text-generation", model=qg_model, tokenizer=qg_tokenizer)


In [6]:
# STEP 3: Helpers

# ASR: Convert speech -> text
def transcribe_audio(path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(path) as source:
        audio = recognizer.record(source)
    try:
        return recognizer.recognize_google(audio)
    except:
        return "[ASR Error: Could not understand audio]"

# Convert audio to wav (pydub)
def convert_to_wav(input_path):
    out_path = str(tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name)
    audio = AudioSegment.from_file(input_path)
    audio = audio.set_channels(1).set_frame_rate(16000)
    audio.export(out_path, format="wav")
    return out_path

# TTS: Convert text -> mp3
def tts_to_audio(text):
    out_path = str(tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name)
    tts = gTTS(text=text, lang="en")
    tts.save(out_path)
    return out_path

# NER Highlight
def highlight_entities(text):
    ents = ner_pipe(text)
    highlighted = text
    for ent in ents:
        highlighted = highlighted.replace(ent["word"], f"[{ent['word']} ({ent['entity_group']})]")
    return highlighted, ents

# Quiz generation
import spacy
nlp = spacy.load("en_core_web_sm")

def generate_mcqs(text, n=5):
    if not text.strip():
        return ["Please provide some study material."]

    try:
        input_text = "generate questions: " + text
        num_beams = max(n, 5)  # avoid beam mismatch error
        questions_raw = qg_pipe(
            input_text,
            max_length=128,
            num_return_sequences=n,
            do_sample=True,
            num_beams=num_beams
        )

        # Extract nouns for answers/distractors
        doc = nlp(text)
        nouns = list(set([token.text for token in doc if token.pos_ in ["PROPN", "NOUN"]]))
        if not nouns:
            nouns = ["Answer"]

        mcqs = []
        for i, q in enumerate(questions_raw, 1):
            question_text = q.get("generated_text", f"Question {i}?")
            correct_answer = random.choice(nouns)
            distractors = [w for w in nouns if w != correct_answer]

            if len(distractors) >= 3:
                wrong_choices = random.sample(distractors, 3)
            else:
                wrong_choices = ["Option X", "Option Y", "Option Z"][:3]

            options = [correct_answer] + wrong_choices
            random.shuffle(options)

            mcq_text = (
                f"{i}. {question_text}\n"
                f"A) {options[0]}\n"
                f"B) {options[1]}\n"
                f"C) {options[2]}\n"
                f"D) {options[3]}\n"
            )
            mcqs.append(mcq_text)

        return mcqs

    except Exception as e:
        return [f"Error generating MCQs: {str(e)}"]

# Q&A
def answer_question(question, context):
    result = qa_pipe(question=question, context=context)
    return result["answer"]


In [7]:
# STEP 4: Gradio UI

import gradio as gr

with gr.Blocks() as demo:
    gr.Markdown("# 🎓 StudyBuddy: AI-powered Study Assistant (Hackathon Project)")

    with gr.Tab("📘 Study Material → Entities + Quiz"):
        notes = gr.Textbox(label="Paste your study notes here", lines=10)
        btn_ner = gr.Button("Highlight Key Entities")
        ner_output = gr.Textbox(label="Highlighted Entities")

        btn_quiz = gr.Button("Generate Quiz Questions")
        quiz_output = gr.Textbox(label="Generated Quiz", lines=8)

        btn_ner.click(lambda t: highlight_entities(t)[0], notes, ner_output)
        btn_quiz.click(lambda t: "\n\n".join(generate_mcqs(t, n=5)), notes, quiz_output)

    with gr.Tab("💬 Chat with StudyBuddy (Voice + Text)"):
        chatbot = gr.Chatbot(label="Chatbot")
        txt_in = gr.Textbox(label="Type your question here")
        mic_in = gr.Audio(type="filepath", label="Or use your voice")
        tts_audio = gr.Audio(label="StudyBuddy speaks (TTS)", interactive=False)

        def chat_text(msg, history, notes):
            ans = answer_question(msg, notes)
            audio = tts_to_audio(ans)
            history.append(("You: " + msg, "StudyBuddy: " + ans))
            return history, audio

        def chat_voice(path, history, notes):
            wav = convert_to_wav(path)
            msg = transcribe_audio(wav)
            ans = answer_question(msg, notes)
            audio = tts_to_audio(ans)
            history.append(("You (voice): " + msg, "StudyBuddy: " + ans))
            return history, audio

        txt_in.submit(chat_text, [txt_in, chatbot, notes], [chatbot, tts_audio])
        mic_in.change(chat_voice, [mic_in, chatbot, notes], [chatbot, tts_audio])

demo.launch(share=True)

  chatbot = gr.Chatbot(label="Chatbot")


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://af4fa097493cceaadf.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


