**LLM Hackathon Code Base - AM044_AM045_AM046_AM903**

**📦 Dependency Installation: Set Up Environment for Lecture Content Extraction**

In [3]:
# ✅ Install all necessary dependencies
!pip install -q openai-whisper
!pip install -q moviepy
!pip install -q python-pptx
!pip install -q PyMuPDF
!pip install -q pdfminer.six
!pip install -q ffmpeg-python

**🎥📄 Lecture Content Extractor: Transcribe Videos and Extract Text from Slides (PDF/PPT/PPTX) using Whisper and Python**

In [13]:
import os
import shutil
import json
from pathlib import Path
from moviepy.editor import VideoFileClip
import whisper
from pptx import Presentation
import fitz  # PyMuPDF
from pdfminer.high_level import extract_text as extract_pdfminer_text
from IPython.display import display
from google.colab import files
import io

# Setup
os.makedirs("Uploads", exist_ok=True)
os.makedirs("Output", exist_ok=True)

# Load Whisper
print("Loading Whisper model...")
model = whisper.load_model("base")  # use 'medium' or 'large' for better accuracy

# File upload helpers
def upload_files(label):
    print(f"\nUpload your {label} files:")
    uploaded = files.upload()
    saved_paths = []
    for name, data in uploaded.items():
        path = os.path.join("Uploads", name)
        with open(path, 'wb') as f:
            f.write(data)
        saved_paths.append(path)
    return saved_paths

# Extractors
def transcribe_video(video_path):
    print(f"Transcribing: {video_path}")
    temp_audio = "temp_audio.wav"
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(temp_audio, logger=None)
    result = model.transcribe(temp_audio)
    os.remove(temp_audio)
    return result['text']

def extract_text_from_pptx(pptx_path):
    print(f"Extracting text from PPTX: {pptx_path}")
    prs = Presentation(pptx_path)
    text = []
    for slide in prs.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:
                text.append(shape.text)
    return "\n".join(text)

def extract_text_from_pdf(pdf_path):
    print(f"Extracting text from PDF: {pdf_path}")
    try:
        return extract_pdfminer_text(pdf_path)
    except Exception as e:
        print("Fallback to PyMuPDF due to error:", e)
        doc = fitz.open(pdf_path)
        text = "\n".join(page.get_text() for page in doc)
        return text

# Upload Inputs
video_files = upload_files("Lecture Video (only MP4)")
pdf_files = upload_files("PDF slides")
pptx_files = upload_files("PPTX slides")
ppt_files = upload_files("PPT slides")

# Process all inputs
lecture_text = ""
slides_text = ""

# Process video
if video_files:
    lecture_text = transcribe_video(video_files[0])  # assuming 1 video

# Process slides
for pdf in pdf_files:
    slides_text += extract_text_from_pdf(pdf) + "\n\n"

for pptx in pptx_files:
    slides_text += extract_text_from_pptx(pptx) + "\n\n"

# Output
with open("Output/lecture_transcript.txt", "w", encoding="utf-8") as f:
    f.write(lecture_text.strip())

with open("Output/slides_text.txt", "w", encoding="utf-8") as f:
    f.write(slides_text.strip())

print("\n✅ Files processed and saved in /Output:")
print("- lecture_transcript.txt")
print("- slides_text.txt")

Loading Whisper model...

Upload your Lecture Video (only MP4) files:



Upload your PDF slides files:



Upload your PPTX slides files:


Saving class9_Unit3_Trees_naryTraversal.pptx to class9_Unit3_Trees_naryTraversal (1).pptx

Upload your PPT slides files:


Extracting text from PPTX: Uploads/class9_Unit3_Trees_naryTraversal (1).pptx

✅ Files processed and saved in /Output:
- lecture_transcript.txt
- slides_text.txt


**📝 Combine Lecture Transcript and Slide Content into a Single File**

In [14]:
# Path to combined input file
combined_path = "Output/input.txt"

# Combine both texts with clear headers
with open(combined_path, "w", encoding="utf-8") as f:
    f.write("### Lecture Transcript\n\n")
    f.write(lecture_text.strip() + "\n\n")

    f.write("### Slide Content\n\n")
    f.write(slides_text.strip())

print(f"\n📄 Combined input saved as: {combined_path}")


📄 Combined input saved as: Output/input.txt


**🤖 Install Google Generative AI SDK**

In [7]:
!pip install -q google-generativeai

**🧠 Generate Structured Lecture Notes using Gemini (Google Generative AI)**

In [None]:
import google.generativeai as genai

# 🔐 Authenticate using your Google AI Studio API key
genai.configure(api_key="Replace this with your Gemini key")  

def generate_notes_with_gemini(input_text):
    print("🧠 Generating lecture notes using Gemini...")
    model = genai.GenerativeModel("models/gemini-1.5-pro")
    response = model.generate_content(
        f"""You are an AI assistant helping generate structured lecture notes.
Format content using:
- Headings, subheadings
- Bullet points
- Definitions and examples

Content to summarize:
{input_text}
"""
    )
    return response.text

# Read combined input
with open("Output/input.txt", "r", encoding="utf-8") as f:
    input_text = f.read()

# Generate and save notes
lecture_notes = generate_notes_with_gemini(input_text)

with open("Output/lecture_notes.txt", "w", encoding="utf-8") as f:
    f.write(lecture_notes)

print("✅ Gemini-generated notes saved to Output/lecture_notes.txt")

🧠 Generating lecture notes using Gemini...
✅ Gemini-generated notes saved to Output/lecture_notes.txt


**🎨 Generate and Beautify Lecture Notes in Markdown using Gemini AI**

In [None]:
import google.generativeai as genai
import re

# Configure your API key
genai.configure(api_key="Replace this with your Gemini key")  

model = genai.GenerativeModel(model_name="models/gemini-1.5-pro")

def format_notes_md(text):
    # Beautify: Convert "###" to bold headers
    text = re.sub(r'(?m)^### (.*?)$', r'### \1\n', text)
    text = re.sub(r'(?m)^## (.*?)$', r'## \1\n---', text)

    # Bold important words like 'Key Takeaway', 'Definition', etc.
    keywords = ['Key Takeaway', 'Definition', 'Example', 'Benefits', 'Challenges', 'Significance', 'Applications', 'Importance']
    for word in keywords:
        text = re.sub(fr'(?i)\b({word}):', r'**\1:**', text)

    # Add line breaks between bullets and paragraphs
    text = text.replace("* ", "\n* ")

    # Optional: Emojis for engagement
    emoji_map = {
        "Key Takeaway": "📌",
        "Benefits": "✅",
        "Challenges": "⚠️",
        "Example": "💡",
        "Definition": "📖",
        "Applications": "🛠️",
        "Importance": "🌟",
        "Conclusion": "🧠"
    }
    for word, emoji in emoji_map.items():
        text = text.replace(f"**{word}:**", f"{emoji} **{word}:**")

    return text

def generate_notes_with_gemini(input_text):
    print("🧠 Generating lecture notes using Gemini with beautified formatting...")

    prompt = f"""
You are an AI assistant creating beautifully formatted lecture notes from transcripts and slides.

✅ Use clear formatting with:
- Markdown headings (##, ###)
- Bullet points and line breaks
- Bold important concepts like 'Definition', 'Example', etc.
- Use emojis sparingly to make sections engaging
- Insert horizontal dividers (---) between major sections

Here is the lecture content to be transformed:
{input_text}
"""

    response = model.generate_content(prompt)
    return format_notes_md(response.text)

# Read combined input
with open("Output/input.txt", "r", encoding="utf-8") as f:
    input_text = f.read()

# Generate and save markdown notes
lecture_notes = generate_notes_with_gemini(input_text)

with open("Output/lecture_notes.md", "w", encoding="utf-8") as f:
    f.write(lecture_notes)

print("✅ Beautiful lecture notes saved as Output/lecture_notes.md")

🧠 Generating lecture notes using Gemini with beautified formatting...
✅ Beautiful lecture notes saved as Output/lecture_notes.md


**📊 Auto-Generate Quiz Questions & Answers and Concept Diagrams from Lecture Notes using Gemini AI**

In [17]:
# -------- Q&A Generation -------- #
def generate_questions_with_gemini(note_text):
    print("❓ Generating quiz questions...")
    prompt = f"""
Based on the following lecture notes, generate a set of 5-10 quiz questions to test understanding.

Use the format:
**Q1:** Question text
**A1:** Correct answer

Lecture Notes:
{note_text}
"""
    response = model.generate_content(prompt)
    return response.text


# -------- Diagram/Concept Mapping -------- #
def generate_diagrams_with_gemini(note_text):
    print("📊 Generating text-based diagrams/concept maps...")
    prompt = f"""
From the lecture notes below, extract key concepts and represent them using ASCII-style diagrams or concept maps.

Use markdown format, and try to visually link concepts.

Lecture Notes:
{note_text}
"""
    response = model.generate_content(prompt)
    return response.text


# Read markdown notes
with open("Output/lecture_notes.md", "r", encoding="utf-8") as f:
    notes_text = f.read()

# Generate Q&A
questions_md = generate_questions_with_gemini(notes_text)
with open("Output/lecture_questions.md", "w", encoding="utf-8") as f:
    f.write(questions_md)

# Generate Diagrams
diagrams_md = generate_diagrams_with_gemini(notes_text)
with open("Output/diagrams.md", "w", encoding="utf-8") as f:
    f.write(diagrams_md)

print("✅ Q&A and Diagrams saved to Output folder.")

❓ Generating quiz questions...
📊 Generating text-based diagrams/concept maps...
✅ Q&A and Diagrams saved to Output folder.


**🌐 Install Gradio for Building Interactive Web Interfaces**

In [11]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

**🧠 Build an Interactive Gradio App for Lecture Notes, Q&A, and Diagrams**

In [None]:
import gradio as gr
import os
import google.generativeai as genai

# Configure Gemini API
GENAI_API_KEY = "Replace this with your Gemini key"  
genai.configure(api_key=GENAI_API_KEY)
model = genai.GenerativeModel(model_name="models/gemini-1.5-pro")

def generate_notes(input_text):
    prompt = f"""
You are an AI assistant creating beautifully formatted lecture notes from transcripts and slides.

✅ Use clear formatting with:
- Markdown headings (##, ###)
- Bullet points and line breaks
- Bold important concepts like 'Definition', 'Example', etc.
- Use emojis sparingly to make sections engaging
- Insert horizontal dividers (---) between major sections

Here is the lecture content to be transformed:
{input_text}
"""
    response = model.generate_content(prompt)
    return response.text

def generate_questions(notes_text):
    prompt = f"""
Based on the following lecture notes, generate a set of 5-10 quiz questions to test understanding.

Use the format:
**Q1:** Question text
**A1:** Correct answer

Lecture Notes:
{notes_text}
"""
    response = model.generate_content(prompt)
    return response.text

def generate_diagrams(notes_text):
    prompt = f"""
From the lecture notes below, extract key concepts and represent them using ASCII-style diagrams or concept maps.

Use markdown format, and try to visually link concepts.

Lecture Notes:
{notes_text}
"""
    response = model.generate_content(prompt)
    return response.text

def process(input_text):
    notes = generate_notes(input_text)
    questions = generate_questions(notes)
    diagrams = generate_diagrams(notes)
    return notes, questions, diagrams

with gr.Blocks(title="📘 AI Lecture Note Generator") as demo:
    gr.Markdown("""
        # 📘 AI Lecture Note Generator
        Upload raw lecture content (transcripts + slides) and get:
        - ✅ Structured lecture notes
        - ❓ Auto-generated Q&A
        - 📊 Conceptual diagrams
    """)

    with gr.Row():
        input_textbox = gr.Textbox(label="📥 Paste Combined Lecture + Slides Text Here", lines=20, placeholder="Paste your raw content from lecture transcript + slides...")

    with gr.Row():
        submit_btn = gr.Button("🧠 Generate Notes, Q&A & Diagrams")

    with gr.Row():
        notes_output = gr.Markdown(label="📘 Lecture Notes")
    with gr.Row():
        qna_output = gr.Markdown(label="❓ Q&A")
    with gr.Row():
        diagram_output = gr.Markdown(label="📊 Diagrams")

    submit_btn.click(fn=process, inputs=input_textbox, outputs=[notes_output, qna_output, diagram_output])

# Launch the interface
demo.launch(debug=True, share=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://a35f277ccb79588186.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://a35f277ccb79588186.gradio.live




**📊 Automated Evaluation of AI-Generated Lecture Notes using Gemini**

In [None]:
import re
import google.generativeai as genai

# 🔐 Configure Gemini
genai.configure(api_key="Replace this with your Gemini key") 
model = genai.GenerativeModel("models/gemini-1.5-pro")

# 🧠 Gemini-based evaluation with full prompt and lecture note injection
def evaluate_notes(input_text, notes_text):
    prompt = f"""
You are an expert education evaluator.

Evaluate the quality of the generated lecture notes using the following 5 criteria.
For each, assign a score from 0 to 5 and explain your reasoning:

1. *Accuracy*
2. *Completeness*
3. *Organization*
4. *Readability*
5. *Value-Added*

---

### Raw Input (Transcript + Slides):
{input_text}

---

### Generated Lecture Notes:
{notes_text}

---

Return your evaluation in markdown format with the format:
*Accuracy (4/5):* explanation...
*Completeness (3/5):* explanation...
...
Then at the bottom, include a short summary of your overall impression.
"""
    response = model.generate_content(prompt)
    return response.text

# 📊 Extract score values and compute percentage breakdown
def extract_scores_and_convert_to_percentages(report_text):
    score_data = {}
    total_score = 0
    max_score = 0

    # Updated regex handles bold or plain markdown (e.g., Accuracy (4/5))
    matches = re.findall(r"(Accuracy|Completeness|Organization|Readability|Value-Added)\s*\((\d)/5\)", report_text, re.IGNORECASE)

    for metric, score_str in matches:
        score = int(score_str)
        percentage = (score / 5) * 100
        score_data[metric.strip().title()] = {
            "score": score,
            "percentage": percentage
        }
        total_score += score
        max_score += 5

    overall_percentage = (total_score / max_score) * 100 if max_score > 0 else 0
    return score_data, total_score, overall_percentage

# 📁 Load generated notes and input
with open("Output/input.txt", "r", encoding="utf-8") as f:
    raw_input = f.read()

with open("Output/lecture_notes.md", "r", encoding="utf-8") as f:
    generated_notes = f.read()

# 📝 Evaluate the notes
eval_markdown = evaluate_notes(raw_input, generated_notes)

# 📈 Extract and format scores
score_data, total, overall_pct = extract_scores_and_convert_to_percentages(eval_markdown)
score_table = "\n".join([f"- *{k}*: {v['score']}/5 ({v['percentage']}%)" for k, v in score_data.items()])
summary = f"### 🧾 Evaluation Summary\n\n{score_table}\n\n*Total Score: {total}/25\nOverall Percentage*: {overall_pct:.2f}%"

# 📄 Combine into final report
final_report = f"# 📊 Evaluation Report\n\n{eval_markdown}\n\n{summary}"

# 💾 Save to file
with open("Output/evaluation_report.md", "w", encoding="utf-8") as f:
    f.write(final_report)

print("✅ Evaluation complete. Results saved to Output/evaluation_report.md")

✅ Evaluation complete. Results saved to Output/evaluation_report.md
