**Student Performance Feedback System**

In [2]:
!pip install --quiet openai fpdf matplotlib pandas gradio

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for fpdf (setup.py) ... [?25l[?25hdone


In [18]:
import gradio as gr
import json
import pandas as pd
import matplotlib.pyplot as plt
from openai import OpenAI
from fpdf import FPDF
import re
from datetime import datetime
import textwrap
from collections import deque
import tempfile
import os

In [24]:
def load_and_validate_data(file):
    if file is None:
        raise ValueError("No file uploaded!")

    try:
        with open(file.name, 'r') as f:
            submission = json.load(f)

        if isinstance(submission, list):
            submission = submission[0]

        required = ['totalTimeTaken', 'totalMarkScored', 'totalAttempted',
                    'totalCorrect', 'accuracy', 'subjects', 'sections']

        for field in required:
            if field not in submission:
                raise ValueError(f"Missing required field: {field}")

        return submission
    except json.JSONDecodeError:
        raise ValueError("Invalid JSON file format")
    except Exception as e:
        raise ValueError(f"Error processing file: {str(e)}")

def process_subject_data(subjects):
    return pd.DataFrame([{
        "Subject": s['subjectId']['$oid'],
        "Accuracy (%)": round(s['accuracy'], 2),
        "Attempts": s.get('attempted', 0),
        "Correct": s.get('correct', 0)
    } for s in subjects])

def process_chapter_data(sections):
    chapters = {}
    for sec in sections:
        for q in sec['questions']:
            chs = q['questionId'].get('chapters', [])
            if not chs:
                continue

            name = chs[0]['title']
            entry = chapters.setdefault(name, {"attempted": 0, "correct": 0, "time": 0, "marks": 0})

            if q.get('status') in ['answered', 'answeredAndMarkedReview']:
                entry['attempted'] += 1
                entry['time'] += q.get('timeTaken', 0)

                if q.get('inputValue', {}).get('isCorrect'):
                    entry['correct'] += 1
                    entry['marks'] += q.get('marks', 1)

    return pd.DataFrame([{
        "Chapter": k,
        "Accuracy (%)": round(v['correct'] / v['attempted'] * 100, 2) if v['attempted'] else 0,
        "Avg Time (s)": round(v['time'] / v['attempted'], 2) if v['attempted'] else 0,
        "Marks Obtained": v['marks'],
        "Marks/Question": round(v['marks'] / v['correct'], 2) if v['correct'] else 0,
        "Attempts": v['attempted'],
        "Correct": v['correct']
    } for k, v in chapters.items()])

def create_visualizations(subjects_df, chapter_df):
    plt.style.use('ggplot')
    fig, axs = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Student Performance Analysis Dashboard', fontsize=18, y=0.98, fontweight='bold')

    subjects_sorted = subjects_df.sort_values('Accuracy (%)', ascending=False)
    bars1 = axs[0,0].bar(subjects_sorted['Subject'], subjects_sorted['Accuracy (%)'],
                         color='#4C72B0', alpha=0.8)
    axs[0,0].set_ylim(0, 100)
    axs[0,0].set_title("Subject-wise Accuracy", fontsize=14, fontweight='bold')
    axs[0,0].set_ylabel("Accuracy (%)")
    axs[0,0].tick_params(axis='x', rotation=45)

    for bar in bars1:
        height = bar.get_height()
        axs[0,0].text(bar.get_x() + bar.get_width()/2., height + 1,
                     f'{height:.1f}%', ha='center', va='bottom', fontsize=10)

    chapter_top = chapter_df.sort_values('Accuracy (%)', ascending=False).head(10)
    bars2 = axs[0,1].bar(range(len(chapter_top)), chapter_top['Accuracy (%)'],
                         color='#55A868', alpha=0.8)
    axs[0,1].set_xticks(range(len(chapter_top)))
    axs[0,1].set_xticklabels([name[:15] + '...' if len(name) > 15 else name
                             for name in chapter_top['Chapter']], rotation=45, ha='right')
    axs[0,1].set_title("Top 10 Chapters by Accuracy", fontsize=14, fontweight='bold')
    axs[0,1].set_ylabel("Accuracy (%)")

    scatter = axs[1,0].scatter(chapter_df['Avg Time (s)'], chapter_df['Accuracy (%)'],
                              color='#C44E52', s=60, alpha=0.7)
    axs[1,0].set_xlabel('Average Time per Question (seconds)')
    axs[1,0].set_ylabel('Accuracy (%)')
    axs[1,0].set_title("Time vs Accuracy Analysis", fontsize=14, fontweight='bold')
    axs[1,0].grid(True, alpha=0.3)

    chapter_marks = chapter_df.sort_values('Marks Obtained', ascending=False).head(10)
    bars3 = axs[1,1].bar(range(len(chapter_marks)), chapter_marks['Marks Obtained'],
                         color='#8172B2', alpha=0.8)
    axs[1,1].set_xticks(range(len(chapter_marks)))
    axs[1,1].set_xticklabels([name[:15] + '...' if len(name) > 15 else name
                             for name in chapter_marks['Chapter']], rotation=45, ha='right')
    axs[1,1].set_title("Top 10 Chapters by Marks Obtained", fontsize=14, fontweight='bold')
    axs[1,1].set_ylabel("Marks Obtained")

    plt.tight_layout()
    return fig

def format_time_duration(seconds):
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    secs = seconds % 60

    if hours > 0:
        return f"{hours}h {minutes}m {secs}s"
    elif minutes > 0:
        return f"{minutes}m {secs}s"
    else:
        return f"{secs}s"

def clean_and_format_text(text):
    text = re.sub(r'\*\*\*([^*]+)\*\*\*', r'\1', text)
    text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
    text = re.sub(r'\*([^*]+)\*', r'\1', text)
    text = re.sub(r'###\s*([^\n]+)', r'\1', text)
    text = re.sub(r'##\s*([^\n]+)', r'\1', text)
    text = re.sub(r'#\s*([^\n]+)', r'\1', text)

    replacements = {
        """: '"', """: '"', "'": "'", "'": "'",
        "â€™": "'", "â€œ": '"', "â€": '"',
        "Ã¢â‚¬â„¢": "'", "Ã¢â‚¬Å": '"',
        "youâ€™ve": "you've", "youâ€™re": "you're",
        "donâ€™t": "don't", "canâ€™t": "can't",
        "wonâ€™t": "won't", "itâ€™s": "it's",

        "–": "-", "—": "-", "…": "...",

        "•": "- ", "◦": "  - ", "▪": "- ",
        "★": "*", "✓": "v", "✗": "x",
        "\u2022": "- ", "\u2023": "- ",
        "\u2024": "- ", "\u2025": "- ",
        "\u2026": "...", "\u2013": "-", "\u2014": "-",
        "\u201c": '"', "\u201d": '"',
        "\u2018": "'", "\u2019": "'",

        "▸": "- ", "▹": "- ", "▶": "- ",
        "►": "- ", "⟶": "->", "→": "->",
    }

    for k, v in replacements.items():
        text = text.replace(k, v)

    text = re.sub(r"(\w)â€™(\w)", r"\1'\2", text)
    text = re.sub(r"(\w)â€(\w)", r"\1'\2", text)

    text = re.sub(r'\n\s*\n\s*\n', '\n\n', text)
    text = re.sub(r' +', ' ', text)
    text = text.strip()

    paragraphs = []
    for para in text.split('\n\n'):
        para = para.strip()
        if para:
            para = re.sub(r'^[-*+]\s+', '- ', para, flags=re.MULTILINE)
            para = re.sub(r'^\d+\.\s+', lambda m: f"{m.group().strip()} ", para, flags=re.MULTILINE)

            wrapped = textwrap.fill(para, width=85, break_long_words=False, break_on_hyphens=False)
            paragraphs.append(wrapped)

    final_text = '\n\n'.join(paragraphs)

    final_text = final_text.encode('ascii', 'ignore').decode('ascii')

    final_text = re.sub(r'[^\x00-\x7F]+', ' ', final_text)

    return final_text

def compose_feedback_prompt(student_data, subjects_df, chapter_df):
    """Compose a detailed prompt for LLM feedback generation"""
    time_formatted = format_time_duration(student_data['totalTimeTaken'])

    subjects_sorted = subjects_df.sort_values('Accuracy (%)', ascending=False)
    top_subjects = subjects_sorted.head(3)['Subject'].tolist()
    weak_subjects = subjects_sorted.tail(3)['Subject'].tolist()

    chapters_sorted = chapter_df.sort_values('Accuracy (%)', ascending=False)
    top_chapters = chapters_sorted.head(5)['Chapter'].tolist()
    weak_chapters = chapters_sorted.tail(5)['Chapter'].tolist()

    return f"""
You are an experienced educational mentor providing personalized feedback to a student.

STUDENT'S TEST PERFORMANCE SUMMARY:
- Total Time Taken: {time_formatted}
- Total Marks Scored: {student_data['totalMarkScored']}
- Questions Attempted: {student_data['totalAttempted']}
- Correct Answers: {student_data['totalCorrect']}
- Overall Accuracy: {student_data['accuracy']:.1f}%

STRONGEST SUBJECTS: {', '.join(top_subjects)}
SUBJECTS NEEDING IMPROVEMENT: {', '.join(weak_subjects)}

TOP PERFORMING CHAPTERS: {', '.join(top_chapters)}
CHAPTERS NEEDING ATTENTION: {', '.join(weak_chapters)}

DETAILED PERFORMANCE DATA:
Subject-wise Performance:
{subjects_df.to_string(index=False)}

Chapter-wise Performance (Top 15):
{chapter_df.head(15).to_string(index=False)}

Please provide a comprehensive, personalized feedback report with the following structure:

1. MOTIVATIONAL OPENING
   - Acknowledge their effort and participation
   - Highlight overall performance positively

2. DETAILED PERFORMANCE ANALYSIS
   - Break down subject-wise strengths and areas for improvement
   - Discuss chapter-wise performance patterns
   - Analyze time management efficiency

3. KEY INSIGHTS & OBSERVATIONS
   - Identify learning patterns from the data
   - Point out any notable trends or correlations

4. ACTIONABLE IMPROVEMENT STRATEGIES
   - Provide 3-4 specific, practical strategies
   - Focus on both content mastery and test-taking skills

5. PERSONALIZED STUDY PLAN
   - Suggest a structured approach for weak areas
   - Include time allocation recommendations

6. ENCOURAGING CONCLUSION
   - Reinforce confidence and growth mindset
   - Set positive expectations for future performance

Write in a warm, encouraging tone as if you're speaking directly to the student. Make it personal and actionable.
IMPORTANT: Use only basic ASCII characters in your response. Avoid special bullets, fancy quotes, or Unicode symbols.
"""

def generate_llm_feedback(prompt):
    try:
        client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key="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"
        )

        response = client.chat.completions.create(
            model="deepseek/deepseek-chat-v3-0324:free",
            messages=[{"role": "user", "content": prompt}],
            extra_headers={
                "HTTP-Referer": "https://test.com",
                "X-Title": "StudentFeedback"
            },
            max_tokens=2000,
            temperature=0.7
        )

        return response.choices[0].message.content.strip()
    except Exception as e:
        fallback_feedback = f"""Dear Student,

I apologize, but I'm unable to generate personalized AI feedback at this moment due to a technical issue: {str(e)}

However, based on your performance data, here are some general observations:

PERFORMANCE SUMMARY:
- Your overall accuracy is showing room for improvement
- Focus on consistent practice and understanding concepts thoroughly

GENERAL RECOMMENDATIONS:
1. Review your incorrect answers to understand mistake patterns
2. Practice time management during tests
3. Focus extra attention on weaker subject areas
4. Create a regular study schedule
5. Seek help from teachers for challenging topics

Keep up the good work and stay motivated!

Best regards,
Your Study Assistant"""

        return fallback_feedback

class generatePDF(FPDF):
    def header(self):
        self.set_fill_color(41, 128, 185)
        self.rect(0, 0, 210, 25, 'F')

        self.set_text_color(255, 255, 255)
        self.set_font('Arial', 'B', 20)
        self.cell(0, 15, 'Student Performance Analysis Report', ln=1, align='C', border=0)

        self.set_font('Arial', '', 12)
        self.cell(0, 8, f"Generated on: {datetime.now().strftime('%B %d, %Y at %I:%M %p')}",
                 ln=1, align='C', border=0)

        self.set_text_color(0, 0, 0)
        self.ln(10)

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 10)
        self.set_text_color(128, 128, 128)
        self.cell(0, 10, f'Page {self.page_no()} | Student Performance Report', align='C')

    def add_section_header(self, title, color=(52, 73, 94)):
        self.ln(5)
        self.set_fill_color(*color)
        self.set_text_color(255, 255, 255)
        self.set_font('Arial', 'B', 14)
        self.cell(0, 12, f"  {title}", ln=1, fill=True)
        self.set_text_color(0, 0, 0)
        self.ln(3)

    def add_formatted_text(self, text, font_size=11, line_height=6):
        text = clean_and_format_text(text)

        self.set_font('Arial', '', font_size)

        paragraphs = text.strip().split('\n\n')

        for i, para in enumerate(paragraphs):
            if not para.strip():
                continue

            is_header = False
            if (len(para) < 60 and
                (para.isupper() or
                 re.match(r'^\d+\.?\s+[A-Z]', para.strip()) or
                 para.strip().endswith(':'))):
                is_header = True

            if is_header:
                self.ln(3)
                self.set_font('Arial', 'B', font_size + 1)
                self.set_text_color(41, 128, 185)
            else:
                self.set_font('Arial', '', font_size)
                self.set_text_color(0, 0, 0)

            lines = para.split('\n')
            for line in lines:
                line = line.strip()
                if not line:
                    continue

                if line.startswith('- '):
                    self.cell(8, line_height, '', 0, 0)
                    bullet_text = line[2:].strip()

                    try:
                        self.multi_cell(0, line_height, f"* {bullet_text}")
                    except UnicodeEncodeError:
                        cleaned_bullet = bullet_text.encode('ascii', 'ignore').decode('ascii')
                        self.multi_cell(0, line_height, f"* {cleaned_bullet}")
                elif re.match(r'^\d+\.', line):
                    try:
                        self.multi_cell(0, line_height, line)
                    except UnicodeEncodeError:
                        cleaned_line = line.encode('ascii', 'ignore').decode('ascii')
                        self.multi_cell(0, line_height, cleaned_line)
                else:
                    try:
                        self.multi_cell(0, line_height, line)
                    except UnicodeEncodeError:
                        cleaned_line = line.encode('ascii', 'ignore').decode('ascii')
                        self.multi_cell(0, line_height, cleaned_line)

            if i < len(paragraphs) - 1:
                self.ln(2)

    def add_data_table(self, title, df, max_rows=10):
        self.add_section_header(title, (46, 125, 50))

        self.set_font('Arial', 'B', 10)
        self.set_fill_color(240, 240, 240)

        col_widths = [60, 30, 30, 30, 30]
        headers = df.columns.tolist()[:5]

        for i, header in enumerate(headers):
            self.cell(col_widths[i] if i < len(col_widths) else 30, 8, str(header), 1, 0, 'C', True)
        self.ln()

        self.set_font('Arial', '', 9)
        for idx, row in df.head(max_rows).iterrows():
            for i, col in enumerate(headers):
                value = str(row[col])
                if len(value) > 20:
                    value = value[:17] + "..."

                try:
                    self.cell(col_widths[i] if i < len(col_widths) else 30, 6, value, 1, 0, 'C')
                except UnicodeEncodeError:
                    cleaned_value = value.encode('ascii', 'ignore').decode('ascii')
                    self.cell(col_widths[i] if i < len(col_widths) else 30, 6, cleaned_value, 1, 0, 'C')
            self.ln()

def create_pdf_report(viz_path, student_data, subjects_df, chapter_df, feedback_text):
    pdf = generatePDF()
    pdf.add_page()

    pdf.add_section_header("Performance Summary", (52, 152, 219))

    summary_text = f"""
Overall Accuracy: {student_data['accuracy']:.1f}%
Total Marks Scored: {student_data['totalMarkScored']}
Questions Attempted: {student_data['totalAttempted']} out of total questions
Correct Answers: {student_data['totalCorrect']}
Time Taken: {format_time_duration(student_data['totalTimeTaken'])}
Average Time per Question: {student_data['totalTimeTaken'] // student_data['totalAttempted'] if student_data['totalAttempted'] > 0 else 0} seconds
"""

    pdf.add_formatted_text(summary_text)

    if viz_path and os.path.exists(viz_path):
        pdf.add_section_header("Performance Visualizations", (231, 76, 60))
        try:
            pdf.image(viz_path, x=10, y=None, w=190)
            pdf.ln(10)
        except:
            pdf.add_formatted_text("Visualization could not be embedded in the PDF.")

    pdf.add_data_table("Subject-wise Performance", subjects_df)
    pdf.ln(5)
    pdf.add_data_table("Chapter-wise Performance (Top 10)", chapter_df.head(10))

    pdf.add_page()
    pdf.add_section_header("Personalized AI Feedback", (155, 89, 182))

    # Extra cleaning for feedback text
    cleaned_feedback = clean_and_format_text(feedback_text)
    pdf.add_formatted_text(cleaned_feedback, font_size=11, line_height=6)

    output_path = "student_performance_report.pdf"
    pdf.output(output_path)
    return output_path

def process_student_data(file, progress=gr.Progress()):
    if file is None:
        return "Please upload a JSON file first.", None, None, None

    try:
        progress(0.1, desc="Loading and validating data...")
        student_data = load_and_validate_data(file)

        progress(0.3, desc="Processing subject and chapter data...")
        subjects_df = process_subject_data(student_data['subjects'])
        chapter_df = process_chapter_data(student_data['sections'])

        progress(0.5, desc="Creating visualizations...")
        fig = create_visualizations(subjects_df, chapter_df)

        viz_path = tempfile.mktemp(suffix='.png')
        fig.savefig(viz_path, bbox_inches='tight', dpi=300, facecolor='white')
        plt.close()

        progress(0.7, desc="Generating AI feedback...")
        prompt = compose_feedback_prompt(student_data, subjects_df, chapter_df)
        feedback = generate_llm_feedback(prompt)

        progress(0.9, desc="Creating PDF report...")
        report_path = create_pdf_report(viz_path, student_data, subjects_df, chapter_df, feedback)

        progress(1.0, desc="Complete!")

        summary = f"""
## Analysis Complete!

**Performance Summary:**
- Overall Accuracy: {student_data['accuracy']:.1f}%
- Total Marks: {student_data['totalMarkScored']}
- Questions Attempted: {student_data['totalAttempted']}
- Time Taken: {format_time_duration(student_data['totalTimeTaken'])}

**Top Performing Subject:** {subjects_df.loc[subjects_df['Accuracy (%)'].idxmax(), 'Subject']} ({subjects_df['Accuracy (%)'].max():.1f}%)

**Best Chapter:** {chapter_df.loc[chapter_df['Accuracy (%)'].idxmax(), 'Chapter']} ({chapter_df['Accuracy (%)'].max():.1f}%)

Download your detailed PDF report below!
"""

        return summary, viz_path, report_path, feedback

    except Exception as e:
        return f"Error: {str(e)}", None, None, None

def create_gradio_interface():
    with gr.Blocks(title="Student Performance Analyzer", theme=gr.themes.Soft()) as interface:

        gr.Markdown("""
        #  Student Performance Analysis Dashboard

        Upload your test results JSON file to get a comprehensive performance analysis with AI-powered feedback!

        ### Features:
        -  **Visual Analytics**: Subject and chapter-wise performance charts
        -  **AI Feedback**: Personalized insights and study recommendations
        -  **PDF Report**: Professional downloadable report
        -  **Time Analysis**: Time vs accuracy correlations
        """)

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### Upload Test Results")
                file_input = gr.File(
                    label="Upload JSON File",
                    file_types=[".json"],
                    type="filepath"
                )

                analyze_btn = gr.Button(
                    "Analyze Performance",
                    variant="primary",
                    size="lg"
                )

            with gr.Column(scale=2):
                gr.Markdown("### 📋 Analysis Results")
                summary_output = gr.Markdown(label="Summary")

        with gr.Row():
            with gr.Column():
                gr.Markdown("### Performance Visualizations")
                chart_output = gr.Image(label="Performance Charts")

            with gr.Column():
                gr.Markdown("### Download Report")
                pdf_output = gr.File(label="PDF Report")

        with gr.Row():
            gr.Markdown("### AI-Generated Feedback")
            feedback_output = gr.Textbox(
                label="Personalized Feedback",
                lines=15,
                max_lines=20,
                show_copy_button=True
            )

        analyze_btn.click(
            fn=process_student_data,
            inputs=[file_input],
            outputs=[summary_output, chart_output, pdf_output, feedback_output],
            show_progress=True
        )

        gr.Markdown("""
        ### Instructions:
        1. Upload a JSON file containing your test results
        2. Click "Analyze Performance" to process the data
        3. View your performance visualizations and AI feedback
        4. Download the comprehensive PDF report

        ### Supported Format:
        Your JSON file should contain fields like: `totalTimeTaken`, `totalMarkScored`, `subjects`, `sections`, etc.
        """)

    return interface

if __name__ == "__main__":
    interface = create_gradio_interface()
    interface.launch(
        share=True,
        show_error=True,
        inbrowser=True
    )

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3f1c8c7abf0f2d2fc9.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
