# CSYE 7374: Introduction to Agentic AI - Final Project
# Education Content System

1. Install Dependencies

In [1]:
# Installing dependencies from requiremnts.txt in GitHub
!pip install -r https://raw.githubusercontent.com/ishreyasp/education_content_system/refs/heads/main/requirements.txt

Collecting streamlit<1.30.0,>=1.28.0 (from -r https://raw.githubusercontent.com/ishreyasp/education_content_system/refs/heads/main/requirements.txt (line 4))
  Downloading streamlit-1.29.0-py2.py3-none-any.whl.metadata (8.2 kB)
Collecting openai<1.15.0,>=1.3.0 (from -r https://raw.githubusercontent.com/ishreyasp/education_content_system/refs/heads/main/requirements.txt (line 7))
  Downloading openai-1.14.3-py3-none-any.whl.metadata (20 kB)
Collecting langchain<0.2.0,>=0.1.0 (from -r https://raw.githubusercontent.com/ishreyasp/education_content_system/refs/heads/main/requirements.txt (line 8))
  Downloading langchain-0.1.20-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community<0.1.0,>=0.0.20 (from -r https://raw.githubusercontent.com/ishreyasp/education_content_system/refs/heads/main/requirements.txt (line 9))
  Downloading langchain_community-0.0.38-py3-none-any.whl.metadata (8.7 kB)
Collecting faiss-cpu<1.8.0,>=1.7.4 (from -r https://raw.githubusercontent.com/ishreyasp/educ

2. Education Content Agents Pipeline

In [None]:
# Imports
import streamlit as st
import os
import json
import time
import tempfile
from datetime import datetime
from typing import List, Dict, Optional, Literal
from enum import Enum
import openai
from pydantic import BaseModel, Field
import faiss
import numpy as np
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
import PyPDF2
import docx
import re
import random

# ======================== CONFIGURATION ========================
class Config:
    """System configuration"""
    MAX_QUIZ_QUESTIONS = 10
    MIN_QUIZ_QUESTIONS = 3
    DEFAULT_CHUNK_SIZE = 1000
    DEFAULT_CHUNK_OVERLAP = 200
    COST_PER_CALL_GPT35 = 0.002
    COST_PER_CALL_GPT4 = 0.01

# ======================== LLM MANAGEMENT ========================
class LLMInterface:
    """Centralized LLM management - tracks every call for cost analysis"""
    
    def __init__(self, api_key: str, model: str = "gpt-3.5-turbo"):
        self.client = openai.OpenAI(api_key=api_key.strip())
        self.model = model
        self.call_count = 0
        self.total_cost = 0.0
        
    def make_call(self, messages: List[Dict], response_format=None, temperature=0.3):
        """Single point for ALL LLM calls - easy to monitor/debug"""
        self.call_count += 1
        cost = Config.COST_PER_CALL_GPT35 if "gpt-3.5" in self.model else Config.COST_PER_CALL_GPT4
        self.total_cost += cost
        
        try:
            request_params = {
                "model": self.model,
                "messages": messages,
                "temperature": temperature,
                "max_tokens": 2000
            }
            
            if response_format and isinstance(response_format, dict):
                request_params["response_format"] = response_format
            
            response = self.client.chat.completions.create(**request_params)
            return response.choices[0].message
            
        except Exception as e:
            return type('obj', (object,), {'content': f"Error: {str(e)}"})
            
    def get_metrics(self) -> Dict:
        """Return usage metrics"""
        return {
            "total_calls": self.call_count,
            "total_cost": round(self.total_cost, 4),
            "average_cost_per_call": round(self.total_cost / max(1, self.call_count), 4)
        }

# ======================== DATA MODELS ========================
class QuestionDifficulty(str, Enum):
    EASY = "easy"
    MEDIUM = "medium"
    HARD = "hard"

class QuizQuestion(BaseModel):
    question: str = Field(description="The quiz question")
    options: List[str] = Field(description="Four multiple choice options", min_length=4, max_length=4)
    correct_answer: int = Field(description="Index of correct answer (0-3)", ge=0, le=3)
    explanation: str = Field(description="Brief explanation of the correct answer")
    difficulty: QuestionDifficulty = Field(default=QuestionDifficulty.MEDIUM)

class QuizContent(BaseModel):
    questions: List[QuizQuestion] = Field(description="List of quiz questions")
    topic: str = Field(description="Main topic of the quiz")
    total_questions: int = Field(description="Number of questions")

# ======================== AGENTS ========================

class DocumentProcessorAgent:
    """ Agent 1: Document Processor: Accepts document with .pdf, .txt and .docx extention. Creates chunks of document content using langchain. """
    def __init__(self):
        self.name = "DocumentProcessor"
        self.uses_llm = False
        
    def extract_text_from_file(self, file_path: str, file_type: str) -> str:
        try:
            if file_type == "pdf":
                return self._extract_from_pdf(file_path)
            elif file_type == "docx":
                return self._extract_from_docx(file_path)
            elif file_type == "txt":
                return self._extract_from_txt(file_path)
            else:
                raise ValueError(f"Unsupported file type: {file_type}")
        except Exception as e:
            print(f"Document processing failed: {e}")
            return ""
            
    def _extract_from_pdf(self, file_path: str) -> str:
        text = ""
        try:
            with open(file_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    text += page.extract_text() + "\n"
        except Exception as e:
            print(f"PDF extraction failed: {e}")
        return text
        
    def _extract_from_docx(self, file_path: str) -> str:
        try:
            doc = docx.Document(file_path)
            text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
            return text
        except Exception as e:
            print(f"DOCX extraction failed: {e}")
            return ""
            
    def _extract_from_txt(self, file_path: str) -> str:
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                return file.read()
        except Exception as e:
            print(f"TXT extraction failed: {e}")
            return ""
            
    def chunk_text(self, text: str) -> List[str]:
        if not text.strip():
            return []
            
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=Config.DEFAULT_CHUNK_SIZE,
            chunk_overlap=Config.DEFAULT_CHUNK_OVERLAP,
            length_function=len
        )
        return splitter.split_text(text)  

class VectorStore:
    """ Utility to create emdeddings for chunks and to store them in FAISS """
    def __init__(self, api_key: str):
        self.name = "VectorStore"
        self.uses_llm = False
        
        try:
            self.embeddings = OpenAIEmbeddings(
                openai_api_key=api_key,
                model="text-embedding-ada-002"
            )
            self.vector_store = None
        except Exception as e:
            self.embeddings = None
        
    def create_vector_store(self, text_chunks: List[str]) -> bool:
        if not text_chunks or not self.embeddings:
            return False
            
        try:
            self.vector_store = FAISS.from_texts(
                texts=text_chunks,
                embedding=self.embeddings
            )
            return True
        except Exception as e:
            return False
            
    def retrieve_relevant_content(self, query: str, k: int = 5) -> List[str]:
        if not self.vector_store:
            return []
            
        try:
            docs = self.vector_store.similarity_search(query, k=k)
            return [doc.page_content for doc in docs]
        except Exception as e:
            return []

            