In [1]:
# Skip this if already installed in your Jupyter environment
!pip install -q langgraph langchain langchain_openai pypdf2 pillow pymupdf faiss-cpu langchain_community reportlab


In [None]:
import os
os.environ["OPENAI_API_KEY"] = "sk-.................."  # Replace with your actual OpenAI key
SENDER_EMAIL = "........@........"
SENDER_PASSWORD = "....................."

import json
import re
import uuid
from pathlib import Path
from typing import Optional, List, Annotated, TypedDict
from email.message import EmailMessage
import smtplib
from pydantic import BaseModel, Field
from PIL import Image
import base64
import fitz
import PyPDF2

# LangChain / LangGraph
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.messages import HumanMessage
from langgraph.graph import StateGraph, END, START
from langgraph.checkpoint.memory import MemorySaver
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [5]:
from pathlib import Path
import os
import json

# === Constants ===
DATA_FILE = "admission_data_v2.json"
BACKUP_FILE = "admission_data_backup.json"
UPLOAD_DIR = "uploaded_files"

Path(UPLOAD_DIR).mkdir(exist_ok=True)

# === Default Structures ===
DEFAULT_DATA_STRUCTURE = {
    "applications": [],
    "eligibility_criteria": {
        "min_class10_pcm_perc": 60,
        "min_class12_pcm_perc": 60,
        "max_wbjee_rank": 10000,
        "max_income_for_loan_lpa": 5.0,
        "required_docs": ["Marksheet", "Aadhaar"]
    },
    "university_capacity": 3,
    "loan_budget": 12000,
    "fee_amount": 5000,
    "director_log": [],
    "criteria_file_path": None
}

DEFAULT_APPLICATION_STRUCTURE = {
    "app_id": "",
    "applicant_name_marksheet": None,
    "applicant_email": None,
    "marks": {"class10_pcm_perc": None, "class12_pcm_perc": None},
    "wbjee_rank": None,
    "aadhaar_name": None,
    "aadhaar_number": None,
    "marksheet_pdf_path": None,
    "aadhaar_pdf_path": None,
    "family_income_lpa": None,
    "loan_requested": False,
    "extraction_status": "Pending",
    "validation_status": "Pending",
    "validation_reason": None,
    "shortlist_status": "Pending",
    "communication_status": "Not Sent",
    "loan_status": "Not Applicable",
    "loan_rejection_reason": None,
    "fee_slip_status": "Not Sent"
}

# === Load Function with Auto Recovery ===
def load_data():
    """
    Loads admission data from the JSON file.
    If the file doesn't exist or is corrupted, it creates and returns the default structure.
    """
    if not os.path.exists(DATA_FILE):
        print(f"📁 '{DATA_FILE}' not found. Creating new data file.")
        with open(DATA_FILE, 'w') as f:
            json.dump(DEFAULT_DATA_STRUCTURE, f, indent=4)
        return DEFAULT_DATA_STRUCTURE

    try:
        with open(DATA_FILE, 'r') as f:
            data = json.load(f)

        # Safety check for keys
        if "applications" not in data:
            data["applications"] = []
        if "eligibility_criteria" not in data:
            data["eligibility_criteria"] = DEFAULT_DATA_STRUCTURE["eligibility_criteria"]

        return data

    except Exception as e:
        print(f"⚠️ Error reading '{DATA_FILE}': {e}. Creating fresh file.")
        with open(DATA_FILE, 'w') as f:
            json.dump(DEFAULT_DATA_STRUCTURE, f, indent=4)
        return DEFAULT_DATA_STRUCTURE

# === Save Function with Auto Backup ===
def save_data(data):
    """
    Saves admission data to the JSON file.
    Also keeps a backup copy in case something breaks.
    """
    try:
        # Save primary file
        with open(DATA_FILE, 'w') as f:
            json.dump(data, f, indent=4)
        print("💾 Data saved successfully.")

        # Save backup
        with open(BACKUP_FILE, 'w') as f:
            json.dump(data, f, indent=4)
        print("🛡️ Backup created.")

    except Exception as e:
        print(f"❌ Failed to save data to '{DATA_FILE}': {e}")


In [7]:
def run_single_application_graph(student_data: dict):
    """
    Trigger LangGraph processing for a single student application.
    Used directly by the Streamlit chatbot after collecting user data.
    """
    print(f"📥 Saving application for {student_data.get('name')} | ID: {student_data['app_id']}")
    admission_data = load_data()

    new_app = DEFAULT_APPLICATION_STRUCTURE.copy()
    new_app["app_id"] = student_data["app_id"]
    new_app["marksheet_pdf_path"] = student_data.get("marksheet_pdf_path")
    new_app["aadhaar_pdf_path"] = student_data.get("aadhaar_pdf_path")
    new_app["loan_requested"] = student_data.get("loan_requested", False)
    new_app["family_income_lpa"] = student_data.get("family_income_lpa", None)

    admission_data["applications"].append(new_app)
    app_index = len(admission_data["applications"]) - 1

    state = {
        "admission_data": admission_data,
        "current_app_index": app_index,
        "current_run_log": [],
        "extracted_marksheet_data": None,
        "extracted_aadhaar_data": None
    }

    config = {"configurable": {"thread_id": f"app_process_{student_data['app_id']}"}}

    try:
        final_state = compiled_process_app_graph.invoke(state, config=config)
        save_data(final_state["admission_data"])
        print("✅ Application processed and saved successfully.")
    except Exception as e:
        print(f"❌ Error while running LangGraph: {e}")
        admission_data["director_log"].append(f"ERROR processing application ID: {student_data['app_id']}: {e}")
        save_data(admission_data)


In [9]:
def handle_director_query(query: str):
    """
    Allows an admin to ask questions about the current admission state.
    Powered by GPT to summarize or answer custom queries.
    """
    print(f"\n🔍 Handling Director Query: '{query}'")

    try:
        current_data = load_data()
        data_summary = json.dumps(current_data, indent=2)
    except Exception as e:
        print(f"❌ Error loading data for query: {e}")
        return "⚠️ Unable to load current admission data."

    prompt = f"""
    You are an intelligent assistant helping a university admission director.
    The following is the current admission data:
    {data_summary}

    Now answer this query clearly and concisely:
    {query}
    """

    try:
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
        response = llm.invoke([HumanMessage(content=prompt)])
        print("✅ Query answered successfully.")
        return response.content
    except Exception as e:
        print(f"❌ Error generating response: {e}")
        return f"⚠️ An error occurred while answering the query: {e}"


In [17]:
from datetime import datetime
from reportlab.pdfgen import canvas

# === Utility: Extract Text from PDF ===
def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            text = " ".join([page.extract_text() or "" for page in reader.pages])
        return text
    except Exception as e:
        print(f"⚠️ Failed to extract text from {pdf_path}: {e}")
        return ""

# === Node: Extract Data ===
def data_extraction_node(state: ProcessAppState) -> ProcessAppState:
    index = state.current_app_index
    app = state.admission_data["applications"][index]

    marksheet_text = extract_text_from_pdf(app["marksheet_pdf_path"])
    aadhaar_text = extract_text_from_pdf(app["aadhaar_pdf_path"])

    # Naive mock parsing (replace with regex/NER)
    app["applicant_name_marksheet"] = "Student Name" if "Student" in marksheet_text else "Unknown"
    app["aadhaar_name"] = "Student Name" if "Student" in aadhaar_text else "Unknown"
    app["aadhaar_number"] = "XXXX-XXXX-XXXX"

    state.extracted_marksheet_data = {"text": marksheet_text}
    state.extracted_aadhaar_data = {"text": aadhaar_text}
    state.admission_data["applications"][index] = app
    state.current_run_log.append("🧾 PDF data extracted.")
    return state

# === Node: Validate Application ===
def validation_node(state: ProcessAppState) -> ProcessAppState:
    index = state.current_app_index
    app = state.admission_data["applications"][index]

    valid = (
        app["marksheet_pdf_path"] and app["aadhaar_pdf_path"] and
        app["applicant_name_marksheet"] != "Unknown" and
        app["aadhaar_name"] != "Unknown"
    )

    if valid:
        app["validation_status"] = "Valid"
        state.current_run_log.append("✅ Application validated.")
    else:
        app["validation_status"] = "Invalid"
        app["validation_reason"] = "Name mismatch or missing document"
        state.current_run_log.append("❌ Validation failed.")

    state.admission_data["applications"][index] = app
    return state

# === Node: Communicate Application Status ===
def communication_node(state: ProcessAppState) -> ProcessAppState:
    index = state.current_app_index
    app = state.admission_data["applications"][index]

    msg = EmailMessage()
    msg["Subject"] = f"Application Status - ID {app['app_id']}"
    msg["From"] = SENDER_EMAIL
    msg["To"] = app["applicant_email"]
    content = f"""
    Hello {app['applicant_name_marksheet']},

    Your application (ID: {app['app_id']}) has been {app['validation_status']}.

    Loan status: {app['loan_status']}

    Thank you,
    Admissions Team
    """
    msg.set_content(content)

    try:
        server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
        server.login(SENDER_EMAIL, SENDER_PASSWORD)
        server.send_message(msg)
        server.quit()
        app["communication_status"] = "Email Sent"
        state.current_run_log.append("📧 Email sent successfully.")
    except Exception as e:
        print(f"❌ Email failed: {e}")
        app["communication_status"] = "Failed to send"
        state.current_run_log.append(f"❌ Email error: {e}")

    state.admission_data["applications"][index] = app
    return state

# === Node: Loan and Fee Slip Handling ===
def loan_processing_node(state: ProcessAppState) -> ProcessAppState:
    index = state.current_app_index
    app = state.admission_data["applications"][index]
    budget = state.admission_data.get("loan_budget", 0)

    if app.get("loan_requested"):
        income = app.get("family_income_lpa", 10)
        if income <= 5.0 and budget >= 5000:
            app["loan_status"] = "Approved"
            state.admission_data["loan_budget"] -= 5000
            state.current_run_log.append("🏦 Loan approved.")
        else:
            app["loan_status"] = "Rejected"
            app["loan_rejection_reason"] = "Income too high or insufficient budget"
            state.current_run_log.append("❌ Loan rejected.")
    else:
        app["loan_status"] = "Not Requested"
        state.current_run_log.append("💼 Loan not requested.")

    # Generate and save fee slip
    slip_path = Path(UPLOAD_DIR) / f"{app['app_id']}_fee_slip.pdf"
    try:
        c = canvas.Canvas(str(slip_path))
        c.drawString(100, 750, f"Fee Slip for {app['applicant_name_marksheet']}")
        c.drawString(100, 730, f"App ID: {app['app_id']}")
        c.drawString(100, 710, f"Loan Status: {app['loan_status']}")
        c.drawString(100, 690, f"Date: {datetime.now().strftime('%Y-%m-%d')}")
        c.save()
        app["fee_slip_status"] = "Generated"
        state.current_run_log.append("🧾 Fee slip generated.")
    except Exception as e:
        print(f"❌ Fee slip error: {e}")
        app["fee_slip_status"] = "Failed"

    state.admission_data["applications"][index] = app
    return state


In [19]:
from email.message import EmailMessage
from reportlab.pdfgen import canvas
from datetime import datetime


In [21]:
from pydantic import BaseModel, Field
from typing import Optional, List
from langgraph.graph import StateGraph, END, START

# ✅ Updated schema for LangGraph
class ProcessAppState(BaseModel):
    admission_data: dict
    current_app_index: int
    current_run_log: List[str] = Field(default_factory=list)
    extracted_marksheet_data: Optional[dict] = None
    extracted_aadhaar_data: Optional[dict] = None

# ✅ Define and compile the LangGraph
process_app_workflow = StateGraph(ProcessAppState)

# Add your node functions here:
# These functions must be defined elsewhere in your notebook or module
# For example:
# def data_extraction_node(state: ProcessAppState): ...
# def validation_node(state: ProcessAppState): ...
# def communication_node(state: ProcessAppState): ...
# def loan_processing_node(state: ProcessAppState): ...

process_app_workflow.add_node("extract_data", data_extraction_node)
process_app_workflow.add_node("validate_application", validation_node)
process_app_workflow.add_node("communicate_status", communication_node)
process_app_workflow.add_node("check_loan_request", loan_processing_node)

# Define flow structure
process_app_workflow.set_entry_point("extract_data")
process_app_workflow.add_edge("extract_data", "validate_application")
process_app_workflow.add_edge("validate_application", "communicate_status")
process_app_workflow.add_edge("communicate_status", "check_loan_request")
process_app_workflow.set_finish_point("check_loan_request")

# Compile it
compiled_process_app_graph = process_app_workflow.compile()
print("✅ LangGraph for single application compiled successfully.")


✅ LangGraph for single application compiled successfully.


In [25]:
import uuid

def run_admission_process_with_uploads():
    """
    Jupyter-based flow for uploading and processing multiple student applications manually.
    """
    print("📋 Starting batch admission process via notebook...")
    admission_data = load_data()
    admission_data["applications"] = []

    while True:
        cont = input("📤 Upload a new application? (yes/no): ").strip().lower()
        if cont != "yes":
            break

        app_id = str(uuid.uuid4())
        print(f"\n📎 Application ID: {app_id}")

        marksheet_path = input("Path to Marksheet PDF: ").strip()
        aadhaar_path = input("Path to Aadhaar PDF: ").strip()
        income_str = input("Family income in LPA (e.g., 4.5): ").strip()
        loan_req = input("Apply for loan? (yes/no): ").strip().lower()

        try:
            income = float(income_str)
        except ValueError:
            income = None

        loan_requested = loan_req == "yes"

        new_app = DEFAULT_APPLICATION_STRUCTURE.copy()
        new_app["app_id"] = app_id
        new_app["marksheet_pdf_path"] = marksheet_path
        new_app["aadhaar_pdf_path"] = aadhaar_path
        new_app["family_income_lpa"] = income
        new_app["loan_requested"] = loan_requested

        admission_data["applications"].append(new_app)

    if not admission_data["applications"]:
        print("🚫 No applications to process.")
        return

    # Run processing graph for each application
    for i, app in enumerate(admission_data["applications"]):
        state = {
            "admission_data": admission_data,
            "current_app_index": i,
            "current_run_log": [],
            "extracted_marksheet_data": None,
            "extracted_aadhaar_data": None
        }
        config = {"configurable": {"thread_id": f"app_process_{app['app_id']}"}}
        try:
            final_state = compiled_process_app_graph.invoke(state, config=config)
            admission_data = final_state["admission_data"]
            print(f"✅ Processed application {i+1}/{len(admission_data['applications'])}")
        except Exception as e:
            print(f"❌ Error processing application {i+1}: {e}")
            continue

    save_data(admission_data)
    print("📁 All applications processed and saved.")
