In [None]:
# Cell 1: Install Libraries & Dependencies (CORRECTED)
# Install poppler-utils (still potentially useful for PyPDF2 or other tools)
!apt-get update && apt-get install -y poppler-utils

# Install Python packages - Ensure pymupdf is installed, pdf2image is NOT
!pip install -U langgraph langchain langchain_openai pypdf2 pillow pymupdf faiss-cpu langchain_community reportlab > /dev/null

print("Required libraries and dependencies installed.")

Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Hit:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Get:11 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [8,824 kB]
Get:12 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,243 kB]
Get:13 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,688 kB]


In [None]:
# Cell 2: Import necessary libraries (CORRECTED)

import os
import json
import uuid
import re
import smtplib
from email.message import EmailMessage
from typing import TypedDict, Annotated, List, Literal, Optional
import operator
from pathlib import Path
import base64
from PIL import Image
import io

# Langchain & LangGraph imports
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, AIMessage
# --- Use Pydantic V2 directly ---
# from langchain_core.pydantic_v1 import BaseModel, Field # Deprecated
from pydantic import BaseModel, Field # Use Pydantic v2
# --- End Pydantic V2 ---
from langgraph.graph import StateGraph, END, START
from langgraph.checkpoint.memory import MemorySaver
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

# PDF and Image processing
import PyPDF2
import fitz  # PyMuPDF library
# --- REMOVED pdf2image import ---
# from pdf2image import convert_from_path
# --- END REMOVAL ---

# Google Colab specific import for file upload
from google.colab import files

# --- Credentials ---
openai_api_key = "sk-..................."
os.environ["OPENAI_API_KEY"] = openai_api_key
SENDER_EMAIL = ".........@..........."
SENDER_PASSWORD = "......................"

print("Libraries imported and credentials set.")

Libraries imported (PyMuPDF included, pdf2image removed) and credentials set.


In [None]:
# Cell 3: Define Data Structure and JSON Handling (with income fields)

# Define the structure for a single student application (ADDED family_income_lpa)
DEFAULT_APPLICATION_STRUCTURE = {
    "app_id": "",
    "applicant_name_marksheet": None,
    "applicant_email": None,
    "marks": {"class10_pcm_perc": None, "class12_pcm_perc": None},
    "wbjee_rank": None,
    "aadhaar_name": None,
    "aadhaar_number": None,
    "marksheet_pdf_path": None,
    "aadhaar_pdf_path": None,
    "family_income_lpa": None, # ADDED: Family income in Lakhs Per Annum
    "loan_requested": False,
    "extraction_status": "Pending",
    "validation_status": "Pending",
    "validation_reason": None,
    "shortlist_status": "Pending",
    "communication_status": "Not Sent",
    "loan_status": "Not Applicable",
    "loan_rejection_reason": None,
    "fee_slip_status": "Not Sent"
}

# Define the overall data structure (ADDED max_income_for_loan_lpa)
DEFAULT_DATA_STRUCTURE = {
    "applications": [],
    "eligibility_criteria": {
        "min_class10_pcm_perc": 60,
        "min_class12_pcm_perc": 60,
        "max_wbjee_rank": 10000,
        "max_income_for_loan_lpa": 5.0, # ADDED: Max income (in Lakhs) for loan eligibility
        "required_docs": ["Marksheet", "Aadhaar"]
    },
    "university_capacity": 3,
    "loan_budget": 12000,
    "fee_amount": 5000,
    "director_log": [],
    "criteria_file_path": None
}

DATA_FILE = "admission_data_v2.json"
UPLOAD_DIR = "uploaded_files"

os.makedirs(UPLOAD_DIR, exist_ok=True)

# Function to initialize or load data
def load_data():
    if not os.path.exists(DATA_FILE):
        print(f"Data file '{DATA_FILE}' not found. Creating with default structure.")
        with open(DATA_FILE, 'w') as f:
            json.dump(DEFAULT_DATA_STRUCTURE, f, indent=4)
        return DEFAULT_DATA_STRUCTURE
    try:
        with open(DATA_FILE, 'r') as f:
            print(f"Loading data from '{DATA_FILE}'.")
            data = json.load(f)
            if "applications" not in data or not isinstance(data["applications"], list): data["applications"] = []
            if "eligibility_criteria" not in data: data["eligibility_criteria"] = DEFAULT_DATA_STRUCTURE["eligibility_criteria"]
            if "max_income_for_loan_lpa" not in data["eligibility_criteria"]: data["eligibility_criteria"]["max_income_for_loan_lpa"] = DEFAULT_DATA_STRUCTURE["eligibility_criteria"]["max_income_for_loan_lpa"]
            for app in data.get("applications", []): # Use .get for safety
                if "family_income_lpa" not in app: app["family_income_lpa"] = None
            return data
    except json.JSONDecodeError:
        print(f"Error decoding JSON from '{DATA_FILE}'. Creating default structure.")
        with open(DATA_FILE, 'w') as f: json.dump(DEFAULT_DATA_STRUCTURE, f, indent=4)
        return DEFAULT_DATA_STRUCTURE

# Function to save data
def save_data(data):
    try:
        if "applications" not in data or not isinstance(data["applications"], list): data["applications"] = []; print("Warning: 'applications' key missing/invalid during save.")
        with open(DATA_FILE, 'w') as f: json.dump(data, f, indent=4)
    except IOError as e: print(f"Error saving data to '{DATA_FILE}': {e}")
    except TypeError as e: print(f"Error serializing data for saving: {e}")

# Initialize/Load data
admission_data = load_data()
print("Initial Data Loaded/Created (with income fields).")

Data file 'admission_data_v2.json' not found. Creating with default structure.
Initial Data Loaded/Created (with income fields).


In [None]:
# Cell 4: Helper Functions (Using PyMuPDF for OCR Image Gen)

# --- PDF Text Extraction ---
def extract_text_from_pdf(pdf_path):
    # ... (implementation from previous step) ...
    if not pdf_path or not os.path.exists(pdf_path): print(f"Error: PDF path is invalid or file does not exist: {pdf_path}"); return None
    try:
        text = ""; file = open(pdf_path, 'rb'); reader = PyPDF2.PdfReader(file)
        if reader.is_encrypted: print(f"Warning: PDF {pdf_path} is encrypted.")
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]; page_text = page.extract_text()
            if page_text: text += page_text + "\n"
            else: print(f"Warning: No text found on page {page_num+1} of {pdf_path}.")
        file.close()
        if not text.strip(): print(f"Warning: No text could be extracted from {pdf_path} overall."); return None
        return text.strip()
    except FileNotFoundError: print(f"Error: PDF file not found at {pdf_path}"); return None
    except Exception as e: print(f"Error extracting text from PDF {pdf_path}: {e}"); return None

# --- Pydantic model for OCR ---
class AadhaarDetails(BaseModel): # Use Pydantic v2 BaseModel
    name: Optional[str] = Field(None, description="The full name...")
    aadhaar_number: Optional[str] = Field(None, description="The 12-digit Aadhaar number...")

# --- Aadhaar OCR (Using PyMuPDF/fitz) ---
llm_vision = ChatOpenAI(model="gpt-4o", max_tokens=1024)
structured_llm_vision = llm_vision.with_structured_output(AadhaarDetails)

def perform_real_ocr_on_pdf(pdf_path):
    """Performs OCR on the first page of a PDF using PyMuPDF and OpenAI Vision."""
    if not pdf_path or not os.path.exists(pdf_path):
        print(f"Error: Aadhaar PDF path is invalid or file does not exist: {pdf_path}")
        return None
    print(f"Attempting OCR on {pdf_path} using PyMuPDF and GPT-4o Vision...")
    doc = None # Initialize doc to None for finally block
    try:
        doc = fitz.open(pdf_path)
        if not doc or doc.page_count == 0:
             print(f"Error: Could not open or find pages in PDF: {pdf_path}")
             return None
        page = doc.load_page(0)
        pix = page.get_pixmap(dpi=200)
        doc.close() # Close doc as soon as pixmap is obtained
        doc = None # Set doc to None after closing

        if not pix: print(f"Error: Could not render page 0 to pixmap for {pdf_path}."); return None
        img_bytes = pix.tobytes("png")
        if not img_bytes: print(f"Error: Could not convert pixmap to bytes for {pdf_path}."); return None

        base64_image = base64.b64encode(img_bytes).decode('utf-8'); image_url = f"data:image/png;base64,{base64_image}"
        message = HumanMessage(content=[{"type": "text", "text": "Extract the full name and the 12-digit Aadhaar number..."}, {"type": "image_url", "image_url": {"url": image_url}}])
        response = structured_llm_vision.invoke([message]); print(f"Vision API Response (parsed): Name='{response.name}', Number='{response.aadhaar_number}'")
        extracted_name = response.name; extracted_number = response.aadhaar_number.replace(" ", "") if response.aadhaar_number else None
        if not extracted_name or not extracted_number: print("Warning: Vision API did not return both name and number.")
        elif len(extracted_number) != 12 or not extracted_number.isdigit(): print(f"Warning: Extracted Aadhaar number '{extracted_number}' is not 12 digits or contains non-digits.")
        print("OCR successful.")
        return {"name": extracted_name, "number": extracted_number}
    except Exception as e:
        print(f"Error during PyMuPDF/OCR process for {pdf_path}: {e}")
        return None
    finally:
        # Ensure the document is closed even if errors occur
        if doc:
            doc.close()
            print(f"Ensured fitz document for {pdf_path} is closed.")


# --- Marksheet Data Extraction ---
# (Keep corrected version from previous step)
def extract_marksheet_data(pdf_path):
    # ... (implementation from previous step) ...
    if not pdf_path or not os.path.exists(pdf_path): print(f"Error: Marksheet PDF path is invalid or file does not exist: {pdf_path}"); return None
    print(f"Extracting data from Marksheet: {pdf_path}..."); text = extract_text_from_pdf(pdf_path)
    if not text: print(f"Failed to extract any text from marksheet: {pdf_path}"); return None
    data = {"name": None, "email": None, "marks": {}, "wbjee_rank": None}
    try:
        name_match = re.search(r"Name\s*[:\-]?\s*([A-Z][a-zA-Z\s]+?)\s*(?:\n|Email|Roll)", text, re.IGNORECASE); data["name"] = name_match.group(1).strip() if name_match else None
        if not data["name"]: print("Warning: Could not extract Name from marksheet.")
        email_match = re.search(r"[\w\.\-+%]+@[\w\.\-]+\.[a-zA-Z]{2,}", text); data["email"] = email_match.group(0) if email_match else None
        if not data["email"]: print("Warning: Could not extract Email from marksheet.")
        c10_match = re.search(r"Class\s+10\s+PCM\s+(?:Percentage|%)\s*[:\-]?\s*(\d{1,3}(?:\.\d+)?)", text, re.IGNORECASE)
        c12_match = re.search(r"Class\s+12\s+PCM\s+(?:Percentage|%)\s*[:\-]?\s*(\d{1,3}(?:\.\d+)?)", text, re.IGNORECASE)
        data["marks"] = {"class10_pcm_perc": float(c10_match.group(1)) if c10_match else None, "class12_pcm_perc": float(c12_match.group(1)) if c12_match else None}
        if data["marks"]["class10_pcm_perc"] is None: print("Warning: Could not extract Class 10 PCM %.")
        if data["marks"]["class12_pcm_perc"] is None: print("Warning: Could not extract Class 12 PCM %.")
        rank_match = re.search(r"WBJEE\s+Rank\s*[:\-]?\s*(\d+)", text, re.IGNORECASE); data["wbjee_rank"] = int(rank_match.group(1)) if rank_match else None
        if data["wbjee_rank"] is None: print("Warning: Could not extract WBJEE Rank.")
        print(f"Attempted Marksheet Extraction Results: {data}"); return data
    except Exception as e: print(f"Error during regex extraction from marksheet {pdf_path}: {e}"); return data

# --- Criteria File Parsing ---
# (Keep corrected version from previous step)
def parse_criteria_file(file_path):
    # ... (implementation from previous step) ...
    if not file_path or not os.path.exists(file_path): print(f"Error: Criteria file path is invalid or file does not exist: {file_path}"); return DEFAULT_DATA_STRUCTURE["eligibility_criteria"]
    print(f"Parsing criteria file: {file_path}..."); text_content = extract_text_from_pdf(file_path)
    if text_content is None: print(f"Failed to extract text from criteria PDF: {file_path}. Using defaults."); return DEFAULT_DATA_STRUCTURE["eligibility_criteria"]
    criteria = {}; key_mapping = {"min class 10 pcm perc": "min_class10_pcm_perc", "min class 12 pcm perc": "min_class12_pcm_perc", "max wbjee rank": "max_wbjee_rank", "required docs": "required_docs", "max income for loan lpa": "max_income_for_loan_lpa", "minimum class 10 pcm percentage": "min_class10_pcm_perc", "minimum class 12 pcm percentage": "min_class12_pcm_perc", "maximum wbjee rank": "max_wbjee_rank", "maximum income for loan lpa": "max_income_for_loan_lpa"}
    try:
        for line in text_content.splitlines():
            line = line.strip();
            if not line or line.startswith('#'): continue
            if ':' in line:
                raw_key, value = line.split(':', 1); cleaned_key = raw_key.strip().lower(); code_key = key_mapping.get(cleaned_key)
                if code_key:
                    value = value.strip();
                    try:
                        if code_key == "required_docs": criteria[code_key] = [doc.strip() for doc in value.split(',')]
                        elif '.' in value: criteria[code_key] = float(value)
                        else: criteria[code_key] = int(value)
                    except ValueError: criteria[code_key] = value
                else: print(f"Warning: Unrecognized criteria key '{raw_key.strip()}' in file. Skipping.")
        print(f"Parsed Criteria: {criteria}"); defaults = DEFAULT_DATA_STRUCTURE["eligibility_criteria"]
        for k, default_val in defaults.items():
            if k not in criteria: print(f"Warning: Criteria file missing key '{k}'. Using default: {default_val}"); criteria[k] = default_val
        return criteria
    except Exception as e: print(f"Error parsing extracted criteria text from {file_path}: {e}"); return DEFAULT_DATA_STRUCTURE["eligibility_criteria"]

# --- Email Sending Function ---
# (Keep as is)
def send_email(recipient_email, subject, body):
    # ... (implementation from previous step) ...
    if not recipient_email: print("Skipping email: No recipient email provided."); return False, "No recipient email"
    if not re.match(r"[^@]+@[^@]+\.[^@]+", recipient_email): print(f"Skipping email: Invalid recipient email format '{recipient_email}'."); return False, "Invalid recipient email format"
    print(f"Attempting to send email to {recipient_email} with subject: {subject}...")
    try:
        msg = EmailMessage(); msg.set_content(body); msg['Subject'] = subject; msg['From'] = SENDER_EMAIL; msg['To'] = recipient_email
        server = smtplib.SMTP_SSL('smtp.gmail.com', 465); server.login(SENDER_EMAIL, SENDER_PASSWORD); server.send_message(msg); server.quit()
        print(f"Email successfully sent to {recipient_email}."); return True, "Email Sent Successfully"
    except smtplib.SMTPAuthenticationError: print("SMTP Authentication Error..."); return False, "SMTP Authentication Error"
    except Exception as e: print(f"Error sending email to {recipient_email}: {e}"); return False, f"Failed to send email: {e}"

# --- Vector Store Creation Function ---
# (Keep as is)
def create_criteria_vectorstore(criteria_pdf_path, save_path="criteria_vectorstore"):
    # ... (implementation from previous step) ...
    if not criteria_pdf_path or not os.path.exists(criteria_pdf_path): print(f"Error: Cannot create vector store. Criteria PDF not found at {criteria_pdf_path}"); return False
    print(f"\n--- Creating FAISS Vector Store from: {criteria_pdf_path} ---");
    try:
        print("Extracting text..."); text_content = extract_text_from_pdf(criteria_pdf_path)
        if not text_content: print("Failed to extract text."); return False
        print("Splitting text..."); text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50); documents = text_splitter.create_documents([text_content]); print(f"Created {len(documents)} chunks.")
        print("Initializing embeddings...");
        if not os.getenv("OPENAI_API_KEY"): print("Error: OPENAI_API_KEY not set."); return False
        embeddings = OpenAIEmbeddings()
        print("Creating FAISS index..."); vectorstore = FAISS.from_documents(documents, embeddings); print("FAISS index created.")
        vectorstore.save_local(save_path); print(f"FAISS vector store saved locally to: '{save_path}'"); return True
    except Exception as e: print(f"An error occurred during vector store creation: {e}"); return False

print("Helper functions defined (using PyMuPDF for OCR).")

Helper functions defined (using PyMuPDF for OCR).


In [None]:
# Cell 5: Define LangGraph State for Single App Processing (Keep as is)

class ProcessAppState(TypedDict):
    admission_data: dict
    current_app_index: int
    current_run_log: Annotated[List[str], operator.add]
    extracted_marksheet_data: Optional[dict]
    extracted_aadhaar_data: Optional[dict]

class AdmissionState(TypedDict):
    applications: List[dict]
    eligibility_criteria: dict
    university_capacity: int
    loan_budget: int
    fee_amount: int
    director_log: Annotated[List[str], operator.add]
    criteria_file_path: Optional[str]

print("LangGraph state definitions complete.")

LangGraph state definitions complete.


In [None]:
# Cell 6: Define Agent Nodes (CORRECTED - Validation & Communication Logic)

# Helper to update the application list immutably
def update_app_data(admission_data, index, updates):
    if "applications" not in admission_data or not isinstance(admission_data["applications"], list): print("Warning: 'applications' key missing/invalid during update."); return admission_data
    new_apps = [app.copy() for app in admission_data["applications"]]
    if 0 <= index < len(new_apps): new_apps[index].update(updates)
    else: print(f"Warning: Invalid index {index} provided for update. List length: {len(new_apps)}")
    return {**admission_data, "applications": new_apps}

# --- Node 1: Data Extraction ---
# (Keep as is from previous correction)
def data_extraction_node(state: ProcessAppState) -> ProcessAppState:
    # ... (implementation from previous step) ...
    app_index = state["current_app_index"]
    if not (0 <= app_index < len(state["admission_data"].get("applications", []))): print(f"Error: Invalid current_app_index {app_index}..."); run_log = [f"Error: Invalid application index {app_index}."]; return {"current_run_log": run_log}
    app_data = state["admission_data"]["applications"][app_index]; applicant_name_initial = f'App_{app_index+1}'; print(f"\n--- Running Data Extraction for: {applicant_name_initial} (ID: {app_data.get('app_id', 'N/A')}) ---"); run_log = [f"Starting data extraction for {applicant_name_initial}."]
    marksheet_path = app_data.get("marksheet_pdf_path"); extracted_m_data = None
    if marksheet_path: extracted_m_data = extract_marksheet_data(marksheet_path); run_log.append(f"Marksheet data extraction attempted.")
    else: run_log.append(f"Marksheet PDF path missing.")
    aadhaar_path = app_data.get("aadhaar_pdf_path"); extracted_a_data = None
    if aadhaar_path: extracted_a_data = perform_real_ocr_on_pdf(aadhaar_path); run_log.append(f"Aadhaar data extraction attempted.")
    else: run_log.append(f"Aadhaar PDF path missing.")
    status = "Pending"
    if not marksheet_path or not aadhaar_path: status = "Failed (Missing Files)"; run_log.append(f"Extraction marked Failed due to missing file paths.")
    elif extracted_m_data is None or extracted_a_data is None: status = "Failed (Extraction Error)"; run_log.append(f"Extraction marked Failed due to errors during processing.")
    else: status = "Attempted"; run_log.append(f"Data extraction attempted. Validation needed.")
    updated_admission_data = update_app_data(state["admission_data"], app_index, {"extraction_status": status})
    return {"current_run_log": run_log, "extracted_marksheet_data": extracted_m_data, "extracted_aadhaar_data": extracted_a_data, "admission_data": updated_admission_data}


# --- Node 2: Validation (CORRECTED - Persist Extracted Details) ---
def validation_node(state: ProcessAppState) -> ProcessAppState:
    app_index = state["current_app_index"]
    admission_data = state["admission_data"]
    app_data = admission_data["applications"][app_index]
    applicant_name = app_data.get('applicant_name_marksheet') or app_data.get('aadhaar_name') or f'App_{app_index+1}'
    print(f"\n--- Running Validation for: {applicant_name} ---")
    run_log = [f"Starting validation for {applicant_name}."]

    extracted_m = state.get("extracted_marksheet_data")
    extracted_a = state.get("extracted_aadhaar_data")
    criteria = admission_data.get("eligibility_criteria", DEFAULT_DATA_STRUCTURE["eligibility_criteria"])
    validation_status = "Pending"
    validation_reason = []
    verified_name = None
    applicant_email = None # Store extracted email here
    aadhaar_number = None # Store extracted aadhaar here
    marks = {}
    rank = None

    # --- Persist extracted data first ---
    if extracted_m:
        applicant_email = extracted_m.get("email")
        marks = extracted_m.get("marks", {})
        rank = extracted_m.get("wbjee_rank")
    if extracted_a:
        aadhaar_number = str(extracted_a.get("number", "")).replace(" ", "")
    # --- End Persist ---

    if app_data.get("extraction_status", "").startswith("Failed"):
        validation_status = "Rejected"; validation_reason.append(f"Data Extraction Failed/Incomplete ({app_data.get('extraction_status')})"); run_log.append(f"Validation Rejected: Prerequisite data extraction failed.")
    elif not extracted_m or not extracted_a:
         validation_status = "Rejected"; validation_reason.append("Missing extracted Marksheet or Aadhaar data."); run_log.append("Validation Rejected: Missing extracted data.")
    else:
        # Validation checks using extracted_m and extracted_a
        name_m = extracted_m.get("name", "").strip().lower(); name_a = extracted_a.get("name", "").strip().lower()
        if name_m and name_a and name_m == name_a: verified_name = extracted_m.get('name').strip(); run_log.append(f"Name match successful ('{verified_name}').")
        elif name_m and name_a: validation_status = "Rejected"; validation_reason.append(f"Name Mismatch (M: '{extracted_m.get('name')}', A: '{extracted_a.get('name')}')"); run_log.append(f"Validation Rejected: Name Mismatch.")
        else: validation_status = "Rejected"; validation_reason.append(f"Name Missing (M: '{extracted_m.get('name')}', A: '{extracted_a.get('name')}')"); run_log.append(f"Validation Rejected: Name Missing.")

        if len(aadhaar_number) == 12 and aadhaar_number.isdigit(): run_log.append(f"Aadhaar number valid ('{aadhaar_number}').")
        else: validation_status = "Rejected"; validation_reason.append(f"Invalid Aadhaar Number (Found: '{extracted_a.get('number')}')"); run_log.append(f"Validation Rejected: Invalid Aadhaar Number.")

        c10_marks = marks.get("class10_pcm_perc"); c12_marks = marks.get("class12_pcm_perc")
        min_c10 = criteria.get("min_class10_pcm_perc", 0); min_c12 = criteria.get("min_class12_pcm_perc", 0)
        if c10_marks is None or c10_marks < min_c10: validation_status = "Rejected"; validation_reason.append(f"Class 10 PCM % ({c10_marks}) < Min ({min_c10})"); run_log.append(f"Validation Rejected: Class 10 Marks.")
        else: run_log.append(f"Class 10 Marks OK ({c10_marks} >= {min_c10}).")
        if c12_marks is None or c12_marks < min_c12: validation_status = "Rejected"; validation_reason.append(f"Class 12 PCM % ({c12_marks}) < Min ({min_c12})"); run_log.append(f"Validation Rejected: Class 12 Marks.")
        else: run_log.append(f"Class 12 Marks OK ({c12_marks} >= {min_c12}).")

        max_rank = criteria.get("max_wbjee_rank", 999999)
        if rank is None or rank > max_rank: validation_status = "Rejected"; validation_reason.append(f"WBJEE Rank ({rank}) > Max ({max_rank})"); run_log.append(f"Validation Rejected: WBJEE Rank.")
        else: run_log.append(f"WBJEE Rank OK ({rank} <= {max_rank}).")

        if not validation_reason: validation_status = "Verified"; run_log.append("Overall Validation Successful.")
        else: validation_status = "Rejected"

    # Prepare updates for the main data structure
    updates = {
        "validation_status": validation_status,
        "validation_reason": ", ".join(validation_reason) if validation_reason else None,
        "applicant_name_marksheet": verified_name or extracted_m.get('name') if extracted_m else None, # Store best available name
        "applicant_email": applicant_email, # Store extracted email
        "marks": marks, # Store extracted marks
        "wbjee_rank": rank, # Store extracted rank
        "aadhaar_name": extracted_a.get("name") if extracted_a else None, # Store name from aadhaar
        "aadhaar_number": aadhaar_number, # Store extracted aadhaar number
    }
    return {"current_run_log": run_log, "admission_data": update_app_data(admission_data, app_index, updates)}


# --- Node 3: Communication (CORRECTED - Fetch Latest Name/Email) ---
def communication_node(state: ProcessAppState) -> ProcessAppState:
    app_index = state["current_app_index"]
    admission_data = state["admission_data"]
    app_data = admission_data["applications"][app_index] # Get the latest app_data
    # --- Fetch latest name and email from app_data ---
    applicant_name = app_data.get('applicant_name_marksheet') or app_data.get('aadhaar_name') or f'Applicant {app_index+1}'
    recipient = app_data.get("applicant_email")
    # --- End Fetch ---
    print(f"\n--- Running Communication for: {applicant_name} ---")
    run_log = [f"Preparing communication for {applicant_name}."]
    subject = ""; body = ""; comm_status = "Not Sent"

    # Use .get() for safer access to validation_status
    current_validation_status = app_data.get("validation_status")

    if current_validation_status == "Verified":
        subject = "Application Status: Verification Successful"; body = f"Dear {applicant_name},\n\nCongratulations! Your application documents and eligibility have been successfully verified...\n\nRegards,\nAdmission Team"; comm_status = "Verification Success Sent"
    elif current_validation_status == "Rejected":
        subject = "Application Status: Update Required / Rejection"; reason = app_data.get('validation_reason', 'details not available'); body = f"Dear {applicant_name},\n\nWe regret to inform you that your application could not be verified due to:\n- {reason}\n\nRegards,\nAdmission Team"; comm_status = "Rejection Sent"
    else:
         run_log.append(f"No communication sent (Validation Status: {current_validation_status})."); comm_status = "Pending Validation"
         return {"current_run_log": run_log, "admission_data": update_app_data(admission_data, app_index, {"communication_status": comm_status})}

    # --- Check recipient before sending ---
    if recipient:
        success, message = send_email(recipient, subject, body)
        if success: run_log.append(f"Email sent to {recipient} regarding validation status.")
        else: run_log.append(f"Failed to send validation status email to {recipient}: {message}"); comm_status = "Email Failed"
    else:
        run_log.append(f"Cannot send email: Recipient email address missing for {applicant_name}.")
        comm_status = "Email Failed (Missing Address)"
    # --- End Check ---

    return {"current_run_log": run_log, "admission_data": update_app_data(admission_data, app_index, {"communication_status": comm_status})}

# --- Node 4: Loan Processing Check ---
# (Keep as is - logic depends on external shortlisting)
def loan_processing_node(state: ProcessAppState) -> ProcessAppState:
    # ... (implementation from previous step) ...
    app_index = state["current_app_index"]; admission_data = state["admission_data"]; app_data = admission_data["applications"][app_index]
    applicant_name = app_data.get('applicant_name_marksheet') or app_data.get('aadhaar_name') or f'App_{app_index+1}'
    print(f"\n--- Running Loan Processing Check for: {applicant_name} ---"); run_log = [f"Checking loan status for {applicant_name}."]
    updates = {}
    if app_data.get("validation_status") == "Verified":
        if app_data.get("loan_requested", False): updates["loan_status"] = "Pending Shortlisting"; run_log.append("Loan requested. Status set to Pending Shortlisting."); print(f"Loan request noted for {applicant_name}.")
        else: updates["loan_status"] = "Not Requested"; run_log.append("Loan not requested."); print(f"Loan not requested by {applicant_name}.")
    else: updates["loan_status"] = "Not Applicable"; run_log.append(f"Loan not applicable (Validation: {app_data.get('validation_status')})."); print(f"Loan not applicable for {applicant_name}.")
    updates["fee_slip_status"] = "Pending Shortlisting"
    return {"current_run_log": run_log, "admission_data": update_app_data(admission_data, app_index, updates)}


# --- Node 5: Admission Officer Report (CORRECTED - Safer Access) ---
llm_report = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.1)
def admission_officer_agent(state: AdmissionState) -> AdmissionState:
    print("\n--- Running Admission Officer Agent (Generating Report) ---"); log_entry = "Generating final admission status report..."; print(log_entry)
    admission_data = state; status_summary = []
    # --- Use .get() for safer access ---
    for app in admission_data.get("applications", []):
        app_name = app.get('applicant_name_marksheet') or app.get('aadhaar_name') or app.get('app_id', 'N/A')
        validation_reason = f" (Reason: {app.get('validation_reason', 'N/A')})" if app.get('validation_status') == 'Rejected' else ""
        loan_reason = f" (Loan Reason: {app.get('loan_rejection_reason', 'N/A')})" if app.get('loan_status') == 'Rejected' else ""
        status_summary.append(
            f"- {app_name}: "
            f"Docs={app.get('extraction_status', 'N/A')}, Validation={app.get('validation_status', 'N/A')}{validation_reason}, "
            f"Shortlist={app.get('shortlist_status', 'N/A')}, Comm={app.get('communication_status', 'N/A')}, "
            f"Loan={app.get('loan_status', 'N/A')}{loan_reason}, FeeSlip={app.get('fee_slip_status', 'N/A')}"
        )
    # --- End Safer Access ---
    summary_text = "\n".join(status_summary); log_limit = 30; previous_logs = "\n".join(admission_data.get("director_log", [])[-log_limit:])
    prompt = f"""You are the Admission Officer... Final Application Status Summary:\n{summary_text}\n\nUniversity Capacity: {admission_data.get('university_capacity', 'N/A')}\nRemaining Loan Budget: {admission_data.get('loan_budget', 'N/A')}\nFee Amount: {admission_data.get('fee_amount', 'N/A')}\n\nGenerate a brief report..."""
    try:
        response = llm_report.invoke([HumanMessage(content=prompt)]); report = response.content; print("Report Generated by LLM.")
    except Exception as e: report = f"Error generating report: {e}"; print(f"LLM Error: {e}")
    updated_log = admission_data.get("director_log", []) + [log_entry, f"FINAL REPORT:\n{report}"]
    return {"director_log": updated_log}

print("Agent node functions defined (Corrected).")

Agent node functions defined (Corrected).


In [None]:
# Cell 7: Define Graph Structure (Using Corrected State Types)

# --- Graph for Single Application Processing ---
process_app_workflow = StateGraph(ProcessAppState) # Use ProcessAppState

process_app_workflow.add_node("extract_data", data_extraction_node)
process_app_workflow.add_node("validate_application", validation_node)
process_app_workflow.add_node("communicate_status", communication_node)
process_app_workflow.add_node("check_loan_request", loan_processing_node)

process_app_workflow.add_edge(START, "extract_data")
process_app_workflow.add_edge("extract_data", "validate_application")
process_app_workflow.add_edge("validate_application", "communicate_status")
process_app_workflow.add_edge("communicate_status", "check_loan_request")
process_app_workflow.add_edge("check_loan_request", END)

compiled_process_app_graph = process_app_workflow.compile()

# --- Graph for Final Report ---
report_workflow = StateGraph(AdmissionState) # Use AdmissionState

report_workflow.add_node("admission_officer_agent", admission_officer_agent)
report_workflow.add_edge(START, "admission_officer_agent")
report_workflow.add_edge("admission_officer_agent", END)
compiled_report_graph = report_workflow.compile()

print("Graphs defined and compiled with correct state types.")

Graphs defined and compiled with correct state types.


In [None]:
# Cell 8: Main Application Flow (Using Corrected Log Handling)

import time
from google.colab import files
import shutil

def run_admission_process_with_uploads():
    print("\n=== Step 1: Initial Setup & File Uploads ===")
    admission_data = load_data()
    admission_data["applications"] = []
    admission_data["director_log"] = []

    # --- Upload Criteria File ---
    print("\n--- Please upload the Criteria File you created (e.g., criteria.pdf) ---")
    uploaded_criteria = files.upload(); criteria_file_path = None
    if not uploaded_criteria: print("No criteria file uploaded. Using default criteria."); admission_data["criteria_file_path"] = None; admission_data["eligibility_criteria"] = DEFAULT_DATA_STRUCTURE["eligibility_criteria"]
    else:
        criteria_filename = list(uploaded_criteria.keys())[0]; criteria_path = os.path.join(UPLOAD_DIR, criteria_filename); shutil.move(criteria_filename, criteria_path)
        print(f"Criteria file '{criteria_filename}' uploaded and saved to {criteria_path}"); admission_data["criteria_file_path"] = criteria_path; admission_data["eligibility_criteria"] = parse_criteria_file(criteria_path)

    # --- Create Vector Store (Demo Only) ---
    current_criteria_path = admission_data.get("criteria_file_path")
    if current_criteria_path: create_criteria_vectorstore(current_criteria_path, save_path="criteria_faiss_index")
    else: print("Skipping vector store creation.")
    save_data(admission_data)

    # --- Upload Application Files Loop ---
    while True:
        add_another = input("\nDo you want to upload files for an application? (yes/no): ").lower()
        if add_another != 'yes': break
        app_id = str(uuid.uuid4()); print(f"\n--- Uploading files for New Application ID: {app_id} ---")
        print("Please upload the Marksheet PDF:"); uploaded_marksheet = files.upload(); marksheet_path = None
        if uploaded_marksheet: marksheet_filename = list(uploaded_marksheet.keys())[0]; marksheet_path = os.path.join(UPLOAD_DIR, f"{app_id}_marksheet.pdf"); shutil.move(marksheet_filename, marksheet_path); print(f"Marksheet '{marksheet_filename}' saved as {marksheet_path}")
        else: print("Marksheet upload skipped."); continue
        print("Please upload the Aadhaar Card PDF:"); uploaded_aadhaar = files.upload(); aadhaar_path = None
        if uploaded_aadhaar: aadhaar_filename = list(uploaded_aadhaar.keys())[0]; aadhaar_path = os.path.join(UPLOAD_DIR, f"{app_id}_aadhaar.pdf"); shutil.move(aadhaar_filename, aadhaar_path); print(f"Aadhaar '{aadhaar_filename}' saved as {aadhaar_path}")
        else: print("Aadhaar upload skipped."); continue
        family_income_str = input("Enter applicant's family income (in Lakhs Per Annum, e.g., 4.5): "); family_income_lpa = None
        try: family_income_lpa = float(family_income_str)
        except ValueError: print("Invalid income input. Storing as None.")
        loan_req_input = input("Does this applicant request a loan? (yes/no): ").lower(); loan_requested = loan_req_input == 'yes'
        new_app = DEFAULT_APPLICATION_STRUCTURE.copy(); new_app["app_id"] = app_id; new_app["marksheet_pdf_path"] = marksheet_path; new_app["aadhaar_pdf_path"] = aadhaar_path; new_app["loan_requested"] = loan_requested; new_app["family_income_lpa"] = family_income_lpa
        admission_data["applications"].append(new_app); print(f"Application entry created for ID: {app_id}"); save_data(admission_data)

    if not admission_data["applications"]: print("\nNo applications were added/found. Exiting process."); return

    print(f"\n=== Step 2: Process {len(admission_data['applications'])} Application(s) via LangGraph ===")
    num_applications = len(admission_data["applications"])
    for i in range(num_applications):
        app_id_for_log = admission_data['applications'][i].get('app_id', 'UNKNOWN_ID'); applicant_name_for_log = f'App_{i+1}'
        print(f"\n>>> Processing Application {i+1}/{num_applications} (ID: {app_id_for_log}) <<<")
        state_for_this_run = {"admission_data": admission_data, "current_app_index": i, "current_run_log": [], "extracted_marksheet_data": None, "extracted_aadhaar_data": None}
        config = {"configurable": {"thread_id": f"app_process_{app_id_for_log}"}}
        try:
            final_state_after_app = compiled_process_app_graph.invoke(state_for_this_run, config=config)
            admission_data = final_state_after_app["admission_data"]
            admission_data.setdefault("director_log", []).extend(final_state_after_app.get("current_run_log", [])) # Safer append
            save_data(admission_data)
            applicant_name_final = admission_data['applications'][i].get('applicant_name_marksheet') or admission_data['applications'][i].get('aadhaar_name') or f'App_{i+1}'
            print(f">>> Finished Graph Processing for Application {i+1}/{num_applications} ({applicant_name_final}) <<<")
            time.sleep(1)
        except Exception as e:
            print(f"ERROR processing application {i+1} (ID: {app_id_for_log}) via graph: {e}")
            admission_data.setdefault("director_log", []).append(f"ERROR processing {applicant_name_for_log} (ID: {app_id_for_log}) via graph: {e}")
            try: admission_data["applications"][i]["validation_status"] = "Processing Error"; admission_data["applications"][i]["validation_reason"] = str(e)
            except IndexError: print("Could not update application status due to index error.")
            save_data(admission_data)

    print("\n=== Step 3: Final Shortlisting Based on Capacity ===")
    verified_apps = [(i, app) for i, app in enumerate(admission_data["applications"]) if app.get("validation_status") == "Verified"]
    verified_apps.sort(key=lambda item: item[1].get("wbjee_rank") if isinstance(item[1].get("wbjee_rank"), int) else 999999)
    shortlisted_count = 0; capacity = admission_data.get("university_capacity", 0); log_entries = ["Starting final shortlisting based on capacity."]
    for i, app in verified_apps:
        applicant_name = app.get('applicant_name_marksheet') or app.get('aadhaar_name') or f'App_{i+1}'; rank = app.get('wbjee_rank', 'N/A')
        if shortlisted_count < capacity: admission_data["applications"][i]["shortlist_status"] = "Shortlisted"; log_entries.append(f"Final Shortlist: {applicant_name} (Rank: {rank})."); print(f"Final Shortlist: {applicant_name} (Rank: {rank})"); shortlisted_count += 1
        else: admission_data["applications"][i]["shortlist_status"] = "Waitlisted"; log_entries.append(f"Waitlisted (Capacity Full): {applicant_name} (Rank: {rank})."); print(f"Waitlisted: {applicant_name} (Rank: {rank})")
    admission_data.setdefault("director_log", []).extend(log_entries); save_data(admission_data); print("Final shortlisting complete.")

    print("\n=== Step 4: Process Loans & Fee Slips for Shortlisted ===")
    log_entries = ["Starting final loan processing and fee slip dispatch for shortlisted."]; current_budget = admission_data.get("loan_budget", 0); fee_amount = admission_data.get("fee_amount", 0)
    max_income_for_loan = admission_data.get("eligibility_criteria", {}).get("max_income_for_loan_lpa", 5.0); print(f"Loan Eligibility: Max Family Income = {max_income_for_loan} LPA")
    for i, app in enumerate(admission_data["applications"]):
         applicant_name = app.get('applicant_name_marksheet') or app.get('aadhaar_name') or f'App_{i+1}'
         if app.get("shortlist_status") == "Shortlisted":
             loan_status = "Not Applicable"; loan_reason = None; fee_status = "Not Sent"; email_subject = ""; email_body = ""; recipient_email = app.get("applicant_email")
             if app.get("loan_requested"):
                 family_income = app.get("family_income_lpa")
                 if family_income is not None and family_income > max_income_for_loan: loan_status = "Rejected"; loan_reason = f"Income ({family_income} LPA) > Limit ({max_income_for_loan} LPA)"; log_entries.append(f"Loan Rejected for {applicant_name} ({loan_reason})."); print(f"Loan Rejected for {applicant_name} (Income)."); email_subject = "Admission Update: Fee Info (Loan Status)"; email_body = f"Dear {applicant_name},\n\nCongrats...\n\nLoan rejected due to income...\n\nRegards,\nFinance Team"
                 elif current_budget >= fee_amount: loan_status = "Approved"; current_budget -= fee_amount; log_entries.append(f"Loan Approved for {applicant_name}. Budget left: {current_budget}."); print(f"Loan Approved for {applicant_name}."); email_subject = "Admission Update: Loan Approved & Fee Info"; email_body = f"Dear {applicant_name},\n\nCongrats...\n\nLoan approved...\n\nRegards,\nFinance Team"
                 else: loan_status = "Rejected"; loan_reason = f"Insufficient Budget ({current_budget})"; log_entries.append(f"Loan Rejected for {applicant_name} (Budget)."); print(f"Loan Rejected for {applicant_name} (Budget)."); email_subject = "Admission Update: Fee Info (Loan Status)"; email_body = f"Dear {applicant_name},\n\nCongrats...\n\nLoan rejected due to budget...\n\nRegards,\nFinance Team"
             else: loan_status = "Not Requested"; log_entries.append(f"Loan not requested by {applicant_name}."); print(f"Loan not requested by {applicant_name}."); email_subject = "Admission Update: Fee Info"; email_body = f"Dear {applicant_name},\n\nCongrats...\n\nFee slip attached..."
             # --- Added check for recipient_email before sending ---
             if recipient_email:
                 success, msg = send_email(recipient_email, email_subject, email_body); log_entries.append(f"Fee/Loan email to {applicant_name}: {'Success' if success else 'Failed: '+msg}")
             else:
                 log_entries.append(f"Skipped Fee/Loan email to {applicant_name}: No email address found.")
             # --- End check ---
             fee_status = "Sent"; log_entries.append(f"Fee slip sent to {applicant_name}."); print(f"Fee slip sent to {applicant_name}.")
             admission_data["applications"][i]["loan_status"] = loan_status; admission_data["applications"][i]["loan_rejection_reason"] = loan_reason; admission_data["applications"][i]["fee_slip_status"] = fee_status; admission_data["applications"][i]["communication_status"] = "Final Decision Sent"
    admission_data["loan_budget"] = current_budget; admission_data.setdefault("director_log", []).extend(log_entries); save_data(admission_data); print("Final loan processing and fee slip dispatch complete.")

    print("\n=== Step 5: Generate Final Director Report ===")
    config_report = {"configurable": {"thread_id": "admission_process_final_report"}}
    try:
        final_report_state_update = compiled_report_graph.invoke(admission_data, config=config_report)
        admission_data["director_log"] = final_report_state_update["director_log"]
    except Exception as e:
         print(f"ERROR generating final report: {e}")
         admission_data.setdefault("director_log", []).append(f"ERROR generating final report: {e}")
    save_data(admission_data)

    print("\n=== Admission Process Complete ===")
    print("\nFinal State (saved to admission_data_v2.json):")
    # print(json.dumps(admission_data, indent=2)) # Optionally print

    print("\n--- Final Director's Log ---")
    for entry in admission_data.get("director_log", []): print(f"- {entry}")

# --- Function to Handle Director Queries ---
def handle_director_query(query: str):
    print(f"\n--- Handling Director Query: '{query}' ---")
    try:
        current_data = load_data()
        if not isinstance(current_data.get("applications"), list): print("Error: Invalid data format in JSON file."); return "Sorry, the admission data seems corrupted."
        data_summary = json.dumps(current_data, indent=2)
    except Exception as e: print(f"Error loading data for director query: {e}"); return "Sorry, I couldn't load the latest admission data."
    prompt = f"""
You are assisting the University Director...
Admission Data:
{data_summary}
Director's Query: {query}
Provide a clear and concise answer...
"""
    try:
        response = llm_report.invoke([HumanMessage(content=prompt)]); answer = response.content
        print("\nDirector's Answer:"); print(answer); return answer
    except Exception as e: print(f"Error handling director query: {e}"); return f"Sorry, I encountered an error: {e}"

# --- Run the entire process ---
run_admission_process_with_uploads()

# --- Example Director Queries ---
# print("\n--- Answering Director Queries ---")
# handle_director_query("How many students applied in total?")
# handle_director_query("How many students were shortlisted?")
# handle_director_query("Give me the names of the shortlisted students.")
# handle_director_query("How many loans were approved and what is the remaining budget?")
# handle_director_query("Why was Bob rejected?")
# handle_director_query("What is the status of Anjali Sharma's application?")
# handle_director_query("Which students had their loan rejected due to income?")


=== Step 1: Initial Setup & File Uploads ===
Loading data from 'admission_data_v2.json'.

--- Please upload the Criteria File you created (e.g., criteria.pdf) ---


Saving criteria.pdf to criteria.pdf
Criteria file 'criteria.pdf' uploaded and saved to uploaded_files/criteria.pdf
Parsing criteria file: uploaded_files/criteria.pdf...
Parsed Criteria: {'min_class10_pcm_perc': 75, 'min_class12_pcm_perc': 70, 'max_wbjee_rank': 5000, 'required_docs': ['ID Proof', 'Marksheet', 'Photo']}

--- Creating FAISS Vector Store from: uploaded_files/criteria.pdf ---
Extracting text...
Splitting text...
Created 1 chunks.
Initializing embeddings...
Creating FAISS index...
FAISS index created.
FAISS vector store saved locally to: 'criteria_faiss_index'

Do you want to upload files for an application? (yes/no): yes

--- Uploading files for New Application ID: 978e77af-59aa-42a6-9dec-ad747447ac4d ---
Please upload the Marksheet PDF:


Saving marksheet.pdf to marksheet.pdf
Marksheet 'marksheet.pdf' saved as uploaded_files/978e77af-59aa-42a6-9dec-ad747447ac4d_marksheet.pdf
Please upload the Aadhaar Card PDF:


Saving adhaar.pdf to adhaar.pdf
Aadhaar 'adhaar.pdf' saved as uploaded_files/978e77af-59aa-42a6-9dec-ad747447ac4d_aadhaar.pdf
Enter applicant's family income (in Lakhs Per Annum, e.g., 4.5): 3
Does this applicant request a loan? (yes/no): yes
Application entry created for ID: 978e77af-59aa-42a6-9dec-ad747447ac4d

Do you want to upload files for an application? (yes/no): no

=== Step 2: Process 1 Application(s) via LangGraph ===

>>> Processing Application 1/1 (ID: 978e77af-59aa-42a6-9dec-ad747447ac4d) <<<

--- Running Data Extraction for: App_1 (ID: 978e77af-59aa-42a6-9dec-ad747447ac4d) ---
Extracting data from Marksheet: uploaded_files/978e77af-59aa-42a6-9dec-ad747447ac4d_marksheet.pdf...
Attempted Marksheet Extraction Results: {'name': 'Anjali', 'email': 'arkodeep3404@gmail.com', 'marks': {'class10_pcm_perc': 88.33, 'class12_pcm_perc': 82.0}, 'wbjee_rank': 2150}
Attempting OCR on uploaded_files/978e77af-59aa-42a6-9dec-ad747447ac4d_aadhaar.pdf using PyMuPDF and GPT-4o Vision...
Vision