In [0]:
%run ./EnvPrep

In [0]:
import argparse
import json
import logging
import sys
from pathlib import Path
from typing import List, Optional

from config import Config
from models.base_models import VETQualification, UniQualification, UnitOfCompetency, UniCourse
from interfaces.genai_interface import GenAIInterface
from interfaces.embedding_interface import EmbeddingInterface
from analysis.analyzer import CreditTransferAnalyzer
from reporting.report_generator import ReportGenerator

In [0]:

# Configure logging
logging.basicConfig(
    level=getattr(logging, Config.LOG_LEVEL),
    format=Config.LOG_FORMAT,
    handlers=[
        logging.FileHandler(Config.LOG_FILE),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)
# Silence Py4J logs while keeping your app logs at INFO
logging.getLogger("py4j").setLevel(logging.WARNING)
logging.getLogger("py4j.clientserver").setLevel(logging.WARNING)
logging.getLogger("py4j.java_gateway").setLevel(logging.WARNING)


def load_vet_data(filepath: str) -> VETQualification:
    """
    Load VET qualification data from JSON file
    
    Expected format:
    {
        "code": "ICT50220",
        "name": "Diploma of Information Technology",
        "level": "Diploma",
        "units": [
            {
                "code": "ICTICT517",
                "name": "Match IT needs with the strategic direction",
                "description": "...",
                "learning_outcomes": [...],
                "assessment_requirements": "...",
                "nominal_hours": 60,
                "prerequisites": []
            }
        ]
    }
    """
    with open(filepath, 'r') as f:
        data = json.load(f)

    vet_qual = VETQualification(
        code=data["code"],
        name=data["name"],
        level=data["level"]
    )
    
    for unit_data in data.get("units", []):
        unit = UnitOfCompetency(
            code=unit_data["code"],
            name=unit_data["name"],
            description=unit_data.get("description", ""),
            learning_outcomes=unit_data.get("learning_outcomes", []),
            assessment_requirements=unit_data.get("assessment_requirements", ""),
            nominal_hours=unit_data.get("nominal_hours", 0),
            prerequisites=unit_data.get("prerequisites", [])
        )
        vet_qual.units.append(unit)
    
    logger.info(f"Loaded VET qualification: {vet_qual.code} with {len(vet_qual.units)} units")
    return vet_qual


def load_uni_data(filepath: str) -> UniQualification:
    """
    Load university qualification data from JSON file
    
    Expected format:
    {
        "code": "BIT",
        "name": "Bachelor of Information Technology",
        "courses": [
            {
                "code": "COMP1234",
                "name": "Introduction to Programming",
                "description": "...",
                "study_level": "introductory",
                "learning_outcomes": [...],
                "prerequisites": [],
                "credit_points": 6,
                "topics": [...],
                "assessment": "..."
            }
        ]
    }
    """
    with open(filepath, 'r') as f:
        data = json.load(f)
    
    uni_qual = UniQualification(
        code=data["code"],
        name=data["name"]
    )
    
    for course_data in data.get("courses", []):
        course = UniCourse(
            code=course_data["code"],
            name=course_data["name"],
            description=course_data.get("description", ""),
            study_level=course_data.get("study_level", "intermediate"),  # Default to intermediate
            learning_outcomes=course_data.get("learning_outcomes", []),
            prerequisites=course_data.get("prerequisites", []),
            credit_points=course_data.get("credit_points", 0),
            topics=course_data.get("topics", []),
            assessment=course_data.get("assessment", "")
        )
        uni_qual.courses.append(course)
    
    logger.info(f"Loaded university qualification: {uni_qual.code} with {len(uni_qual.courses)} courses")
    return uni_qual

In [0]:
def initialize_interfaces():
    """Initialize GenAI and Embedding interfaces"""
    genai = None
    embeddings = None
    
    # Initialize GenAI - prefer vLLM over web API
    if Config.USE_VLLM:
        try:
            from interfaces.vllm_genai_interface import VLLMGenAIInterface
            genai = VLLMGenAIInterface(
                model_name=Config.VLLM_MODEL_NAME,
                number_gpus=Config.VLLM_NUM_GPUS,
                max_model_len=Config.VLLM_MAX_MODEL_LEN,
                model_cache_dir=Config.MODEL_CACHE_DIR,
                external_model_dir=Config.EXTERNAL_MODEL_DIR
            )
            logger.info(f"vLLM GenAI interface initialized with model: {Config.VLLM_MODEL_NAME}")
        except Exception as e:
            logger.warning(f"Failed to initialize vLLM GenAI interface: {e}")
            # Fall back to web API if available
            if Config.USE_GENAI:
                try:
                    from interfaces.genai_interface import GenAIInterface
                    genai = GenAIInterface(
                        model_endpoint=Config.GENAI_ENDPOINT,
                        api_key=Config.GENAI_API_KEY,
                        timeout=Config.GENAI_TIMEOUT
                    )
                    logger.info("Fell back to web API GenAI interface")
                except Exception as e2:
                    logger.warning(f"Failed to initialize web API GenAI interface: {e2}")
    elif Config.USE_GENAI:
        try:
            from interfaces.genai_interface import GenAIInterface
            genai = GenAIInterface(
                model_endpoint=Config.GENAI_ENDPOINT,
                api_key=Config.GENAI_API_KEY,
                timeout=Config.GENAI_TIMEOUT
            )
            logger.info("Web API GenAI interface initialized")
        except Exception as e:
            logger.warning(f"Failed to initialize GenAI interface: {e}")
    
    # Initialize Embeddings
    try:
        embeddings = EmbeddingInterface(
            model_name=Config.EMBEDDING_MODEL_NAME,
            model_cache_dir= "/root/.cache/huggingface/hub",
            external_model_dir= "/Volumes/jsa_external_prod/external_vols/scratch/Scratch/Ehsan/Models",
            device= "cuda:1",
            batch_size= 32
        )
        logger.info("Embedding interface initialized")
    except Exception as e:
        logger.warning(f"Failed to initialize Embedding interface: {e}")
    
    return genai, embeddings

In [0]:
logger.info("Starting credit transfer analysis")
vet_file = "./data/sample_vet.json"
uni_file = "./data/sample_uni.json"   
    # Load data
vet_qual = load_vet_data(vet_file)
uni_qual = load_uni_data(uni_file)

In [0]:
genai, embeddings = initialize_interfaces()

In [0]:
analyzer = CreditTransferAnalyzer(
        genai=genai,
        embeddings=embeddings,
        config=Config.get_config_dict()
    )
    
# Perform analysis
logger.info("Performing credit transfer analysis...")
recommendations = analyzer.analyze_transfer(
    vet_qual=vet_qual,
    uni_qual=uni_qual,
    target_courses=None
)