In [0]:
%run ./EnvPrep

In [0]:
%load_ext autoreload
%autoreload 2

In [0]:
import argparse
import logging
import sys
import time
from pathlib import Path
from importlib import reload
# import analysis.simplified_analyzer as simple_analyzer
# reload(simple_analyzer)
from config import Config
from config_profiles import ConfigProfiles
from interfaces.model_factory import ModelFactory
from analysis.simplified_analyzer import SimplifiedAnalyzer
from utils.quality_monitor import QualityMonitor
from reporting.report_generator import ReportGenerator
from models.base_models import VETQualification, UniQualification, UnitOfCompetency, UniCourse
import json
# Configure logging
logging.basicConfig(
    level=getattr(logging, Config.LOG_LEVEL),
    format=Config.LOG_FORMAT,
    handlers=[
        logging.FileHandler(Config.LOG_FILE),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)
# Silence Py4J logs while keeping your app logs at INFO
logging.getLogger("py4j").setLevel(logging.WARNING)
logging.getLogger("py4j.clientserver").setLevel(logging.WARNING)
logging.getLogger("py4j.java_gateway").setLevel(logging.WARNING)

def load_vet_data(filepath: str) -> VETQualification:
    """Load VET qualification data"""
    with open(filepath, 'r') as f:
        data = json.load(f)
    
    vet_qual = VETQualification(
        code=data["code"],
        name=data["name"],
        level=data["level"]
    )
    
    for unit_data in data.get("units", []):
        unit = UnitOfCompetency(
            code=unit_data["code"],
            name=unit_data["name"],
            description=unit_data.get("description", ""),
            learning_outcomes=unit_data.get("learning_outcomes", []),
            assessment_requirements=unit_data.get("assessment_requirements", ""),
            nominal_hours=unit_data.get("nominal_hours", 0) if unit_data.get("nominal_hours") is not None else 0,  # Default to 0 instead of None
            prerequisites=unit_data.get("prerequisites", [])
        )
        vet_qual.units.append(unit)
    
    return vet_qual


def load_uni_data(filepath: str) -> UniQualification:
    """Load university qualification data"""
    with open(filepath, 'r') as f:
        data = json.load(f)
    
    uni_qual = UniQualification(
        code=data["code"],
        name=data["name"]
    )
    
    for course_data in data.get("courses", []):
        course = UniCourse(
            code=course_data["code"],
            name=course_data["name"],
            description=course_data.get("description", ""),
            study_level=course_data.get("study_level", "intermediate"),
            learning_outcomes=course_data.get("learning_outcomes", []),
            prerequisites=course_data.get("prerequisites", []),
            credit_points=course_data.get("credit_points", 0),
            topics=course_data.get("topics", []),
            assessment=course_data.get("assessment", "")
        )
        uni_qual.courses.append(course)
    
    return uni_qual

In [0]:

depth = 'quick'
profile='robust'

extract_skills = True
skip_analysis = extract_skills
use_cached_skills = True
verbose = True  # Set to True for detailed config output
config = ConfigProfiles.create_config(
    profile_name=profile,
    backend='vllm',
    embedding='jina' # Add embedding selection
)

verbose = True  # Set to True for detailed config output
if verbose:
    config.print_config()

logger.info(f"Using profile: {profile}")
logger.info(f"Using backend: {config.get_model_info()}")
logger.info(f"Analysis depth: {depth}")

# Initialize quality monitor
# monitor = QualityMonitor() if args.monitor else None
# vet_file = "./data/diploma_of_business.json"
# uni_file = "./data/933AA_Diploma_of_Business.json"
parent_dir = Path("/Volumes/jsa_external_prod/external_vols/scratch/Scratch/Ehsan/NST/MiniProjOct25")
vet_file = parent_dir / "data/HLT54121_Diploma_of_Nursing.json"
uni_file = parent_dir / "data/364JA_Bachelor_of_Nursing.json"

# vet_file = parent_dir / "data/BSB50120_Diploma_of_Business.json"
# uni_file = parent_dir / "data/933AA_Diploma_of_Business.json"

logger.info("Loading qualifications...")
vet_qual = load_vet_data(vet_file)
uni_qual = load_uni_data(uni_file)

logger.info(f"Loaded VET: {vet_qual.name} ({len(vet_qual.units)} units)")
logger.info(f"Loaded Uni: {uni_qual.name} ({len(uni_qual.courses)} courses)")

In [0]:
# Create embedding interface
embeddings = ModelFactory.create_embedding_interface(config)

if embeddings is None:
    logger.warning("No embedding interface available - using simple matching")

In [0]:
# Create interfaces using factory
logger.info("Initializing AI interfaces...")
genai = ModelFactory.create_genai_interface(config)
if genai is None:
    logger.warning("No GenAI interface available - using fallback extraction")



In [0]:
from utils.skill_recalibration import SkillRecalibrationTool

tool = SkillRecalibrationTool(genai=genai, config=config)
tool.recalibrate_vet_skills(vet_qual, recalibrate_levels=True, recalibrate_categories=True, recalibrate_contexts=True)
tool.recalibrate_uni_skills(uni_qual, recalibrate_levels=True, recalibrate_categories=True, recalibrate_contexts=True)

In [0]:
if extract_skills:
    logger.info("Extracting and saving skills...")
    from extract_skills import extract_and_save_skills
    vet_filepath, uni_filepath = extract_and_save_skills(
        genai, embeddings, config,
        vet_file, 
        uni_file
    )
    logger.info(f"Skills saved to {vet_filepath} and {uni_filepath}")
    
    if skip_analysis:
        logger.info("Skipping analysis as requested")

In [0]:
analyzer = SimplifiedAnalyzer(
    genai=genai,
    embeddings=embeddings,
    config=config.to_dict()
)
recommendations = analyzer.analyze(
    vet_qual,
    uni_qual,
    depth=depth,
    use_cached_skills=use_cached_skills
)
logger.info(f"Generated {len(recommendations)} recommendations")

In [0]:

logger.info(f"Generated {len(recommendations)} recommendations")
# Save results
output_path = Path("./output/recommendations.json")
output_path.parent.mkdir(exist_ok=True)

# Add backend info to output
analyzer.export_results(recommendations, str(output_path))

# Generate report
report_gen = ReportGenerator()
html_path = output_path.with_suffix('.html')

# Generate enhanced HTML report
html_content = report_gen.generate_html_report(
    recommendations, vet_qual, uni_qual
)

with open(html_path, 'w') as f:
    f.write(html_content)

logger.info(f"HTML report saved to {html_path}")

logger.info("Exporting extracted skills...")

# Generate complete report package including skills
files = report_gen.generate_complete_report_package(
    recommendations, vet_qual, uni_qual
)

logger.info("Report package generated:")
for file_type, filepath in files.items():
    logger.info(f"  {file_type}: {filepath}")

print("\n" + "="*60)
print("ANALYSIS COMPLETE")
print("="*60)
print(f"Backend: {config.get_model_info()}")
print(f"Profile: {profile}")
print(f"Depth: {depth}")
# print(f"Time: {analysis_time:.2f} seconds")
print(f"Recommendations: {len(recommendations)}")


# Show top recommendations
if recommendations:
    print("\nTop Recommendations:")
    for i, rec in enumerate(sorted(recommendations, 
                                    key=lambda x: x.alignment_score, 
                                    reverse=True)[:5], 1):
        print(f"{i}. {' + '.join(rec.get_vet_unit_codes())} â†’ {rec.uni_course.code}")
        print(f"   Score: {rec.alignment_score:.1%} | Type: {rec.recommendation.value}")

print(f"\nResults saved to: {output_path}")

In [0]:
print('test')