In [1]:
import numpy as np
import pandas as pd
import pickle
import sys
import re
import time

In [3]:
# --- Configuration ---
MODEL_PATH = 'C:/Users/ashua/Desktop/Inelligent Job Recomendation Engine/data/Supervised Training/final_random_forest_model.pkl'

SKILL_CODES = [
    'ACCT', 'ADM', 'ADVR', 'ANLS', 'ART', 'BD', 'CNST', 'DSGN', 'EDCN', 'ENG', 
    'FASH', 'FIN', 'GENB', 'HCPR', 'HR', 'IT', 'LGL', 'MGMT', 'MNFC', 'MRKT', 
    'OTHR', 'PR', 'PRJM', 'PROD', 'PRSR', 'QA', 'REAL', 'RSCH', 'SALE', 'SCI', 
    'SPRT', 'SUPL', 'TECH', 'TRNS', 'WRT'
]

# Mapping your comprehensive list to the codes
SKILL_MAPPER = {
    'IT': ['python', 'java', 'javascript', 'c++', 'c#', 'php', 'ruby', 'sql', 'nosql', 'react', 'node.js', 'html', 'css'],
    'PRJM': ['agile', 'scrum', 'kanban', 'jira', 'project management', 'pmp'],
    'ANLS': ['data analysis', 'statistics', 'tableau', 'power bi', 'excel', 'pandas', 'numpy'],
    'TECH': ['aws', 'azure', 'docker', 'kubernetes', 'linux', 'git', 'cybersecurity', 'cloud'],
    'QA': ['testing', 'selenium', 'quality assurance', 'automation testing', 'junit'],
    'MGMT': ['leadership', 'management', 'teamwork', 'strategy', 'collaboration']
    # NOTE: You will add all remaining mappings here based on your 117-word list
}

ALL_FEATURE_COLS = [f'R_{c}' for c in SKILL_CODES] + [f'J_{c}' for c in SKILL_CODES]

In [4]:
def clean_text(text):
    if not text or pd.isna(text): return ""
    text = str(text).lower()
    return re.sub(r'[^a-z0-9\s]', '', text)

def extract_features(text, prefix='R'):
    cleaned = clean_text(text)
    found_codes = set()
    
    # Check text against the mapper
    for code, keywords in SKILL_MAPPER.items():
        if any(kw in cleaned for kw in keywords):
            found_codes.add(code)
            
    # Create the binary row
    data = {f'{prefix}_{c}': [1 if c in found_codes else 0] for c in SKILL_CODES}
    return pd.DataFrame(data), list(found_codes)

def load_engine():
    try:
        with open(MODEL_PATH, 'rb') as f:
            return pickle.load(f)
    except FileNotFoundError:
        print("‚ùå Model not found! Please check the file path.")
        return None

# Initialize the model
rf_model = load_engine()

In [6]:
def run_recommendation_test(resume_txt, job_txt):
    if rf_model is None: return
    
    start_clock = time.time()
    
    # 1. Extract and Track Skills
    res_df, res_skills = extract_features(resume_txt, 'R')
    job_df, job_skills = extract_features(job_txt, 'J')
    
    # 2. Prepare 70-column input
    input_x = pd.concat([res_df, job_df], axis=1).reindex(columns=ALL_FEATURE_COLS, fill_value=0)
    
    # 3. Predict Probability
    score = rf_model.predict_proba(input_x)[:, 1][0]
    
    latency = time.time() - start_clock
    
    # --- Results Dashboard ---
    print("="*30)
    print(f"üìä SUITABILITY SCORE: {score*100:.2f}%")
    print(f"‚è±Ô∏è LATENCY: {latency:.4f} seconds")
    print("-" * 30)
    print(f"‚úÖ Skills Detected in Resume: {', '.join(res_skills) if res_skills else 'None'}")
    print(f"üéØ Skills Required by Job: {', '.join(job_skills) if job_skills else 'None'}")
    
    # Verification check
    if latency < 3.0:
        print("\n‚úÖ Phase 4 Latency Goal Met!")
    else:
        print("\n‚ùå Phase 4 Latency Goal Failed!")
    print("="*30)

# --- RUN THE TEST ---
sample_resume = "I am a Python developer with experience in SQL and Agile Project Management."
sample_jd = "Looking for an Agile Software Engineer skilled in Python and SQL."

run_recommendation_test(sample_resume, sample_jd)

üìä SUITABILITY SCORE: 94.61%
‚è±Ô∏è LATENCY: 0.0427 seconds
------------------------------
‚úÖ Skills Detected in Resume: PRJM, IT, MGMT
üéØ Skills Required by Job: PRJM, IT

‚úÖ Phase 4 Latency Goal Met!
