In [4]:
import pandas as pd           # Used for data handling
import os                     # Used for checking file existence
from typing import Dict, Any  # Used for function typing hints
from IPython.display import display # Used for displaying dataframes

# Define the data structure for a single applicant (used for typing hints)
Applicant = Dict[str, Any]

def analyze_risk(applicant: Applicant) -> str:
    """
    Analyzes an applicant's profile using a set of predefined, hard-coded rules
    to categorize their credit risk as 'Low Risk', 'Medium Risk', or 'High Risk'.

    *** YOUR TASK: TUNE THESE THRESHOLDS! ***
    These initial values are educated guesses. To maximize your project's accuracy,
    you must change the numbers and purpose lists based on your 'Training Data' analysis.
    """

    # --- Extract Key Metrics from the DataFrame Row ---
    # NOTE: These names match the column headers in your CSV file
    credit_amount = applicant.get('Credit amount', 999999) 
    duration = applicant.get('Duration', 60) 
    loan_purpose = str(applicant.get('Purpose', '')).lower() 
    
    # =======================================================
    # RULE SET 1: HIGH RISK (Designed to predict 'bad' outcomes)
    # =======================================================

    # Rule H1: High Credit Amount
    # TUNE THIS: If the requested credit is very high, the financial strain and risk are greater.
    HIGH_AMOUNT_THRESHOLD = 5000 
    if credit_amount > HIGH_AMOUNT_THRESHOLD:
        return "High Risk (H1)"

    # Rule H2: Long Duration
    # TUNE THIS: Longer repayment times introduce more uncertainty and chance of life changes.
    LONG_DURATION_THRESHOLD = 48 
    if duration > LONG_DURATION_THRESHOLD:
        return "High Risk (H2)"

    # Rule H3: High-Risk Purpose 
    # Loans for speculative or non-essential purposes are often deemed high-risk.
    HIGH_RISK_PURPOSES = ['business', 'other', 'repairs', 'vacation']
    if loan_purpose in HIGH_RISK_PURPOSES:
        return "High Risk (H3)"

    # =======================================================
    # RULE SET 2: LOW RISK (Designed to predict 'good' outcomes)
    # =======================================================

    # Rule L1: Low Credit Amount AND Short Duration
    # Small loans paid back quickly are generally the safest.
    LOW_AMOUNT_THRESHOLD = 1500
    SHORT_DURATION_THRESHOLD = 18 
    if credit_amount < LOW_AMOUNT_THRESHOLD and duration <= SHORT_DURATION_THRESHOLD:
        return "Low Risk (L1)"

    # Rule L2: Safe Purpose
    # TUNE THIS: Some purposes (like 'radio/TV') might be very safe.
    SAFE_PURPOSES = ['radio/tv', 'domestic appliances', 'education', 'car']
    if loan_purpose in SAFE_PURPOSES:
        return "Low Risk (L2)"

    # =======================================================
    # RULE SET 3: MEDIUM RISK (Catch-all)
    # =======================================================

    # If the applicant didn't trigger any explicit High or Low risk rules, they fall here.
    return "Medium Risk"

In [6]:
FILE_PATH = r"C:\Users\Joel_\Downloads\AI-assessment-1\test_data.csv" 
RISK_COLUMN_NAME = 'Risk' # The name we want to use internally

# Attempt to load data from the file
try:
    if os.path.exists(FILE_PATH):
        df_applicants = pd.read_csv(FILE_PATH)
        print(f"Successfully loaded data from: {FILE_PATH}")
    else:
        raise FileNotFoundError(f"File not found: {FILE_PATH}")

except FileNotFoundError:
    print(f"ERROR: Could not find or load '{FILE_PATH}'. Please ensure the path is correct.")
    df_applicants = pd.DataFrame()
except Exception as e:
    print(f"Error loading data: {e}. Check file contents.")
    df_applicants = pd.DataFrame()

if not df_applicants.empty:
    print(f"Total applicants loaded: {len(df_applicants)}")
    
    # --- THIS LINE IS THE KEY ---
    print("\nAll Columns in the Data:")
    print(df_applicants.columns.tolist())
    print("\n" + "="*50 + "\n")

Successfully loaded data from: C:\Users\Joel_\Downloads\AI-assessment-1\test_data.csv
Total applicants loaded: 280

All Columns in the Data:
['ID', 'Age', 'Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account', 'Credit amount', 'Duration', 'Purpose']


