In [1]:
# ===============================
# 1. IMPORT LIBRARIES
# ===============================
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

import joblib

# ===============================
# 2. LOAD DATASET
# ===============================
df = pd.read_csv("layoff_synthetic_200.csv")

# ===============================
# 3. DEFINE FEATURES & TARGET
# ===============================
feature_names = [
    'experience',
    'primary_skill',
    'certification',
    'upskilling_last_year',
    'industry',
    'skill_demand',
    'industry_layoff_risk',
    'role_demand',
    'company_size',
    'salary_band'
]

X = df[feature_names]
y = df['Layoff_Risk']

# ===============================
# 4. TRAIN-TEST SPLIT
# ===============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# ===============================
# 5. FEATURE SCALING
# ===============================
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ===============================
# 6. MODEL TRAINING
# ===============================
model = RandomForestClassifier(
    n_estimators=200,
    max_depth=8,
    random_state=42,
    class_weight='balanced'
)

model.fit(X_train_scaled, y_train)

# ===============================
# 7. MODEL EVALUATION
# ===============================
y_pred = model.predict(X_test_scaled)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# ===============================
# 8. SAVE MODEL & SCALER
# ===============================
joblib.dump(model, "layoff_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(feature_names, "feature_names.pkl")

print("\n✅ Model, scaler, and feature names saved successfully!")


Confusion Matrix:
[[13  3  0]
 [ 1 26  0]
 [ 0  5  2]]

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.81      0.87        16
           1       0.76      0.96      0.85        27
           2       1.00      0.29      0.44         7

    accuracy                           0.82        50
   macro avg       0.90      0.69      0.72        50
weighted avg       0.85      0.82      0.80        50


✅ Model, scaler, and feature names saved successfully!
