In [None]:
# ============================================
# 1. IMPORTS & NOTEBOOK SETUP
# ============================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import warnings

warnings.filterwarnings('ignore')

# Allow module import from scripts/
sys.path.append('scripts')
from preprocessing import preprocess

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


In [None]:
# ============================================
# 2. DATA PREPROCESSING
# ============================================
# Load and preprocess data using the pipeline.
# Output: X_train, X_val, X_test, y_train, y_val, y_test
# ============================================

X_train, X_val, X_test, y_train, y_val, y_test = preprocess('data/raw/HR_Attrition_Dataset.csv')

print(f"Training set: {X_train.shape[0]} samples, {X_train.shape[1]} features")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples\n")
print("Features:", list(X_train.columns), "\n")
print("Class distribution - Training:")
print(y_train.value_counts(normalize=True).round(3), "\n")


In [None]:
# ============================================
# 3. CLASS IMBALANCE OVERVIEW
# ============================================

n_negative = (y_train == 0).sum()
n_positive = (y_train == 1).sum()
imbalance_ratio = n_negative / n_positive if n_positive > 0 else np.nan

print(f"Class 0 (Stay): {n_negative}")
print(f"Class 1 (Leave): {n_positive}")
print(f"Imbalance ratio: {imbalance_ratio:.2f}")
print(f"Recommended class_weight or pos_weight: {imbalance_ratio:.2f}")


In [None]:
# ============================================
# 4. BUILD FEEDFORWARD NEURAL NETWORK (KERAS)
# ============================================

# Convert pandas DataFrames/Series to numpy (if needed)
X_train_np = X_train.values
X_val_np = X_val.values
X_test_np = X_test.values
y_train_np = y_train.values
y_val_np = y_val.values
y_test_np = y_test.values

# Model architecture: 2 dense layers (classic MLP)
model = keras.Sequential([
    layers.Input(shape=(X_train_np.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')  # Binary output for attrition
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc')]
)

model.summary()


In [None]:
# ============================================
# 5. HANDLE CLASS IMBALANCE WITH CLASS_WEIGHTS
# ============================================

# Calculate class weights inversely proportional to class frequencies
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_np),
    y=y_train_np
)
class_weights = {i : w for i, w in enumerate(class_weights)}
print("Class weights (to be used in .fit):", class_weights)


In [None]:
# ============================================
# 6. TRAIN KERAS MODEL
# ============================================

early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_auc',
    patience=10,
    mode='max',
    restore_best_weights=True
)

history = model.fit(
    X_train_np, y_train_np,
    validation_data=(X_val_np, y_val_np),
    epochs=100,
    batch_size=32,
    class_weight=class_weights,
    callbacks=[early_stopping],
    verbose=2
)

