### *NOTE: this is the updated optimizer for training our DNN*
# Training of Final DNN
This jupyter notebook file is where the final version of the DNN is trained and saved. It is written to be fully reproducible.

I created a new env to run this file and its sister file (dnn_load_test.ipynb). It's probably easiest to create a new conda env using this command:

and this YAML file content:

``` yaml
name: consensus-tf
channels:
  - defaults
  - conda-forge
dependencies:
  - python=3.11.5
  - matplotlib=3.10
  - scikit-learn=1.6.1
  - tensorflow=2.12.0
  - notebook=7.3.2
  - pandas=2.2.3
```

### Extra note: If you do edit this file and yield a new model, you'll have to copy/paste the json and keras files that save the preprocessor and weights over to the api/backend/ml_models/deep_neural_network folder.

In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# Set seeds for reproducibility
RANDOM_SEED = 11
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

print("="*60)
print("SIMPLIFIED HYPERPARAMETER OPTIMIZATION")
print("="*60)

# Load and prepare data
df = pd.read_csv('/Users/seanblundin/Documents/courses/cs3200/GINIndicator/datasets/MEGAFRAME_CLEANEDV2.csv')
X = df.drop(columns=['UNEMP', 'Reference area', 'REF_AREA', 'TIME_PERIOD'])
y = df['UNEMP']

categorical_features = ['Region']
numerical_features = X.columns.difference(categorical_features)

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
])

X_processed = preprocessor.fit_transform(X)

# Train/validation split (keeping test simple)
X_train, X_val, y_train, y_val = train_test_split(
    X_processed, y, test_size=0.2, random_state=RANDOM_SEED
)

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")

def create_and_evaluate_model(architecture, regularization_level, batch_size, X_train, y_train, X_val, y_val):
    """Create, train, and evaluate a model configuration"""
    
    # Set regularization based on level
    if regularization_level == 'very_light':
        dropout_rate = 0.02
        l2_reg = 0.0001
    elif regularization_level == 'light':
        dropout_rate = 0.05
        l2_reg = 0.0001
    else:  # moderate
        dropout_rate = 0.1
        l2_reg = 0.001
    
    # Create model
    model = Sequential()
    model.add(Dense(architecture[0], activation='relu', kernel_regularizer=l2(l2_reg), 
                   input_shape=(X_train.shape[1],)))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    
    for units in architecture[1:]:
        model.add(Dense(units, activation='relu', kernel_regularizer=l2(l2_reg)))
        model.add(BatchNormalization())
        if units > 16:  # Only dropout on larger layers (matching your original logic)
            model.add(Dropout(dropout_rate))
    
    model.add(Dense(1))  # Output layer
    
    # Compile
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
    
    # Train with callbacks
    early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=0)
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=300,  # Match your original epochs
        batch_size=batch_size,
        callbacks=[early_stop],
        verbose=0
    )
    
    # Evaluate on validation set only (fair comparison)
    val_pred = model.predict(X_val, verbose=0)
    train_pred = model.predict(X_train, verbose=0)
    
    # Calculate metrics
    val_mse = mean_squared_error(y_val, val_pred)
    val_mae = mean_absolute_error(y_val, val_pred)
    val_r2 = r2_score(y_val, val_pred)
    
    train_mse = mean_squared_error(y_train, train_pred)
    overfitting_gap = val_mse - train_mse
    
    return {
        'architecture': architecture,
        'regularization': regularization_level,
        'batch_size': batch_size,
        'val_mse': val_mse,
        'val_mae': val_mae,
        'val_r2': val_r2,
        'overfitting_gap': overfitting_gap,
        'dropout': dropout_rate,
        'l2_reg': l2_reg
    }

# Define architectures to test (including your original)
architectures = [
    [128, 64, 32, 16],   # YOUR ORIGINAL MODEL
    [256, 128, 64],      # Previous optimization winner
    [224, 112, 56],      # Slightly smaller
    [256, 128, 64, 32],  # Best from recent results
]

regularization_levels = ['very_light', 'light', 'moderate']

# Test configurations
test_configs = []

# Test your original model exactly as it was
test_configs.append({
    'architecture': [128, 64, 32, 16],
    'regularization': 'very_light',
    'batch_size': 8  # Your original batch size
})

# Test other architectures with different settings
for arch in architectures:
    if arch != [128, 64, 32, 16]:  # Skip original since we already added it
        for reg_level in ['light', 'moderate']:
            test_configs.append({
                'architecture': arch,
                'regularization': reg_level,
                'batch_size': 32
            })

# Also test your original architecture with modern settings
test_configs.append({
    'architecture': [128, 64, 32, 16],
    'regularization': 'moderate',
    'batch_size': 32
})

print(f"\nTesting {len(test_configs)} configurations including your ORIGINAL model")
print("="*80)

# Run experiments
results = []
for i, config in enumerate(test_configs):
    arch = config['architecture']
    reg_level = config['regularization']
    batch_size = config['batch_size']
    
    # Highlight original model
    if arch == [128, 64, 32, 16] and reg_level == 'very_light' and batch_size == 8:
        print(f"🔥 Testing YOUR ORIGINAL MODEL: {arch} with {reg_level} regularization, batch_size={batch_size}")
    else:
        print(f"Testing: {arch} with {reg_level} regularization, batch_size={batch_size}")
        
    result = create_and_evaluate_model(arch, reg_level, batch_size, X_train, y_train, X_val, y_val)
    results.append(result)
    print(f"  MSE: {result['val_mse']:.3f} | MAE: {result['val_mae']:.2f} | R²: {result['val_r2']:.3f} | Overfitting: {result['overfitting_gap']:.2f}")

# Sort results by validation MSE
results.sort(key=lambda x: x['val_mse'])

print(f"\n{'='*90}")
print("RESULTS SUMMARY (Best to Worst) - ALL EVALUATED ON UNSEEN VALIDATION DATA")
print(f"{'='*90}")
print(f"{'Rank':<4} {'Architecture':<20} {'Reg':<10} {'Batch':<5} {'MSE':<6} {'MAE':<6} {'R²':<6} {'Overfit':<7}")
print("-" * 90)

for i, result in enumerate(results):
    arch_str = str(result['architecture']).replace(' ', '')
    
    # Highlight original model in results
    if (result['architecture'] == [128, 64, 32, 16] and 
        result['regularization'] == 'very_light' and 
        result['batch_size'] == 8):
        rank_str = f"🔥{i+1}"
    else:
        rank_str = str(i+1)
    
    print(f"{rank_str:<4} {arch_str:<20} {result['regularization']:<10} {result['batch_size']:<5} "
          f"{result['val_mse']:<6.3f} {result['val_mae']:<6.2f} {result['val_r2']:<6.3f} {result['overfitting_gap']:<7.2f}")

# Find your original model in results
original_result = None
for result in results:
    if (result['architecture'] == [128, 64, 32, 16] and 
        result['regularization'] == 'very_light' and 
        result['batch_size'] == 8):
        original_result = result
        break

original_rank = results.index(original_result) + 1 if original_result else None

# Identify winner
winner = results[0]
print(f"\n🏆 OVERALL WINNER:")
print(f"   Architecture: {winner['architecture']}")
print(f"   Regularization: {winner['regularization']} (dropout={winner['dropout']}, L2={winner['l2_reg']})")
print(f"   Batch Size: {winner['batch_size']}")
print(f"   Performance: MSE={winner['val_mse']:.3f}, MAE={winner['val_mae']:.2f}, R²={winner['val_r2']:.3f}")
print(f"   Overfitting Gap: {winner['overfitting_gap']:.2f}")

# Special analysis of your original model
if original_result:
    print(f"\n📊 YOUR ORIGINAL MODEL ANALYSIS:")
    print(f"   Ranked: #{original_rank} out of {len(results)}")
    print(f"   FAIR R² Score: {original_result['val_r2']:.3f} (vs your reported 0.83)")
    print(f"   MSE: {original_result['val_mse']:.3f}")
    print(f"   MAE: {original_result['val_mae']:.2f}")
    
    if original_rank == 1:
        print(f"   🎉 Your original model IS the winner!")
    elif original_rank <= 3:
        print(f"   👍 Your original model performs very well (top 3)")
        winner_gap = winner['val_mse'] - original_result['val_mse']
        print(f"   📈 Winner is only {winner_gap:.3f} MSE better")
    else:
        print(f"   📊 Your original model is solid but others perform better")
        winner_gap = original_result['val_mse'] - winner['val_mse']
        print(f"   📈 Winner is {winner_gap:.3f} MSE better ({winner_gap/original_result['val_mse']*100:.1f}% improvement)")
    
    print(f"\n💡 KEY INSIGHT:")
    print(f"   Original reported R² = 0.83 (evaluated on training data - inflated)")
    print(f"   True R² on unseen data = {original_result['val_r2']:.3f} (fair evaluation)")
    print(f"   Difference = {0.83 - original_result['val_r2']:.3f} (due to data leakage)")

# Quick overfitting assessment
if winner['overfitting_gap'] < 1.0:
    print(f"\n   ✅ Excellent generalization")
elif winner['overfitting_gap'] < 3.0:
    print(f"\n   ✅ Good generalization") 
elif winner['overfitting_gap'] < 5.0:
    print(f"\n   ⚠️  Moderate overfitting")
else:
    print(f"\n   ❌ High overfitting - consider more regularization")

print(f"\n{'='*60}")
print("RECOMMENDATION")
print(f"{'='*60}")

if winner['val_r2'] > 0.8:
    print("🎯 EXCELLENT MODEL - Ready for deployment!")
elif winner['val_r2'] > 0.7:
    print("👍 GOOD MODEL - Should work well for predictions")
elif winner['val_r2'] > 0.6:
    print("🤔 DECENT MODEL - Consider more data or features")
else:
    print("😟 WEAK MODEL - May need fundamental changes")

print(f"\nMAE of {winner['val_mae']:.1f} means predictions are typically off by ±{winner['val_mae']:.1f} percentage points")
print(f"R² of {winner['val_r2']:.3f} means the model explains {winner['val_r2']*100:.1f}% of unemployment variance")

# Train final model on full dataset
print(f"\n{'='*60}")
print("TRAINING FINAL MODEL")  
print(f"{'='*60}")

print("Training winning configuration on full dataset...")

# Use winning parameters for final model
final_model = Sequential()
final_model.add(Dense(winner['architecture'][0], activation='relu', 
                     kernel_regularizer=l2(winner['l2_reg']), input_shape=(X_processed.shape[1],)))
final_model.add(BatchNormalization())
final_model.add(Dropout(winner['dropout']))

for units in winner['architecture'][1:]:
    final_model.add(Dense(units, activation='relu', kernel_regularizer=l2(winner['l2_reg'])))
    final_model.add(BatchNormalization())
    if units > 16:  # Match the logic from evaluation
        final_model.add(Dropout(winner['dropout']))

final_model.add(Dense(1))
final_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

# Train on full dataset
early_stop = EarlyStopping(monitor='loss', patience=20, restore_best_weights=True, verbose=1)
final_model.fit(X_processed, y, epochs=300, batch_size=winner['batch_size'], callbacks=[early_stop], verbose=1)

# Save model and preprocessing parameters
final_model.save('Unemployment_AI_Optimized.keras')

# Save preprocessing parameters  
import json
preprocessing_params = {
    'numerical_features': list(numerical_features),
    'categorical_features': categorical_features,
    'scaler_mean': preprocessor.named_transformers_['num'].mean_.tolist(),
    'scaler_scale': preprocessor.named_transformers_['num'].scale_.tolist(),
    'encoder_categories': [cat.tolist() for cat in preprocessor.named_transformers_['cat'].categories_]
}

with open('preprocessing_params_optimized.json', 'w') as f:
    json.dump(preprocessing_params, f, indent=2)

print(f"\n✅ COMPLETE!")
print(f"   Optimized model saved: 'Unemployment_AI_Optimized.keras'")
print(f"   Preprocessing saved: 'preprocessing_params_optimized.json'")
print(f"   Winning config: {winner['architecture']} with {winner['regularization']} reg, batch_size={winner['batch_size']}")
print(f"   Expected performance: MAE ≈ {winner['val_mae']:.1f}, R² ≈ {winner['val_r2']:.3f}")

if original_result and original_rank:
    print(f"\n🔍 ORIGINAL MODEL VERDICT:")
    if original_rank <= 2:
        print(f"   Your original model was excellent! (Ranked #{original_rank})")
    else:
        print(f"   Your original model was good, but optimization found better configs")
    print(f"   The 0.83 R² you saw was inflated due to evaluating on training data")
    print(f"   True performance: R² = {original_result['val_r2']:.3f} on unseen data")



SIMPLIFIED HYPERPARAMETER OPTIMIZATION
Training samples: 320
Validation samples: 80

Testing 8 configurations including your ORIGINAL model
🔥 Testing YOUR ORIGINAL MODEL: [128, 64, 32, 16] with very_light regularization, batch_size=8


2025-06-13 02:43:37.498839: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


  MSE: 3.955 | MAE: 1.31 | R²: 0.784 | Overfitting: 2.30
Testing: [256, 128, 64] with light regularization, batch_size=32




  MSE: 4.151 | MAE: 1.27 | R²: 0.773 | Overfitting: 3.46
Testing: [256, 128, 64] with moderate regularization, batch_size=32




  MSE: 3.500 | MAE: 1.22 | R²: 0.809 | Overfitting: 2.90
Testing: [224, 112, 56] with light regularization, batch_size=32




  MSE: 3.507 | MAE: 1.16 | R²: 0.808 | Overfitting: 2.84
Testing: [224, 112, 56] with moderate regularization, batch_size=32




  MSE: 3.793 | MAE: 1.20 | R²: 0.793 | Overfitting: 3.23
Testing: [256, 128, 64, 32] with light regularization, batch_size=32




  MSE: 3.367 | MAE: 1.03 | R²: 0.816 | Overfitting: 2.93
Testing: [256, 128, 64, 32] with moderate regularization, batch_size=32




  MSE: 4.312 | MAE: 1.24 | R²: 0.764 | Overfitting: 3.60
Testing: [128, 64, 32, 16] with moderate regularization, batch_size=32
  MSE: 3.754 | MAE: 1.24 | R²: 0.795 | Overfitting: 3.22

RESULTS SUMMARY (Best to Worst) - ALL EVALUATED ON UNSEEN VALIDATION DATA
Rank Architecture         Reg        Batch MSE    MAE    R²     Overfit
------------------------------------------------------------------------------------------
1    [256,128,64,32]      light      32    3.367  1.03   0.816  2.93   
2    [256,128,64]         moderate   32    3.500  1.22   0.809  2.90   
3    [224,112,56]         light      32    3.507  1.16   0.808  2.84   
4    [128,64,32,16]       moderate   32    3.754  1.24   0.795  3.22   
5    [224,112,56]         moderate   32    3.793  1.20   0.793  3.23   
🔥6   [128,64,32,16]       very_light 8     3.955  1.31   0.784  2.30   
7    [256,128,64]         light      32    4.151  1.27   0.773  3.46   
8    [256,128,64,32]      moderate   32    4.312  1.24   0.764  3.60   





Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78