# Optimization Methods For Data Science
## Final Project - Part 2: SVM

Géraldine V. Maurer, Viktoriia Vlasenko

### Import Libraries

In [1]:
import numpy as np
import pandas as pd
import random
from scipy.optimize import minimize
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import warnings
random.seed(42)
np.random.seed(42)

warnings.filterwarnings('ignore')

from functions_1j_maurer_vlasenko import *

### Import Data

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/gmaurer08/Optimization-Final-Project/refs/heads/main/AGE_PREDICTION.csv")
data.head()

Unnamed: 0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,feat_10,...,feat_24,feat_25,feat_26,feat_27,feat_28,feat_29,feat_30,feat_31,feat_32,gt
0,2.686191,-0.989465,-0.920503,1.607427,-0.896248,1.118974,-0.969456,1.811707,2.560955,3.803463,...,-0.862891,-0.909545,-0.915361,-0.952061,-0.989461,1.911855,1.409705,2.303997,-0.98184,54
1,-0.887917,4.915272,-0.939446,-0.343677,-0.964685,-0.478649,4.342395,-0.33287,-0.768041,-0.815375,...,-0.939201,-0.965917,-0.969461,-0.934799,5.304822,0.93479,-0.410701,0.28469,4.919212,18
2,-0.923215,2.746968,-0.918085,0.047804,-0.908587,-0.451752,2.984481,0.535007,-0.591029,-0.324043,...,-0.809726,-0.929934,-0.891814,-0.881796,3.415373,1.044108,-0.442615,0.033648,2.628199,26
3,-0.268866,-0.408416,-0.935145,0.7318,-0.922438,0.221781,-0.046606,1.149634,0.592136,1.357959,...,-0.834968,-0.937475,-0.917737,-0.929519,-0.226282,1.608048,0.276169,1.246468,-0.363367,33
4,0.529231,-0.829957,-0.897425,0.92128,-0.865304,0.331018,-0.64494,1.296097,1.166863,2.036034,...,-0.775411,-0.881967,-0.864018,-0.908001,-0.784495,1.329586,0.547925,1.195395,-0.810089,35


### Data Preparation

In [3]:
# Separate features and target
feature_cols = [col for col in data.columns if col.startswith('feat')]
X = data[feature_cols].values
y = data['gt'].values

print(f"Features: {len(feature_cols)} columns")
print(f"Data shape: {X.shape}")
print(f"Target range: {y.min():.2f}-{y.max():.2f}")

# Split data into train/test sets
n_total = len(X)
n_train = int(0.8*n_total)    # 80% for training (used with CV inside)
# Remaining 20% for testing

# Shuffle indices
indices = np.random.permutation(n_total)
train_idx = indices[:n_train]
test_idx = indices[n_train:]

X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]

# Normalize features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(f"Train set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

Features: 32 columns
Data shape: (20475, 32)
Target range: 10.00-89.00
Train set: (16380, 32)
Test set: (4095, 32)


### Find the best hyperparameters with Cross-Validation

In [4]:
# Find best hyperparameters using cross-validation
best_parameters, search_results = hyperparameter_search(X_train, y_train)

Starting hyperparameter search

[1/24] Testing configuration:
Layers: [64, 32] (hidden)
Activation: tanh
Lambda: 0.001

Starting 3-fold cross-validation...

Fold 1/3
Training network with architecture: [32, 64, 32, 1]
Activation: tanh, Lambda: 0.001
Initial loss: 1694.755682
Final loss: 86.266267
Optimization successful: False
Number of iterations: 1000
Train Loss: 83.4771, Val Loss: 104.7391
Train MAPE: 21.2973%, Val MAPE: 24.1226%

Fold 2/3
Training network with architecture: [32, 64, 32, 1]
Activation: tanh, Lambda: 0.001
Initial loss: 1658.215185
Final loss: 84.745777
Optimization successful: False
Number of iterations: 1000
Train Loss: 82.0352, Val Loss: 107.7196
Train MAPE: 21.2439%, Val MAPE: 24.3258%

Fold 3/3
Training network with architecture: [32, 64, 32, 1]
Activation: tanh, Lambda: 0.001
Initial loss: 1653.696334
Final loss: 86.308981
Optimization successful: False
Number of iterations: 1000
Train Loss: 83.6884, Val Loss: 103.3884
Train MAPE: 21.5647%, Val MAPE: 24.0292%
R

### Training the final model with the best hyperparameters

In [5]:
# Train final model with best parameters
final_weights, final_biases, optimization_result, initial_loss, final_loss = train_network(
    X_train, y_train,
    layer_sizes=best_parameters['layers'],
    activation=best_parameters['activation'],
    lambda_reg=best_parameters['lambda_reg'],
    method='L-BFGS-B',
    maxiter=1000
)

print(f"\nOptimization details:")
print(f"Success: {optimization_result.success}")
print(f"Message: {optimization_result.message}")
print(f"Iterations: {optimization_result.nit}")
print(f"Loss reduction: {initial_loss:.6f} -> {final_loss:.6f}")

# Make predictions on all datasets
y_train_pred = predict(X_train, final_weights, final_biases, best_parameters['activation'])
y_test_pred = predict(X_test, final_weights, final_biases, best_parameters['activation'])

# Compute Mean Squared Error
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

# Compute Mean Absolute Percentage Error
train_mape = MAPE(y_train, y_train_pred)
test_mape = MAPE(y_test, y_test_pred)

Training network with architecture: [32, 64, 32, 16, 1]
Activation: sigmoid, Lambda: 0.01
Initial loss: 1756.619372
Final loss: 98.697318
Optimization successful: False
Number of iterations: 1000

Optimization details:
Success: False
Message: STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT
Iterations: 1000
Loss reduction: 1756.619372 -> 98.697318


### Print Final Results

In [None]:
print(f"\nBest Network Configuration:")
print(f"Architecture: {best_parameters['layers']}")
print(f"Hidden layers: {best_parameters['layers'][1:-1]}")
print(f"Activation function: {best_parameters['activation']}")
print(f"Regularization (lambda): {best_parameters['lambda_reg']}")
print(f"Total parameters: {sum(w.size + b.size for w, b in zip(final_weights, final_biases))}")

print(f"\nOptimization Results:")
print(f"Method: L-BFGS-B")
print(f"Max iterations: 1000")
print(f"Convergence: {'Successful' if optimization_result.success else 'Failed'}")
print(f"Iterations used: {optimization_result.nit}")
print(f"Initial loss: {initial_loss:.6f}")
print(f"Final loss: {final_loss:.6f}")
print(f"Loss reduction: {((initial_loss-final_loss) / initial_loss*100):.2f}%")

print(f"\nPerformance Metrics:")
print(f"Training MSE: {train_mse:.6f}")
print(f"Test MSE: {test_mse:.6f}")
print(f"Training MAPE: {train_mape:.4f}%")
print(f"Test MAPE: {test_mape:.4f}%")

print(f"\nModel Insights:")
print(f"Best performing activation: {best_parameters['activation']}")
print(f"Optimal regularization strength: {best_parameters['lambda_reg']}")
print(f"Network depth: {len(best_parameters['layers'])-1} layers")
print(f"Network width: {max(best_parameters['layers'][1:-1])} neurons (max)")


# Save detailed results
results_summary = {
    'best_config': best_parameters,
    'optimization_result': {
        'success': optimization_result.success,
        'message': optimization_result.message,
        'iterations': optimization_result.nit,
        'initial_loss': initial_loss,
        'final_loss': final_loss
    },
    'performance_metrics': {
        'train_mse': train_mse,
        'test_mse': test_mse,
        'train_mape': train_mape,
        'test_mape': test_mape
    },
    'hyperparameter_search_results': search_results
}


Best Network Configuration:
Architecture: [32, 64, 32, 16, 1]
Hidden layers: [64, 32, 16]
Activation function: sigmoid
Regularization (lambda): 0.01
Total parameters: 4737

Optimization Results:
Method: L-BFGS-B
Max iterations: 1000
Convergence: Failed
Iterations used: 1000
Initial loss: 1756.619372
Final loss: 98.697318
Loss reduction: 94.38%

Performance Metrics:
Training MSE: 94.948496
Test MSE: 91.639851
Training MAPE: 23.3081%
Test MAPE: 22.7537%

Model Insights:
Best performing activation: sigmoid
Optimal regularization strength: 0.01
Network depth: 4 layers
Network width: 64 neurons (max)


In [7]:
import pickle
with open("results_summary.pkl", "wb") as f:
    pickle.dump(results_summary,f)
f.close()

In [8]:
results_summary

{'best_config': {'layers': [32, 64, 32, 16, 1],
  'activation': 'sigmoid',
  'lambda_reg': 0.01},
 'optimization_result': {'success': False,
  'message': 'STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT',
  'iterations': 1000,
  'initial_loss': np.float64(1756.6193722644898),
  'final_loss': np.float64(98.69731751643677)},
 'performance_metrics': {'train_mse': 94.94849595886431,
  'test_mse': 91.63985098915778,
  'train_mape': np.float64(23.308067409477044),
  'test_mape': np.float64(22.753721138685908)},
 'hyperparameter_search_results': [{'layers': [32, 64, 32, 1],
   'activation': 'tanh',
   'lambda_reg': 0.001,
   'avg_val_loss': np.float64(105.28236443534229),
   'avg_val_mape': np.float64(24.15918092646488),
   'avg_train_loss': np.float64(83.06689650781153),
   'avg_train_mape': np.float64(21.368623353417735)},
  {'layers': [32, 64, 32, 1],
   'activation': 'tanh',
   'lambda_reg': 0.01,
   'avg_val_loss': np.float64(96.57989694851581),
   'avg_val_mape': np.float64(23.3320161601662