# Optimization Methods For Data Science
## Final Project - Part 1: Multi-Layer-Perceptron

Géraldine V. Maurer, Viktoriia Vlasenko

### Import Libraries

In [21]:
import numpy as np
import pandas as pd
import random
import os
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from cvxopt import matrix, solvers
from itertools import product
import time
from functions_2j_maurer_vlasenko import *
from sklearn.metrics import confusion_matrix

import warnings
warnings.filterwarnings('ignore')

SEED=123
random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"]=str(SEED)
os.environ["OMP_NUM_THREADS"]="1"
os.environ["MKL_NUM_THREADS"]="1"
os.environ["OPENBLAS_NUM_THREADS"]="1"
os.environ["NUMEXPR_NUM_THREADS"]="1"

### Import Data

In [22]:
df = pd.read_csv("https://raw.githubusercontent.com/gmaurer08/Optimization-Final-Project/refs/heads/main/GENDER_CLASSIFICATION.csv")
df.head()

Unnamed: 0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,feat_10,...,feat_24,feat_25,feat_26,feat_27,feat_28,feat_29,feat_30,feat_31,feat_32,gt
0,-0.900846,0.102587,-0.397814,0.112796,2.588096,-0.192754,-0.968311,-0.490886,-0.872099,-0.288411,...,2.541431,1.739102,0.166066,4.584869,-0.107031,-0.91399,-0.686416,-0.368085,-0.870545,0
1,-0.838868,0.039976,-0.387101,0.055413,2.066874,-0.226948,-0.947416,-0.472817,-0.855387,-0.207101,...,1.991721,1.259745,0.065058,3.01979,-0.110633,-0.890023,-0.611625,-0.298235,-0.855208,0
2,-0.814961,-0.010184,-0.397147,0.092713,1.897454,-0.269387,-0.945285,-0.449579,-0.849705,-0.151179,...,1.822978,1.105511,0.065353,2.500681,-0.05273,-0.885691,-0.583346,-0.21814,-0.856456,0
3,-0.11047,0.027849,-0.04431,-0.005343,0.177831,-0.232092,-0.5627,-0.400713,-0.552356,0.037349,...,-0.098367,-0.370318,-0.123008,-0.861314,0.10684,-0.483669,-0.224164,0.147321,-0.615051,0
4,-0.626313,-0.091985,-0.373756,-0.005083,1.172486,-0.314868,-0.885046,-0.412587,-0.818729,-0.012022,...,1.030348,0.421886,-0.068029,0.258984,-0.057158,-0.834079,-0.441066,-0.099874,-0.829539,0


### Question 2: SVM Dual with CVXOPT + k-fold CV

### Data Preparation

In [23]:
# Split into x and y
X = df.iloc[:, :-1].values
Y = df.iloc[:, -1].values

# Standardize the data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_scaled = sc.fit_transform(X)

# Split into test and train set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2)

# Suppress progress outputs
solvers.options['show_progress'] = False  # Optimization routine: CVXOPT 'qp' (default settings)

# The dual SVM expects labels in {-1,+1}, while our dataset uses {0,1}
# We map once here for the final training stage, CV does its own mapping inside
y_train_svm = np.where(y_train==0,-1.0,1.0)
y_test_svm  = np.where(y_test==0,-1.0,1.0)

#### Choose Kernel and set parameter grids for Hyperparameter Optimization

In [24]:
# kernel & hyperparameter grid
KERNEL = 'rbf'  # rbf or poly

if KERNEL=='rbf':
    # parameter grid for hyperparameter optimization
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'gamma': [0.01, 0.1, 1.0]
    }
elif KERNEL=='poly':
    # polynomial degree p controls model capacity; often p=2..4 is plenty
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'p': [2, 3, 4]
    }
else:
    # friendly failure if someone mistypes the kernel name
    raise ValueError("KERNEL must be 'rbf' or 'poly'")

#### Run Cross-Validation with k=5

In [25]:
# Run cross-validation to pick (C, kernel hyperparam)
best_params, val_best_mean, cv_history = cross_validate_svm(
    x_train, (y_train>0).astype(int), KERNEL, param_grid, k=5, seed=SEED
)

print(f"Kernel = {KERNEL}")
print(f"Best parameters = {best_params}")
print(f"Mean 5-fold val acc = {val_best_mean:.4f}")

Kernel = rbf
Best parameters = {'C': 1, 'gamma': 0.1}
Mean 5-fold val acc = 0.9225


In [None]:
# Train FINAL model on the whole training split using the best hyperparameters from CV

# Make the kernel matrices
K_train, K_test = make_kernel_matrices(x_train, x_test, KERNEL, **best_params)

# Time the optimization
start_time = time.time()

# Solve SVM dual problem
alphas, sol, Q = solve_svm_dual(K_train, y_train_svm, C=best_params['C'])

# Measure time difference
end_time = time.time()
optimization_time = end_time - start_time

# Compute Bias
b, sv_mask = compute_b(alphas, y_train_svm, K_train, C=best_params['C'])

# Scores
scores_train = decision_function(alphas, y_train_svm, K_train, b)
scores_test = decision_function(alphas, y_train_svm, K_test, b)

# Accuracies (classification rates)
ytrue_train01 = (y_train>0).astype(int)
ytrue_test01 = (y_test>0).astype(int)
train_acc = accuracy_from_scores(scores_train, ytrue_train01)
test_acc = accuracy_from_scores(scores_test, ytrue_test01)

# Confusion matrices
ypred_train01 = (scores_train>=0).astype(int)
ypred_test01 = (scores_test>=0).astype(int)
cm_train = confusion_matrix(ytrue_train01, ypred_train01, labels=[0,1])
cm_test = confusion_matrix(ytrue_test01, ypred_test01, labels=[0,1])

# Optimization diagnostics
# cvxopt solves min g(a)=1/2 a^T Q a - 1^T a ; the dual objective is f(a)=-g(a)
a = alphas
primal_obj_g = float(sol['primal objective']) # value minimized by cvxopt
final_dual_obj = -primal_obj_g # flip sign to get maximized dual
initial_dual_obj = 0.0 # at a=0 the dual is 0
iterations = int(sol['iterations'])
num_sv = int(np.sum(sv_mask))

print(f"Kernel: {KERNEL}")
print(f"Hyperparameters: {best_params}")  # setting values of the hyperparameters

print(f"\nClassification accuracy (train): {train_acc:.4f}")
print(f"Classification accuracy (test):  {test_acc:.4f}")

print("\nConfusion matrix (train) [rows=true 0/1, cols=pred 0/1]:")
print(cm_train)
print("Confusion matrix (test) [rows=true 0/1, cols=pred 0/1]:")
print(cm_test)

print(f"\nOptimization time: {optimization_time:.4f} s")
print(f"Optimization iterations: {iterations}")
print(f"Final dual SVM objective: {final_dual_obj:.6f}")

Kernel: rbf
Hyperparameters: {'C': 1, 'gamma': 0.1}

Classification accuracy (train): 0.9225
Classification accuracy (test):  0.9000

Confusion matrix (train) [rows=true 0/1, cols=pred 0/1]:
[[366  33]
 [ 29 372]]
Confusion matrix (test)  [rows=true 0/1, cols=pred 0/1]:
[[90 11]
 [ 9 90]]

Optimization time: 1.6117 s
Optimization iterations: 14
Final dual SVM objective: 132.722617


### Question 3: MVP Decomposition (q=2)

In [None]:
# Train & Evaluate MVP using the best hyperparams from Q2
# default if not defined or code is being re-run out of order
try:
    kernel_for_q3 = KERNEL
    best_for_q3 = dict(best_params)
except NameError:
    kernel_for_q3 = 'rbf'
    best_for_q3 = {'C': 1.0, 'gamma': 0.1}

# labels in {-1,+1} for the dual
ytrain_svm = np.where(y_train==0, -1.0, 1.0)
ytest_svm = np.where(y_test==0, -1.0, 1.0)

# Build kernel matrices
K_train_q3, K_test_q3 = make_kernel_matrices(x_train, x_test, kernel_for_q3, **best_for_q3)

# Train with MVP (q=2) and time it
t0=time.time()  # start timer
alphas_mvp, stats_mvp = mvp_train(K_train_q3, ytrain_svm, C=best_for_q3['C'], tol=1e-3, max_iter=200000)
opt_time=time.time()-t0  # Optimization time

# get bias; averaging over free SVs gives a stable estimate
b_mvp, sv_mask_mvp = compute_b(alphas_mvp, ytrain_svm, K_train_q3, C=best_for_q3['C'])

# Decision scores
scores_train_mvp = (alphas_mvp*ytrain_svm) @ K_train_q3 + b_mvp
scores_test_mvp = (alphas_mvp*ytrain_svm) @ K_test_q3 + b_mvp

# Accuracies (classification rates)
ytrue_train01 = (y_train>0).astype(int) # true labels in {0,1}
ytrue_test01 = (y_test>0).astype(int)
ypred_train01 = (scores_train_mvp>=0).astype(int) # predicted labels in {0,1}
ypred_test01 = (scores_test_mvp>=0).astype(int)
train_acc_mvp = float(np.mean(ypred_train01==ytrue_train01))
test_acc_mvp = float(np.mean(ypred_test01==ytrue_test01))

# Confusion matrices
cm_train = confusion_matrix(ytrue_train01, ypred_train01, labels=[0,1])
cm_test = confusion_matrix(ytrue_test01, ypred_test01, labels=[0,1])

print(f"Kernel: {kernel_for_q3}") # kernel
print(f"Hyperparameters: {best_for_q3}") # hyperparameters
print(f"\nClassification accuracy (train): {train_acc_mvp:.4f}") # training accuracy
print(f"Classification accuracy (test): {test_acc_mvp:.4f}") # test accuracy
print("\nConfusion matrix (train) [rows=true 0/1, cols=pred 0/1]:") # confusion matrix (train)
print(cm_train)
print("Confusion matrix (test) [rows=true 0/1, cols=pred 0/1]:") # confusion matrix (test)
print(cm_test)
print(f"\nOptimization time: {opt_time:.4f} s")  # Optimization time
print(f"Number of optimization iterations: {stats_mvp['iterations']}")  # Number of optimization iterations
print(f"Final value of the dual SVM objective: {stats_mvp['dual_obj']:.6f}")  # Final value of the dual SVM objective

Kernel: rbf
Hyperparameters: {'C': 1, 'gamma': 0.1}

Classification accuracy (train): 0.9225
Classification accuracy (test): 0.9000

Confusion matrix (train) [rows=true 0/1, cols=pred 0/1]:
[[366  33]
 [ 29 372]]
Confusion matrix (test) [rows=true 0/1, cols=pred 0/1]:
[[90 11]
 [ 9 90]]

Optimization time: 0.3539 s
Number of optimization iterations: 2920
Final value of the dual SVM objective: 132.722442
