In [None]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from proj1_helpers import *
from implementations import *
from losses import *
from plots import *
from EDA import *
from cross_validation import *
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [None]:
DATA_TRAIN_PATH = "../data/train.csv"
data_y, data_set, ids = load_csv_data(DATA_TRAIN_PATH, sub_sample=True)

In [None]:
# Fixing the seed
seed = 8

### Split and classify data

In [None]:
# Classification of the output
y_0, y_1, y_2, y_3 = y_classification(data_y, data_set)

In [None]:
#EDA for each class
class_0, class_1, class_2, class_3 = EDA_class(data_set)

In [None]:
max_degree = 6

# Class 0

#### Logistic regression

***

In [None]:
# Setting parameters
param_least_0 = Parameters()
param_least_0.set_degree(max_degree)
param_least_0.set_method(least_squares)
param_least_0.set_viz(False)
param_least_0.set_use_backward_selection(False)
param_least_0.set_use_interactions(True)
# Cross validation
param_least_0 = cross_validation_poly_gas(y_0, class_0, param_least_0)

print(param_least_0.feature_list)
print(param_least_0.polynomial_selection)
print(param_least_0.best_error)
print(param_least_0.kept_interactions)

#### Gradient descent

***

In [None]:
# Setting parameters
param_GD_0 = Parameters()
param_GD_0.set_degree(max_degree)
param_GD_0.set_method(least_squares_GD)
param_GD_0.set_to_test(['gamma'])
param_GD_0.set_viz(False)
param_GD_0.set_use_backward_selection(False)
param_GD_0.set_use_interactions(True)
# Cross validation
param_GD_0 = cross_validation_poly_gas(y_0, class_0, param_GD_0)

print(param_GD_0.feature_list)
print(param_GD_0.polynomial_selection)
print(param_GD_0.best_error)
print(param_GD_0.kept_interactions)

#### Stochastic Gradient Descent

***

In [None]:
# Setting parameters
param_SGD_0 = Parameters()
param_SGD_0.set_degree(max_degree)
param_SGD_0.set_method(least_squares_SGD)
param_SGD_0.set_to_test(['gamma'])
param_SGD_0.set_viz(False)
param_SGD_0.set_use_backward_selection(False)
param_SGD_0.set_use_interactions(True)
# Cross validation
param_SGD_0 = cross_validation_poly_gas(y_0, class_0, param_SGD_0)

print(param_SGD_0.feature_list)
print(param_SGD_0.polynomial_selection)
print(param_SGD_0.best_error)
print(param_SGD_0.kept_interactions)

#### Ridge Regression

***

In [None]:
# Setting parameters
param_ridge_0 = Parameters()
param_ridge_0.set_degree(max_degree)
param_ridge_0.set_method(ridge_regression)
param_ridge_0.set_to_test(['lambda'])
param_ridge_0.set_viz(False)
param_ridge_0.set_use_backward_selection(False)
param_ridge_0.set_use_interactions(True)
# Cross validation
param_ridge_0 = cross_validation_poly_gas(y_0, class_0, param_ridge_0)

print(param_ridge_0.feature_list)
print(param_ridge_0.polynomial_selection)
print(param_ridge_0.best_error)
print(param_ridge_0.kept_interactions)

#### Logistic regression

***

In [None]:
# Setting parameters
neg_log = Neg_log()
param_log_0 = Parameters()
param_log_0.set_degree(max_degree)
param_log_0.set_method(logistic_regression)
param_log_0.set_loss_fct(neg_log)
param_log_0.set_to_test(['gamma'])
param_log_0.set_viz(False)
param_log_0.set_use_backward_selection(False)
param_log_0.set_use_interactions(True)
# Cross validation
param_log_0 = cross_validation_poly_gas(y_0, class_0, param_log_0)

print(param_log_0.feature_list)
print(param_log_0.polynomial_selection)
print(param_log_0.best_error)
print(param_log_0.kept_interactions)

#### Reguralized Logistic regression

***

In [None]:
# Setting parameters
neg_log = Neg_log()
param_reg_0 = Parameters()
param_reg_0.set_degree(max_degree)
param_reg_0.set_method(reg_logistic_regression)
param_reg_0.set_loss_fct(neg_log)
param_reg_0.set_to_test(['gamma', 'lambda'])
param_reg_0.set_viz(False)
param_reg_0.set_use_backward_selection(False)
param_reg_0.set_use_interactions(True)
# Cross validation
param_reg_0 = cross_validation_poly_gas(y_0, class_0, param_reg_0)

print(param_reg_0.feature_list)
print(param_reg_0.polynomial_selection)
print(param_reg_0.best_error)
print(param_reg_0.kept_interactions)

# Class 1

***

In [None]:
# Setting parameters
param_least_1 = Parameters()
param_least_1.set_degree(max_degree)
param_least_1.set_method(least_squares)
param_least_1.set_viz(False)
param_least_1.set_use_backward_selection(False)
param_least_1.set_use_interactions(True)
# Cross validation
param_least_1 = cross_validation_poly_gas(y_1, class_1, param_least_1)

print(param_least_1.feature_list)
print(param_least_1.polynomial_selection)
print(param_least_1.best_error)
print(param_least_1.kept_interactions)

In [None]:
# Setting parameters
param_GD_1 = Parameters()
param_GD_1.set_degree(max_degree)
param_GD_1.set_method(least_squares_GD)
param_GD_1.set_to_test(['gamma'])
param_GD_1.set_viz(False)
param_GD_1.set_use_backward_selection(False)
param_GD_1.set_use_interactions(True)
# Cross validation
param_GD_1 = cross_validation_poly_gas(y_1, class_1, param_GD_1)

print(param_GD_1.feature_list)
print(param_GD_1.polynomial_selection)
print(param_GD_1.best_error)
print(param_GD_1.kept_interactions)

In [None]:
# Setting parameters
param_SGD_1 = Parameters()
param_SGD_1.set_degree(max_degree)
param_SGD_1.set_method(least_squares_SGD)
param_SGD_1.set_to_test(['gamma'])
param_SGD_1.set_viz(False)
param_SGD_1.set_use_backward_selection(False)
param_SGD_1.set_use_interactions(True)
# Cross validation
param_SGD_1 = cross_validation_poly_gas(y_1, class_1, param_SGD_1)

print(param_SGD_1.feature_list)
print(param_SGD_1.polynomial_selection)
print(param_SGD_1.best_error)
print(param_SGD_1.kept_interactions)

In [None]:
# Setting parameters
param_ridge_1 = Parameters()
param_ridge_1.set_degree(max_degree)
param_ridge_1.set_method(ridge_regression)
param_ridge_1.set_to_test(['lambda'])
param_ridge_1.set_viz(False)
param_ridge_1.set_use_backward_selection(False)
param_ridge_1.set_use_interactions(True)
# Cross validation
param_ridge_1 = cross_validation_poly_gas(y_1, class_1, param_ridge_1)

print(param_ridge_1.feature_list)
print(param_ridge_1.polynomial_selection)
print(param_ridge_1.best_error)
print(param_ridge_1.kept_interactions)

In [None]:
# Setting parameters
neg_log = Neg_log()
param_log_1 = Parameters()
param_log_1.set_degree(max_degree)
param_log_1.set_method(logistic_regression)
param_log_1.set_loss_fct(neg_log)
param_log_1.set_to_test(['gamma'])
param_log_1.set_viz(False)
param_log_1.set_use_backward_selection(False)
param_log_1.set_use_interactions(True)
# Cross validation
param_log_1 = cross_validation_poly_gas(y_1, class_1, param_log_1)

print(param_log_1.feature_list)
print(param_log_1.polynomial_selection)
print(param_log_1.best_error)
print(param_log_1.kept_interactions)

In [None]:
# Setting parameters
neg_log = Neg_log()
param_reg_1 = Parameters()
param_reg_1.set_degree(max_degree)
param_reg_1.set_method(reg_logistic_regression)
param_reg_1.set_loss_fct(neg_log)
param_reg_1.set_to_test(['gamma', 'lambda'])
param_reg_1.set_viz(False)
param_reg_1.set_use_backward_selection(False)
param_reg_1.set_use_interactions(True)
# Cross validation
param_reg_1 = cross_validation_poly_gas(y_1, class_1, param_reg_1)

print(param_reg_1.feature_list)
print(param_reg_1.polynomial_selection)
print(param_reg_1.best_error)
print(param_reg_1.kept_interactions)

# Class 2

***

In [None]:
print('LEAST SQUARES')

# Setting parameters
param_least_2 = Parameters()
param_least_2.set_degree(max_degree)
param_least_2.set_method(least_squares)
param_least_2.set_viz(False)
param_least_2.set_use_backward_selection(False)
param_least_2.set_use_interactions(True)
# Cross validation
param_least_2 = cross_validation_poly_gas(y_2, class_2, param_least_2)

print(param_least_2.feature_list)
print(param_least_2.polynomial_selection)
print(param_least_2.best_error)
print(param_least_2.kept_interactions)

print('GRADIENT DESCENT')

# Setting parameters
param_GD_2 = Parameters()
param_GD_2.set_degree(max_degree)
param_GD_2.set_method(least_squares_GD)
param_GD_2.set_to_test(['gamma'])
param_GD_2.set_viz(False)
param_GD_2.set_use_backward_selection(False)
param_GD_2.set_use_interactions(True)
# Cross validation
param_GD_2 = cross_validation_poly_gas(y_2, class_2, param_GD_2)

print(param_GD_2.feature_list)
print(param_GD_2.polynomial_selection)
print(param_GD_2.best_error)
print(param_GD_2.kept_interactions)

print('STOCHASTIC GRADIENT DESCENT')

# Setting parameters
param_SGD_2 = Parameters()
param_SGD_2.set_degree(max_degree)
param_SGD_2.set_method(least_squares_SGD)
param_SGD_2.set_to_test(['gamma'])
param_SGD_2.set_viz(False)
param_SGD_2.set_use_backward_selection(False)
param_SGD_2.set_use_interactions(True)
# Cross validation
param_SGD_2 = cross_validation_poly_gas(y_2, class_2, param_SGD_2)

print(param_SGD_2.feature_list)
print(param_SGD_2.polynomial_selection)
print(param_SGD_2.best_error)
print(param_SGD_2.kept_interactions)

print('RIDGE REGRESSION')

# Setting parameters
param_ridge_2 = Parameters()
param_ridge_2.set_degree(max_degree)
param_ridge_2.set_method(ridge_regression)
param_ridge_2.set_to_test(['lambda'])
param_ridge_2.set_viz(False)
param_ridge_2.set_use_backward_selection(False)
param_ridge_2.set_use_interactions(True)
# Cross validation
param_ridge_2 = cross_validation_poly_gas(y_2, class_2, param_ridge_2)

print(param_ridge_2.feature_list)
print(param_ridge_2.polynomial_selection)
print(param_ridge_2.best_error)
print(param_ridge_2.kept_interactions)

print('LOGISTIC REGRESSION')

# Setting parameters
neg_log = Neg_log()
param_log_2 = Parameters()
param_log_2.set_degree(max_degree)
param_log_2.set_method(logistic_regression)
param_log_2.set_loss_fct(neg_log)
param_log_2.set_to_test(['gamma'])
param_log_2.set_viz(False)
param_log_2.set_use_backward_selection(False)
param_log_2.set_use_interactions(True)
# Cross validation
param_log_2 = cross_validation_poly_gas(y_2, class_2, param_log_2)

print(param_log_2.feature_list)
print(param_log_2.polynomial_selection)
print(param_log_2.best_error)
print(param_log_2.kept_interactions)

print('REGURALIZED LOGISTIC REGRESSION')

# Setting parameters
neg_log = Neg_log()
param_reg_2 = Parameters()
param_reg_2.set_degree(max_degree)
param_reg_2.set_method(reg_logistic_regression)
param_reg_2.set_loss_fct(neg_log)
param_reg_2.set_to_test(['gamma', 'lambda'])
param_reg_2.set_viz(False)
param_reg_2.set_use_backward_selection(False)
param_reg_2.set_use_interactions(True)
# Cross validation
param_reg_2 = cross_validation_poly_gas(y_2, class_2, param_reg_2)

print(param_reg_2.feature_list)
print(param_reg_2.polynomial_selection)
print(param_reg_2.best_error)
print(param_reg_2.kept_interactions)

# Class 3

***

In [None]:
print('LEAST SQUARES')

# Setting parameters
param_least_3 = Parameters()
param_least_3.set_degree(max_degree)
param_least_3.set_method(least_squares)
param_least_3.set_viz(False)
param_least_3.set_use_backward_selection(False)
param_least_3.set_use_interactions(True)
# Cross validation
param_least_3 = cross_validation_poly_gas(y_3, class_3, param_least_3)

print(param_least_3.feature_list)
print(param_least_3.polynomial_selection)
print(param_least_3.best_error)
print(param_least_3.kept_interactions)

print('GRADIENT DESCENT')

# Setting parameters
param_GD_3 = Parameters()
param_GD_3.set_degree(max_degree)
param_GD_3.set_method(least_squares_GD)
param_GD_3.set_to_test(['gamma'])
param_GD_3.set_viz(False)
param_GD_3.set_use_backward_selection(False)
param_GD_3.set_use_interactions(True)
# Cross validation
param_GD_3 = cross_validation_poly_gas(y_3, class_3, param_GD_3)

print(param_GD_3.feature_list)
print(param_GD_3.polynomial_selection)
print(param_GD_3.best_error)
print(param_GD_3.kept_interactions)

print('STOCHASTIC GRADIENT DESCENT')

# Setting parameters
param_SGD_3 = Parameters()
param_SGD_3.set_degree(max_degree)
param_SGD_3.set_method(least_squares_SGD)
param_SGD_3.set_to_test(['gamma'])
param_SGD_3.set_viz(False)
param_SGD_3.set_use_backward_selection(False)
param_SGD_3.set_use_interactions(True)
# Cross validation
param_SGD_3 = cross_validation_poly_gas(y_3, class_3, param_SGD_3)

print(param_SGD_3.feature_list)
print(param_SGD_3.polynomial_selection)
print(param_SGD_3.best_error)
print(param_SGD_3.kept_interactions)

print('RIDGE REGRESSION')

# Setting parameters
param_ridge_3 = Parameters()
param_ridge_3.set_degree(max_degree)
param_ridge_3.set_method(ridge_regression)
param_ridge_3.set_to_test(['lambda'])
param_ridge_3.set_viz(False)
param_ridge_3.set_use_backward_selection(False)
param_ridge_3.set_use_interactions(True)
# Cross validation
param_ridge_3 = cross_validation_poly_gas(y_3, class_3, param_ridge_3)

print(param_ridge_3.feature_list)
print(param_ridge_3.polynomial_selection)
print(param_ridge_3.best_error)
print(param_ridge_3.kept_interactions)

print('LOGISTIC REGRESSION')

# Setting parameters
neg_log = Neg_log()
param_log_3 = Parameters()
param_log_3.set_degree(max_degree)
param_log_3.set_method(logistic_regression)
param_log_3.set_loss_fct(neg_log)
param_log_3.set_to_test(['gamma'])
param_log_3.set_viz(False)
param_log_3.set_use_backward_selection(False)
param_log_3.set_use_interactions(True)
# Cross validation
param_log_3 = cross_validation_poly_gas(y_3, class_3, param_log_3)

print(param_log_3.feature_list)
print(param_log_3.polynomial_selection)
print(param_log_3.best_error)
print(param_log_3.kept_interactions)

print('REGURALIZED LOGISTIC REGRESSION')

# Setting parameters
neg_log = Neg_log()
param_reg_3 = Parameters()
param_reg_3.set_degree(max_degree)
param_reg_3.set_method(reg_logistic_regression)
param_reg_3.set_loss_fct(neg_log)
param_reg_3.set_to_test(['gamma', 'lambda'])
param_reg_3.set_viz(False)
param_reg_3.set_use_backward_selection(False)
param_reg_3.set_use_interactions(True)
# Cross validation
param_reg_3 = cross_validation_poly_gas(y_3, class_3, param_reg_3)

print(param_reg_3.feature_list)
print(param_reg_3.polynomial_selection)
print(param_reg_3.best_error)
print(param_reg_3.kept_interactions)