In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

# imports
import numpy as np 
import matplotlib.pyplot as plt

from implementations_enhanced import *
from costs import *
from optimize_hyperparams import *
from cross_validation import *
from step_wise import *

# Build dataset

In [2]:
from proj1_helpers import load_csv_data 

# load raw data
y_raw, input_data_raw, ids = load_csv_data('train.csv', sub_sample=False)

In [3]:
from outliers import handle_outliers

# handle outliers
X_raw, y = handle_outliers(input_data_raw, y_raw, -999, 'mean')

# set y in {0,1} instead of {-1,1}
y[np.where(y==-1)]=0

-999 are replaced by the mean value of the feature


In [4]:
# get feature names 
all_features_raw = list(np.genfromtxt('train.csv', delimiter=",", dtype=str, max_rows = 1)[2:])

In [8]:
from extend_features import extend_features

# feature degree
degree = 1

# extend feature set
all_candidates, features = extend_features(X_raw, all_features_raw, degree, is_add_log = True)
print(all_candidates.shape)

# standardize candidates to give to the stepwise
all_candidates,_,_ = standardize(all_candidates)

---------------------------
Features have been set to the power(s): [1]
16 Features of the momentum have been added
4 logarithmic features have been added.
(250000, 50)


# Feature Selection through step-wise

In [9]:
# model parameters
model = dict()
model['method'] = 'lr' # 'lrr' if regularized with a penalization 'lambda_' otherwise 'lr' 
model['loss'] = 'loglikelihood'

# model hyperparameter
model['gamma'] = 1e-5

# other
model['method_minimization'] = 'gd' # 'gd' (gradient_descent) or 'newton' (newton descent leads to non invertible matrice "S" which is too big)
model['threshold'] = 5000
model['max_iters'] = 10000
model['debug_mode'] = 0

# R2 type
R2_method = 'McFadden' 

# estimate R2 error through cross validation (1 or 0)
cv = 0
model['k_fold'] = 10

In [10]:
# step-wise
best_R2adj, idx_features = stepwise(model, R2_method, all_candidates, features, y, cv)

--------------------------------------------------------------------------------------------
Feature chosen:  DER_mass_transverse_met_lep_power_1 (index : 1 ) |  R2adj =  0.11273281839
--------------------------------------------------------------------------------------------
Feature chosen:  PRI_tau_pt_power_1 (index : 13 ) |  R2adj =  0.138911714063


KeyboardInterrupt: 

In [None]:
# display selected features
results_r2_stepwise(best_R2adj[:len(best_R2adj)-1], idx_features)

In [None]:
print(best_R2adj[:len(best_R2adj)-1])

In [None]:
# save the selected features
Xsw = all_candidates[:, idx_features]
print(Xsw.shape)
print(y.shape)

# Optimization of degree when lambda = 0

In [None]:
# pick Xsw
Xsw2 = all_candidates[:, idx_features[:3]]

# change optimization parameters
model['gamma'] = 1e-5
model['max_iters'] = 10000
model['threshold'] = 1e-3

# optimization parameters
degree_min = 1 
degree_max = 10
degree_steps = 1
model['k_fold'] = 10

# optimize degree
w_opt, loss_tr, loss_te, degree_opt = optimize_degree(y, Xsw2, degree_min, degree_max, degree_steps, model, debug_mode=1)

# plot results
plt.figure()
plt.plot(build_poly(x, degree_opt).dot(w_opt))
plt.plot(y)
plt.xlabel('sample')
plt.ylabel('observation')
plt.show()

# Optimization of lambda through simple cross validation

In [None]:
# optimization parameters
lambda_min = -10 
lambda_max = 1
lambda_steps = 10
model['k_fold'] = 10

# optimize lambda_
w_opt, loss_tr, loss_te, lambda_opt, success_rate = optimize_lambda(y, Xsw, lambda_min, lambda_max, lambda_steps, arg_rr, debug_mode=1)