In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

In [150]:
from proj1_helpers import *
from implementations import *

In [3]:
def compute_accuracy(y, predictions):
    N = y.size
    accuracy = 1 - (np.count_nonzero(predictions-y)/N)
    print("Accuracy: {}".format(accuracy))

In [4]:
def save_results(weights, clean_features, parameters):
    np.save('all/weights.npy', weights)
    np.save('all/clean_features.npy', clean_features)
    np.save('all/parameters.npy', parameters)

# Part 1: Without feature engineering

### Import Dataset

In [5]:
train_data = 'all/train.csv'
labels, input_data, ids, features = load_csv_data(train_data)

In [6]:
features

array(['DER_mass_MMC', 'DER_mass_transverse_met_lep', 'DER_mass_vis',
       'DER_pt_h', 'DER_deltaeta_jet_jet', 'DER_mass_jet_jet',
       'DER_prodeta_jet_jet', 'DER_deltar_tau_lep', 'DER_pt_tot',
       'DER_sum_pt', 'DER_pt_ratio_lep_tau', 'DER_met_phi_centrality',
       'DER_lep_eta_centrality', 'PRI_tau_pt', 'PRI_tau_eta',
       'PRI_tau_phi', 'PRI_lep_pt', 'PRI_lep_eta', 'PRI_lep_phi',
       'PRI_met', 'PRI_met_phi', 'PRI_met_sumet', 'PRI_jet_num',
       'PRI_jet_leading_pt', 'PRI_jet_leading_eta', 'PRI_jet_leading_phi',
       'PRI_jet_subleading_pt', 'PRI_jet_subleading_eta',
       'PRI_jet_subleading_phi', 'PRI_jet_all_pt'], dtype='<U27')

In [27]:
training_ratio = 0.8

In [28]:
x_tr, x_te, y_tr, y_te = split_data(input_data, labels, training_ratio)

In [29]:
tx_tr, mean_tr, std_tr = extend_and_standardize(x_tr)

## Gradient descent

In [166]:
initial_w = np.zeros(x_tr.shape[1])
max_iters = 100
gamma = 0.00001

In [167]:
losses_GD, ws_GD = least_squares_GD(y_tr, x_tr, initial_w, max_iters, gamma)

  ret = umr_sum(arr, axis, dtype, out, keepdims)
  return 1/2*np.mean(e**2)


In [53]:
predictions_GD = predict_labels(w_GD,x_te)
compute_accuracy(y_te,predictions_GD)

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


## Stochastic gradient descent

## Least squares

### No standardization

In [12]:
loss, w_LS = least_squares(y_tr,x_tr)
predictions = predict_labels(w_LS,x_te)

In [14]:
compute_accuracy(y_te,predictions)

Accuracy: 0.74402


### With standardization

In [30]:
tx_te, _, _ = extend_and_standardize(x_te, mean_tr, std_tr)

In [31]:
loss, w_LS = least_squares(y_tr,tx_tr)
predictions = predict_labels(w_LS,tx_te)

In [32]:
compute_accuracy(y_te,predictions)

Accuracy: 0.74468


## Ridge regression

### Use cross-validation to find good hyperparameter

In [92]:
seed = 1
k_fold = 4
k_indices = build_k_indices(y_tr, k_fold, seed)
lambda_ = 0.001

In [93]:
lambdas, tr_losses, te_losses = find_optimal_lambda(y_tr,x_tr)

Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29


In [70]:
optimal_lambda = find_optimal_lambda(y_tr,x_tr)

Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29


In [72]:
w_rr = ridge_regression(y_tr,x_tr,optimal_lambda)

In [88]:
optimal_lambda

0.004520353656360241

In [73]:
predictions = predict_labels(w_rr,x_te)

In [75]:
compute_accuracy(y_te,predictions)

Accuracy: 0.7453000000000001


## Logistic regression

In [160]:
initial_w = np.zeros(x_tr.shape[1])
max_iters = 1000
gamma = 0.01
y_tr_log = np.ones(y_tr.size)
y_tr_log[y_tr == -1] = 0

In [161]:
loss, w_logistic = logistic_regression(y_tr_log, x_tr, initial_w, max_iters, gamma)

  loss = (-y * np.log(pred) - (1 - y) * np.log(1 - pred)).mean()
  loss = (-y * np.log(pred) - (1 - y) * np.log(1 - pred)).mean()


Current iteration=0, loss=nan
Current iteration=100, loss=nan
Current iteration=200, loss=nan
Current iteration=300, loss=nan
Current iteration=400, loss=nan
Current iteration=500, loss=nan
Current iteration=600, loss=nan
Current iteration=700, loss=nan
Current iteration=800, loss=nan
Current iteration=900, loss=nan


In [163]:
predictions = predict_labels(w_logistic, x_te)
compute_accuracy(y_te,predictions)

Accuracy: 0.6876599999999999


## Regularized logistic regression

In [156]:
loss, w_reg_logistic = reg_logistic_regression(y_tr_log, x_tr, optimal_lambda, initial_w, max_iters, gamma)

  loss = (-y * np.log(pred) - (1 - y) * np.log(1 - pred)).mean()
  loss = (-y * np.log(pred) - (1 - y) * np.log(1 - pred)).mean()


Current iteration=0, loss=nan
Current iteration=100, loss=nan
Current iteration=200, loss=nan
Current iteration=300, loss=nan
Current iteration=400, loss=nan
Current iteration=500, loss=nan
Current iteration=600, loss=nan
Current iteration=700, loss=nan
Current iteration=800, loss=nan
Current iteration=900, loss=nan


In [158]:
predictions = predict_labels(w_reg_logistic, x_te)
compute_accuracy(y_te,predictions)

Accuracy: 0.7079599999999999


# Part 2: EDA and feature engineering

### Import Dataset

In [83]:
train_data = 'all/train.csv'
labels, input_data, ids, features = load_csv_data(train_data)

In [84]:
training_ratio = 0.8

In [85]:
x_tr, x_te, y_tr, y_te = split_data(input_data, labels, training_ratio)

In [86]:
X, y = x_tr, y_tr  # input_data, labels

i, = np.where(features == 'PRI_jet_num')
pri_jet_num_idx = np.squeeze(i)
cond_null = X[:, pri_jet_num_idx] == 0
cond_one = X[:, pri_jet_num_idx] == 1
cond_plural = X[:, pri_jet_num_idx] >= 2
conditions = [cond_null, cond_one, cond_plural]

dsets = [X[cond] for cond in conditions]
ybs = [y[cond] for cond in conditions]

For now, just remove any column with undefined -999 values. Also, before standardization, remove features with 0 variance. 
Second part: test how replacing -999 in DER_mass_MMC by defined mean affects the score. 

In [103]:
clean_dsets = []
clean_features = []

for dset in dsets:
    
    # Impute undefined DER_mass_MMC
    """
    DER_mass_MMC = dset[:,0]
    undefined_indices = (DER_mass_MMC == -999)
    filter_undefined = DER_mass_MMC[~undefined_indices]
    defined_mean = np.mean(filter_undefined)
    print(defined_mean)
    defined_median = np.median(filter_undefined)
    print(defined_median)
    DER_mass_MMC[undefined_indices] = defined_median
    """
    
    # Remove constant features and features with undefined samples
    no_undefined = np.all(dset != -999, axis = 0)
    no_constant = np.any(dset != dset[0], axis = 0)
    cleaned = no_undefined * no_constant
    clean_dset = dset[:,cleaned]
    clean_dsets.append(clean_dset)
    clean_features.append(cleaned)

Standardize and extend data, save mean and standard deviation of each dataset.

In [111]:
parameters = []
standardized_dsets = []

for clean_dset in clean_dsets:
    standardized_dset, mean_x, std_x = extend_and_standardize(clean_dset)
    # Added for testing purposes, handles outliers
    """
    standardized_dset[standardized_dset > 3] = 3
    standardized_dset[standardized_dset < -3]  = -3
    """
    standardized_dsets.append(standardized_dset)
    parameters.append((mean_x,std_x))

Only extend datasets

In [89]:
parameters = [(0,1),(0,1),(0,1)]
extended_dsets = []
for clean_dset in clean_dsets:
    tmp = build_model_data(clean_dset)
    tmp_mean = np.mean(tmp, axis = 0)
    tmp_std = np.std(tmp, axis = 0)
    rows, cols = tmp.shape
    for row in range(rows):
        for col in range(cols):
            if tmp[row,col] > (tmp_mean[col]+3*tmp_std[col]):
                tmp[row,col] = tmp_mean[col]+3*tmp_std[col]
            elif tmp[row,col] < (tmp_mean[col]-3*tmp_std[col]):
                tmp[row,col] = tmp_mean[col]-3*tmp_std[col]
    extended_dsets.append(tmp)

## Gradient descent

In [168]:
max_iters = 1000
gamma = 0.1

In [169]:
ws_GD = []
for jet_num, standardized_dset in enumerate(standardized_dsets):
    initial_w = np.zeros(standardized_dset.shape[1])
    losses_GD, w_GD = least_squares_GD(ybs[jet_num], standardized_dset, initial_w, max_iters, gamma)
    ws_GD.append(w_GD)

In [170]:
predictions = model_predictions(x_te, ws_GD, pri_jet_num_idx, clean_features, parameters)

In [171]:
compute_accuracy(y_te,predictions)

Accuracy: 0.75998


Second score, after replacing -999 in DER_mass_MMC by the defined mean.

In [44]:
compute_accuracy(y_te,predictions)

Accuracy: 0.72386


Conclusion: bit worse

Third score, after replacing -999 in DER_mass_MMC by the defined median.

In [70]:
compute_accuracy(y_te,predictions)

Accuracy: 0.72848


## Stochastic Gradient descent

In [130]:
max_iters = 1000
gamma = 0.1
batch_size = 100

In [132]:
ws_SGD = []
for jet_num, standardized_dset in enumerate(standardized_dsets):
    initial_w = np.zeros(standardized_dset.shape[1])
    loss_SGD, w_SGD = least_squares_SGD(ybs[jet_num], standardized_dset, initial_w, batch_size, max_iters, gamma)
    ws_SGD.append(w_SGD)

In [133]:
predictions = model_predictions(x_te, ws_SGD, pri_jet_num_idx, clean_features, parameters)

In [134]:
compute_accuracy(y_te,predictions)

Accuracy: 0.75512


## Least squares

In [120]:
ws_LS = []
for jet_num, standardized_dset in enumerate(standardized_dsets):
    loss, w = least_squares(ybs[jet_num],standardized_dset)
    ws_LS.append(w)

In [121]:
predictions = model_predictions(x_te, ws_LS, pri_jet_num_idx, clean_features, parameters)

In [122]:
compute_accuracy(y_te,predictions)

Accuracy: 0.7598


Second score, after replacing -999 in DER_mass_MMC by the defined mean.

In [69]:
compute_accuracy(y_te,predictions)

Accuracy: 0.75986


Third score, after replacing -999 in DER_mass_MMC by the defined median.

In [50]:
compute_accuracy(y_te,predictions)

Accuracy: 0.7598


Handle outliers

In [105]:
compute_accuracy(y_te,predictions)

Accuracy: 0.76336


In [90]:
ws_LS = []
for jet_num, extended_dset in enumerate(extended_dsets):
    loss, w = least_squares(ybs[jet_num],extended_dset)
    ws_LS.append(w)

In [91]:
predictions = model_predictions(x_te, ws_LS, pri_jet_num_idx, clean_features, parameters)
compute_accuracy(y_te, predictions)

Accuracy: 0.76336


## Ridge regression

In [141]:
lambdas = []
for jet_num, standardized_dset in enumerate(standardized_dsets):
    optimal_lambda = ridge_optimal_lambda(ybs[jet_num], standardized_dset)
    lambdas.append(optimal_lambda)

Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19
Iteration 20
Iteration 21
Iteration 22
Iteration 23
Iteration 24
Iteration 25
Iteration 26
Iteration 27
Iteration 28
Iteration 29
Iteration 0
Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Ite

In [142]:
lambdas

[1.0, 1.0, 1.0]

In [143]:
ws_RR = []
for jet_num, standardized_dset in enumerate(standardized_dsets):
    w_RR = ridge_regression(ybs[jet_num],standardized_dset,lambdas[jet_num])
    ws_RR.append(w_RR)

In [24]:
save_results(ws_RR, clean_features, parameters)

In [146]:
predictions = model_predictions(x_te, ws_RR, pri_jet_num_idx, clean_features, parameters)

In [147]:
compute_accuracy(y_te,predictions)

Accuracy: 0.7599400000000001


Second score, after replacing -999 in DER_mass_MMC by the defined mean.

In [140]:
compute_accuracy(y_te,predictions)

Accuracy: 0.75988


Third score, after replacing -999 in DER_mass_MMC by the defined median.

In [152]:
compute_accuracy(y_te,predictions)

Accuracy: 0.75992


## Logistic regression

In [172]:
ws_LR = []
for jet_num, standardized_dset in enumerate(standardized_dsets):
    initial_w = np.zeros(standardized_dset.shape[1])
    max_iters = 2000
    gamma = 0.7 # 0.01
    y_logistic = np.ones(ybs[jet_num].size)
    y_logistic[ybs[jet_num] == -1] = 0
    loss, w_LR = logistic_regression(y_logistic, standardized_dset, initial_w, max_iters, gamma)
    ws_LR.append(w_LR)

Current iteration=0, loss=0.5981378276640935
Current iteration=100, loss=0.4050067610901047
Current iteration=200, loss=0.3970473245894675
Current iteration=300, loss=0.39380211522480185
Current iteration=400, loss=0.3922295351190929
Current iteration=500, loss=0.3914176415322084
Current iteration=600, loss=0.3909819194819656
Current iteration=700, loss=0.39074228818318946
Current iteration=800, loss=0.39060828411520365
Current iteration=900, loss=0.39053243787160014
Current iteration=1000, loss=0.390489123865655
Current iteration=1100, loss=0.3904642228923078
Current iteration=1200, loss=0.3904498357378845
Current iteration=1300, loss=0.3904414918432804
Current iteration=1400, loss=0.3904366389666473
Current iteration=1500, loss=0.39043381039306674
Current iteration=1600, loss=0.39043215900687506
Current iteration=0, loss=0.6370571430238028
Current iteration=100, loss=0.5437890177824172
Current iteration=200, loss=0.5417063984969864
Current iteration=300, loss=0.5411709012859485
Curre

In [173]:
predictions = model_predictions(x_te, ws_LR, pri_jet_num_idx, clean_features, parameters)

In [174]:
compute_accuracy(y_te,predictions)

Accuracy: 0.76416


In [175]:
save_results(ws_RR, clean_features, parameters)

Second score, after replacing -999 in DER_mass_MMC by the defined mean.

In [235]:
compute_accuracy(y_te,predictions)

Accuracy: 0.76258


Third score, after replacing -999 in DER_mass_MMC by the defined median.

In [225]:
compute_accuracy(y_te,predictions)

Accuracy: 0.76196


Handle outliers

In [114]:
compute_accuracy(y_te,predictions)

Accuracy: 0.76416


## Regularized logistic regression

In [149]:
lambdas = []
for jet_num, standardized_dset in enumerate(standardized_dsets):
    initial_w = np.zeros(standardized_dset.shape[1])
    max_iters = 2000
    gamma = 0.7 # 0.01
    y_logistic = np.ones(ybs[jet_num].size)
    y_logistic[ybs[jet_num] == -1] = 0
    optimal_lambda = logistic_optimal_lambda(y_logistic, standardized_dset, initial_w, max_iters, gamma)
    lambdas.append(optimal_lambda)

Iteration 0
Current iteration=0, loss=0.5985665553496784
Current iteration=100, loss=0.40557929202525944
Current iteration=200, loss=0.39793366841149214
Current iteration=300, loss=0.3949550545132445
Current iteration=400, loss=0.3935836792995515
Current iteration=500, loss=0.3929015946370937
Current iteration=600, loss=0.39254697900561397
Current iteration=700, loss=0.3923579038077661
Current iteration=800, loss=0.3922554613715885
Current iteration=900, loss=0.39219934993675315
Current iteration=1000, loss=0.39216837989391523
Current iteration=1100, loss=0.3921511924385661
Current iteration=1200, loss=0.39214161586665547
Current iteration=1300, loss=0.3921362643594388
Current iteration=1400, loss=0.3921332674227051
Current iteration=1500, loss=0.3921315864040248
Current iteration=0, loss=0.39318537128364117
Current iteration=100, loss=0.39312705200197295
Current iteration=0, loss=0.39127746572868966
Current iteration=100, loss=0.3911715816908651
Current iteration=200, loss=0.391163949

Current iteration=100, loss=0.41648703952167043
Iteration 11
Current iteration=0, loss=0.4227585135136648
Current iteration=100, loss=0.42224333864045516
Current iteration=200, loss=0.4221867905460583
Current iteration=300, loss=0.42217773392650737
Current iteration=0, loss=0.42323974414788235
Current iteration=0, loss=0.4208141953117644
Current iteration=100, loss=0.42072178664923826
Current iteration=0, loss=0.4222654914291675
Current iteration=100, loss=0.42217356164397674
Iteration 12
Current iteration=0, loss=0.429464120329188
Current iteration=100, loss=0.42885398884242115
Current iteration=200, loss=0.4288115805435419
Current iteration=300, loss=0.4288071590378056
Current iteration=0, loss=0.42986011840978683
Current iteration=0, loss=0.427368780413748
Current iteration=100, loss=0.4272796432202233
Current iteration=0, loss=0.4287684237724266
Current iteration=100, loss=0.428683773274004
Iteration 13
Current iteration=0, loss=0.43708688939412643
Current iteration=100, loss=0.436

Current iteration=0, loss=0.6372057644955702
Current iteration=100, loss=0.5438416813162126
Current iteration=200, loss=0.5417860172541333
Current iteration=300, loss=0.5412739964189548
Current iteration=400, loss=0.5411155008625274
Current iteration=500, loss=0.5410624806793088
Current iteration=600, loss=0.5410441097741868
Current iteration=700, loss=0.5410376273364207
Current iteration=800, loss=0.5410353169273785
Current iteration=0, loss=0.5402675026071933
Current iteration=100, loss=0.5401480001923136
Current iteration=200, loss=0.5401452281728484
Current iteration=0, loss=0.543584927798681
Current iteration=100, loss=0.5434819360982678
Current iteration=200, loss=0.5434777883114079
Current iteration=0, loss=0.5408228558647676
Current iteration=100, loss=0.5407017039047481
Current iteration=200, loss=0.5406970817244697
Iteration 1
Current iteration=0, loss=0.541282253886665
Current iteration=0, loss=0.5404414833247254
Current iteration=100, loss=0.5403201232866919
Current iterati

Current iteration=0, loss=0.6016242403604695
Current iteration=0, loss=0.5996644473741906
Current iteration=0, loss=0.6022146031987679
Current iteration=0, loss=0.6006384527142085
Iteration 21
Current iteration=0, loss=0.6104185603239362
Current iteration=0, loss=0.6085011713752814
Current iteration=0, loss=0.6108532019224431
Current iteration=0, loss=0.6094331119786355
Iteration 22
Current iteration=0, loss=0.6195364315050962
Current iteration=0, loss=0.6177225657193011
Current iteration=0, loss=0.6198532453458239
Current iteration=0, loss=0.6185942077400954
Iteration 23
Current iteration=0, loss=0.6287322780810444
Current iteration=0, loss=0.6270941773859069
Current iteration=0, loss=0.6289866211974747
Current iteration=0, loss=0.6278888565461147
Iteration 24
Current iteration=0, loss=0.6377404249206915
Current iteration=0, loss=0.6363483422177253
Current iteration=0, loss=0.6379937117279952
Current iteration=0, loss=0.6370533702378707
Iteration 25
Current iteration=0, loss=0.6463141

Current iteration=100, loss=0.5225334822455865
Current iteration=200, loss=0.5225273780677501
Current iteration=0, loss=0.5270437105186521
Current iteration=100, loss=0.5268267202610146
Current iteration=200, loss=0.5268217841161486
Current iteration=0, loss=0.526443195039386
Current iteration=100, loss=0.5263117037869326
Iteration 5
Current iteration=0, loss=0.5273858119125454
Current iteration=100, loss=0.5271542534089363
Current iteration=200, loss=0.5271484953936483
Current iteration=0, loss=0.5233479484207131
Current iteration=100, loss=0.5231275967932932
Current iteration=200, loss=0.5231217729946952
Current iteration=0, loss=0.5276197061520089
Current iteration=100, loss=0.5274051649294185
Current iteration=200, loss=0.5274005925185773
Current iteration=0, loss=0.5270165262444391
Current iteration=100, loss=0.5268870479854165
Iteration 6
Current iteration=0, loss=0.5281460888694698
Current iteration=100, loss=0.527910174829217
Current iteration=200, loss=0.5279035195387566
Curre

Current iteration=1900, loss=0.684307126356827
Current iteration=0, loss=0.6838076672512111
Current iteration=100, loss=0.682300639963849
Current iteration=200, loss=0.6823005961663221
Current iteration=300, loss=0.6823005961649768
Current iteration=400, loss=0.6823005961649768
Current iteration=500, loss=0.6823005961649768
Current iteration=600, loss=0.6823005961649768
Current iteration=700, loss=0.6823005961649768
Current iteration=800, loss=0.6823005961649768
Current iteration=900, loss=0.6823005961649768
Current iteration=1000, loss=0.6823005961649768
Current iteration=1100, loss=0.6823005961649768
Current iteration=1200, loss=0.6823005961649768
Current iteration=1300, loss=0.6823005961649768
Current iteration=1400, loss=0.6823005961649768
Current iteration=1500, loss=0.6823005961649768
Current iteration=1600, loss=0.6823005961649768
Current iteration=1700, loss=0.6823005961649768
Current iteration=1800, loss=0.6823005961649768
Current iteration=1900, loss=0.6823005961649768
Curren

In [151]:
lambdas

[0.0001, 0.0001, 0.00013738237958832623]

Lambdas when replacing -999 by mean

In [263]:
lambdas

[0.0001, 0.0001, 0.0001]

In [152]:
ws_RLR = []
for jet_num, standardized_dset in enumerate(standardized_dsets):
    initial_w = np.zeros(standardized_dset.shape[1])
    max_iters = 2000
    gamma = 0.7 # 0.01
    y_logistic = np.ones(ybs[jet_num].size)
    y_logistic[ybs[jet_num] == -1] = 0
    loss, w_RLR = reg_logistic_regression(y_logistic, standardized_dset, lambdas[jet_num], initial_w, max_iters, gamma)
    ws_RLR.append(w_RLR)

Current iteration=0, loss=0.5981457873646676
Current iteration=100, loss=0.40570366954781983
Current iteration=200, loss=0.39811158731651086
Current iteration=300, loss=0.3951233762054761
Current iteration=400, loss=0.3937249484158812
Current iteration=500, loss=0.39302899842308897
Current iteration=600, loss=0.3926703056127306
Current iteration=700, loss=0.39248120630136046
Current iteration=800, loss=0.3923800033816537
Current iteration=900, loss=0.39232526283559516
Current iteration=1000, loss=0.3922954253301242
Current iteration=1100, loss=0.3922790701887242
Current iteration=1200, loss=0.39227006822843535
Current iteration=1300, loss=0.3922650984133083
Current iteration=1400, loss=0.3922623484964606
Current iteration=1500, loss=0.3922608243656276
Current iteration=0, loss=0.6370622906477821
Current iteration=100, loss=0.5440962786181257
Current iteration=200, loss=0.5421053931684284
Current iteration=300, loss=0.5416118282423613
Current iteration=400, loss=0.54146090795682
Current

In [265]:
predictions = model_predictions(x_te, ws_RLR, pri_jet_num_idx, clean_features, parameters)

In [153]:
compute_accuracy(y_te,predictions)

Accuracy: 0.7599400000000001


Second score, after replacing -999 in DER_mass_MMC by the defined mean.

In [255]:
compute_accuracy(y_te,predictions)

Accuracy: 0.76004


Third score, after replacing -999 in DER_mass_MMC by the defined median.

In [267]:
compute_accuracy(y_te,predictions)

Accuracy: 0.7593799999999999
