In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import math
import random
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [2]:
from proj1_helpers import *
DATA_TRAIN_PATH = '../data/train.csv' # train data path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

In [3]:
print(tX.shape)

(250000, 30)


### Changing the labels to {0,1}

In [4]:
# y == 0 non detected Boson, y == 1 detected Boson
y_ = np.array([0 if l == -1 else 1 for l in y])

### Dividing the features by the number of jets

In [5]:
# dividing the rows of tX by the number of jets, dropping the column Pri_Jet_Num and adding an extra column of np.ones
zero_indices = []
one_indices = []
two_three_indices = []
zero_indices = np.where(tX[:,22]==0)[0]
one_indices = np.where(tX[:,22]==1)[0]
two_three_indices = np.where(np.logical_or(tX[:,22]==2, tX[:,22]==3))[0]
tX_0 = tX[zero_indices, :]
tX_0 = np.delete(tX_0, 22, axis=1)
tX_1 = tX[one_indices, :]
tX_1 = np.delete(tX_1, 22, axis=1)
tX_2_3 = tX[two_three_indices, :]

### Dividing also the output by the type of particle

In [6]:
y_0 = y_[zero_indices]
y_1 = y_[one_indices]
y_2_3 = y_[two_three_indices]

### Adding a column of zeros and ones to detect whether the mass has been measured or not

In [7]:
# take the indices where the mass is not calculated, add the column which has 0 in those indices
# and 1 everywhere else for all matrices 0,1,2_3
zero_indices_0 = np.where(tX_0[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_0 else 1 for i in range(tX_0.shape[0])])
tX_0 = np.insert(tX_0, 0, column_to_add, axis=1)
zero_indices_1 = np.where(tX_1[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_1 else 1 for i in range(tX_1.shape[0])])
tX_1 = np.insert(tX_1, 0, column_to_add, axis=1)
zero_indices_2_3 = np.where(tX_2_3[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_2_3 else 1 for i in range(tX_2_3.shape[0])])
tX_2_3 = np.insert(tX_2_3, 0, column_to_add, axis=1)

### Throwing away the outliers from the training data

In [8]:
for i in range(1, tX_2_3.shape[1]):
    index_column_valid =np.where(tX_2_3[:,i] != -999.)[0]
    column_25_quantile, column_75_quantile = np.quantile(tX_2_3[index_column_valid,i], 
                                                         np.array([0.25, 0.75]))
    interquantile = column_75_quantile-column_25_quantile
    column_15_quantile, column_85_quantile = np.quantile(tX_2_3[index_column_valid,i], 
                                                         np.array([0.15, 0.85]))
    indices_outliers = np.where((column_15_quantile - 1.5 * interquantile >= tX_2_3[index_column_valid,i])
                                             | (tX_2_3[index_column_valid,i] >= 
                                                column_85_quantile + 1.5 * interquantile))[0]
    #indices_outliers = np.argwhere((tX_tilda_2_3[index_column_valid,i] >= column_85_quantile + 1.5 * interquantile) | 
                                  #(column_15_quantile - 1.5 * interquantile >= tX_tilda_2_3[index_column_valid,i]))
    median = np.median(tX_2_3[index_column_valid, i], axis = 0)
    #print(np.sort(tX_tilda_2_3[index_column_valid[indices_outliers],i]).T)
    #print(np.where(tX_tilda_2_3[indices_outliers,i])==-999.)
    #print(median)
    tX_2_3[index_column_valid[indices_outliers],i] =  median

In [9]:
col_to_delete_0 = []
for i in range(1, tX_0.shape[1]):
    index_column_valid =np.where(tX_0[:,i] != -999.)[0]
    if len(index_column_valid)==0:
        #we drop the column (we will have to do the same for the test set as well)
        col_to_delete_0.append(i)
    else :
        column_25_quantile, column_75_quantile = np.quantile(tX_0[index_column_valid,i], 
                                                         np.array([0.25, 0.75]))
        interquantile = column_75_quantile-column_25_quantile
        column_15_quantile, column_85_quantile = np.quantile(tX_0[index_column_valid,i], 
                                                         np.array([0.15, 0.85]))
        indices_outliers = np.where((column_15_quantile - 1.5 * interquantile >= tX_0[index_column_valid,i])
                                             | (tX_0[index_column_valid,i] >= 
                                                column_85_quantile + 1.5 * interquantile))[0]
        #indices_outliers = np.argwhere((tX_tilda_2_3[index_column_valid,i] >= column_85_quantile + 1.5 * interquantile) | 
                                  #(column_15_quantile - 1.5 * interquantile >= tX_tilda_2_3[index_column_valid,i]))
        median = np.median(tX_0[index_column_valid, i], axis = 0)
        #print(np.sort(tX_tilda_2_3[index_column_valid[indices_outliers],i]).T)
        #print(np.where(tX_tilda_2_3[indices_outliers,i])==-999.)
        #print(median)
        tX_0[index_column_valid[indices_outliers],i] =  median
col_to_delete_0.append(tX_0.shape[1]-1)
tX_0 = np.delete(tX_0, col_to_delete_0, axis=1)
print(tX_0.shape)
print(col_to_delete_0)

(99913, 19)
[5, 6, 7, 13, 23, 24, 25, 26, 27, 28, 29]


In [10]:
col_to_delete_1 = []
for i in range(1, tX_1.shape[1]):
    index_column_valid =np.where(tX_1[:,i] != -999.)[0]
    if len(index_column_valid)==0:
        #we drop the column (we will have to do the same for the test set as well)
        col_to_delete_1.append(i)
    else :
        column_25_quantile, column_75_quantile = np.quantile(tX_1[index_column_valid,i], 
                                                         np.array([0.25, 0.75]))
        interquantile = column_75_quantile-column_25_quantile
        column_15_quantile, column_85_quantile = np.quantile(tX_1[index_column_valid,i], 
                                                         np.array([0.15, 0.85]))
        indices_outliers = np.where((column_15_quantile - 1.5 * interquantile >= tX_1[index_column_valid,i])
                                             | (tX_1[index_column_valid,i] >= 
                                                column_85_quantile + 1.5 * interquantile))[0]
        #indices_outliers = np.argwhere((tX_tilda_2_3[index_column_valid,i] >= column_85_quantile + 1.5 * interquantile) | 
                                  #(column_15_quantile - 1.5 * interquantile >= tX_tilda_2_3[index_column_valid,i]))
        median = np.median(tX_1[index_column_valid, i], axis = 0)
        #print(np.sort(tX_tilda_2_3[index_column_valid[indices_outliers],i]).T)
        #print(np.where(tX_tilda_2_3[indices_outliers,i])==-999.)
        #print(median)
        tX_1[index_column_valid[indices_outliers],i] =  median
tX_1 = np.delete(tX_1, col_to_delete_1, axis=1)
print(col_to_delete_1)

[5, 6, 7, 13, 26, 27, 28]


### Now we substitute the -999 values with the median

In [11]:
for i in range(1, tX_2_3.shape[1]):
    index_column_non_valid =np.where(tX_2_3[:,i] == -999.)[0]
    index_column_valid =np.where(tX_2_3[:,i] != -999.)[0]
    median = np.median(tX_2_3[index_column_valid, i], axis = 0)
    tX_2_3[index_column_non_valid,i] =  median

In [12]:
for i in range(1, tX_1.shape[1]):
    index_column_non_valid =np.where(tX_1[:,i] == -999.)[0]
    index_column_valid =np.where(tX_1[:,i] != -999.)[0]
    median = np.median(tX_1[index_column_valid, i], axis = 0)
    tX_1[index_column_non_valid,i] =  median

In [13]:
for i in range(1, tX_0.shape[1]):
    index_column_non_valid =np.where(tX_0[:,i] == -999.)[0]
    index_column_valid =np.where(tX_0[:,i] != -999.)[0]
    median = np.median(tX_0[index_column_valid, i], axis = 0)
    tX_0[index_column_non_valid,i] =  median

### Now we standardize the data

In [14]:
tX_2_3[:,1:], mean_2_3,std_2_3 = standardize(tX_2_3[:,1:]) #we standardize everything a part from the column added manually

In [15]:
print(tX_2_3)
print(np.count_nonzero(tX_2_3 == -999.))

[[ 1.          0.97351249  0.46588281 ...  0.61614788 -1.36131161
  -0.70374641]
 [ 1.         -0.82851663 -0.77157038 ...  0.11608109  1.71034105
   0.2995537 ]
 [ 1.          1.3538447  -0.27431587 ...  0.07030726 -1.52202162
   0.12704911]
 ...
 [ 1.          0.04006392  0.02513449 ...  0.25930888  0.22982758
   0.42357227]
 [ 1.          0.66304099 -1.0843679  ...  0.29031696 -1.21821366
  -0.20699627]
 [ 1.          0.04006392  0.31977857 ... -0.02271698 -0.62490751
   0.05569682]]
0


In [16]:
print(tX_0)

[[ 1.00000e+00  1.43905e+02  8.14170e+01 ...  3.10820e+01  6.00000e-02
   8.60620e+01]
 [ 1.00000e+00  1.75864e+02  1.69150e+01 ...  2.72300e+00 -8.71000e-01
   5.31310e+01]
 [ 1.00000e+00  1.05594e+02  5.05590e+01 ...  3.77910e+01  2.40000e-02
   1.29804e+02]
 ...
 [ 1.00000e+00  1.11452e+02  5.81790e+01 ...  4.67370e+01 -8.67000e-01
   8.04080e+01]
 [ 1.00000e+00  9.49510e+01  1.93620e+01 ...  1.21500e+01  8.11000e-01
   1.12718e+02]
 [ 1.00000e+00  1.11452e+02  7.27560e+01 ...  4.07290e+01 -1.59600e+00
   9.94050e+01]]


In [17]:
tX_0[:,1:],mean_0,std_0 = standardize(tX_0[:,1:]) 

In [18]:
print(tX_0)

[[ 1.          1.05744907  0.7827665  ...  0.01825038  0.04662815
  -0.7724943 ]
 [ 1.          2.14505538 -1.37519852 ... -1.71504715 -0.4674532
  -1.44727052]
 [ 1.         -0.24632406 -0.2496121  ...  0.42830338  0.0267496
   0.12380588]
 ...
 [ 1.         -0.0469687   0.00532098 ...  0.97508147 -0.46524447
  -0.8883482 ]
 [ 1.         -0.60851918 -1.29333222 ... -1.13887042  0.46131675
  -0.22629665]
 [ 1.         -0.0469687   0.49300595 ...  0.60787347 -0.86778508
  -0.49908812]]


In [19]:
tX_1[:,1:],mean_1,std_1 = standardize(tX_1[:,1:])

In [20]:
print(tX_1)

[[ 1.00000000e+00  1.55539992e+00  7.27047143e-01 ...  3.98445313e-01
   6.45414781e-01 -4.14297220e-01]
 [ 1.00000000e+00  1.45598722e-03  3.58462301e+00 ...  1.12748232e+00
  -1.10752634e+00 -4.89864560e-01]
 [ 1.00000000e+00  1.36261180e+00 -1.05809645e+00 ... -3.92076740e-01
  -9.40265168e-01 -1.01072441e+00]
 ...
 [ 1.00000000e+00  1.45598722e-03  1.01732036e+00 ... -4.67286130e-01
  -3.80160315e-01  8.39087556e-01]
 [ 1.00000000e+00  6.75509966e-01  9.95415259e-01 ... -6.76994064e-01
   1.39533906e+00  5.32418071e-01]
 [ 1.00000000e+00 -2.21030015e-01  4.74893699e-01 ...  9.88591985e-01
  -8.30516498e-02 -5.76298292e-01]]


### We insert the column for the bias term

In [21]:
tX_tilda_0 = np.insert(tX_0, 0, np.ones(tX_0.shape[0]), axis=1)
tX_tilda_1 = np.insert(tX_1, 0, np.ones(tX_1.shape[0]), axis=1)
tX_tilda_2_3 = np.insert(tX_2_3, 0, np.ones(tX_2_3.shape[0]), axis=1)

In [22]:
print(tX_tilda_0)

[[ 1.          1.          1.05744907 ...  0.01825038  0.04662815
  -0.7724943 ]
 [ 1.          1.          2.14505538 ... -1.71504715 -0.4674532
  -1.44727052]
 [ 1.          1.         -0.24632406 ...  0.42830338  0.0267496
   0.12380588]
 ...
 [ 1.          1.         -0.0469687  ...  0.97508147 -0.46524447
  -0.8883482 ]
 [ 1.          1.         -0.60851918 ... -1.13887042  0.46131675
  -0.22629665]
 [ 1.          1.         -0.0469687  ...  0.60787347 -0.86778508
  -0.49908812]]


In [23]:
# colors = ['red', 'blue']
# x_pos=[]
# x_neg=[]

# for j in range(len(y)):
#  if(y[j]==1):
#       x_pos.insert(0,tX[j])
#    else:
#        x_neg.insert(0,tX[j])
# xpos = np.array(x_pos)
# xneg = np.array(x_neg)
# for i in range(tX.shape[1]):
#  plt.hist(xpos[:,i], alpha = 0.5, color = 'r', bins = 100)
#  plt.hist(xneg[:,i], alpha = 0.5, color = 'b', bins = 100)
#  plt.show()

## Do your thing crazy machine learning thing here :) ...

In [24]:
def compute_loss(y, tx, w):
    N = y.shape[0]
    e = y - tx @ w 
    loss = 1/(2*N) * np.dot(e,e)
    return loss

In [25]:
def compute_gradient(y, tx, w):
    N = y.shape[0]
    e = y - tx @ w
    gradient = -(1/N) * (tx.T) @ (e)
    return gradient

In [26]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss(y,tx,w)
        gradient = compute_gradient(y,tx,w)
        w = w - gamma * gradient
        ws.append(w)
        losses.append(loss)
        # print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
        # bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws

In [None]:
def cross_validation_GD(y, x, k_indices, k, degree, gamma = 3.0e-02):
    """return the loss of ridge regression."""
    N = y.shape[0]
    k_fold = k_indices.shape[0]
    list_ = []
    interval = int(N/k_fold)
    for i in range(k_fold):
        if i != k:
            list_.append(i)
    x_training = np.zeros((int((k_fold-1)/k_fold*N), x.shape[1]))
    y_training = np.zeros(int((k_fold-1)/k_fold*N))
    for j in range(len(list_)):
        x_training[interval*(j):interval*(j+1), :] = x[np.array([k_indices[list_[j]]]), :]
    x_testing = x[k_indices[k], :]
    for j in range(len(list_)):
        y_training[interval*(j):interval*(j+1)] = y[np.array([k_indices[list_[j]]])]
    y_testing = y[k_indices[k]]
    x_training_augmented = build_poly(x_training, degree)
    x_testing_augmented = build_poly(x_testing, degree)
    #w_opt_training = ridge_regression(y_training, x_training_augmented, lambda_)
    _,  w_opt_training = least_squares_GD(y_training, x_training_augmented,
                                                        np.zeros(x_training_augmented.shape[1]), 1000, gamma)
    loss_tr = calculate_loss(y_training, x_training_augmented, w_opt_training[-1])
    loss_te = calculate_loss(y_testing, x_testing_augmented, w_opt_training[-1])
    return loss_tr, loss_te

In [27]:
def compute_stoch_gradient(y, tx, w):
    N = y.shape[0]
    random_number = random.randint(0,N)
    #random_number =1
    xn = tx[random_number,:]
    random_gradient = - np.dot(xn, y[random_number] - np.dot(xn,w))
    return random_gradient

In [28]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss(y,tx,w)
        stoch_gradient = compute_stoch_gradient(y,tx,w)
        w = w - gamma * stoch_gradient
        ws.append(w)
        losses.append(loss)
        # print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
        #    bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws

In [29]:
from proj1_helpers import *

def least_squares(y, tx):
    """calculate the least squares solution."""
    forcing_term = np.transpose(tx) @ y
    coefficient_matrix = np.transpose(tx) @ tx
    w = np.linalg.solve(coefficient_matrix, forcing_term)
    return w

def test_your_least_squares(y, tx):
    """compare the solution of the normal equations with the weights returned by gradient descent algorithm."""
    w_least_squares = least_squares(y, tx)
    initial_w = np.zeros(tx.shape[1])
    max_iters = 50
    gamma = 0.7
    losses_gradient_descent, w_gradient_descent = gradient_descent(y, tx, initial_w, max_iters, gamma)
    w = w_gradient_descent[-1]
    err = np.linalg.norm(w_least_squares-w)
    return err

In [30]:
def ridge_regression(y, tx, lambda_):
    """implement ridge regression."""
    N = tx.shape
    lambda_prime = 2 * N[0] * lambda_
    coefficient_matrix = np.transpose(tx) @ tx + lambda_prime * np.eye(N[1])
    forcing_term = np.transpose(tx) @ y
    w = np.linalg.solve(coefficient_matrix, forcing_term)
    return w

def debug_ridge(y, tx):
    """debugging the ridge regression by setting lambda=0."""
    w_least_squares = least_squares(y, tx)
    w_0 = ridge_regression(y, tx, 0)
    err = np.linalg.norm(w_least_squares-w_0)
    return err

In [31]:
def sigmoid(t):
    """apply the sigmoid function on t."""
    positive_indices = np.where(t >= 0)[0]
    negative_indices = np.where(t < 0)[0]
    z = np.zeros(len(t))
    z[positive_indices] = 1 / (1+np.exp(-t[positive_indices]))
    z[negative_indices] = np.exp(t[negative_indices]) / (1 + np.exp(t[negative_indices]))
    return z

In [32]:
def calculate_loss(y, tx, w):
    """compute the loss: negative log likelihood."""
    epsilon = 1.0e-12
    # term1 = sigmoid(tx @ w)
    # term1[y == 0] = 1
    # term2 = 1 - sigmoid(tx @ w)
    # term2[y == 1] = 1
    # summands = np.multiply(y, np.log(term1)) + np.multiply(1 - y, np.log(term2))
    # e = - y * (tx @ w) + np.log(1 + np.exp(tx @ w))
    # return e.sum()
    pos_ind = np.where(tx @ w >=0)[0]
    neg_ind = np.where(tx @ w <0)[0]
    loss_pos = - y[pos_ind] * (tx @ w)[pos_ind] + (tx @ w)[pos_ind] + np.log(1+np.exp(-(tx @ w)[pos_ind]))
    loss_neg = - y[neg_ind] * (tx @ w)[neg_ind] - (tx @ w)[neg_ind] + np.log(1+np.exp((tx @ w)[neg_ind]))
    # loss = - np.sum(y*np.log(sigmoid(tx @ w)+epsilon) + ((1 - y) * np.log(1-sigmoid(tx@w) + epsilon)))
    # return e.sum()
    return loss_pos.sum() + loss_neg.sum()

In [33]:
def calculate_gradient(y, tx, w):
    """compute the gradient of loss."""
    return np.transpose(tx) @ (sigmoid(tx @ w) - y)

In [34]:
def learning_by_gradient_descent(y, tx, w_initial, gamma, max_iters):
    """
    Do one step of gradient descent using logistic regression.
    Return the loss and the updated w.
    """
    losses = []
    w = w_initial
    for iter in range(max_iters):
        grad = calculate_gradient(y, tx, w)
        w = w - gamma * grad
        if iter %100 == 0:
            gamma = gamma/2
        loss = calculate_loss(y, tx, w)
        losses.append(loss)
    return losses, w

In [35]:
losses1, w1 = learning_by_gradient_descent(y_0, tX_tilda_0, np.zeros(tX_tilda_0.shape[1]), 0.8, 10000)
print(losses1)

[5179248944.798144, 4548292501.191404, 3873264747.21586, 3204189882.4289737, 2558766497.952185, 1925840421.0113575, 1358134428.6513941, 806750192.3189367, 2000410682.432268, 1601256172.1407366, 2113524821.0324888, 1606047298.1298404, 1990415844.7863326, 1640858641.9754827, 1934131711.4022741, 1285499978.8239229, 2039587553.53073, 1427677293.0631957, 1887233645.3011723, 1447488537.8697784, 1922910135.0624075, 1269966212.2418594, 1955256377.1863978, 1400327379.5162985, 1845506548.8767402, 1264731822.7936993, 1931658652.60472, 1314650368.8542542, 1862692520.787514, 1330058160.1396096, 1877959970.3375432, 1229045186.878255, 1902392318.7811527, 1308156948.822424, 1844261214.6761906, 1241157611.7496824, 1894851670.6505013, 1255749330.385391, 1860264536.3923934, 1274839497.858362, 1859682719.8045244, 1218658685.3284106, 1880581668.544345, 1258713077.568829, 1850203797.9833395, 1235051327.652052, 1873359564.5703435, 1230356581.4284768, 1859083288.6203346, 1244041475.3119462, 1856766909.6040347

In [36]:
def calculate_hessian(y, tx, w):
    """return the Hessian of the loss function."""
    diag = sigmoid(tx @ w) * (1 - sigmoid(tx @ w))
    D = diag * np.eye(tx.shape[0])
    return np.transpose(tx) @ D @ tx

In [37]:
def logistic_regression(y, tx, w):
    """return the loss, gradient, and Hessian."""
    grad = calculate_gradient(y, tx, w)
    hess = calculate_hessian(y, tx, w)
    loss = calculate_loss(y, tx, w)
    return loss, grad, hess

In [38]:
def learning_by_newton_method(y, tx, w, gamma):
    """
    Do one step on Newton's method.
    return the loss and updated w.
    """
    loss, grad, hess = logistic_regression(y, tx, w)
    sol = np.linalg.solve(hess, grad)
    w = w - gamma * sol
    return loss, w

In [39]:
def penalized_logistic_regression(y, tx, w, lambda_):
    """return the loss, gradient"""
    loss = calculate_loss(y, tx, w) + lambda_*np.linalg.norm(w) ** 2
    grad = calculate_gradient(y, tx, w) + 2*lambda_*w
    hess = calculate_hessian(y, tx, w) + 2*lambda_*np.eye(w.shape[0])
    return loss, grad, hess

In [40]:
def learning_by_penalized_gradient(y, tx, w_initial, gamma, max_iters, lambda_):
    """
    Do one step of gradient descent, using the penalized logistic regression.
    Return the loss and updated w.
    """
    threshold = 1e-8
    losses = []
    w = w_initial
    for iter in range(max_iters):
        grad = calculate_gradient(y, tx, w) + 2*lambda_*w
        w = w - gamma * grad
        # regularizer = lambda_ / 2 * np.linalg.norm(w) ** 2
        # summing = np.sum(np.log(1+np.exp(tx.dot(w))))
        # y_component = y.T.dot(tx.dot(w)).flatten().flatten()
        # loss = summing - y_component*regularizer
        loss = calculate_loss(y, tx, w) + lambda_*np.linalg.norm(w) ** 2
        losses.append(loss)
        if iter % 25 == 0:
            gamma = gamma / 2
            print("Current iteration={i}, loss={l}".format(i=iter, l=loss))
        # converge criterion
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
            break
    return losses, w

In [41]:
def build_poly(x, degree):
    """polynomial basis functions for input data x, for j=1 up to j=degree."""
    powers = np.arange(1, degree + 1)
    phi = np.column_stack([np.power(x[:,0], exponent) for exponent in powers])
    for i in range(1, x.shape[1]):
        phi_i = np.column_stack([np.power(x[:,i], exponent) for exponent in powers])
        phi = np.column_stack([phi, phi_i])
    return phi

In [42]:
def build_k_indices(y, k_fold, seed):
    N = y.shape[0]
    np.random.seed(seed)
    interval = int(np.floor(N / k_fold))
    indices = np.random.permutation(N)
    k_indices = [indices[k * interval: (k + 1) * interval]
                 for k in range(k_fold)]
    return np.array(k_indices)

In [43]:
def cross_validation_ridge(y, x, k_indices, k, lambda_, degree):
    """return the loss of ridge regression."""
    N = y.shape[0]
    k_fold = k_indices.shape[0]
    list_ = []
    interval = int(N/k_fold)
    for i in range(k_fold):
        if i != k:
            list_.append(i)
    x_training = np.zeros((int((k_fold-1)/k_fold*N), x.shape[1]))
    y_training = np.zeros(int((k_fold-1)/k_fold*N))
    for j in range(len(list_)):
        x_training[interval*(j):interval*(j+1), :] = x[np.array([k_indices[list_[j]]]), :]
    x_testing = x[k_indices[k], :]
    for j in range(len(list_)):
        y_training[interval*(j):interval*(j+1)] = y[np.array([k_indices[list_[j]]])]
    y_testing = y[k_indices[k]]
    x_training_augmented = build_poly(x_training, degree)
    x_testing_augmented = build_poly(x_testing, degree)
    w_opt_training = ridge_regression(y_training, x_training_augmented, lambda_)
    loss_tr = compute_loss(y_training, x_training_augmented, w_opt_training)
    loss_te = compute_loss(y_testing, x_testing_augmented, w_opt_training)
    return loss_tr, loss_te

In [44]:
degrees = np.arange(2, 7)
lambdas = np.logspace(-5,0,15)
k_fold = 5
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees)))
testing_loss = np.zeros((len(lambdas), len(degrees)))
k_indices = build_k_indices(y_0, k_fold, seed)
for index1 in range(len(lambdas)):
    for index2 in range(len(degrees)):
        train_loss = 0
        test_loss = 0
        for k in range(k_fold):
            loss_tr, loss_te = cross_validation_ridge(y_0, tX_tilda_0, k_indices, k,
                                                lambdas[index1], degrees[index2])
            train_loss += loss_tr
            test_loss += loss_te
        training_loss[index1, index2] = train_loss / k_fold
        testing_loss[index1, index2] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
print(testing_loss)
lambda_opt, degree_opt = lambdas[best_result[0]],degrees[best_result[1]]
print(lambda_opt, degree_opt)

KeyboardInterrupt: 

In [None]:
degrees = np.arange(2, 7)
lambdas = np.logspace(-5,0,15)
k_fold = 5
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees)))
testing_loss = np.zeros((len(lambdas), len(degrees)))
k_indices = build_k_indices(y_1, k_fold, seed)
for index1 in range(len(lambdas)):
    for index2 in range(len(degrees)):
        train_loss = 0
        test_loss = 0
        for k in range(k_fold):
            loss_tr, loss_te = cross_validation_ridge(y_1, tX_tilda_1, k_indices, k, 
                                                lambdas[index1], degrees[index2])
            train_loss += loss_tr
            test_loss += loss_te
        training_loss[index1, index2] = train_loss / k_fold
        testing_loss[index1, index2] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
print(testing_loss)
lambda_opt, degree_opt = lambdas[best_result[0]], degrees[best_result[1]]
print(lambda_opt, degree_opt)

In [None]:
degrees = np.arange(2, 7)
lambdas = np.logspace(-5,0,15)
k_fold = 5
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees)))
testing_loss = np.zeros((len(lambdas), len(degrees)))
k_indices = build_k_indices(y_2_3, k_fold, seed)
for index1 in range(len(lambdas)):
    for index2 in range(len(degrees)):
        train_loss = 0
        test_loss = 0
        for k in range(k_fold):
            loss_tr, loss_te = cross_validation_ridge(y_2_3, tX_tilda_2_3, k_indices, k,
                                            lambdas[index1], degrees[index2])
            train_loss += loss_tr
            test_loss += loss_te
        training_loss[index1, index2] = train_loss / k_fold
        testing_loss[index1, index2] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
lambda_opt, degree_opt = lambdas[best_result[0]], degrees[best_result[1]]
print(testing_loss)
print(lambda_opt, degree_opt) 

We train the model for ridge regression with the best hyperparameters

In [None]:
tX_tilda_0_augmented = build_poly(tX_tilda_0, degree = 6)
w_ridge_0 = ridge_regression(y_0, tX_tilda_0_augmented, lambda_= 0.00061054)
#print(w_ridge_0)

In [None]:
tX_tilda_1_augmented = build_poly(tX_tilda_1, degree=6)
w_ridge_1 = ridge_regression(y_1, tX_tilda_1_augmented, lambda_= 5.17947468e-05)
#print(w_ridge_1)

In [None]:
tX_tilda_2_3_augmented = build_poly(tX_tilda_2_3, degree=6)
w_ridge_2_3 = ridge_regression(y_2_3, tX_tilda_2_3_augmented, lambda_= 0.00026827)
#print(w_ridge_2_3)

We will now try with logistic regression

In [45]:
def cross_validation_logistic(y, x, k_indices, k, lambda_, degree, gamma = 3.0e-02):
    """return the loss of ridge regression."""
    N = y.shape[0]
    k_fold = k_indices.shape[0]
    list_ = []
    interval = int(N/k_fold)
    for i in range(k_fold):
        if i != k:
            list_.append(i)
    x_training = np.zeros((int((k_fold-1)/k_fold*N), x.shape[1]))
    y_training = np.zeros(int((k_fold-1)/k_fold*N))
    for j in range(len(list_)):
        x_training[interval*(j):interval*(j+1), :] = x[np.array([k_indices[list_[j]]]), :]
    x_testing = x[k_indices[k], :]
    for j in range(len(list_)):
        y_training[interval*(j):interval*(j+1)] = y[np.array([k_indices[list_[j]]])]
    y_testing = y[k_indices[k]]
    x_training_augmented = build_poly(x_training, degree)
    x_testing_augmented = build_poly(x_testing, degree)
    #w_opt_training = ridge_regression(y_training, x_training_augmented, lambda_)
    _,  w_opt_training = learning_by_penalized_gradient(y_training, x_training_augmented,
                                                        np.ones(x_training_augmented.shape[1]), gamma, 1000, lambda_)
    loss_tr = calculate_loss(y_training, x_training_augmented, w_opt_training)
    loss_te = calculate_loss(y_testing, x_testing_augmented, w_opt_training)
    return loss_tr, loss_te

We perform cross validation in order to find the best parameters degree, lamdba and gamma caracterizing logistic regression

In [46]:
print(tX_tilda_0)

[[ 1.          1.          1.05744907 ...  0.01825038  0.04662815
  -0.7724943 ]
 [ 1.          1.          2.14505538 ... -1.71504715 -0.4674532
  -1.44727052]
 [ 1.          1.         -0.24632406 ...  0.42830338  0.0267496
   0.12380588]
 ...
 [ 1.          1.         -0.0469687  ...  0.97508147 -0.46524447
  -0.8883482 ]
 [ 1.          1.         -0.60851918 ... -1.13887042  0.46131675
  -0.22629665]
 [ 1.          1.         -0.0469687  ...  0.60787347 -0.86778508
  -0.49908812]]


In [47]:
degrees = np.arange(2, 6)
lambdas = np.logspace(-2, 0, 5)
gamma = 3.0e-02
k_fold = 3
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees)))
testing_loss = np.zeros((len(lambdas), len(degrees)))
k_indices = build_k_indices(y_0, k_fold, seed)
for index1 in range(len(lambdas)):
    for index2 in range(len(degrees)):
        train_loss = 0
        test_loss = 0
        for k in range(k_fold):
            loss_tr, loss_te = cross_validation_logistic(y_0, tX_tilda_0, k_indices, k,
                                                lambdas[index1], degrees[index2], gamma)
        train_loss += loss_tr
        test_loss += loss_te
        training_loss[index1, index2] = train_loss / k_fold
        testing_loss[index1, index2] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
lambda_opt, degree_opt = lambdas[best_result[0]], degrees[best_result[1]]
print(lambda_opt, degree_opt)

Current iteration=0, loss=2726856128.7656813
Current iteration=25, loss=241762524.9605943
Current iteration=50, loss=77359417.75728951
Current iteration=75, loss=56395353.924070515
Current iteration=100, loss=19985876.49675651
Current iteration=125, loss=28200703.349070482
Current iteration=150, loss=15667710.38642564
Current iteration=175, loss=8041295.945319406
Current iteration=200, loss=6952061.924182257
Current iteration=225, loss=6436646.177311907
Current iteration=250, loss=6185324.943271471
Current iteration=275, loss=6061145.97387116
Current iteration=300, loss=5999391.593439781
Current iteration=325, loss=5968606.931328493
Current iteration=350, loss=5953239.791080056
Current iteration=375, loss=5945562.889935108
Current iteration=400, loss=5941726.200781571
Current iteration=425, loss=5939808.271084353
Current iteration=450, loss=5938849.416220061
Current iteration=475, loss=5938370.016079022
Current iteration=500, loss=5938130.322405921
Current iteration=525, loss=5938010.4

In [48]:
print(np.logspace(-2,0,5))

[0.01       0.03162278 0.1        0.31622777 1.        ]


In [49]:
degrees = np.arange(2, 6)
lambdas = np.logspace(-5, 0, 5)
gamma = 3.0e-02
k_fold = 3
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees)))
testing_loss = np.zeros((len(lambdas), len(degrees)))
k_indices = build_k_indices(y_1, k_fold, seed)
for index1 in range(len(lambdas)):
    for index2 in range(len(degrees)):
            train_loss = 0
            test_loss = 0
            for k in range(k_fold):
                loss_tr, loss_te = cross_validation_logistic(y_1, tX_tilda_1, k_indices, k,
                                                lambdas[index1], degrees[index2], gamma)
                train_loss += loss_tr
                test_loss += loss_te
            training_loss[index1, index2] = train_loss / k_fold
            testing_loss[index1, index2] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
lambda_opt, degree_opt = lambdas[best_result[0]], degrees[best_result[1]]
print(lambda_opt, degree_opt)

Current iteration=0, loss=1811062395.5550945
Current iteration=25, loss=289913244.2874549
Current iteration=50, loss=81585531.68744601
Current iteration=75, loss=27927584.284584858
Current iteration=100, loss=27498018.084268767
Current iteration=125, loss=23420058.92227279
Current iteration=150, loss=21686032.807129
Current iteration=175, loss=20856119.480833698
Current iteration=200, loss=20459835.312905077
Current iteration=225, loss=20266025.270243924
Current iteration=250, loss=20170416.647644427
Current iteration=275, loss=20122743.221071232
Current iteration=300, loss=20098936.704539184
Current iteration=325, loss=20087041.925297577
Current iteration=350, loss=20081096.77597437
Current iteration=375, loss=20078124.80070362
Current iteration=400, loss=20076638.95460261
Current iteration=425, loss=20075896.066961773
Current iteration=450, loss=20075524.63199472
Current iteration=475, loss=20075338.9167245
Current iteration=500, loss=20075246.059642702
Current iteration=525, loss=20

In [51]:
degrees = np.arange(2, 6)
lambdas = np.logspace(-5, 0, 5)
gamma = 3.0e-02
k_fold = 3
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees)))
testing_loss = np.zeros((len(lambdas), len(degrees)))
k_indices = build_k_indices(y_2_3, k_fold, seed)
for index1 in range(len(lambdas)):
    for index2 in range(len(degrees)):
            train_loss = 0
            test_loss = 0
            for k in range(k_fold):
                loss_tr, loss_te = cross_validation_logistic(y_2_3, tX_tilda_2_3, k_indices, k,
                                                lambdas[index1], degrees[index2], gamma)
                train_loss += loss_tr
                test_loss += loss_te
            training_loss[index1, index2] = train_loss / k_fold
            testing_loss[index1, index2] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
lambda_opt, degree_opt = lambdas[best_result[0]],degrees[best_result[1]]
print(lambda_opt, degree_opt)

Current iteration=0, loss=1888205713.1982095
Current iteration=25, loss=74751661.1136548
Current iteration=50, loss=62767906.97449791
Current iteration=75, loss=43629497.787527174
Current iteration=100, loss=36342435.199700125
Current iteration=125, loss=32989972.738524288
Current iteration=150, loss=31404724.99205972
Current iteration=175, loss=30641799.1098223
Current iteration=200, loss=30257590.360522322
Current iteration=225, loss=30065570.781350464
Current iteration=250, loss=29970315.521394335
Current iteration=275, loss=29923071.26837485
Current iteration=300, loss=29899391.499219246
Current iteration=325, loss=29887536.197534326
Current iteration=350, loss=29881605.374192305
Current iteration=375, loss=29878639.23976672
Current iteration=400, loss=29877156.010374907
Current iteration=425, loss=29876414.354866702
Current iteration=450, loss=29876043.517116044
Current iteration=475, loss=29875858.095767647
Current iteration=500, loss=29875765.38447846
Current iteration=525, loss

In [95]:
tX_tilda_0_augmented = build_poly(tX_tilda_0, degree = 2)
_, w_logistic_0 = learning_by_penalized_gradient(y_0, tX_tilda_0_augmented, np.zeros(tX_tilda_0_augmented.shape[1]), 3.0e-02,
                                              1000, lambda_= 1)

Current iteration=0, loss=2031272077.17814
Current iteration=25, loss=1330146427.649928
Current iteration=50, loss=97531868.44670574
Current iteration=75, loss=259796953.59472895
Current iteration=100, loss=62910213.66680528
Current iteration=125, loss=24560017.557555895
Current iteration=150, loss=6349578.649698712
Current iteration=175, loss=4024290.2943855436
Current iteration=200, loss=6287733.35624327
Current iteration=225, loss=2992300.603425447
Current iteration=250, loss=2717842.060216559
Current iteration=275, loss=2591871.47539018
Current iteration=300, loss=2530315.131203707
Current iteration=325, loss=2499812.074830965
Current iteration=350, loss=2484622.980960858
Current iteration=375, loss=2477043.6144315805
Current iteration=400, loss=2473257.6018508812
Current iteration=425, loss=2471365.482594677
Current iteration=450, loss=2470419.6491374276
Current iteration=475, loss=2469946.7897484945
Current iteration=500, loss=2469710.374021007
Current iteration=525, loss=2469592

In [92]:
tX_tilda_1_augmented = build_poly(tX_tilda_1, degree = 2)
_, w_logistic_1 = learning_by_penalized_gradient(y_1, tX_tilda_1_augmented, np.zeros(tX_tilda_1_augmented.shape[1]), 3.0e-02,
                                              1000, lambda_= 1)

Current iteration=0, loss=929169492.1900488
Current iteration=25, loss=182612579.186406
Current iteration=50, loss=345598718.2863831
Current iteration=75, loss=251205003.41826034
Current iteration=100, loss=39011783.15840742
Current iteration=125, loss=23631907.21527367
Current iteration=150, loss=19701052.134486884
Current iteration=175, loss=17930962.027047377
Current iteration=200, loss=17082738.61700043
Current iteration=225, loss=16667326.590739848
Current iteration=250, loss=16461426.860439911
Current iteration=275, loss=16359153.98698016
Current iteration=300, loss=16308162.059249176
Current iteration=325, loss=16282693.006489042
Current iteration=350, loss=16269964.096324068
Current iteration=375, loss=16263600.953134459
Current iteration=400, loss=16260419.694366151
Current iteration=425, loss=16258829.144796882
Current iteration=450, loss=16258033.889528621
Current iteration=475, loss=16257636.266605796
Current iteration=500, loss=16257437.4563201
Current iteration=525, loss=

In [74]:
tX_tilda_2_3_augmented = build_poly(tX_tilda_2_3, degree = 2)
_, w_logistic_2_3 = learning_by_penalized_gradient(y_2_3, tX_tilda_2_3_augmented, np.zeros(tX_tilda_2_3_augmented.shape[1]),
                                                3.0e-02, 1000, lambda_= 1)

Current iteration=0, loss=416199879.9349383
Current iteration=25, loss=164593499.16912028
Current iteration=50, loss=85943278.32508574
Current iteration=75, loss=45442774.499672435
Current iteration=100, loss=65652371.24763463
Current iteration=125, loss=26671670.150418367
Current iteration=150, loss=23276006.73615902
Current iteration=175, loss=21715031.05872106
Current iteration=200, loss=20972848.612764854
Current iteration=225, loss=20602982.836566735
Current iteration=250, loss=20418370.701531958
Current iteration=275, loss=20326454.253680114
Current iteration=300, loss=20280595.14112848
Current iteration=325, loss=20257687.396337852
Current iteration=350, loss=20246238.17325544
Current iteration=375, loss=20240514.637927227
Current iteration=400, loss=20237653.132756617
Current iteration=425, loss=20236222.44519263
Current iteration=450, loss=20235507.1173689
Current iteration=475, loss=20235149.45743371
Current iteration=500, loss=20234970.628516737
Current iteration=525, loss=2

In [75]:
### Generate predictions and save ouput in csv format for submission:

In [76]:
DATA_TEST_PATH = '../data/test.csv' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [77]:
print(tX_test.shape)

(568238, 30)


We will now format the tX_test as we did for tX_train

### we split the test into the three subgroups

In [78]:
zero_indices = []
one_indices = []
two_three_indices = []
zero_indices = np.where(tX_test[:,22]==0)[0]
one_indices = np.where(tX_test[:,22]==1)[0]
two_three_indices = np.where(np.logical_or(tX_test[:,22]==2, tX_test[:,22]==3))[0]
tX_test_0 = tX_test[zero_indices, :]
tX_test_0 = np.delete(tX_test_0, 22, axis=1)
tX_test_1 = tX_test[one_indices, :]
tX_test_1 = np.delete(tX_test_1, 22, axis=1)
tX_test_2_3 = tX_test[two_three_indices, :]

### Adding a column of zeros and ones to detect whether the mass has been measured or not

In [79]:
# take the indices where the mass is not calculated, add the column which has 0 in those indices
# and 1 everywhere else for all matrices 0,1,2_3
zero_indices_0 = np.where(tX_test_0[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_0 else 1 for i in range(tX_test_0.shape[0])])
tX_test_0 = np.insert(tX_test_0, 0, column_to_add, axis=1)
zero_indices_1 = np.where(tX_test_1[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_1 else 1 for i in range(tX_test_1.shape[0])])
tX_test_1 = np.insert(tX_test_1, 0, column_to_add, axis=1)
zero_indices_2_3 = np.where(tX_test_2_3[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_2_3 else 1 for i in range(tX_test_2_3.shape[0])])
tX_test_2_3 = np.insert(tX_test_2_3, 0, column_to_add, axis=1)

### We drop the same columns we have dropped for the X training

In [80]:
tX_test_0 = np.delete(tX_test_0, col_to_delete_0, axis=1)
tX_test_1 = np.delete(tX_test_1, col_to_delete_1, axis=1)

### Now we substitute the -999 values with the median

In [81]:
for i in range(1, tX_test_2_3.shape[1]):
    index_column_non_valid =np.where(tX_test_2_3[:,i] == -999.)[0]
    index_column_valid =np.where(tX_test_2_3[:,i] != -999.)[0]
    median = np.median(tX_test_2_3[index_column_valid, i], axis = 0)
    tX_test_2_3[index_column_non_valid,i] =  median

In [82]:
for i in range(1, tX_test_1.shape[1]):
    index_column_non_valid =np.where(tX_test_1[:,i] == -999.)[0]
    index_column_valid =np.where(tX_test_1[:,i] != -999.)[0]
    median = np.median(tX_test_1[index_column_valid, i], axis = 0)
    tX_test_1[index_column_non_valid,i] =  median

In [83]:
for i in range(1, tX_test_0.shape[1]):
    index_column_non_valid =np.where(tX_test_0[:,i] == -999.)[0]
    index_column_valid =np.where(tX_test_0[:,i] != -999.)[0]
    median = np.median(tX_test_0[index_column_valid, i], axis = 0)
    tX_test_0[index_column_non_valid,i] =  median

### We standardize the test set using the mean and the standard deviation of the training

In [84]:
print(tX_test_0.shape)

(227458, 19)


In [85]:
print(tX_0.shape)

(99913, 19)


In [86]:
def standardize_test(x, mean, std):
    """Standardize the test set."""
    x = x - mean
    x = x / std
    return x

In [87]:
tX_test_0[:,1:] = standardize_test(tX_test_0[:,1:], mean_0, std_0)
tX_test_1[:,1:] = standardize_test(tX_test_1[:,1:], mean_1, std_1)
tX_test_2_3[:,1:]= standardize_test(tX_test_2_3[:,1:], mean_2_3, std_2_3) #we standardize everything a part from the column added manually

### We insert the column for the bias term

In [88]:
tX_tilda_test_0 = np.insert(tX_test_0, 0, np.ones(tX_test_0.shape[0]), axis=1)
tX_tilda_test_1 = np.insert(tX_test_1, 0, np.ones(tX_test_1.shape[0]), axis=1)
tX_tilda_test_2_3 = np.insert(tX_test_2_3, 0, np.ones(tX_test_2_3.shape[0]), axis=1)

### We make the predictions

In [89]:
tX_tilda_test_2_3_augmented = build_poly(tX_tilda_test_2_3, degree=6)
predictions_ridge_2_3 = tX_tilda_test_2_3_augmented @ w_ridge_2_3
# print(predictions_2_3.shape)

NameError: name 'w_ridge_2_3' is not defined

In [None]:
tX_tilda_test_0_augmented = build_poly(tX_tilda_test_0, degree = 6)
predictions_ridge_0 = tX_tilda_test_0_augmented @ w_ridge_0
# print(predictions_0.shape)

In [None]:
tX_tilda_test_1_augmented = build_poly(tX_tilda_test_1, degree = 6)
predictions_ridge_1 = tX_tilda_test_1_augmented @ w_ridge_1
# print(predictions_1.shape)

In [None]:
print(len(zero_indices))
print(len(one_indices))
print(len(two_three_indices))
print()

In [None]:
### Predictions with logistic regression

In [90]:
tX_tilda_test_2_3_augmented = build_poly(tX_tilda_test_2_3, degree = 2)
predictions_logistic_2_3 = sigmoid(tX_tilda_test_2_3_augmented @ w_logistic_2_3)
# print(predictions_2_3.shape)

In [93]:
tX_tilda_test_1_augmented = build_poly(tX_tilda_test_1, degree = 2)
predictions_logistic_1 = sigmoid(tX_tilda_test_1_augmented @ w_logistic_1)
# print(predictions_1.shape)

In [96]:
tX_tilda_test_0_augmented = build_poly(tX_tilda_test_0, degree = 2)
predictions_logistic_0 = sigmoid(tX_tilda_test_0_augmented @ w_logistic_0)
# print(predictions_0.shape)

Now we have to reconstruct a single vector of predictions

In [97]:
#jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10

In [98]:
stacked_predictions = []
count_0 = 0
count_1 = 0
count_2_3 = 0
for index_row in range(tX_test.shape[0]):
    if index_row in zero_indices:
        stacked_predictions.append(predictions_logistic_0[count_0])
        count_0 = count_0 + 1
    elif index_row in one_indices:
        stacked_predictions.append(predictions_logistic_1[count_1])
        count_1 = count_1 +1
    else:
        stacked_predictions.append(predictions_logistic_2_3[count_2_3])
        count_2_3 = count_2_3 + 1

In [99]:
final_predictions = np.array([-1 if el < 0.5 else 1 for el in stacked_predictions])

In [100]:
print(final_predictions)

[-1 -1 -1 ...  1 -1 -1]


In [101]:
def predict_labels(weights, tX_test):
    y = np.array(tX_test) @ np.array(weights)
    labels = [1 if l > 0 else -1 for l in y]
    return labels

In [102]:
OUTPUT_PATH = 'submission.csv' # TODO: fill in desired name of output file for submission
#y_pred = predict_labels(weights, tX_test)
y_pred = final_predictions
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)