In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import math
import random
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [2]:
from proj1_helpers import *
DATA_TRAIN_PATH = '../data/train.csv' # train data path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

In [3]:
print(tX.shape)

(250000, 30)


### Changing the labels to {0,1}

In [4]:
# y == 0 non detected Boson, y == 1 detected Boson
y_ = np.array([0 if l == -1 else 1 for l in y])

### Dividing the features by the number of jets

In [5]:
# dividing the rows of tX by the number of jets, dropping the column Pri_Jet_Num and adding an extra column of np.ones
zero_indices = []
one_indices = []
two_three_indices = []
zero_indices = np.where(tX[:,22]==0)[0]
one_indices = np.where(tX[:,22]==1)[0]
two_three_indices = np.where(np.logical_or(tX[:,22]==2, tX[:,22]==3))[0]
tX_0 = tX[zero_indices, :]
tX_0 = np.delete(tX_0, 22, axis=1)
tX_1 = tX[one_indices, :]
tX_1 = np.delete(tX_1, 22, axis=1)
tX_2_3 = tX[two_three_indices, :]

### Dividing also the output by the type of particle

In [6]:
y_0 = y_[zero_indices]
y_1 = y_[one_indices]
y_2_3 = y_[two_three_indices]

### Adding a column of zeros and ones to detect whether the mass has been measured or not

In [7]:
# take the indices where the mass is not calculated, add the column which has 0 in those indices
# and 1 everywhere else for all matrices 0,1,2_3
zero_indices_0 = np.where(tX_0[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_0 else 1 for i in range(tX_0.shape[0])])
tX_0 = np.insert(tX_0, 0, column_to_add, axis=1)
zero_indices_1 = np.where(tX_1[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_1 else 1 for i in range(tX_1.shape[0])])
tX_1 = np.insert(tX_1, 0, column_to_add, axis=1)
zero_indices_2_3 = np.where(tX_2_3[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_2_3 else 1 for i in range(tX_2_3.shape[0])])
tX_2_3 = np.insert(tX_2_3, 0, column_to_add, axis=1)

### Throwing away the outliers from the training data

In [8]:
for i in range(1, tX_2_3.shape[1]):
    index_column_valid =np.where(tX_2_3[:,i] != -999.)[0]
    column_25_quantile, column_75_quantile = np.quantile(tX_2_3[index_column_valid,i], 
                                                         np.array([0.25, 0.75]))
    interquantile = column_75_quantile-column_25_quantile
    column_15_quantile, column_85_quantile = np.quantile(tX_2_3[index_column_valid,i], 
                                                         np.array([0.15, 0.85]))
    indices_outliers = np.where((column_15_quantile - 1.5 * interquantile >= tX_2_3[index_column_valid,i])
                                             | (tX_2_3[index_column_valid,i] >= 
                                                column_85_quantile + 1.5 * interquantile))[0]
    #indices_outliers = np.argwhere((tX_tilda_2_3[index_column_valid,i] >= column_85_quantile + 1.5 * interquantile) | 
                                  #(column_15_quantile - 1.5 * interquantile >= tX_tilda_2_3[index_column_valid,i]))
    median = np.median(tX_2_3[index_column_valid, i], axis = 0)
    #print(np.sort(tX_tilda_2_3[index_column_valid[indices_outliers],i]).T)
    #print(np.where(tX_tilda_2_3[indices_outliers,i])==-999.)
    #print(median)
    tX_2_3[index_column_valid[indices_outliers],i] =  median

In [9]:
col_to_delete_0 = []
for i in range(1, tX_0.shape[1]):
    index_column_valid =np.where(tX_0[:,i] != -999.)[0]
    if len(index_column_valid)==0:
        #we drop the column (we will have to do the same for the test set as well)
        col_to_delete_0.append(i)
    else :
        column_25_quantile, column_75_quantile = np.quantile(tX_0[index_column_valid,i], 
                                                         np.array([0.25, 0.75]))
        interquantile = column_75_quantile-column_25_quantile
        column_15_quantile, column_85_quantile = np.quantile(tX_0[index_column_valid,i], 
                                                         np.array([0.15, 0.85]))
        indices_outliers = np.where((column_15_quantile - 1.5 * interquantile >= tX_0[index_column_valid,i])
                                             | (tX_0[index_column_valid,i] >= 
                                                column_85_quantile + 1.5 * interquantile))[0]
        #indices_outliers = np.argwhere((tX_tilda_2_3[index_column_valid,i] >= column_85_quantile + 1.5 * interquantile) | 
                                  #(column_15_quantile - 1.5 * interquantile >= tX_tilda_2_3[index_column_valid,i]))
        median = np.median(tX_0[index_column_valid, i], axis = 0)
        #print(np.sort(tX_tilda_2_3[index_column_valid[indices_outliers],i]).T)
        #print(np.where(tX_tilda_2_3[indices_outliers,i])==-999.)
        #print(median)
        tX_0[index_column_valid[indices_outliers],i] =  median
col_to_delete_0.append(tX_0.shape[1]-1)
tX_0 = np.delete(tX_0, col_to_delete_0, axis=1)
print(tX_0.shape)
print(col_to_delete_0)

(99913, 19)
[5, 6, 7, 13, 23, 24, 25, 26, 27, 28, 29]


In [10]:
col_to_delete_1 = []
for i in range(1, tX_1.shape[1]):
    index_column_valid =np.where(tX_1[:,i] != -999.)[0]
    if len(index_column_valid)==0:
        #we drop the column (we will have to do the same for the test set as well)
        col_to_delete_1.append(i)
    else :
        column_25_quantile, column_75_quantile = np.quantile(tX_1[index_column_valid,i], 
                                                         np.array([0.25, 0.75]))
        interquantile = column_75_quantile-column_25_quantile
        column_15_quantile, column_85_quantile = np.quantile(tX_1[index_column_valid,i], 
                                                         np.array([0.15, 0.85]))
        indices_outliers = np.where((column_15_quantile - 1.5 * interquantile >= tX_1[index_column_valid,i])
                                             | (tX_1[index_column_valid,i] >= 
                                                column_85_quantile + 1.5 * interquantile))[0]
        #indices_outliers = np.argwhere((tX_tilda_2_3[index_column_valid,i] >= column_85_quantile + 1.5 * interquantile) | 
                                  #(column_15_quantile - 1.5 * interquantile >= tX_tilda_2_3[index_column_valid,i]))
        median = np.median(tX_1[index_column_valid, i], axis = 0)
        #print(np.sort(tX_tilda_2_3[index_column_valid[indices_outliers],i]).T)
        #print(np.where(tX_tilda_2_3[indices_outliers,i])==-999.)
        #print(median)
        tX_1[index_column_valid[indices_outliers],i] =  median
tX_1 = np.delete(tX_1, col_to_delete_1, axis=1)
print(col_to_delete_1)

[5, 6, 7, 13, 26, 27, 28]


### Now we substitute the -999 values with the median

In [11]:
for i in range(1, tX_2_3.shape[1]):
    index_column_non_valid =np.where(tX_2_3[:,i] == -999.)[0]
    index_column_valid =np.where(tX_2_3[:,i] != -999.)[0]
    median = np.median(tX_2_3[index_column_valid, i], axis = 0)
    tX_2_3[index_column_non_valid,i] =  median

In [12]:
for i in range(1, tX_1.shape[1]):
    index_column_non_valid =np.where(tX_1[:,i] == -999.)[0]
    index_column_valid =np.where(tX_1[:,i] != -999.)[0]
    median = np.median(tX_1[index_column_valid, i], axis = 0)
    tX_1[index_column_non_valid,i] =  median

In [13]:
for i in range(1, tX_0.shape[1]):
    index_column_non_valid =np.where(tX_0[:,i] == -999.)[0]
    index_column_valid =np.where(tX_0[:,i] != -999.)[0]
    median = np.median(tX_0[index_column_valid, i], axis = 0)
    tX_0[index_column_non_valid,i] =  median

### Now we standardize the data

In [14]:
tX_2_3[:,1:], mean_2_3,std_2_3 = standardize(tX_2_3[:,1:]) #we standardize everything a part from the column added manually

In [15]:
print(tX_2_3)
print(np.count_nonzero(tX_2_3 == -999.))

[[ 1.          0.97351249  0.46588281 ...  0.61614788 -1.36131161
  -0.70374641]
 [ 1.         -0.82851663 -0.77157038 ...  0.11608109  1.71034105
   0.2995537 ]
 [ 1.          1.3538447  -0.27431587 ...  0.07030726 -1.52202162
   0.12704911]
 ...
 [ 1.          0.04006392  0.02513449 ...  0.25930888  0.22982758
   0.42357227]
 [ 1.          0.66304099 -1.0843679  ...  0.29031696 -1.21821366
  -0.20699627]
 [ 1.          0.04006392  0.31977857 ... -0.02271698 -0.62490751
   0.05569682]]
0


In [16]:
print(tX_0)

[[ 1.00000e+00  1.43905e+02  8.14170e+01 ...  3.10820e+01  6.00000e-02
   8.60620e+01]
 [ 1.00000e+00  1.75864e+02  1.69150e+01 ...  2.72300e+00 -8.71000e-01
   5.31310e+01]
 [ 1.00000e+00  1.05594e+02  5.05590e+01 ...  3.77910e+01  2.40000e-02
   1.29804e+02]
 ...
 [ 1.00000e+00  1.11452e+02  5.81790e+01 ...  4.67370e+01 -8.67000e-01
   8.04080e+01]
 [ 1.00000e+00  9.49510e+01  1.93620e+01 ...  1.21500e+01  8.11000e-01
   1.12718e+02]
 [ 1.00000e+00  1.11452e+02  7.27560e+01 ...  4.07290e+01 -1.59600e+00
   9.94050e+01]]


In [17]:
tX_0[:,1:],mean_0,std_0 = standardize(tX_0[:,1:]) 

In [18]:
print(tX_0)

[[ 1.          1.05744907  0.7827665  ...  0.01825038  0.04662815
  -0.7724943 ]
 [ 1.          2.14505538 -1.37519852 ... -1.71504715 -0.4674532
  -1.44727052]
 [ 1.         -0.24632406 -0.2496121  ...  0.42830338  0.0267496
   0.12380588]
 ...
 [ 1.         -0.0469687   0.00532098 ...  0.97508147 -0.46524447
  -0.8883482 ]
 [ 1.         -0.60851918 -1.29333222 ... -1.13887042  0.46131675
  -0.22629665]
 [ 1.         -0.0469687   0.49300595 ...  0.60787347 -0.86778508
  -0.49908812]]


In [19]:
tX_1[:,1:],mean_1,std_1 = standardize(tX_1[:,1:])

In [20]:
print(tX_1)

[[ 1.00000000e+00  1.55539992e+00  7.27047143e-01 ...  3.98445313e-01
   6.45414781e-01 -4.14297220e-01]
 [ 1.00000000e+00  1.45598722e-03  3.58462301e+00 ...  1.12748232e+00
  -1.10752634e+00 -4.89864560e-01]
 [ 1.00000000e+00  1.36261180e+00 -1.05809645e+00 ... -3.92076740e-01
  -9.40265168e-01 -1.01072441e+00]
 ...
 [ 1.00000000e+00  1.45598722e-03  1.01732036e+00 ... -4.67286130e-01
  -3.80160315e-01  8.39087556e-01]
 [ 1.00000000e+00  6.75509966e-01  9.95415259e-01 ... -6.76994064e-01
   1.39533906e+00  5.32418071e-01]
 [ 1.00000000e+00 -2.21030015e-01  4.74893699e-01 ...  9.88591985e-01
  -8.30516498e-02 -5.76298292e-01]]


### We insert the column for the bias term

In [21]:
tX_tilda_0 = np.insert(tX_0, 0, np.ones(tX_0.shape[0]), axis=1)
tX_tilda_1 = np.insert(tX_1, 0, np.ones(tX_1.shape[0]), axis=1)
tX_tilda_2_3 = np.insert(tX_2_3, 0, np.ones(tX_2_3.shape[0]), axis=1)

In [22]:
print(tX_tilda_0)

[[ 1.          1.          1.05744907 ...  0.01825038  0.04662815
  -0.7724943 ]
 [ 1.          1.          2.14505538 ... -1.71504715 -0.4674532
  -1.44727052]
 [ 1.          1.         -0.24632406 ...  0.42830338  0.0267496
   0.12380588]
 ...
 [ 1.          1.         -0.0469687  ...  0.97508147 -0.46524447
  -0.8883482 ]
 [ 1.          1.         -0.60851918 ... -1.13887042  0.46131675
  -0.22629665]
 [ 1.          1.         -0.0469687  ...  0.60787347 -0.86778508
  -0.49908812]]


In [23]:
# colors = ['red', 'blue']
# x_pos=[]
# x_neg=[]

# for j in range(len(y)):
#  if(y[j]==1):
#       x_pos.insert(0,tX[j])
#    else:
#        x_neg.insert(0,tX[j])
# xpos = np.array(x_pos)
# xneg = np.array(x_neg)
# for i in range(tX.shape[1]):
#  plt.hist(xpos[:,i], alpha = 0.5, color = 'r', bins = 100)
#  plt.hist(xneg[:,i], alpha = 0.5, color = 'b', bins = 100)
#  plt.show()

## Do your thing crazy machine learning thing here :) ...

In [24]:
def compute_loss(y, tx, w):
    N = y.shape[0]
    e = y - tx @ w 
    loss = 1/(2*N) * np.dot(e,e)
    return loss

In [25]:
def compute_gradient(y, tx, w):
    N = y.shape[0]
    e = y - tx @ w
    gradient = -(1/N) * (tx.T) @ (e)
    return gradient

In [26]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss(y,tx,w)
        gradient = compute_gradient(y,tx,w)
        w = w - gamma * gradient
        ws.append(w)
        losses.append(loss)
        # print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
        # bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws

In [27]:
def cross_validation_GD(y, x, k_indices, k, degree, gamma = 3.0e-02):
    """return the loss of ridge regression."""
    N = y.shape[0]
    k_fold = k_indices.shape[0]
    list_ = []
    interval = int(N/k_fold)
    for i in range(k_fold):
        if i != k:
            list_.append(i)
    x_training = np.zeros((int((k_fold-1)/k_fold*N), x.shape[1]))
    y_training = np.zeros(int((k_fold-1)/k_fold*N))
    for j in range(len(list_)):
        x_training[interval*(j):interval*(j+1), :] = x[np.array([k_indices[list_[j]]]), :]
    x_testing = x[k_indices[k], :]
    for j in range(len(list_)):
        y_training[interval*(j):interval*(j+1)] = y[np.array([k_indices[list_[j]]])]
    y_testing = y[k_indices[k]]
    x_training_augmented = build_poly(x_training, degree)
    x_testing_augmented = build_poly(x_testing, degree)
    #w_opt_training = ridge_regression(y_training, x_training_augmented, lambda_)
    _,  w_opt_training = least_squares_GD(y_training, x_training_augmented,
                                                        np.zeros(x_training_augmented.shape[1]), 1000, gamma)
    loss_tr = calculate_loss(y_training, x_training_augmented, w_opt_training[-1])
    loss_te = calculate_loss(y_testing, x_testing_augmented, w_opt_training[-1])
    return loss_tr, loss_te

In [28]:
def compute_stoch_gradient(y, tx, w):
    N = y.shape[0]
    random_number = random.randint(0,N)
    #random_number =1
    xn = tx[random_number,:]
    random_gradient = - np.dot(xn, y[random_number] - np.dot(xn,w))
    return random_gradient

In [29]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss(y,tx,w)
        stoch_gradient = compute_stoch_gradient(y,tx,w)
        w = w - gamma * stoch_gradient
        ws.append(w)
        losses.append(loss)
        # print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
        #    bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws

In [30]:
from proj1_helpers import *

def least_squares(y, tx):
    """calculate the least squares solution."""
    forcing_term = np.transpose(tx) @ y
    coefficient_matrix = np.transpose(tx) @ tx
    w = np.linalg.solve(coefficient_matrix, forcing_term)
    return w

def test_your_least_squares(y, tx):
    """compare the solution of the normal equations with the weights returned by gradient descent algorithm."""
    w_least_squares = least_squares(y, tx)
    initial_w = np.zeros(tx.shape[1])
    max_iters = 50
    gamma = 0.7
    losses_gradient_descent, w_gradient_descent = gradient_descent(y, tx, initial_w, max_iters, gamma)
    w = w_gradient_descent[-1]
    err = np.linalg.norm(w_least_squares-w)
    return err

In [31]:
def ridge_regression(y, tx, lambda_):
    """implement ridge regression."""
    N = tx.shape
    lambda_prime = 2 * N[0] * lambda_
    coefficient_matrix = np.transpose(tx) @ tx + lambda_prime * np.eye(N[1])
    forcing_term = np.transpose(tx) @ y
    w = np.linalg.solve(coefficient_matrix, forcing_term)
    return w

def debug_ridge(y, tx):
    """debugging the ridge regression by setting lambda=0."""
    w_least_squares = least_squares(y, tx)
    w_0 = ridge_regression(y, tx, 0)
    err = np.linalg.norm(w_least_squares-w_0)
    return err

In [32]:
def sigmoid(t):
    """apply the sigmoid function on t."""
    positive_indices = np.where(t >= 0)[0]
    negative_indices = np.where(t < 0)[0]
    z = np.zeros(len(t))
    z[positive_indices] = 1 / (1+np.exp(-t[positive_indices]))
    z[negative_indices] = np.exp(t[negative_indices]) / (1 + np.exp(t[negative_indices]))
    return z

In [33]:
def calculate_loss(y, tx, w):
    """compute the loss: negative log likelihood."""
    epsilon = 1.0e-12
    # term1 = sigmoid(tx @ w)
    # term1[y == 0] = 1
    # term2 = 1 - sigmoid(tx @ w)
    # term2[y == 1] = 1
    # summands = np.multiply(y, np.log(term1)) + np.multiply(1 - y, np.log(term2))
    # e = - y * (tx @ w) + np.log(1 + np.exp(tx @ w))
    # return e.sum()
    pos_ind = np.where(tx @ w >=0)[0]
    neg_ind = np.where(tx @ w <0)[0]
    loss_pos = - y[pos_ind] * (tx @ w)[pos_ind] + (tx @ w)[pos_ind] + np.log(1+np.exp(-(tx @ w)[pos_ind]))
    loss_neg = - y[neg_ind] * (tx @ w)[neg_ind] - (tx @ w)[neg_ind] + np.log(1+np.exp((tx @ w)[neg_ind]))
    # loss = - np.sum(y*np.log(sigmoid(tx @ w)+epsilon) + ((1 - y) * np.log(1-sigmoid(tx@w) + epsilon)))
    # return e.sum()
    return loss_pos.sum() + loss_neg.sum()

In [34]:
def calculate_gradient(y, tx, w):
    """compute the gradient of loss."""
    return np.transpose(tx) @ (sigmoid(tx @ w) - y)

In [35]:
def learning_by_gradient_descent(y, tx, w_initial, gamma, max_iters):
    """
    Do one step of gradient descent using logistic regression.
    Return the loss and the updated w.
    """
    losses = []
    w = w_initial
    for iter in range(max_iters):
        grad = calculate_gradient(y, tx, w)
        w = w - gamma * grad
        if iter %100 == 0:
            gamma = gamma/2
        loss = calculate_loss(y, tx, w)
        losses.append(loss)
    return losses, w

In [35]:
losses1, w1 = learning_by_gradient_descent(y_0, tX_tilda_0, np.zeros(tX_tilda_0.shape[1]), 0.8, 10000)
print(losses1)

[5179248944.798144, 4548292501.191404, 3873264747.21586, 3204189882.4289737, 2558766497.952185, 1925840421.0113575, 1358134428.6513941, 806750192.3189367, 2000410682.432268, 1601256172.1407366, 2113524821.0324888, 1606047298.1298404, 1990415844.7863326, 1640858641.9754827, 1934131711.4022741, 1285499978.8239229, 2039587553.53073, 1427677293.0631957, 1887233645.3011723, 1447488537.8697784, 1922910135.0624075, 1269966212.2418594, 1955256377.1863978, 1400327379.5162985, 1845506548.8767402, 1264731822.7936993, 1931658652.60472, 1314650368.8542542, 1862692520.787514, 1330058160.1396096, 1877959970.3375432, 1229045186.878255, 1902392318.7811527, 1308156948.822424, 1844261214.6761906, 1241157611.7496824, 1894851670.6505013, 1255749330.385391, 1860264536.3923934, 1274839497.858362, 1859682719.8045244, 1218658685.3284106, 1880581668.544345, 1258713077.568829, 1850203797.9833395, 1235051327.652052, 1873359564.5703435, 1230356581.4284768, 1859083288.6203346, 1244041475.3119462, 1856766909.6040347

In [36]:
def calculate_hessian(y, tx, w):
    """return the Hessian of the loss function."""
    diag = sigmoid(tx @ w) * (1 - sigmoid(tx @ w))
    D = diag * np.eye(tx.shape[0])
    return np.transpose(tx) @ D @ tx

In [37]:
def logistic_regression(y, tx, w):
    """return the loss, gradient, and Hessian."""
    grad = calculate_gradient(y, tx, w)
    hess = calculate_hessian(y, tx, w)
    loss = calculate_loss(y, tx, w)
    return loss, grad, hess

In [38]:
def learning_by_newton_method(y, tx, w, gamma):
    """
    Do one step on Newton's method.
    return the loss and updated w.
    """
    loss, grad, hess = logistic_regression(y, tx, w)
    sol = np.linalg.solve(hess, grad)
    w = w - gamma * sol
    return loss, w

In [39]:
def penalized_logistic_regression(y, tx, w, lambda_):
    """return the loss, gradient"""
    loss = calculate_loss(y, tx, w) + lambda_*np.linalg.norm(w) ** 2
    grad = calculate_gradient(y, tx, w) + 2*lambda_*w
    hess = calculate_hessian(y, tx, w) + 2*lambda_*np.eye(w.shape[0])
    return loss, grad, hess

In [40]:
def learning_by_penalized_gradient(y, tx, w_initial, gamma, max_iters, lambda_):
    """
    Do one step of gradient descent, using the penalized logistic regression.
    Return the loss and updated w.
    """
    threshold = 1e-8
    losses = []
    w = w_initial
    for iter in range(max_iters):
        grad = calculate_gradient(y, tx, w) + 2*lambda_*w
        w = w - gamma * grad
        # regularizer = lambda_ / 2 * np.linalg.norm(w) ** 2
        # summing = np.sum(np.log(1+np.exp(tx.dot(w))))
        # y_component = y.T.dot(tx.dot(w)).flatten().flatten()
        # loss = summing - y_component*regularizer
        loss = calculate_loss(y, tx, w) + lambda_*np.linalg.norm(w) ** 2
        losses.append(loss)
        if iter % 25 == 0:
            gamma = gamma / 2
            print("Current iteration={i}, loss={l}".format(i=iter, l=loss))
        # converge criterion
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
            break
    return losses, w

In [41]:
def build_poly(x, degree):
    """polynomial basis functions for input data x, for j=1 up to j=degree."""
    powers = np.arange(1, degree + 1)
    phi = np.column_stack([np.power(x[:,0], exponent) for exponent in powers])
    for i in range(1, x.shape[1]):
        phi_i = np.column_stack([np.power(x[:,i], exponent) for exponent in powers])
        phi = np.column_stack([phi, phi_i])
    return phi

In [42]:
def build_k_indices(y, k_fold, seed):
    N = y.shape[0]
    np.random.seed(seed)
    interval = int(np.floor(N / k_fold))
    indices = np.random.permutation(N)
    k_indices = [indices[k * interval: (k + 1) * interval]
                 for k in range(k_fold)]
    return np.array(k_indices)

In [43]:
def cross_validation_ridge(y, x, k_indices, k, lambda_, degree):
    """return the loss of ridge regression."""
    N = y.shape[0]
    k_fold = k_indices.shape[0]
    list_ = []
    interval = int(N/k_fold)
    for i in range(k_fold):
        if i != k:
            list_.append(i)
    x_training = np.zeros((int((k_fold-1)/k_fold*N), x.shape[1]))
    y_training = np.zeros(int((k_fold-1)/k_fold*N))
    for j in range(len(list_)):
        x_training[interval*(j):interval*(j+1), :] = x[np.array([k_indices[list_[j]]]), :]
    x_testing = x[k_indices[k], :]
    for j in range(len(list_)):
        y_training[interval*(j):interval*(j+1)] = y[np.array([k_indices[list_[j]]])]
    y_testing = y[k_indices[k]]
    x_training_augmented = build_poly(x_training, degree)
    x_testing_augmented = build_poly(x_testing, degree)
    w_opt_training = ridge_regression(y_training, x_training_augmented, lambda_)
    loss_tr = compute_loss(y_training, x_training_augmented, w_opt_training)
    loss_te = compute_loss(y_testing, x_testing_augmented, w_opt_training)
    return loss_tr, loss_te

In [44]:
degrees = np.arange(2, 7)
lambdas = np.logspace(-5,0,15)
k_fold = 5
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees)))
testing_loss = np.zeros((len(lambdas), len(degrees)))
k_indices = build_k_indices(y_0, k_fold, seed)
for index1 in range(len(lambdas)):
    for index2 in range(len(degrees)):
        train_loss = 0
        test_loss = 0
        for k in range(k_fold):
            loss_tr, loss_te = cross_validation_ridge(y_0, tX_tilda_0, k_indices, k,
                                                lambdas[index1], degrees[index2])
            train_loss += loss_tr
            test_loss += loss_te
        training_loss[index1, index2] = train_loss / k_fold
        testing_loss[index1, index2] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
print(testing_loss)
lambda_opt, degree_opt = lambdas[best_result[0]],degrees[best_result[1]]
print(lambda_opt, degree_opt)

KeyboardInterrupt: 

In [None]:
degrees = np.arange(2, 7)
lambdas = np.logspace(-5,0,15)
k_fold = 5
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees)))
testing_loss = np.zeros((len(lambdas), len(degrees)))
k_indices = build_k_indices(y_1, k_fold, seed)
for index1 in range(len(lambdas)):
    for index2 in range(len(degrees)):
        train_loss = 0
        test_loss = 0
        for k in range(k_fold):
            loss_tr, loss_te = cross_validation_ridge(y_1, tX_tilda_1, k_indices, k, 
                                                lambdas[index1], degrees[index2])
            train_loss += loss_tr
            test_loss += loss_te
        training_loss[index1, index2] = train_loss / k_fold
        testing_loss[index1, index2] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
print(testing_loss)
lambda_opt, degree_opt = lambdas[best_result[0]], degrees[best_result[1]]
print(lambda_opt, degree_opt)

In [None]:
degrees = np.arange(2, 7)
lambdas = np.logspace(-5,0,15)
k_fold = 5
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees)))
testing_loss = np.zeros((len(lambdas), len(degrees)))
k_indices = build_k_indices(y_2_3, k_fold, seed)
for index1 in range(len(lambdas)):
    for index2 in range(len(degrees)):
        train_loss = 0
        test_loss = 0
        for k in range(k_fold):
            loss_tr, loss_te = cross_validation_ridge(y_2_3, tX_tilda_2_3, k_indices, k,
                                            lambdas[index1], degrees[index2])
            train_loss += loss_tr
            test_loss += loss_te
        training_loss[index1, index2] = train_loss / k_fold
        testing_loss[index1, index2] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
lambda_opt, degree_opt = lambdas[best_result[0]], degrees[best_result[1]]
print(testing_loss)
print(lambda_opt, degree_opt) 

We train the model for ridge regression with the best hyperparameters

In [None]:
tX_tilda_0_augmented = build_poly(tX_tilda_0, degree = 6)
w_ridge_0 = ridge_regression(y_0, tX_tilda_0_augmented, lambda_= 0.00061054)
#print(w_ridge_0)

In [None]:
tX_tilda_1_augmented = build_poly(tX_tilda_1, degree=6)
w_ridge_1 = ridge_regression(y_1, tX_tilda_1_augmented, lambda_= 5.17947468e-05)
#print(w_ridge_1)

In [None]:
tX_tilda_2_3_augmented = build_poly(tX_tilda_2_3, degree=6)
w_ridge_2_3 = ridge_regression(y_2_3, tX_tilda_2_3_augmented, lambda_= 0.00026827)
#print(w_ridge_2_3)

We will now try with logistic regression

In [43]:
def cross_validation_logistic(y, x, k_indices, k, lambda_, degree, gamma):
    """return the loss of ridge regression."""
    N = y.shape[0]
    k_fold = k_indices.shape[0]
    list_ = []
    interval = int(N/k_fold)
    for i in range(k_fold):
        if i != k:
            list_.append(i)
    x_training = np.zeros((int((k_fold-1)/k_fold*N), x.shape[1]))
    y_training = np.zeros(int((k_fold-1)/k_fold*N))
    for j in range(len(list_)):
        x_training[interval*(j):interval*(j+1), :] = x[np.array([k_indices[list_[j]]]), :]
    x_testing = x[k_indices[k], :]
    for j in range(len(list_)):
        y_training[interval*(j):interval*(j+1)] = y[np.array([k_indices[list_[j]]])]
    y_testing = y[k_indices[k]]
    x_training_augmented = build_poly(x_training, degree)
    x_testing_augmented = build_poly(x_testing, degree)
    #w_opt_training = ridge_regression(y_training, x_training_augmented, lambda_)
    _,  w_opt_training = learning_by_penalized_gradient(y_training, x_training_augmented,
                                                        np.ones(x_training_augmented.shape[1]), gamma, 1000, lambda_)
    loss_tr = calculate_loss(y_training, x_training_augmented, w_opt_training)
    loss_te = calculate_loss(y_testing, x_testing_augmented, w_opt_training)
    return loss_tr, loss_te

We perform cross validation in order to find the best parameters degree, lamdba and gamma caracterizing logistic regression

In [44]:
print(tX_tilda_0)

[[ 1.          1.          1.05744907 ...  0.01825038  0.04662815
  -0.7724943 ]
 [ 1.          1.          2.14505538 ... -1.71504715 -0.4674532
  -1.44727052]
 [ 1.          1.         -0.24632406 ...  0.42830338  0.0267496
   0.12380588]
 ...
 [ 1.          1.         -0.0469687  ...  0.97508147 -0.46524447
  -0.8883482 ]
 [ 1.          1.         -0.60851918 ... -1.13887042  0.46131675
  -0.22629665]
 [ 1.          1.         -0.0469687  ...  0.60787347 -0.86778508
  -0.49908812]]


In [137]:
# degrees = np.arange(2, 5)
lambdas = np.linspace(0, 3, 5)
degree = 2
# gammas = np.logspace(-5, 0, 5)
gamma = 0.00017783
k_fold = 5
seed = 1
training_loss = np.zeros(len(lambdas))
testing_loss = np.zeros(len(lambdas))
k_indices = build_k_indices(y_0, k_fold, seed)
for index1 in range(len(lambdas)):
    # for index2 in range(len(degrees)):
    #  for index3 in range(len(gammas)):
    train_loss = 0
    test_loss = 0
    for k in range(k_fold):
            loss_tr, loss_te = cross_validation_logistic(y_0, tX_tilda_0, k_indices, k,
                                                    lambdas[index1], degree, gamma)
            train_loss += loss_tr
            test_loss += loss_te
    training_loss[index1] = train_loss / k_fold
    testing_loss[index1] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
lambda_opt = lambdas[best_result[0]]
print(lambda_opt)

Current iteration=0, loss=21039050.154922392
Current iteration=25, loss=984841.4777189632
Current iteration=50, loss=778278.5443313502
Current iteration=75, loss=1188836.5812747062
Current iteration=100, loss=273939.75015508995
Current iteration=125, loss=242911.33780006418
Current iteration=150, loss=231537.50693589723
Current iteration=175, loss=226600.6981856473
Current iteration=200, loss=224305.98685609986
Current iteration=225, loss=223200.39693480553
Current iteration=250, loss=222657.84702895145
Current iteration=275, loss=222389.11240926015
Current iteration=300, loss=222255.37587417557
Current iteration=325, loss=222188.66586371264
Current iteration=350, loss=222155.3508860206
Current iteration=375, loss=222138.70309794298
Current iteration=400, loss=222130.38162838196
Current iteration=425, loss=222126.22149963162
Current iteration=450, loss=222124.14158675287
Current iteration=475, loss=222123.1016681862
Current iteration=500, loss=222122.58171837084
Current iteration=525, 

Current iteration=400, loss=223848.29661584998
Current iteration=425, loss=223843.90765161865
Current iteration=450, loss=223841.71332551123
Current iteration=475, loss=223840.61620145815
Current iteration=500, loss=223840.06764918164
Current iteration=525, loss=223839.7933754809
Current iteration=550, loss=223839.65623923988
Current iteration=575, loss=223839.58767127173
Current iteration=600, loss=223839.5533873257
Current iteration=625, loss=223839.5362453622
Current iteration=650, loss=223839.5276743829
Current iteration=675, loss=223839.52338889366
Current iteration=700, loss=223839.52124614955
Current iteration=725, loss=223839.52017477725
Current iteration=750, loss=223839.51963909133
Current iteration=775, loss=223839.51937124843
Current iteration=800, loss=223839.51923732692
Current iteration=825, loss=223839.51917036614
Current iteration=850, loss=223839.51913688568
Current iteration=875, loss=223839.51912014547
Current iteration=900, loss=223839.51911177544
Current iteration

Current iteration=800, loss=227562.30083552928
Current iteration=825, loss=227562.30076436195
Current iteration=850, loss=227562.30072877818
Current iteration=875, loss=227562.30071098625
Current iteration=900, loss=227562.30070209052
Current iteration=925, loss=227562.3006976426
Current iteration=950, loss=227562.30069541847
Current iteration=975, loss=227562.3006943065
Current iteration=0, loss=21012836.78397008
Current iteration=25, loss=893065.699244907
Current iteration=50, loss=524309.7918368854
Current iteration=75, loss=761853.3601023903
Current iteration=100, loss=274106.37605157064
Current iteration=125, loss=242511.94621621555
Current iteration=150, loss=230870.74769149823
Current iteration=175, loss=225860.36654837552
Current iteration=200, loss=223538.28406683623
Current iteration=225, loss=222421.04333825954
Current iteration=250, loss=221873.11589742117
Current iteration=275, loss=221601.79731049307
Current iteration=300, loss=221466.79323423113
Current iteration=325, lo

Current iteration=225, loss=235827.71449123535
Current iteration=250, loss=235137.35209832428
Current iteration=275, loss=234795.0627867434
Current iteration=300, loss=234624.63555377352
Current iteration=325, loss=234539.60126640755
Current iteration=350, loss=234497.12843250742
Current iteration=375, loss=234475.9030029098
Current iteration=400, loss=234465.29303425524
Current iteration=425, loss=234459.98873637736
Current iteration=450, loss=234457.33675903987
Current iteration=475, loss=234456.0108132703
Current iteration=500, loss=234455.34785110984
Current iteration=525, loss=234455.01637271087
Current iteration=550, loss=234454.85063418164
Current iteration=575, loss=234454.76776508454
Current iteration=600, loss=234454.72633057798
Current iteration=625, loss=234454.7056133351
Current iteration=650, loss=234454.69525471627
Current iteration=675, loss=234454.69007540765
Current iteration=700, loss=234454.6874857534
Current iteration=725, loss=234454.68619092638
Current iteration=

Current iteration=625, loss=226987.48713469467
Current iteration=650, loss=226987.478085086
Current iteration=675, loss=226987.4735602821
Current iteration=700, loss=226987.4712978804
Current iteration=725, loss=226987.47016667956
Current iteration=750, loss=226987.46960107918
Current iteration=775, loss=226987.469318279
Current iteration=800, loss=226987.46917687883
Current iteration=825, loss=226987.46910617873
Current iteration=850, loss=226987.4690708286
Current iteration=875, loss=226987.46905315373
Current iteration=900, loss=226987.4690443163
Current iteration=925, loss=226987.46903989764
Current iteration=950, loss=226987.4690376882
Current iteration=975, loss=226987.46903658356
Current iteration=0, loss=21021684.088279326
Current iteration=25, loss=1139945.9537870793
Current iteration=50, loss=994519.5888594592
Current iteration=75, loss=340141.46515375294
Current iteration=100, loss=296329.4178135802
Current iteration=125, loss=259964.77528960345
Current iteration=150, loss=2

Current iteration=50, loss=1088456.3334063275
Current iteration=75, loss=611605.6497439836
Current iteration=100, loss=280663.81250473706
Current iteration=125, loss=248625.2981293137
Current iteration=150, loss=236243.53914277075
Current iteration=175, loss=230852.30326830075
Current iteration=200, loss=228342.25502730647
Current iteration=225, loss=227131.8209656674
Current iteration=250, loss=226537.5663598473
Current iteration=275, loss=226243.1483446928
Current iteration=300, loss=226096.61251698475
Current iteration=325, loss=226023.51484166156
Current iteration=350, loss=225987.0078977391
Current iteration=375, loss=225968.7648275627
Current iteration=400, loss=225959.64595116553
Current iteration=425, loss=225955.08716840213
Current iteration=450, loss=225952.80793906347
Current iteration=475, loss=225951.66836490345
Current iteration=500, loss=225951.09858795066
Current iteration=525, loss=225950.81370200592
Current iteration=550, loss=225950.67125966653
Current iteration=575,

In [None]:
print(np.logspace(-2,0,5))

In [139]:
# degrees = np.arange(2, 5)
lambdas = np.linspace(0, 3, 5)
degree = 2
# gammas = np.logspace(-5, 0, 5)
k_fold = 5
gamma = 0.00316228
seed = 1
training_loss = np.zeros(len(lambdas))
testing_loss = np.zeros(len(lambdas))
k_indices = build_k_indices(y_1, k_fold, seed)
for index1 in range(len(lambdas)):
    # for index2 in range(len(degrees)):
    #  for index3 in range(len(gammas)):
    train_loss = 0
    test_loss = 0
    for k in range(k_fold):
        loss_tr, loss_te = cross_validation_logistic(y_1, tX_tilda_1, k_indices, k,
                                            lambdas[index1], degree, gamma)
        train_loss += loss_tr
        test_loss += loss_te
    training_loss[index1] = train_loss / k_fold
    testing_loss[index1] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
lambda_opt = lambdas[best_result[0]]
print(lambda_opt)

Current iteration=0, loss=273267123.61131126
Current iteration=25, loss=44125887.461864695
Current iteration=50, loss=17506863.1337279
Current iteration=75, loss=3981775.879560408
Current iteration=100, loss=3994052.851587348
Current iteration=125, loss=3410118.594701222
Current iteration=150, loss=3159312.6756089884
Current iteration=175, loss=3040971.936198583
Current iteration=200, loss=2983261.4529766627
Current iteration=225, loss=2954763.6332311025
Current iteration=250, loss=2940605.1364359437
Current iteration=275, loss=2933548.7571841036
Current iteration=300, loss=2930026.2143532108
Current iteration=325, loss=2928266.3711855
Current iteration=350, loss=2927386.791871916
Current iteration=375, loss=2926947.0865175594
Current iteration=400, loss=2926727.2550107967
Current iteration=425, loss=2926617.345234049
Current iteration=450, loss=2926562.3916626503
Current iteration=475, loss=2926534.9152061883
Current iteration=500, loss=2926521.177060268
Current iteration=525, loss=29

Current iteration=425, loss=2929933.3987009944
Current iteration=450, loss=2929877.628976144
Current iteration=475, loss=2929849.744493977
Current iteration=500, loss=2929835.8023479586
Current iteration=525, loss=2929828.8312987154
Current iteration=550, loss=2929825.345780035
Current iteration=575, loss=2929823.60302218
Current iteration=600, loss=2929822.731643624
Current iteration=625, loss=2929822.2959544393
Current iteration=650, loss=2929822.0781098707
Current iteration=675, loss=2929821.9691875894
Current iteration=700, loss=2929821.9147264496
Current iteration=725, loss=2929821.887495884
Current iteration=750, loss=2929821.8738806
Current iteration=775, loss=2929821.867072959
Current iteration=800, loss=2929821.8636691375
Current iteration=825, loss=2929821.861967227
Current iteration=850, loss=2929821.8611162705
Current iteration=875, loss=2929821.8606907907
Current iteration=900, loss=2929821.860478055
Current iteration=925, loss=2929821.8603716837
Current iteration=950, los

Current iteration=850, loss=2509852.319545171
Current iteration=875, loss=2509852.3191463347
Current iteration=900, loss=2509852.3189469217
Current iteration=925, loss=2509852.3188472097
Current iteration=950, loss=2509852.3187973583
Current iteration=975, loss=2509852.3187724324
Current iteration=0, loss=274390930.7426062
Current iteration=25, loss=45056710.22941253
Current iteration=50, loss=6870075.659537011
Current iteration=75, loss=11382501.942726493
Current iteration=100, loss=3566107.694283007
Current iteration=125, loss=3001233.7881028396
Current iteration=150, loss=2757134.651076068
Current iteration=175, loss=2642029.923880999
Current iteration=200, loss=2586113.4849624117
Current iteration=225, loss=2558537.0823407187
Current iteration=250, loss=2544842.168530725
Current iteration=275, loss=2538017.6946287905
Current iteration=300, loss=2534611.2524384684
Current iteration=325, loss=2532909.5049383333
Current iteration=350, loss=2532058.992900104
Current iteration=375, loss

Current iteration=275, loss=2598567.636877121
Current iteration=300, loss=2594873.48569258
Current iteration=325, loss=2593028.20003669
Current iteration=350, loss=2592106.005712797
Current iteration=375, loss=2591645.0159729505
Current iteration=400, loss=2591414.548275578
Current iteration=425, loss=2591299.3216541857
Current iteration=450, loss=2591241.7100184234
Current iteration=475, loss=2591212.904619214
Current iteration=500, loss=2591198.5020242683
Current iteration=525, loss=2591191.3007529597
Current iteration=550, loss=2591187.7001238465
Current iteration=575, loss=2591185.899810925
Current iteration=600, loss=2591184.9996548733
Current iteration=625, loss=2591184.54957695
Current iteration=650, loss=2591184.3245380134
Current iteration=675, loss=2591184.2120185504
Current iteration=700, loss=2591184.155758818
Current iteration=725, loss=2591184.127628957
Current iteration=750, loss=2591184.113564025
Current iteration=775, loss=2591184.106531561
Current iteration=800, loss=

Current iteration=700, loss=2048343.2361241742
Current iteration=725, loss=2048343.2104240162
Current iteration=750, loss=2048343.1975739384
Current iteration=775, loss=2048343.1911488988
Current iteration=800, loss=2048343.1879363786
Current iteration=825, loss=2048343.186330116
Current iteration=850, loss=2048343.1855269845
Current iteration=875, loss=2048343.185125419
Current iteration=900, loss=2048343.1849246372
Current iteration=925, loss=2048343.1848242453
Current iteration=950, loss=2048343.1847740526
Current iteration=975, loss=2048343.1847489562
Current iteration=0, loss=274429967.80084205
Current iteration=25, loss=46123711.67145325
Current iteration=50, loss=36370259.94537885
Current iteration=75, loss=6161142.450346583
Current iteration=100, loss=2852945.085957114
Current iteration=125, loss=2475774.918462193
Current iteration=150, loss=2241897.776458504
Current iteration=175, loss=2131387.758630893
Current iteration=200, loss=2077632.0035652246
Current iteration=225, loss

Current iteration=125, loss=2349365.681013518
Current iteration=150, loss=2105945.397802886
Current iteration=175, loss=1992032.3471539186
Current iteration=200, loss=1936754.8204343338
Current iteration=225, loss=1909488.209360954
Current iteration=250, loss=1895944.887377342
Current iteration=275, loss=1889195.6468469847
Current iteration=300, loss=1885826.5786916313
Current iteration=325, loss=1884143.4167550297
Current iteration=350, loss=1883302.1803441932
Current iteration=375, loss=1882881.6516941893
Current iteration=400, loss=1882671.408078155
Current iteration=425, loss=1882566.291446591
Current iteration=450, loss=1882513.7344248232
Current iteration=475, loss=1882487.456237431
Current iteration=500, loss=1882474.3172246052
Current iteration=525, loss=1882467.7477384103
Current iteration=550, loss=1882464.463000367
Current iteration=575, loss=1882462.8206326093
Current iteration=600, loss=1882461.999449046
Current iteration=625, loss=1882461.5888573437
Current iteration=650,

In [140]:
# for degrees in range (2, 6) and lambdas in (0, 1) the best results are respectively the lower and the upper bound
# so now we go on with looking for the best parameters in degree \in {1,2} and lambda in [1, 2] hoping to get for lambda an
# internal optimal point 
# degrees = np.arange(2, 5)
lambdas = np.linspace(0, 3, 5)
degree = 2
# gammas = np.logspace(-5, 0, 5)
gamma = 0.0017783
k_fold = 5
seed = 1
training_loss = np.zeros((len(lambdas), len(degrees), len(gammas)))
testing_loss = np.zeros((len(lambdas), len(degrees), len(gammas)))
k_indices = build_k_indices(y_2_3, k_fold, seed)
for index1 in range(len(lambdas)):
    # for index2 in range(len(degrees)):
    #   for index3 in range(len(gammas)):
    train_loss = 0
    test_loss = 0
    for k in range(k_fold):
            loss_tr, loss_te = cross_validation_logistic(y_2_3, tX_tilda_2_3, k_indices, k,
                                            lambdas[index1], degree, gamma)
            train_loss += loss_tr
            test_loss += loss_te
    training_loss[index1] = train_loss / k_fold
    testing_loss[index1] = test_loss / k_fold
best_result = np.where(testing_loss == np.amin(testing_loss))
lambda_opt = lambdas[best_result[0]]
print(lambda_opt)

Current iteration=0, loss=158505624.53907633
Current iteration=25, loss=6357233.2979824
Current iteration=50, loss=5144190.323673397
Current iteration=75, loss=3667294.7999277893
Current iteration=100, loss=3040592.940880635
Current iteration=125, loss=2755572.3345021666
Current iteration=150, loss=2621164.4213775583
Current iteration=175, loss=2555954.596440154
Current iteration=200, loss=2523987.2084299945
Current iteration=225, loss=2508190.993306246
Current iteration=250, loss=2500330.877798271
Current iteration=275, loss=2496409.1177166114
Current iteration=300, loss=2494450.240827481
Current iteration=325, loss=2493471.256790984
Current iteration=350, loss=2492981.880282477
Current iteration=375, loss=2492737.2186027346
Current iteration=400, loss=2492614.894385363
Current iteration=425, loss=2492553.7339297077
Current iteration=450, loss=2492523.154114815
Current iteration=475, loss=2492507.864310563
Current iteration=500, loss=2492500.219434231
Current iteration=525, loss=24924

Current iteration=450, loss=2415322.881064332
Current iteration=475, loss=2415307.6610416137
Current iteration=500, loss=2415300.0510614635
Current iteration=525, loss=2415296.2460791925
Current iteration=550, loss=2415294.343590008
Current iteration=575, loss=2415293.3923459034
Current iteration=600, loss=2415292.916723972
Current iteration=625, loss=2415292.678913035
Current iteration=650, loss=2415292.560007578
Current iteration=675, loss=2415292.500554846
Current iteration=700, loss=2415292.4708284833
Current iteration=725, loss=2415292.455965304
Current iteration=750, loss=2415292.4485337115
Current iteration=775, loss=2415292.4448179165
Current iteration=800, loss=2415292.442960021
Current iteration=825, loss=2415292.442031072
Current iteration=850, loss=2415292.441566597
Current iteration=875, loss=2415292.441334359
Current iteration=900, loss=2415292.4412182425
Current iteration=925, loss=2415292.44116018
Current iteration=950, loss=2415292.4411311513
Current iteration=975, los

Current iteration=875, loss=2251064.8116760445
Current iteration=900, loss=2251064.811559963
Current iteration=925, loss=2251064.8115019216
Current iteration=950, loss=2251064.811472901
Current iteration=975, loss=2251064.811458387
Current iteration=0, loss=158486069.240826
Current iteration=25, loss=6352840.23088122
Current iteration=50, loss=4927899.393113914
Current iteration=75, loss=3392823.7775837686
Current iteration=100, loss=2760759.315612245
Current iteration=125, loss=2478537.0745632383
Current iteration=150, loss=2346924.1855829586
Current iteration=175, loss=2283294.8186318967
Current iteration=200, loss=2252230.8565009893
Current iteration=225, loss=2236918.83511895
Current iteration=250, loss=2229317.486657351
Current iteration=275, loss=2225529.98936998
Current iteration=300, loss=2223639.5270636077
Current iteration=325, loss=2222695.089565139
Current iteration=350, loss=2222223.056887255
Current iteration=375, loss=2221987.088519297
Current iteration=400, loss=2221869

Current iteration=325, loss=2069261.4465784149
Current iteration=350, loss=2068789.7296619713
Current iteration=375, loss=2068553.920303579
Current iteration=400, loss=2068436.0278172768
Current iteration=425, loss=2068377.0845260965
Current iteration=450, loss=2068347.6136182852
Current iteration=475, loss=2068332.8783487966
Current iteration=500, loss=2068325.5107601539
Current iteration=525, loss=2068321.8269773568
Current iteration=550, loss=2068319.9850888401
Current iteration=575, loss=2068319.0641453015
Current iteration=600, loss=2068318.6036737124
Current iteration=625, loss=2068318.3734379602
Current iteration=650, loss=2068318.258320099
Current iteration=675, loss=2068318.2007611685
Current iteration=700, loss=2068318.1719817035
Current iteration=725, loss=2068318.1575919744
Current iteration=750, loss=2068318.150397106
Current iteration=775, loss=2068318.146799676
Current iteration=800, loss=2068318.1450009574
Current iteration=825, loss=2068318.144101598
Current iteration=

Current iteration=750, loss=1929964.3776120697
Current iteration=775, loss=1929964.374103513
Current iteration=800, loss=1929964.3723492352
Current iteration=825, loss=1929964.3714720975
Current iteration=850, loss=1929964.3710335288
Current iteration=875, loss=1929964.370814245
Current iteration=900, loss=1929964.3707045985
Current iteration=925, loss=1929964.3706497774
Current iteration=950, loss=1929964.3706223657
Current iteration=975, loss=1929964.3706086632
Current iteration=0, loss=158452623.50340465
Current iteration=25, loss=6287551.948955842
Current iteration=50, loss=4690693.767875544
Current iteration=75, loss=3070042.483174648
Current iteration=100, loss=2429638.74818919
Current iteration=125, loss=2148701.9361635013
Current iteration=150, loss=2019349.853975444
Current iteration=175, loss=1957800.244077424
Current iteration=200, loss=1927768.804660296
Current iteration=225, loss=1912899.7803262158
Current iteration=250, loss=1905500.795828193
Current iteration=275, loss=1

Current iteration=150, loss=1888292.5877016422
Current iteration=175, loss=1828357.0197070504
Current iteration=200, loss=1799129.570614346
Current iteration=225, loss=1784697.1083496162
Current iteration=250, loss=1777524.7592554474
Current iteration=275, loss=1773949.6204909098
Current iteration=300, loss=1772164.6854290257
Current iteration=325, loss=1771272.8620493042
Current iteration=350, loss=1770827.1086374486
Current iteration=375, loss=1770604.2711093961
Current iteration=400, loss=1770492.8621677274
Current iteration=425, loss=1770437.1601041562
Current iteration=450, loss=1770409.3096740062
Current iteration=475, loss=1770395.3846093183
Current iteration=500, loss=1770388.4221145683
Current iteration=525, loss=1770384.9408765913
Current iteration=550, loss=1770383.2002599523
Current iteration=575, loss=1770382.32995222
Current iteration=600, loss=1770381.894798501
Current iteration=625, loss=1770381.6772216773
Current iteration=650, loss=1770381.5684332778
Current iteration

In [109]:
tX_tilda_0_augmented = build_poly(tX_tilda_0, degree = 2)
_, w_logistic_0 = learning_by_penalized_gradient(y_0, tX_tilda_0_augmented, np.zeros(tX_tilda_0_augmented.shape[1]), 0.00017783,
                                              1000, lambda_= 1)

Current iteration=0, loss=11953934.476351794
Current iteration=25, loss=2158248.8743964396
Current iteration=50, loss=1722482.836902358
Current iteration=75, loss=363118.27443768654
Current iteration=100, loss=496359.83224441216
Current iteration=125, loss=259976.8148791658
Current iteration=150, loss=250660.28359415938
Current iteration=175, loss=247461.42959845066
Current iteration=200, loss=246103.08936854175
Current iteration=225, loss=245474.6445080386
Current iteration=250, loss=245172.08603055903
Current iteration=275, loss=245023.62401411208
Current iteration=300, loss=244950.0839955155
Current iteration=325, loss=244913.48553876905
Current iteration=350, loss=244895.22882598487
Current iteration=375, loss=244886.11110982095
Current iteration=400, loss=244881.55489208575
Current iteration=425, loss=244879.27744473773
Current iteration=450, loss=244878.13888509176
Current iteration=475, loss=244877.5696462698
Current iteration=500, loss=244877.2850371083
Current iteration=525, l

In [110]:
tX_tilda_1_augmented = build_poly(tX_tilda_1, degree = 2)
_, w_logistic_1 = learning_by_penalized_gradient(y_1, tX_tilda_1_augmented, np.zeros(tX_tilda_1_augmented.shape[1]), 0.00316228,
                                              1000, lambda_= 1)

Current iteration=0, loss=97528994.86257344
Current iteration=25, loss=21448681.301449835
Current iteration=50, loss=35952517.29871229
Current iteration=75, loss=7823474.3219228815
Current iteration=100, loss=5396602.07363074
Current iteration=125, loss=4527245.881317648
Current iteration=150, loss=4150793.1954980674
Current iteration=175, loss=3972898.49236086
Current iteration=200, loss=3886154.8912532264
Current iteration=225, loss=3843336.6624230887
Current iteration=250, loss=3822062.7905500294
Current iteration=275, loss=3811459.0964384326
Current iteration=300, loss=3806165.549028915
Current iteration=325, loss=3803520.877948297
Current iteration=350, loss=3802199.068186434
Current iteration=375, loss=3801538.3088100343
Current iteration=400, loss=3801207.9619047805
Current iteration=425, loss=3801042.797392721
Current iteration=450, loss=3800960.217079512
Current iteration=475, loss=3800918.9274086137
Current iteration=500, loss=3800898.2826945907
Current iteration=525, loss=38

In [111]:
tX_tilda_2_3_augmented = build_poly(tX_tilda_2_3, degree = 2)
_, w_logistic_2_3 = learning_by_penalized_gradient(y_2_3, tX_tilda_2_3_augmented, np.zeros(tX_tilda_2_3_augmented.shape[1]),
                                                0.00017783, 1000, lambda_= 1)

Current iteration=0, loss=2453244.116292915
Current iteration=25, loss=989535.5546252584
Current iteration=50, loss=806125.0565594207
Current iteration=75, loss=581981.4053180196
Current iteration=100, loss=488802.3440732876
Current iteration=125, loss=447518.271415908
Current iteration=150, loss=428320.1704786562
Current iteration=175, loss=419093.0935594835
Current iteration=200, loss=414573.47664509027
Current iteration=225, loss=412337.0818380339
Current iteration=250, loss=411224.6590439237
Current iteration=275, loss=410669.90052402986
Current iteration=300, loss=410392.89775151823
Current iteration=325, loss=410254.4884272579
Current iteration=350, loss=410185.3061831723
Current iteration=375, loss=410150.7207928524
Current iteration=400, loss=410133.4295635018
Current iteration=425, loss=410124.78436442604
Current iteration=450, loss=410120.4618575789
Current iteration=475, loss=410118.30062555155
Current iteration=500, loss=410117.22001488693
Current iteration=525, loss=410116

In [112]:
### Generate predictions and save ouput in csv format for submission:

In [113]:
DATA_TEST_PATH = '../data/test.csv' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [114]:
print(tX_test.shape)

(568238, 30)


We will now format the tX_test as we did for tX_train

### we split the test into the three subgroups

In [115]:
zero_indices = []
one_indices = []
two_three_indices = []
zero_indices = np.where(tX_test[:,22]==0)[0]
one_indices = np.where(tX_test[:,22]==1)[0]
two_three_indices = np.where(np.logical_or(tX_test[:,22]==2, tX_test[:,22]==3))[0]
tX_test_0 = tX_test[zero_indices, :]
tX_test_0 = np.delete(tX_test_0, 22, axis=1)
tX_test_1 = tX_test[one_indices, :]
tX_test_1 = np.delete(tX_test_1, 22, axis=1)
tX_test_2_3 = tX_test[two_three_indices, :]

### Adding a column of zeros and ones to detect whether the mass has been measured or not

In [116]:
# take the indices where the mass is not calculated, add the column which has 0 in those indices
# and 1 everywhere else for all matrices 0,1,2_3
zero_indices_0 = np.where(tX_test_0[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_0 else 1 for i in range(tX_test_0.shape[0])])
tX_test_0 = np.insert(tX_test_0, 0, column_to_add, axis=1)
zero_indices_1 = np.where(tX_test_1[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_1 else 1 for i in range(tX_test_1.shape[0])])
tX_test_1 = np.insert(tX_test_1, 0, column_to_add, axis=1)
zero_indices_2_3 = np.where(tX_test_2_3[:,1] == -999.)[0]
column_to_add = np.array([0 if i in zero_indices_2_3 else 1 for i in range(tX_test_2_3.shape[0])])
tX_test_2_3 = np.insert(tX_test_2_3, 0, column_to_add, axis=1)

### We drop the same columns we have dropped for the X training

In [117]:
tX_test_0 = np.delete(tX_test_0, col_to_delete_0, axis=1)
tX_test_1 = np.delete(tX_test_1, col_to_delete_1, axis=1)

### Now we substitute the -999 values with the median

In [118]:
for i in range(1, tX_test_2_3.shape[1]):
    index_column_non_valid =np.where(tX_test_2_3[:,i] == -999.)[0]
    index_column_valid =np.where(tX_test_2_3[:,i] != -999.)[0]
    median = np.median(tX_test_2_3[index_column_valid, i], axis = 0)
    tX_test_2_3[index_column_non_valid,i] =  median

In [119]:
for i in range(1, tX_test_1.shape[1]):
    index_column_non_valid =np.where(tX_test_1[:,i] == -999.)[0]
    index_column_valid =np.where(tX_test_1[:,i] != -999.)[0]
    median = np.median(tX_test_1[index_column_valid, i], axis = 0)
    tX_test_1[index_column_non_valid,i] =  median

In [120]:
for i in range(1, tX_test_0.shape[1]):
    index_column_non_valid =np.where(tX_test_0[:,i] == -999.)[0]
    index_column_valid =np.where(tX_test_0[:,i] != -999.)[0]
    median = np.median(tX_test_0[index_column_valid, i], axis = 0)
    tX_test_0[index_column_non_valid,i] =  median

### We standardize the test set using the mean and the standard deviation of the training

In [121]:
print(tX_test_0.shape)

(227458, 19)


In [122]:
print(tX_0.shape)

(99913, 19)


In [123]:
def standardize_test(x, mean, std):
    """Standardize the test set."""
    x = x - mean
    x = x / std
    return x

In [124]:
tX_test_0[:,1:] = standardize_test(tX_test_0[:,1:], mean_0, std_0)
tX_test_1[:,1:] = standardize_test(tX_test_1[:,1:], mean_1, std_1)
tX_test_2_3[:,1:]= standardize_test(tX_test_2_3[:,1:], mean_2_3, std_2_3) #we standardize everything a part from the column added manually

### We insert the column for the bias term

In [125]:
tX_tilda_test_0 = np.insert(tX_test_0, 0, np.ones(tX_test_0.shape[0]), axis=1)
tX_tilda_test_1 = np.insert(tX_test_1, 0, np.ones(tX_test_1.shape[0]), axis=1)
tX_tilda_test_2_3 = np.insert(tX_test_2_3, 0, np.ones(tX_test_2_3.shape[0]), axis=1)

### We make the predictions

In [89]:
tX_tilda_test_2_3_augmented = build_poly(tX_tilda_test_2_3, degree=6)
predictions_ridge_2_3 = tX_tilda_test_2_3_augmented @ w_ridge_2_3
# print(predictions_2_3.shape)

NameError: name 'w_ridge_2_3' is not defined

In [None]:
tX_tilda_test_0_augmented = build_poly(tX_tilda_test_0, degree = 6)
predictions_ridge_0 = tX_tilda_test_0_augmented @ w_ridge_0
# print(predictions_0.shape)

In [None]:
tX_tilda_test_1_augmented = build_poly(tX_tilda_test_1, degree = 6)
predictions_ridge_1 = tX_tilda_test_1_augmented @ w_ridge_1
# print(predictions_1.shape)

In [None]:
print(len(zero_indices))
print(len(one_indices))
print(len(two_three_indices))
print()

In [None]:
### Predictions with logistic regression

In [126]:
tX_tilda_test_2_3_augmented = build_poly(tX_tilda_test_2_3, degree = 2)
predictions_logistic_2_3 = sigmoid(tX_tilda_test_2_3_augmented @ w_logistic_2_3)
# print(predictions_2_3.shape)

In [127]:
tX_tilda_test_1_augmented = build_poly(tX_tilda_test_1, degree = 2)
predictions_logistic_1 = sigmoid(tX_tilda_test_1_augmented @ w_logistic_1)
# print(predictions_1.shape)

In [128]:
tX_tilda_test_0_augmented = build_poly(tX_tilda_test_0, degree = 2)
predictions_logistic_0 = sigmoid(tX_tilda_test_0_augmented @ w_logistic_0)
# print(predictions_0.shape)

Now we have to reconstruct a single vector of predictions

In [103]:
#jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10

In [129]:
stacked_predictions = np.zeros(len(y.shape[0]))
stacked_predictions[zero_indices] = predictions_logistic_0
stacked_predictions[one_indices] = predictions_logistic_1
stacked_predictions[two_three_indices] = predictions_logistic_2_3
# count_0 = 0
# count_1 = 0
# count_2_3 = 0
# for index_row in range(tX_test.shape[0]):
#  if index_row in zero_indices:
#       stacked_predictions.append(predictions_logistic_0[count_0])
#       count_0 = count_0 + 1
#   elif index_row in one_indices:
#       stacked_predictions.append(predictions_logistic_1[count_1])
#       count_1 = count_1 +1
#   else:
#       stacked_predictions.append(predictions_logistic_2_3[count_2_3])
#       count_2_3 = count_2_3 + 1

In [130]:
final_predictions = np.array([-1 if el < 0.5 else 1 for el in stacked_predictions])

In [131]:
print(final_predictions) 
print(1 / len(final_predictions) * np.count_nonzero(final_predictions == 1))

[-1 -1 -1 ...  1 -1 -1]
0.2952829624206758


In [132]:
def predict_labels(weights, tX_test):
    y = np.array(tX_test) @ np.array(weights)
    labels = [1 if l > 0 else -1 for l in y]
    return labels

In [133]:
OUTPUT_PATH = 'submission.csv' # TODO: fill in desired name of output file for submission
#y_pred = predict_labels(weights, tX_test)
y_pred = final_predictions
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)