In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [2]:
from proj1_helpers import *
DATA_TRAIN_PATH = '../data/train.csv' # train data path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

### Dividing the features by the number of jets

In [3]:
# dividing the rows of tX by the number of jets and adding an extra column of np.ones
zero_indices = []
one_indices = []
two_three_indices = []
zero_indices = np.row_stack(np.where(tX[:,22]==0)).squeeze()
one_indices = np.row_stack(np.where(tX[:,22]==1)).squeeze()
two_three_indices = np.row_stack(np.where(np.logical_or(tX[:,22]==2, tX[:,22]==3))).squeeze()
tX_0 = tX[zero_indices, :]
tX_1 = tX[one_indices, :]
tX_2_3 = tX[two_three_indices, :]
tX_tilda_0 = np.insert(tX_0, 0, np.ones(tX_0.shape[0]), axis=1)
tX_tilda_1 = np.insert(tX_1, 0, np.ones(tX_1.shape[0]), axis=1)
tX_tilda_2_3 = np.insert(tX_2_3, 0, np.ones(tX_2_3.shape[0]), axis=1)

### Adding a column of zeros and ones to detect whether the mass has been measured or not

In [4]:
# take the indices where the mass is not calculated, add the column which has 0 in those indices
# and 1 everywhere else for all matrices 0,1,2_3
zero_indices_0 = np.row_stack(np.where(tX_tilda_0[:,1] == -999.)).squeeze()
column_to_add = np.array([0 if i in zero_indices_0 else 1 for i in range(tX_tilda_0.shape[0])])
tX_tilda_0 = np.insert(tX_tilda_0, 0, column_to_add, axis=1)
zero_indices_1 = np.row_stack(np.where(tX_tilda_1[:,1] == -999.)).squeeze()
column_to_add = np.array([0 if i in zero_indices_1 else 1 for i in range(tX_tilda_1.shape[0])])
tX_tilda_1 = np.insert(tX_tilda_1, 0, column_to_add, axis=1)
zero_indices_2_3 = np.row_stack(np.where(tX_tilda_2_3[:,1] == -999.)).squeeze()
column_to_add = np.array([0 if i in zero_indices_2_3 else 1 for i in range(tX_tilda_2_3.shape[0])])
tX_tilda_2_3 = np.insert(tX_tilda_2_3, 0, column_to_add, axis=1)

### Changing the labels to {0,1}

In [None]:
# y == 0 non detected Boson, y == 1 detected Boson
y_ = np.array([0 if l == -1 else 1 for l in y])

### 

### Throwing away the outliers from the training data

In [36]:
for i in range(2, tX_tilda_2_3.shape[1]):
    index_column_valid = np.row_stack(np.where(tX_tilda_2_3[:,i] != -999.)).squeeze()
    column_25_quantile, column_75_quantile = np.quantile(tX_tilda_2_3[index_column_valid,i], 
                                                         np.array([0.25, 0.75]))
    interquantile = column_75_quantile-column_25_quantile
    column_15_quantile, column_85_quantile = np.quantile(tX_tilda_2_3[index_column_valid,i], 
                                                         np.array([0.15, 0.85]))
    indices_outliers = np.row_stack(np.where((column_15_quantile 
                                             - 1.5 * interquantile >= 
                                             tX_tilda_2_3[index_column_valid,i])
                                             | (tX_tilda_2_3[index_column_valid,i] >= 
                                             column_85_quantile + 1.5 * interquantile))).squeeze()
    median = np.median(tX_tilda_2_3[index_column_valid, i])
    tX_tilda_2_3[indices_outliers,i] =  median

In [6]:
# colors = ['red', 'blue']
# x_pos=[]
# x_neg=[]

# for j in range(len(y)):
#  if(y[j]==1):
#       x_pos.insert(0,tX[j])
#    else:
#        x_neg.insert(0,tX[j])
# xpos = np.array(x_pos)
# xneg = np.array(x_neg)
# for i in range(tX.shape[1]):
#  plt.hist(xpos[:,i], alpha = 0.5, color = 'r', bins = 100)
#  plt.hist(xneg[:,i], alpha = 0.5, color = 'b', bins = 100)
#  plt.show()

## Do your thing crazy machine learning thing here :) ...

In [7]:
def compute_loss(y, tx, w):
    N = y.shape[0]
    e = y - tx @ w 
    loss = 1/(2*N) * np.dot(e,e)
    return loss

In [8]:
def compute_gradient(y, tx, w):
    N = y.shape[0]
    e = y - tx @ w
    gradient = -(1/N) * (tx.T) @ (e)
    return gradient

In [9]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss(y,tx,w)
        gradient = compute_gradient(y,tx,w)
        w = w - gamma * gradient
        ws.append(w)
        losses.append(loss)
        print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws

In [10]:
def compute_stoch_gradient(y, tx, w):
    N = y.shape[0]
    random_number = random.randint(0,N)
    #random_number =1
    xn = tx[random_number,:]
    random_gradient = - np.dot(xn, y[random_number] - np.dot(xn,w))
    return random_gradient

In [11]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss(y,tx,w)
        stoch_gradient = compute_stoch_gradient(y,tx,w)
        w = w - gamma * stoch_gradient
        ws.append(w)
        losses.append(loss)
        print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws

In [12]:
from proj1_helpers import *

def least_squares(y, tx):
    """calculate the least squares solution."""
    forcing_term = np.transpose(tx) @ y
    coefficient_matrix = np.transpose(tx) @ tx
    w = np.linalg.solve(coefficient_matrix, forcing_term)
    return w

def test_your_least_squares(y, tx):
    """compare the solution of the normal equations with the weights returned by gradient descent algorithm."""
    w_least_squares = least_squares(y, tx)
    initial_w = np.zeros(tx.shape[1])
    max_iters = 50
    gamma = 0.7
    losses_gradient_descent, w_gradient_descent = gradient_descent(y, tx, initial_w, max_iters, gamma)
    w = w_gradient_descent[-1]
    err = np.linalg.norm(w_least_squares-w)
    return err

In [13]:
def ridge_regression(y, tx, lambda_):
    """implement ridge regression."""
    N = tx.shape
    lambda_prime = 2 * N[0] * lambda_
    coefficient_matrix = np.transpose(tx) @ tx + lambda_prime * np.eye(N[1])
    forcing_term = np.transpose(tx) @ y
    w = np.linalg.solve(coefficient_matrix, forcing_term)
    return w

def debug_ridge(y, tx):
    """debugging the ridge regression by setting lambda=0."""
    w_least_squares = least_squares(y, tx)
    w_0 = ridge_regression(y, tx, 0)
    err = np.linalg.norm(w_least_squares-w_0)
    return err

In [14]:
def sigmoid(t):
    """apply the sigmoid function on t."""
    return np.exp(t) / (1+np.exp(t))

In [15]:
def calculate_loss(y, tx, w):
    """compute the loss: negative log likelihood."""
    N = y.shape[0]
    e = - (y*np.log(sigmoid(tx @ w)) +
                  (1-y)*np.log(1-sigmoid(tx @ w)))
    return e.sum()

In [16]:
def calculate_gradient(y, tx, w):
    """compute the gradient of loss."""
    return np.transpose(tx) @ (sigmoid(tx @ w) - y)

In [17]:
def learning_by_gradient_descent(y, tx, w, gamma):
    """
    Do one step of gradient descent using logistic regression.
    Return the loss and the updated w.
    """
    loss = calculate_loss(y, tx, w)
    grad = calculate_gradient(y, tx, w)
    w = w - gamma * grad
    return loss, w

In [18]:
def calculate_hessian(y, tx, w):
    """return the Hessian of the loss function."""
    diag = sigmoid(tx @ w) * (1 - sigmoid(tx @ w))
    D = diag * np.eye(tx.shape[0])
    return np.transpose(tx) @ D @ tx

In [19]:
def logistic_regression(y, tx, w):
    """return the loss, gradient, and Hessian."""
    grad = calculate_gradient(y, tx, w)
    hess = calculate_hessian(y, tx, w)
    loss = calculate_loss(y, tx, w)
    return loss, grad, hess

In [20]:
def learning_by_newton_method(y, tx, w, gamma):
    """
    Do one step on Newton's method.
    return the loss and updated w.
    """
    loss, grad, hess = logistic_regression(y, tx, w)
    sol = np.linalg.solve(hess, grad)
    w = w - gamma * sol
    return loss, w

In [21]:
def penalized_logistic_regression(y, tx, w, lambda_):
    """return the loss, gradient"""
    loss = calculate_loss(y, tx, w) + lambda_*np.linalg.norm(w) ** 2
    grad = calculate_gradient(y, tx, w) + 2*lambda_*w
    hess = calculate_hessian(y, tx, w) + 2*lambda_*np.eye(w.shape[0])
    return loss, grad, hess

In [22]:
def learning_by_penalized_gradient(y, tx, w, gamma, lambda_):
    """
    Do one step of gradient descent, using the penalized logistic regression.
    Return the loss and updated w.
    """
    loss, grad, hess = penalized_logistic_regression(y, tx, w, lambda_)
    sol = np.linalg.solve(hess, grad)
    w = w - gamma * sol
    return loss, w

## Generate predictions and save ouput in csv format for submission:

In [23]:
DATA_TEST_PATH = '../data/test.csv' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [24]:
def predict_labels(weights, tX_test):
    y = np.array(tX_test) @ np.array(weights)
    labels = [1 if l > 0 else -1 for l in y]
    return labels

In [25]:
 #indeces_lepton = [0, 1, 2, 3, 7, 8, 9, 10, 11, 12, 16, 17, 18, 21, 22]
# indeces_hadronic_tau = [0, 3, 7, 8, 9, 10, 11, 13, 14, 15, 21, 22]
# indeces_jet = [0, 4, 5, 6, 8, 9, 12, 21, 22, 23, 24, 25, 26, 27, 28, 29]
# indeces_MTE = [0, 1, 3, 8, 9, 11, 19, 20, 21, 22]

tX_lepton_test = tX_test[:, indeces_lepton]
tX_tilda_lepton_test = np.insert(tX_lepton_test, 0, np.ones(tX_test.shape[0]), axis=1)
labels_lepton = predict_labels(w_opt_lepton, tX_tilda_lepton_test)
print(1 / np.array(labels_lepton).shape[0] * np.count_nonzero(np.array(labels_lepton) == 1))

tX_hadronic_tau_test = tX_test[:, indeces_hadronic_tau]
tX_tilda_hadronic_tau_test = np.insert(tX_hadronic_tau_test, 0, np.ones(tX_test.shape[0]), axis=1)
labels_hadronic_tau = predict_labels(w_opt_hadronic_tau, tX_tilda_hadronic_tau_test)
print(1 / np.array(labels_hadronic_tau).shape[0] * np.count_nonzero(np.array(labels_hadronic_tau) == 1))

tX_jet_test = tX_test[:, indeces_jet]
tX_tilda_jet_test = np.insert(tX_jet_test, 0, np.ones(tX_test.shape[0]), axis=1)
labels_jet = predict_labels(w_opt_jet, tX_tilda_jet_test)
print(1 / np.array(labels_jet).shape[0] * np.count_nonzero(np.array(labels_jet) == 1))

tX_MTE_test = tX_test[:, indeces_MTE]
tX_tilda_MTE_test = np.insert(tX_MTE_test, 0, np.ones(tX_test.shape[0]), axis=1)
labels_MTE = predict_labels(w_opt_MTE, tX_tilda_MTE_test)
print(1 / np.array(labels_MTE).shape[0] * np.count_nonzero(np.array(labels_MTE) == 1))

NameError: name 'indeces_lepton' is not defined

In [None]:
# TRIAL, UNDERESTIMATION OF THE TRUE PROBABILITY TO GET A BOSON
# count = np.array(labels_MTE) + np.array(labels_lepton) + np.array(labels_hadronic_tau) + np.array(labels_jet)
# TrueSignal = np.array([int(bool((counted == 4))) for counted in count])
# print( 1 / np.array(count).shape[0] * np.count_nonzero(np.array(TrueSignal) == 1))

In [None]:
OUTPUT_PATH = '' # TODO: fill in desired name of output file for submission
y_pred = predict_labels(weights, tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)