In [99]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from proj1_helpers import *
from implementations import *
from losses import *
from EDA import *
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the training data into feature matrix, class labels, and event ids:

In [100]:
DATA_TRAIN_PATH = "../data/train.csv"
data_y, data_set, ids = load_csv_data(DATA_TRAIN_PATH)

In [101]:
# Fixing the seed
seed = 8

### Split and classify data

In [102]:
# Classification of the output
y_0, y_1, y_2, y_3 = y_classification(data_y, data_set)

In [103]:
#EDA for each class
class_0, class_1, class_2, class_3 = EDA_class(data_set)

In [104]:
# Set the "spliter" value correponding to the part dedicated for train
spliter = 0.8

# Split into a train and a test set
train_0, y_tr_0, test_0, y_te_0 = train_test_separator(y_0, class_0, spliter, seed)
train_1, y_tr_1, test_1, y_te_1 = train_test_separator(y_1, class_1, spliter, seed)
train_2, y_tr_2, test_2, y_te_2 = train_test_separator(y_2, class_2, spliter, seed)
train_3, y_tr_3, test_3, y_te_3 = train_test_separator(y_3, class_3, spliter, seed)

In [105]:
print(train_0.shape)
print(train_1.shape)
print(train_2.shape)
print(train_3.shape)

(79930, 16)
(62035, 22)
(40303, 29)
(17731, 28)


# Data set 1 - Exploration

***

#### Least squares method

In [106]:
MSE_loss = MSE()

In [107]:
# Computation of the MSE and teh optimal weights
mse_1, w_1 = least_squares(y_tr_1, train_1, MSE_loss)

In [108]:
# Get the predictions on the train set
train_pred_1 = predict_labels(w_1, train_1)

# Get the predictions on the test set
test_pred_1 = predict_labels(w_1, test_1)

In [109]:
# Compute the final results
counting_errors(train_pred_1, y_tr_1)
counting_errors(test_pred_1, y_te_1)

Numbers of errors :  18891  // Error accuracy [%] : % 30.452164100910778
Numbers of errors :  4792  // Error accuracy [%] : % 30.898188148816818


*** 

#### Gradient Descent method with MSE loss

In [110]:
# Train our model and get the response weight and its loss
initial_w = np.zeros(train_1.shape[1])
mse_1, w_1 = least_squares_GD(y_tr_1, train_1, MSE_loss, initial_w=initial_w, gamma=1e-1, max_iters=1000)

In [111]:
# Get the predictions on the train set
train_pred_1 = predict_labels(w_1, train_1)

# Get the predictions on the test set
test_pred_1 = predict_labels(w_1, test_1)

In [112]:
# Compute the final results
counting_errors(train_pred_1, y_tr_1)
counting_errors(test_pred_1, y_te_1)

Numbers of errors :  18889  // Error accuracy [%] : % 30.448940114451517
Numbers of errors :  4791  // Error accuracy [%] : % 30.891740279837514


***

#### Stochastic Gradient descent with MSE loss

In [114]:
# Train our model and get the response weight and its loss
initial_w = np.random.rand(train_1.shape[1])
mse_1, w_1 = least_squares_SGD(y_tr_1, train_1, MSE_loss, initial_w=initial_w, gamma=1e-2, max_iters=1000)
print(mse_1)

0.13860464529432823


In [115]:
# Get the predictions on the train set
train_pred_1 = predict_labels(w_1, train_1)

# Get the predictions on the test set
test_pred_1 = predict_labels(w_1, test_1)

In [116]:
# Compute the final results
counting_errors(train_pred_1, y_tr_1)
counting_errors(test_pred_1, y_te_1)

Numbers of errors :  21110  // Error accuracy [%] : % 34.029177077456275
Numbers of errors :  5365  // Error accuracy [%] : % 34.59281707395706


***

#### Ridge regression method

In [117]:
# Computation of the MSE and teh optimal weights
mse_1, w_1 = ridge_regression(y_tr_1, train_1, MSE_loss, lambda_=0.5)

In [118]:
# Get the predictions on the train set
train_pred_1 = predict_labels(w_1, train_1)

# Get the predictions on the test set
test_pred_1 = predict_labels(w_1, test_1)

In [119]:
# Compute the final results
counting_errors(train_pred_1, y_tr_1)
counting_errors(test_pred_1, y_te_1)

Numbers of errors :  19374  // Error accuracy [%] : % 31.23075683082131
Numbers of errors :  4893  // Error accuracy [%] : % 31.549422915726353


***

#### Logistic regression using Gradient descent method

In [120]:
neg_log = Neg_log()

In [121]:
# Computation of he MSE and teh optimal weights
initial_w = np.zeros(train_1.shape[1])
loss, w_1 = logistic_regression(y_tr_1, train_1, neg_log, initial_w=initial_w, gamma=1, max_iters=1000)

  return np.exp(t) / (1 + np.exp(t))
  


In [122]:
# Get the predictions on the train set
train_pred_1 = predict_labels(w_1, train_1)

# Get the predictions on the test set
test_pred_1 = predict_labels(w_1, test_1)

In [123]:
# Compute the final results
counting_errors(train_pred_1, y_tr_1)
counting_errors(test_pred_1, y_te_1)

Numbers of errors :  20674  // Error accuracy [%] : % 33.32634802933828
Numbers of errors :  5218  // Error accuracy [%] : % 33.64498033399961


***

#### Regularized logistic regression

In [124]:
# Computation of he MSE and teh optimal weights
initial_w = np.zeros(train_1.shape[1])
mse_1, w_1 = logistic_regression(y_tr_1, train_1, neg_log, initial_w=initial_w, gamma=0.01, max_iters=1000)

  return np.exp(t) / (1 + np.exp(t))
  


In [125]:
# Get the predictions on the train set
train_pred_1 = predict_labels(w_1, train_1)

# Get the predictions on the test set
test_pred_1 = predict_labels(w_1, test_1)

In [126]:
# Compute the final results
counting_errors(train_pred_1, y_tr_1)
counting_errors(test_pred_1, y_te_1)

Numbers of errors :  20674  // Error accuracy [%] : % 33.32634802933828
Numbers of errors :  5218  // Error accuracy [%] : % 33.64498033399961


In [None]:
x = np.array([1,-2, 3])
A = np.array([[3,2,4],[2,10,7]])
print(np.multiply(x,A))
print(x.shape)
print(np.sign(x).shape)