In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import itertools
import matplotlib.pyplot as plt
import csv
import helpers as helpers
import implementations as impl

In [2]:
# Load data
train_data, train_labels, ids = helpers.load_data('train.csv')

In [4]:
# Clean and standardize data
new_data = helpers.standardize(helpers.clean_data(train_data))
train_labels[train_labels==-1]=0

In [5]:
# Shuffle data
train_labels, new_data = helpers.shuffle_data(train_labels, new_data)

In [6]:
# Slice into training and validation sets
y_validation, y_train, tx_validation, tx_train = helpers.slice_data(train_labels, new_data, 0.25)

In [7]:
# Expand degree
tx_train = helpers.build_poly_deg2(tx_train)
tx_validation = helpers.build_poly_2(tx_validation)

In [8]:
# Add bias to data
tx_train = np.c_[np.ones((tx_train.shape[0], 1)), tx_train]
tx_validation = np.c_[np.ones((tx_validation.shape[0], 1)), tx_validation]

In [9]:
# Initialize the weights randomly according to a Gaussian distribution
initial_w = np.random.normal(0., 0.1, [tx_train.shape[1],])

# Train model
trained_weights, train_loss = impl.logistic_regression(y_train, tx_train, initial_w, max_iters=10000, gamma=0.1)

Current iteration=0, loss=1.0030165128857866
Current iteration=100, loss=0.4600769740640686
Current iteration=200, loss=0.4522624064619956
Current iteration=300, loss=0.44977975413477017
Current iteration=400, loss=0.44852027993561133
Current iteration=500, loss=0.4477107611090668
Current iteration=600, loss=0.44710836347250904
Current iteration=700, loss=0.44661679617008115
Current iteration=800, loss=0.4461926357994798
Current iteration=900, loss=0.44581435842961564
Current iteration=1000, loss=0.4454703500865114
Current iteration=1100, loss=0.4451538006153166
Current iteration=1200, loss=0.44486037636735215
Current iteration=1300, loss=0.44458709918996087
Current iteration=1400, loss=0.4443317785413557
Current iteration=1500, loss=0.44409270936816714
Current iteration=1600, loss=0.44386850342603834
Current iteration=1700, loss=0.4436579905807344
Current iteration=1800, loss=0.44346015841821357
Current iteration=1900, loss=0.4432741137089506
Current iteration=2000, loss=0.44309905682

In [10]:
print(trained_weights)

[-6.44962036e-01  8.86689944e-01 -6.60452236e-01 -1.23385034e+00
  9.13402726e-01 -1.78549779e-01 -1.35625339e-01 -2.52731971e-01
  1.11419654e+00 -1.11348517e-01  1.84208738e-01 -8.08377234e-01
  3.42696844e-02  1.93559144e-01  5.51147342e-01 -3.84801812e-03
 -5.34862802e-03  9.13268596e-01  8.05326706e-03  3.61286635e-03
 -2.22014309e-01  1.69332150e-03 -6.31049270e-02 -3.49616757e-01
  2.64348985e-01 -3.81603742e-03 -5.17898427e-03  9.49934204e-02
  8.40822876e-04 -8.97564602e-04 -2.09074260e-01 -4.55906901e-01
  1.22715939e-01 -1.08339125e-01  1.91211326e-02  4.25077254e-02
  3.22068496e-02 -4.23689840e-03 -5.20407990e-01  3.42174471e-02
 -1.39363193e-01  4.21574477e-02  2.60099644e-01  4.76625028e-02
 -2.79897413e-02 -1.08997793e-01 -1.11177036e-02 -3.49459997e-02
 -1.83203047e-01 -9.79829448e-03  1.70503398e-02  8.10866320e-04
 -6.74023099e-02 -2.97501572e-02 -4.30074437e-02  2.18101665e-01
 -1.58767028e-03 -2.76942280e-03  1.21688592e-01 -4.41297373e-03
  9.46875678e-02]


In [11]:
# Cross validation
predict_validation = helpers.predict_logistic(tx_validation, trained_weights)
predict_train = helpers.predict_logistic(tx_train, trained_weights)

predict_validation[predict_validation == -1] = 0
predict_train[predict_train == -1] = 0

train_accuracy = helpers.accuracy(predict_train, y_train)
validation_accuracy = helpers.accuracy(predict_validation, y_validation)

print(f"train_accuracy = {train_accuracy}")
print(f"validation_accuracy = {validation_accuracy}")

train_accuracy = 0.8019093333333334
validation_accuracy = 0.802784


In [12]:
print(predict_validation)
print(y_validation)

[0. 0. 0. ... 1. 0. 0.]
[1. 0. 0. ... 1. 0. 0.]


In [16]:
tx_test, y_test, ids_test = helpers.load_data('test.csv', train=False)
tx_test = helpers.standardize(helpers.clean_data(tx_test))

In [17]:
tx_test = helpers.build_poly(tx_test, 2)
tx_test = np.c_[np.ones((tx_test.shape[0], 1)), tx_test]
predict_test = helpers.predict_logistic(tx_test, trained_weights)
print(predict_test)

[-1. -1. -1. ...  1. -1. -1.]


In [18]:
helpers.create_csv_submission(ids_test, predict_test, 'Predictions_Logistics_degree2_2.csv')