In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import itertools
import matplotlib.pyplot as plt
import csv
import helpers as helpers
import implementations as impl

In [2]:
# Load data
train_data, train_labels, ids = helpers.load_data('train.csv')

In [3]:
# Clean and standardize data
new_data = helper.standardize(helpers.clean_data(train_data))
train_labels[train_labels==-1]=0

In [4]:
# Shuffle data
train_labels, new_data = helpers.shuffle_data(train_labels, new_data)

In [5]:
# Slice into training and validation sets
y_validation, y_train, tx_validation, tx_train = helpers.slice_data(train_labels, new_data, 0.25)

In [6]:
# Expand degree
tx_train = helpers.build_poly(tx_train, 2)
tx_validation = helpers.build_poly(tx_validation, 2)

In [9]:
# Add bias to data
tx_train = np.c_[np.ones((tx_train.shape[0], 1)), tx_train]
tx_validation = np.c_[np.ones((tx_validation.shape[0], 1)), tx_validation]

In [11]:
# Initialize the weights randomly according to a Gaussian distribution
initial_w = np.random.normal(0., 0.1, [tx_train.shape[1],])

# Train model
trained_weights, train_loss = impl.logistic_regression(y_train, tx_train, initial_w, max_iters=10000, gamma=0.1)

Current iteration=0, loss=0.6879840591914723
Current iteration=100, loss=0.5024897008814241
Current iteration=200, loss=0.48814420068433134
Current iteration=300, loss=0.4811175878058417
Current iteration=400, loss=0.47629903329682693
Current iteration=500, loss=0.4727251748749316
Current iteration=600, loss=0.4700164620753463
Current iteration=700, loss=0.4678910069448358
Current iteration=800, loss=0.4661557033616474
Current iteration=900, loss=0.4646932828395391
Current iteration=1000, loss=0.4634319443280991
Current iteration=1100, loss=0.4623249420484042
Current iteration=1200, loss=0.4613399723540195
Current iteration=1300, loss=0.46045373636301606
Current iteration=1400, loss=0.45964890085244675
Current iteration=1500, loss=0.45891225373540734
Current iteration=1600, loss=0.45823351780097926
Current iteration=1700, loss=0.45760455689948837
Current iteration=1800, loss=0.4570188292297599
Current iteration=1900, loss=0.45647100218273456
Current iteration=2000, loss=0.4559566756472

In [12]:
print(trained_weights)

[-1.19850720e+00  1.80757864e+00 -1.00102034e+00 -4.75226323e-02
  6.62427463e-01 -3.98828019e-01 -1.80350165e-01 -2.59925521e-01
  2.72974994e+00 -9.28327062e-02  3.56977473e-01 -8.22808028e-01
  1.27018261e-01 -2.26831109e-02  7.03260622e-01 -6.39258413e-03
 -5.65360235e-03  9.38070126e-01  3.08978070e-03  3.44175740e-03
 -1.48194900e-01  2.34599412e-03  1.52289469e-01 -3.56373411e-01
  2.83810488e-01 -1.88338647e-03 -3.54361280e-03  3.72764602e-02
  4.28734076e-03 -9.95468920e-04 -4.59267655e-02 -2.44849713e+00
  5.38608593e-01 -2.12155419e+00 -9.56948784e-02  2.55663519e-01
  2.42696932e-01 -1.07011977e-02 -1.74325552e+00  6.24677116e-01
 -2.83407352e-01  2.72797048e-01  1.20786201e-01  2.41532684e-01
 -3.15706205e-01 -1.16457424e-01 -9.18322287e-03 -2.59612478e-01
 -1.90295241e-01 -8.19590921e-03  4.59047901e-01  1.40579582e-03
 -3.96491162e-01 -9.91457971e-02 -2.72085269e-01  3.60970661e-01
 -2.30982603e-03  3.39208148e-02  3.02985182e-01 -9.71897606e-03
  4.05864160e-02]


In [13]:
# Cross validation
predict_validation = helpers.predict_logistic(tx_validation, trained_weights)
predict_train = helpers.predict_logistic(tx_train, trained_weights)

predict_validation[predict_validation == -1] = 0
predict_train[predict_train == -1] = 0

train_accuracy = helpers.accuracy(predict_train, y_train)
validation_accuracy = helpers.accuracy(predict_validation, y_validation)

print(f"train_accuracy = {train_accuracy}")
print(f"validation_accuracy = {validation_accuracy}")

train_accuracy = 0.797472
validation_accuracy = 0.78744


In [14]:
print(predict_validation)
print(y_validation)

[0. 0. 0. ... 1. 0. 0.]
[1. 0. 0. ... 1. 0. 0.]


In [None]:
tx_test, y_test, ids_test = helpers.load_data('test.csv', train=False)
tx_test = helpers.standardize(helpers.clean_data(tx_test))

In [None]:
tx_test = helpers.build_poly(tx_test, 2)
predict_test = helpers.predict_logistic(tx_test, trained_weights)
print(predict_test)

In [None]:
helpers.create_csv_submission(ids_test, predict_test, 'Predictions_Logistics_degree3.csv')