In [206]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
import datetime
from helpers import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the training data into feature matrix, class labels, and event ids:

In [207]:
from proj1_helpers import *
DATA_TRAIN_PATH = '../../train.csv' # TODO: download train data and supply path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH,sub_sample=True)

In [208]:
tX[tX==-999]=0

# Splitting data

In [209]:
def split_data(x, y, ratio, seed=1):
    """split the dataset based on the split ratio."""
    # set seed
    np.random.seed(seed)
   
    train_elements=int(ratio*x.shape[0])
    test_elements=x.shape[0]-train_elements
    indices = np.random.permutation(x.shape[0])
    training_idx, test_idx = indices[:train_elements], indices[train_elements:]
    x_train, x_test = x[training_idx], x[test_idx]
    y_train, y_test = y[training_idx], y[test_idx]
    return x_train,x_test,y_train,y_test

In [210]:
tX,tX_test,y,y_test=split_data(tX,y,0.5,1)

## Do your thing crazy machine learning thing here :) ...

In [211]:
def compute_loss(y, tX, w):
    """Calculate the loss.

    You can calculate the loss using mse or mae.
    """
    error= y-tX.dot(w)
    square=np.sum(error**2)/error.shape[0]
    return square

In [212]:
def compute_gradient(y, tX, w):
    """Compute the gradient."""
    N=tX.shape[0]
    error=y-tX.dot(w)
    gradient=-1.0/N*(np.transpose(tX).dot(error))
    return gradient
        
compute_gradient(y,tX,np.zeros([tX.shape[1]])).shape

(30,)

In [213]:
def gradient_descent(y, tX, initial_w, max_iters, gamma): 
    """Gradient descent algorithm."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        # Compute gradient and loss
        gradient=compute_gradient(y,tX,w)
        loss=compute_loss(y,tX,w)
        # Update w by gradient
        w=w-gamma*gradient
        # store w and loss
        ws.append(np.copy(w))
        losses.append(loss)
        print("Gradient Descent({bi}/{ti}): loss={l}".format(
              bi=n_iter, ti=max_iters - 1, l=loss))

    print(w.shape)
    return loss, w

In [214]:
def compute_stoch_gradient(y, tx, w):
    """Compute a stochastic gradient for batch data."""
    N=tx.shape[0]
    error=y-tx.dot(w)
    gradient=-1.0/N*(np.transpose(tx).dot(error))
    return gradient


def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_epochs, gamma):
    """Stochastic gradient descent algorithm."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    y_shuffle=[]
    tx_shuffle=[]
    for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size):
        y_shuffle.append(minibatch_y)
        tx_shuffle.append(minibatch_tx)
    for n_iter in range(max_epochs):
        # compute stochastic gradient
        gradient=compute_stoch_gradient(y_shuffle[n_iter],tx_shuffle[n_iter],w)
        loss=compute_loss(y,tx,w)
        # update w
        w=w-gamma*gradient
        # store w and loss
        ws.append(np.copy(w))
        losses.append(loss)
        print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))

    return loss, w

In [230]:
# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.00001

# Initialization
w_initial = np.array([  4.75758725e-05,  -7.63351052e-03,  -5.69522020e-03,  -1.49293390e-03,
   1.27062143e-02,   1.99314297e-04,  -3.63164395e-02,   2.96059220e-01,
   1.15862514e-03,  -3.54969018e+01,  -1.83435975e-01,   1.10762995e-01,
   4.41439627e-02,   3.55071732e+01,   9.46339832e-03,  -8.26812836e-04,
   3.55102482e+01,   1.92830657e-02,   8.61388242e-03,   4.14729806e-03,
  -1.87055446e-03,  -6.94875104e-04,  -3.51632971e-01,   5.29297059e-04,
  -1.15698925e-02,   1.14756782e-02,  -7.02426871e-04,  -4.68553776e-03,
  -1.47919941e-02,   3.54963346e+01])

# Start gradient descent.
start_time = datetime.datetime.now()
#gradient_losses, w = gradient_descent(y, tX, w_initial, max_iters, gamma)
gradient_losses, w = stochastic_gradient_descent(y, tX, w_initial,30, max_iters, gamma)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("Gradient Descent: execution time={t:.3f} seconds".format(t=exection_time))

print(sum((y_test-tX_test.dot(w))**2)/tX_test.shape[0])

Gradient Descent(0/49): loss=1.1621731318975859, w0=-0.0006080896299745168, w1=-0.008086000054730322
Gradient Descent(1/49): loss=1.182549387096353, w0=-0.0004019002028903217, w1=-0.007947804726932399
Gradient Descent(2/49): loss=1.0408976563021413, w0=-0.0009539435121557994, w1=-0.008326259154036498
Gradient Descent(3/49): loss=1.0702035266298557, w0=-0.0005295827638817042, w1=-0.008258249968588371
Gradient Descent(4/49): loss=1.3637854357794938, w0=-0.0011204292017032889, w1=-0.008526560791519727
Gradient Descent(5/49): loss=0.9149360872331422, w0=-0.0009678842405886535, w1=-0.008688848663584927
Gradient Descent(6/49): loss=0.8727613399051908, w0=-0.0008282857420919562, w1=-0.008767932961501966
Gradient Descent(7/49): loss=1.000394526490803, w0=-0.0010208852971381015, w1=-0.008956766444394047
Gradient Descent(8/49): loss=0.9912911388660175, w0=-0.001000993179751137, w1=-0.00913498151906656
Gradient Descent(9/49): loss=0.8990605110130014, w0=-0.0005063241227658529, w1=-0.0090735406374

In [224]:
def least_squares(y, tx):
    """calculate the least squares solution."""
    # returns mse, and optimal weights
    
    w=np.linalg.solve(tx.T.dot(tx),tx.T.dot(y))
    mse=sum((y-tx.dot(w))**2)/tx.shape[0]
    
    return mse,w
    
mse,w=least_squares(y,tX)
print(mse,w)

0.672576523066 [  4.30590865e-04  -8.27598691e-03  -3.42198322e-03  -1.43340069e-04
   5.35789809e-02  -2.04295329e-05  -1.19540052e-02   7.65451420e-02
   5.27455054e-05  -1.05573316e+01  -2.85087577e-01   1.08888192e-01
   4.43366853e-01   1.05607366e+01   2.40887161e-03  -3.07212668e-03
   1.05693183e+01   1.61033091e-02  -6.41410528e-03   1.36421722e-03
  -4.28180869e-03  -8.70391095e-04  -3.08246414e-02   1.79979901e-03
  -3.53920334e-02  -1.05755003e-02   1.65768811e-03   1.40691173e-02
  -1.04138883e-02   1.05554442e+01]


In [163]:
def ridge_regression(y, tx, lamb):
    """implement ridge regression."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # ridge regression: TODO
    # ***************************************************
    w=np.linalg.solve(tx.T.dot(tx)+lamb**2*np.identity(tx.shape[1]),tx.T.dot(y))
    mse=sum((y-tx.dot(w))**2)/tx.shape[0]
    return mse,w

mse,w=ridge_regression(y,tX,1)
print(mse,w)

0.672612941033 [  4.27723923e-04  -8.28676856e-03  -3.41350363e-03  -1.45155167e-04
   5.47098722e-02  -2.30182953e-05  -1.21117742e-02   7.62748596e-02
   5.51716961e-05   1.28486936e-03  -2.84015768e-01   1.09024144e-01
   4.37193955e-01   2.13358017e-03   2.58018574e-03  -3.08595434e-03
   1.06729840e-02   1.59267293e-02  -6.55886628e-03   1.36342347e-03
  -4.25037376e-03  -8.69292690e-04  -3.04499089e-02   1.80274097e-03
  -3.53329563e-02  -1.06811438e-02   1.66964184e-03   1.40043476e-02
  -1.02886726e-02  -3.17934565e-03]


# Test on training dataset

In [222]:
print(sum((y_test-tX_test.dot(w))**2)/tX_test.shape[0])
print(tX_test.dot(w))

0.980256005514
[ 0.55725296 -0.29949886  0.14561944 ..., -0.89775117 -0.30239231
 -0.12083606]


## Generate predictions and save ouput in csv format for submission:

In [47]:
DATA_TEST_PATH = '../../test.csv' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [50]:
OUTPUT_PATH = '../../predictions.csv' # TODO: fill in desired name of output file for submission
y_pred = predict_labels(weights, tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)
print(y_pred)

[-1. -1. -1. ..., -1. -1. -1.]
