# Problem 2

Use this notebook to write your code for problem 2. You may reuse your SGD code from last week.

In [5]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

The following function may be useful for loading the necessary data.

In [6]:
def load_data(filename):
    """
    Function loads data stored in the file filename and returns it as a numpy ndarray.
    
    Inputs:
        filename: given as a string.
        
    Outputs:
        Data contained in the file, returned as a numpy ndarray
    """
    return np.loadtxt(filename, skiprows=1, delimiter=',')

In [10]:
# The SGD code I wrote in week 1's set, but with modifications to the loss/gradient function
# to account for regularization
def loss(X, Y, w):
    '''
    Calculate the error. The regularization term is not included in calculating the normal error.
    
    Inputs:
        X: A (N, D) shaped numpy array containing the data points.
        Y: A (N, ) shaped numpy array containing the (float) labels of the data points.
        w: A (D, ) shaped numpy array containing the weight vector.
        lam: the value of lambda
    
    Outputs:
        l: The loss evaluated with respect to X, Y, w, and lambda
    '''
    loss = 0 
    for i in range(0, len(X)):
        loss += np.log(1/(1 + np.exp(-Y[i] * np.dot(w, X[i]))))
    return loss 

def gradient(x, y, w, lam, N):
    '''
    Calculate the gradient of the loss function with respect to
    a single point (x, y), and using weight vector w for the regularized
    logistic loss function. 
    
    Inputs:
        x: A (D, ) shaped numpy array containing a single data point.
        y: The float label for the data point.
        w: A (D, ) shaped numpy array containing the weight vector.
        lam: a float value for lambda
        N: the total number of points
        
    Output:
        g: The gradient of the loss with respect to x, y, and w. 
    '''
    reg_factor = w * (lam / N) 
    gradient = (1 / (1 + np.exp(-1 * np.dot(w, x))) - y) * x
    return gradient - reg_factor

    

def SGD(X, Y, w_start, eta, lam, N_epochs):
    '''
    Perform SGD using dataset (X, Y), initial weight vector w_start,
    learning rate eta, and N_epochs epochs.
    
    Inputs:
        X: A (N, D) shaped numpy array containing the data points.
        Y: A (N, ) shaped numpy array containing the (float) labels of the data points.
        w_start:  A (D, ) shaped numpy array containing the weight vector initialization.
        eta: The step size.
        lam: the regularization lambda
        N_epochs: The number of epochs (iterations) to run SGD.
        
    Outputs:
        W: A (N_epochs, D) shaped array containing the weight vectors from all iterations.
        Ein: The training error using the final w
    '''
    
    losses = []
    W = []
    old_w = w_start
    for i in range(0, N_epochs):
        p = np.random.permutation(len(X))
        for j in range(0, len(X)):
            new_w = old_w - eta * gradient(X[p[j]], Y[p[j]], old_w, lam, len(X))
            old_w = new_w
    return new_w, loss(X, Y, new_w)

def random_w(length):
    w = []
    for i in range(0, length):
        w.append(np.random.ranf())
    return np.array(w)

# Adjust each of the N points to be normalized, 
def normalize(X):
    # First rearrange values such that we can find the mean/std deviation of each column
    rearranged_vals = []
    for j in range(0, len(X[0])):
        col = []
        for i in range(0, len(X)):
            col.append(X[i][j])
        rearranged_vals.append(col)
    
    # Now adjust values
    for i in range(0, len(X)):
        for j in range(0, len(X[0])):
            X[i][j] = (X[i][j] - np.mean(rearranged_vals[j])) / np.std(rearranged_vals[j])
    return X


In [11]:
# Creation of the lambda array
lambdas = []
lam = .00001
for i in range(0, 15):
    lambdas.append(lam)
    lam *= 5
    

In [12]:
data1 = load_data('data/wine_training1.txt')
X1 = data1[:, 1:]
Y1 = data1[:, 0]

data2 = load_data('data/wine_training2.txt')
X2 = data2[:, 1:]
Y2 = data2[:, 0]

data_test = load_data('data/wine_testing.txt')
X_test = data_test[:, 1:]
Y_test = data_test[:, 0]

In [13]:
# Normalization of X arrays
X1 = normalize(X1)
X2 = normalize(X2)
X_test = normalize(X_test)

In [14]:
Ein_1 = []
Ein_2 = []

Eout_1 = []
Eout_2 = []

l2_norm_1 = []
l2_norm_2 = []

for i in range(0, 15):
    start_w = random_w(13)
    W1, ein1 = SGD(X1, Y1, start_w, 5 * np.exp(-4), lambdas[i], 20000)
    W2, ein2 = SGD(X2, Y2, start_w, 5 * np.exp(-4), lambdas[i], 20000)
    eout1 = loss(X_test, Y_test, W1)
    eout2 = loss(X_test, Y_test, W2)
    Ein_1.append(ein1)
    Ein_2.append(ein2)
    
    Eout_1.append(eout1)
    Eout_1.append(eout2)
    
    

