# Section \#1: Centralized Algorithms

## Section \#1.1: LINEAR REGRESSION

Please follow our instructions in the same order to solve the linear regresssion problem.

Please print out the entire results and codes when completed.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
import csv
from data_load import load
import scipy.io as io
# Load matplotlib images inline
%matplotlib inline
# These are important for reloading any code you write in external .py files.
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [None]:
def get_data():
    """
    Load the dataset from disk and perform preprocessing to prepare it for the linear regression problem.   
    """
    X_train, y_train = load('regression_train.csv')
    X_val, y_val = load('regression_val.csv')
    X_test, y_test = load('regression_test.csv')
    return X_train, y_train, X_val, y_val, X_test, y_test

X_train, y_train, X_val, y_val, X_test, y_test= get_data()  


print('Train data shape: ', X_train.shape)
print('Train target shape: ', y_train.shape)
print('Validation data shape: ',X_val.shape)
print('Validation target shape: ',y_val.shape)
print('Test data shape: ',X_test.shape)
print('Test target shape: ',y_test.shape)
#take note of all the dimensions of the data

In [None]:
## Plot the training and test data ##

plt.plot(X_train, y_train,'o', color='black')
plt.plot(X_test, y_test,'o', color='blue')
plt.xlabel('Train data')
plt.ylabel('Target')
plt.show()

### Training Linear Regression

In the following cells, you will build a linear regression. You will implement its loss function, then subsequently train it with gradient descent. You will choose the learning rate of gradient descent to optimize its classification performance. Finally, you will get the opimal solution using closed form expression.

In [None]:
from Regression import Regression

In [None]:
## loss function and gradient are calculated with the initial weight vector set to zero
regression = Regression(m=1, reg_param=0)
loss, grad = regression.loss_and_grad(X_train,y_train)
print(loss)
print(grad)
##

In [None]:
## train the data with gradient descent, feel free to play around with learning rate, batch size, and iterations
loss_history, w = regression.train_LR(X_train,y_train, eta=1e-3,batch_size=20, num_iters=10000)
plt.plot(loss_history)
plt.xlabel('iterations')
plt.ylabel('Loss function')
plt.show()
print(w)
print(loss_history[9999])

In [None]:
## this is different from before because this is an analytical solution, aka equate the gradient to zero and solve for w
loss_2, w_2 = regression.closed_form(X_train, y_train)
print(loss_2)
print(w_2)

In [None]:
train_loss=np.zeros((10,1))
test_loss=np.zeros((10,1))
# plot both the training and test loss in the same plot
# for m range from 1 to 10
#polynomial hypothesis complexity: aka x, x^2, x^3 etc for m = 1, 2, and 3
#does a more complex hypothesis mean that you will get better results?
N,d = X_test.shape
for i in range(10):
    regression_train = Regression(m=i+1, reg_param=0)
    loss_2, w_2 = regression_train.closed_form(X_train, y_train)
    X_bias = regression_train.gen_poly_features(X_test)
    print(w_2)
    train_loss[i]= loss_2
    test_loss[i]= (1/N)*np.sum(np.square(np.reshape(y_test, (N,d))-np.dot(X_bias, w_2)))
plt.xlabel('Polynomial Degree')
plt.ylabel('Loss')
x_axis = np.linspace(1,10, num = 10)
plt.title('Loss Function with respect to polynomial degree')
plt.plot(x_axis, train_loss,  color='black')
plt.plot(x_axis, test_loss, color='blue')
plt.show()
print(train_loss)
print(test_loss)



In [None]:
train_loss=np.zeros((10,1))
test_loss=np.zeros((10,1))
# plot both the training and test loss in the same plot
# for m = 10, vary the regularization term and see how this affects the loss
N,d = X_test.shape
for i in range(10):
    if(i==0):
        regression_train = Regression(m=10, reg_param=0)
        loss_2, w_2 = regression_train.closed_form(X_train, y_train)
        train_loss[i]= loss_2
        X_bias = regression_train.gen_poly_features(X_test)
        test_loss[i]= (1/N)*np.sum(np.square(np.reshape(y_test, (N,d))-np.dot(X_bias, w_2)))
        print(w_2)
    else:
        regression_train = Regression(m=10, reg_param = 10**(-9+i))
        lamda = 10**(-9+i)
        loss_2, w_2 = regression_train.closed_form(X_train, y_train)
        train_loss[i]= loss_2
        print(w_2)
        X_bias = regression_train.gen_poly_features(X_test)
        test_loss[i] = (1/N)*np.sum(np.square(np.reshape(y_test,(N,d))-np.dot(X_bias, w_2))) + (0.5*lamda*np.sum(np.square(w_2)))
plt.xlabel('Log Scaled Regularization Factor ')
plt.ylabel('Loss')
x_axis = np.linspace(1,10, num = 10)
plt.title('Loss as a function of regularization parameter')
plt.plot(x_axis, train_loss,  color='black')
plt.plot(x_axis, test_loss, color='blue')   
print(train_loss)
print(test_loss)
