**Programming Exercise 1**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn import linear_model
from sklearn.metrics import mean_squared_error

np.random.seed(0)

In [15]:
class MiniBatchGradientDescentLinearRegressor:
    ''' Implementation of Mini-Batch Gradient Descent for Linear Regression '''
    
    def __init__(self, learning_rate=0.01, batch_size=None, epochs=5):
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.epochs = epochs
        self.intercept_ = 0
        self.coef_ = np.array([])
        self.X_train = None
        self.y_train = None
    
    def set_params(self, learning_rate=None, batch_size=None, epochs=None):
        if learning_rate:
            self.learning_rate = learning_rate
            
        if batch_size:
            self.batch_size = batch_size
        
        if epochs:
            self.epochs = epochs
    
    def fit(self, X_train, y_train, intercept=0, coef=np.array([])):
        if X_train is None:
            raise Exception('Training feature data has not bee provided')
        
        if y_train is None:
            raise Exception('Training prediction data has not been provided')
            
        self.X_train = np.concatenate((np.ones(((X_train.shape[0]), 1)), X_train), axis=1)
        self.y_train = y_train
        
        if intercept:
            self.intercept_ = intercept
        
        if coef.size == X_train.shape[1] - 1:
            self.coef_ = coef
        else:
            self.coef_ = np.zeros(self.X_train.shape[1] - 1)            
        
        train_set_size = X_train.shape[0]
        coeffs = np.append(self.intercept_, self.coef_)
        
        if train_set_size % self.batch_size:
            raise Exception('Training set size is not divisible by batch size')
        
        no_batch_iterations = train_set_size / self.batch_size
        J_omegas = np.array([])
        
        for epoch in range(self.epochs):
#             batches = np.split(np.random.permutation(train_set_size), no_batch_iterations)
            batches = np.split(range(train_set_size), no_batches_iterations)
    
            print(batches)
            
            for batch in batches:
                X_batch = np.array([self.X_train[train_point_index] for train_point_index in batch])
                y_batch = np.array([self.y_train[train_point_index] for train_point_index in batch])
                
                y_pred = np.matmul(X_batch, coeffs)
                
                print(batch)
                print(y_batch, y_pred)
                
                J_omegas = np.append(J_omegas, mean_squared_error(y_batch, y_pred))
                
                coeffs = coeffs + 2 * self.learning_rate * np.matmul(X_batch.T, (y_batch - y_pred)).flatten()
                
        self.intercept_, *self.coef_ = coeffs
        
        ns = np.linspace(1, J_omegas.shape[0], J_omegas.shape[0]) * no_batch_iterations
    
        return J_omegas, ns
        
    def predict(self, X_eval):
        coeffs = np.array([self.intercept_, self.coef_]).flatten()
        
        X_eval = np.concatenate((np.ones(((X_eval.shape[0]), 1)), X_eval), axis=1)
        y_pred = np.matmul(X_eval, coeffs)
        
        return y_pred

In [10]:
class SGDLinearRegressor(MiniBatchGradientDescentLinearRegressor):
    
    def __init__(self, learning_rate, epochs=5):
        super().__init__(learning_rate=learning_rate, batch_size=1, epochs=epochs)

In [11]:
class BGDLinearRegressor(MiniBatchGradientDescentLinearRegressor):
    
    def __init__(self, learning_rate, epochs=5):
        super().__init__(learning_rate=learning_rate, batch_size=None, epochs=epochs)

In [12]:
!curl -o data.xlsx https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
 62 76189   62 47616    0     0  25950      0  0:00:02  0:00:01  0:00:01 25962
100 76189  100 76189    0     0  37513      0  0:00:02  0:00:02 --:--:-- 37550


In [13]:
N = 200

df = pd.read_excel('./data.xlsx')

X_train = df.iloc[:N, :8].to_numpy()
y_train = df.iloc[:N, 8:9].to_numpy()

X1    0
X2    0
X3    0
X4    0
X5    0
X6    0
X7    0
X8    0
dtype: int64
Y1    0
dtype: int64


Plot magnitude of $J(\omega)$ during stochastic gradient descent for different learning rates

In [14]:
plt.figure()

model = SGDLinearRegressor(learning_rate=0.002)

errors, ns = model.fit(X_train, y_train)
plt.plot(ns, errors, label='$\eta=0.002$')

model.set_params(learning_rate=0.005)

errors, ns = model.fit(X_train, y_train)
plt.plot(ns, errors, label='$\eta=0.005$')

model.set_params(learning_rate=0.01)

errors, ns = stochastic_gradient(X_train, y_train)
plt.plot(ns,errors, label='$\eta=0.01$')

plt.xlabel('epochs')
plt.ylabel('$J(\omega)$')

plt.legend()
plt.show()

[113]
[[37.26]] [0.]
[50]
[[24.63]] [66300.44909717]
[68]
[[32.96]] [-1.26197315e+08]
[155]
[[26.46]] [2.67170631e+11]
[57]
[[26.91]] [-4.84762678e+14]
[82]
[[11.09]] [1.06796136e+18]
[40]
[[10.85]] [-3.09902881e+21]
[105]
[[27.02]] [7.36440459e+24]
[186]
[[15.12]] [-1.74967364e+28]
[130]
[[11.32]] [5.07759582e+31]
[54]
[[29.14]] [-1.08593068e+35]
[177]
[[11.16]] [2.32192932e+38]
[98]
[[24.13]] [-4.53948498e+41]
[172]
[[10.77]] [8.51249998e+44]
[62]
[[23.54]] [-1.88378589e+48]
[149]
[[29.01]] [3.49952722e+51]
[146]
[[24.11]] [-5.55596951e+54]
[74]
[[10.36]] [9.97902946e+57]
[135]
[[11.43]] [-2.58425901e+61]
[164]
[[32.52]] [6.84200959e+64]
[187]
[[15.36]] [-1.88359784e+68]
[153]
[[26.47]] [4.4762219e+71]
[59]
[[25.27]] [-8.12179574e+74]
[87]
[[11.69]] [1.85948554e+78]
[48]
[[24.58]] [-3.78273012e+81]
[37]
[[7.1]] [7.69236033e+84]
[38]
[[7.1]] [-2.23594895e+88]
[127]
[[10.68]] [6.02706866e+91]
[132]
[[11.45]] [-1.62450931e+95]
[65]
[[37.12]] [4.0630951e+98]
[123]
[[10.45]] [-8.9910561e+

  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

<Figure size 432x288 with 0 Axes>

In [17]:
model = SGDLinearRegressor(learning_rate=0.1, epochs=1)

X_train = np.array([
    [4, 1, 4, 16, 1],
    [2, 8, 16, 4, 64],
    [1, 0, 0, 1, 0],
    [3, 2, 6, 9, 4]
])

y_train = np.array([2, -14, 1, -1])

model.fit(X_train, y_train)

[1]
[-14] [0.]
[2]
[1] [-19.6]
[0]
[2] [-498.68]
[3]
[-1] [17628.192]


(array([1.96000000e+02, 4.24360000e+02, 2.50680462e+05, 3.10788411e+08]),
 array([ 4.,  8., 12., 16.]))