**Programming Exercise 1**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

np.random.seed(0)

In [2]:
class MiniBatchGradientDescentLinearRegressor:
    ''' Implementation of Mini-Batch Gradient Descent for Linear Regression '''
    
    def __init__(self, learning_rate=0.01, batch_size=None, epochs=5):
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.epochs = epochs
        self.intercept_ = 0
        self.coef_ = np.array([])
        self.X_train = None
        self.y_train = None
    
    def set_params(self, learning_rate=None, batch_size=None, epochs=None):
        if learning_rate:
            self.learning_rate = learning_rate
            
        if batch_size:
            self.batch_size = batch_size
        
        if epochs:
            self.epochs = epochs
    
    def fit(self, X_train, y_train, intercept=0, coef=np.array([])):
        if X_train is None:
            raise Exception('Training feature data has not bee provided')
        
        if y_train is None:
            raise Exception('Training prediction data has not been provided')
            
        self.X_train = np.concatenate((np.ones(((X_train.shape[0]), 1)), X_train), axis=1)
        self.y_train = y_train
        
        if intercept:
            self.intercept_ = intercept
        
        if coef.size == X_train.shape[1] - 1:
            self.coef_ = coef
        else:
            self.coef_ = np.zeros(self.X_train.shape[1] - 1)            
        
        train_set_size = X_train.shape[0]
        coeffs = np.append(self.intercept_, self.coef_)
        
        if self.batch_size is None:
            self.batch_size = X_train.shape[0]
        
        if train_set_size % self.batch_size:
            raise Exception('Training set size is not divisible by batch size')
        
        no_batch_iterations = train_set_size / self.batch_size
        J_omegas = np.array([])
        
        for epoch in range(self.epochs):
            batches = np.split(np.random.permutation(train_set_size), no_batch_iterations)
            
            for batch in batches:
                X_batch = np.array([self.X_train[train_point_index] for train_point_index in batch])
                y_batch = np.array([self.y_train[train_point_index] for train_point_index in batch])
                
                print('X_batch shape', X_batch.shape)
                print('y_batch shape', y_batch.shape)
                
                y_pred = np.reshape(np.matmul(X_batch, coeffs), y_batch.shape)
                print('y_pred shape', y_pred.shape)
                
                J_omegas = np.append(J_omegas, mean_squared_error(y_batch, y_pred))
                
                print(y_batch - y_pred)
                print(np.matmul(X_batch.T, (y_batch - y_pred)).shape)
                
                coeffs = coeffs + 2 * self.learning_rate * np.matmul(X_batch.T, (y_batch - y_pred))
                
        self.intercept_, *self.coef_ = coeffs
        
        ns = np.linspace(1, J_omegas.shape[0], J_omegas.shape[0]) * self.batch_size
    
        return J_omegas, ns
        
    def predict(self, X_eval):
        coeffs = np.array([self.intercept_, self.coef_]).flatten()
        
        X_eval = np.concatenate((np.ones(((X_eval.shape[0]), 1)), X_eval), axis=1)
        y_pred = np.matmul(X_eval, coeffs)
        
        return y_pred

In [3]:
class SGDLinearRegressor(MiniBatchGradientDescentLinearRegressor):
    
    def __init__(self, learning_rate, epochs=5):
        super().__init__(learning_rate=learning_rate, batch_size=1, epochs=epochs)

In [4]:
class BGDLinearRegressor(MiniBatchGradientDescentLinearRegressor):
    
    def __init__(self, learning_rate, epochs=5):
        super().__init__(learning_rate=learning_rate, batch_size=None, epochs=epochs)

In [5]:
!curl -o data.xlsx https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0curl: (6) Could not resolve host: archive.ics.uci.edu


In [6]:
N = 200

df = pd.read_excel('./data.xlsx')

X_train = df.iloc[:N, :8].to_numpy()
y_train = df.iloc[:N, 8:9].to_numpy()

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)

In [7]:
X_train

array([[ 1.90777955, -1.68368888, -0.56026533, ..., -1.34164079,
        -1.77951304, -1.33333333],
       [ 1.90777955, -1.68368888, -0.56026533, ..., -0.4472136 ,
        -1.77951304, -1.33333333],
       [ 1.90777955, -1.68368888, -0.56026533, ...,  0.4472136 ,
        -1.77951304, -1.33333333],
       ...,
       [ 1.17682953, -1.14056344,  0.01143399, ..., -0.4472136 ,
         0.56195149,  2.        ],
       [ 1.17682953, -1.14056344,  0.01143399, ...,  0.4472136 ,
         0.56195149,  2.        ],
       [ 1.17682953, -1.14056344,  0.01143399, ...,  1.34164079,
         0.56195149,  2.        ]])

In [8]:
y_train

array([[15.55 ],
       [15.55 ],
       [15.55 ],
       [15.55 ],
       [20.84 ],
       [21.46 ],
       [20.71 ],
       [19.68 ],
       [19.5  ],
       [19.95 ],
       [19.34 ],
       [18.31 ],
       [17.05 ],
       [17.41 ],
       [16.95 ],
       [15.98 ],
       [28.52 ],
       [29.9  ],
       [29.63 ],
       [28.75 ],
       [24.77 ],
       [23.93 ],
       [24.77 ],
       [23.93 ],
       [ 6.07 ],
       [ 6.05 ],
       [ 6.01 ],
       [ 6.04 ],
       [ 6.37 ],
       [ 6.4  ],
       [ 6.366],
       [ 6.4  ],
       [ 6.85 ],
       [ 6.79 ],
       [ 6.77 ],
       [ 6.81 ],
       [ 7.18 ],
       [ 7.1  ],
       [ 7.1  ],
       [ 7.1  ],
       [10.85 ],
       [10.54 ],
       [10.77 ],
       [10.56 ],
       [ 8.6  ],
       [ 8.49 ],
       [ 8.45 ],
       [ 8.5  ],
       [24.58 ],
       [24.63 ],
       [24.63 ],
       [24.59 ],
       [29.03 ],
       [29.87 ],
       [29.14 ],
       [28.09 ],
       [26.28 ],
       [26.91 ],
       [26.37 

Plot magnitude of $J(\omega)$ during stochastic gradient descent for different learning rates

In [9]:
plt.figure(figsize=(14, 4))

model = SGDLinearRegressor(learning_rate=0.002)

errors, ns = model.fit(X_train, y_train)
plt.plot(ns, errors, label='$\eta=0.002$')

model.set_params(learning_rate=0.005)

errors, ns = model.fit(X_train, y_train)
plt.plot(ns, errors, label='$\eta=0.005$')

model.set_params(learning_rate=0.01)

errors, ns = model.fit(X_train, y_train)
plt.plot(ns,errors, label='$\eta=0.01$')

plt.xlabel('epochs')
plt.ylabel('$J(\omega)$')
plt.xticks(ticks=[])

plt.legend()
plt.show()

X_batch shape (1, 9)
y_batch shape (1, 1)
y_pred shape (1, 1)
[[29.63]]
(9, 1)
X_batch shape (1, 9)
y_batch shape (1, 1)


ValueError: cannot reshape array of size 9 into shape (1,1)

<Figure size 1008x288 with 0 Axes>

In [10]:
plt.figure(figsize=(14, 4))

bgd_model = BGDLinearRegressor(learning_rate=0.01)

# errors, ns = bgd_model.fit(X_train, y_train)
# plt.plot(ns, errors, label="$\eta=0.01$, batch GD")

# sgd_model = SGDLinearRegressor(learning_rate=0.01)

# errors, ns = sgd_model.fit(X_train, y_train, 0.01)
# plt.plot(ns,errors, label="$\eta=0.01$, stochastic GD")

mbgd_model = MiniBatchGradientDescentLinearRegressor(batch_size=10, learning_rate=0.01)

errors, ns = mbgd_model.fit(X_train, y_train)
plt.plot(ns,errors, label="$\eta=0.01$, mini-batch GD")

plt.xlabel("epochs")
plt.ylabel("$J(\omega)$")
plt.xticks(ticks=[])

plt.legend()
plt.show()

X_batch shape (10, 9)
y_batch shape (10, 1)
y_pred shape (10, 1)
[[37.26]
 [24.63]
 [32.96]
 [26.46]
 [26.91]
 [11.09]
 [10.85]
 [27.02]
 [15.12]
 [11.32]]
(9, 1)
X_batch shape (10, 9)
y_batch shape (10, 1)


ValueError: cannot reshape array of size 90 into shape (10,1)

<Figure size 1008x288 with 0 Axes>

In [11]:
model = SGDLinearRegressor(learning_rate=0.1, epochs=1)

X_train = np.array([
    [4, 1, 4, 16, 1],
    [2, 8, 16, 4, 64],
    [1, 0, 0, 1, 0],
    [3, 2, 6, 9, 4]
])

y_train = np.array([2, -14, 1, -1])

errors, ns = model.fit(X_train, y_train)

X_batch shape (1, 6)
y_batch shape (1,)
y_pred shape (1,)
[2.]
(6,)
X_batch shape (1, 6)
y_batch shape (1,)
y_pred shape (1,)
[-7.4]
(6,)
X_batch shape (1, 6)
y_batch shape (1,)
y_pred shape (1,)
[-87.24]
(6,)
X_batch shape (1, 6)
y_batch shape (1,)
y_pred shape (1,)
[7114.568]
(6,)
