In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

boston = load_boston()
X = boston.data
y = boston.target

m = X.shape[0]   # number of samples
n = X.shape[1]   # number of features

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)

intercept = np.ones((X_train.shape[0], 1))
X_train = np.concatenate((intercept, X_train), axis=1) 
intercept = np.ones((X_test.shape[0], 1))
X_test = np.concatenate((intercept, X_test), axis=1)

# Task 1 : early stopping

In [2]:
# initialize theta
theta = np.zeros((X_train.shape[1]))

# define learning_rate
alpha = 0.0001

# define max_iter
max_iter = 100000

# set initial previous loss
previous_loss = 999999

# set threshold
tol = 0.0001

iter_stop = 0

def h_theta(X, theta):
    return X @ theta

def mse(yhat, y):
    return ((yhat - y)**2).sum() / yhat.shape[0]

def gradient(X, error):
    return X.T @ error

for i in range(max_iter):
    
    #prediction
    yhat = h_theta(X_train, theta)
     
    #current loss
    current_loss = mse(yhat, y_train)
    
    #absolute difference betweeen current loss and previous loss
    diff = np.abs(current_loss - previous_loss)
    
    # if difference is less than threshold, stop
    if diff < tol:
        break
    
    # now current loss become prevois loss
    previous_loss = current_loss
    
   
    # error
    error = yhat - y_train
    
    #grad
    grad = gradient(X_train, error)
    
    #update theta
    theta = theta - alpha * grad
    
    iter_stop = i
    
# make prediction
yhat = h_theta(X_test, theta)

# calculate mean squared errors
mse = mse(yhat, y_test)

print("MSE: ", mse)
print("Stop at iteration :", iter_stop)

MSE:  30.733337233698883
Stop at iteration : 771


# Task 2 : Stochastic gradient descent

In [3]:
#initialize theta
theta = np.zeros((X_train.shape[1]))

# define learning_rate
alpha = 0.0001

#define max_iter
max_iter = 100000

#set initial previous loss
previous_loss = 999999

#def threshold
tol = 0.0001

iter_stop = 0

def h_theta(X, theta):
    return X @ theta

def mse(yhat, y):
    return ((yhat - y)**2).sum() / yhat.shape[0]

def gradient(X, error):
    return X.T @ error

random_index = []

for i in range(max_iter):
    
    index = np.random.randint(X_train.shape[0])
    while index in random_index:
        index = np.random.randint(X_train.shape[0])
    
    random_index.append(index)
    if len(random_index) == X_train.shape[0]:
        random_index = []
    Xi = X_train[index,:].reshape(1,-1)
    yi = y_train[index]
    yhat = h_theta(Xi, theta)
    current_loss = mse(yhat, yi)
    if np.abs(current_loss - previous_loss) < tol:
        break
    previous_loss = current_loss
    error = yhat - yi
    grad = gradient(Xi, error)
    theta = theta - alpha * grad
    
    iter_stop = i
   
    
yhat = h_theta(X_test, theta)

# calculate mean squared errors
mse = mse(yhat, y_test)

print("MSE: ", mse)
print("Stop at iter: ", iter_stop)

MSE:  32.268643086146575
Stop at iter:  55599


# Task 3 : mini-batch gradient descent

In [4]:
#initialize theta
theta = np.zeros((X_train.shape[1]))

# define learning_rate
alpha = 0.001

#define max_iter
max_iter = 100000

#set initial previous loss
previous_loss = 999999

#def threshold
tol = 0.0001

iter_stop = 0

def h_theta(X, theta):
    return X @ theta

def mse(yhat, y):
    return ((yhat - y)**2).sum() / yhat.shape[0]

def gradient(X, error):
    return X.T @ error

for i in range(max_iter):
    
    # define batch size
    size = 10
    shuffle_index = np.random.permutation(X_train.shape[0])
    X_train = X_train[shuffle_index]
    y_train = y_train[shuffle_index]
    
    for batch_size in range(0, X_train.shape[0], size):
        Xi_batch = X_train[batch_size : batch_size + size,:]
        yi_batch = y_train[batch_size : batch_size + size]
    
        #prediction
        yhat = h_theta(Xi_batch, theta)
        
        #current loss
        current_loss = mse(yhat, yi_batch)
    
        #absolute difference betweeen current loss and previous loss
        diff = np.abs(current_loss - previous_loss)
    
        # if difference is less than threshold, stop
        if diff < tol:
            break
    
        # now current loss become prevois loss
        previous_loss = current_loss
    
        # error
        error = yhat - yi_batch
    
        #grad
        grad = gradient(Xi_batch, error)
    
        #update theta
        theta = theta - alpha * grad
    
    if diff < tol:
        break
    
    iter_stop = i
   
# make prediction
yhat = h_theta(X_test, theta)

# calculate mean squared errors
mse = mse(yhat, y_test)

print("MSE: ", mse)
print("stop at iter: ", iter_stop)

MSE:  30.2054782848011
stop at iter:  4226


# Task 4 : put all into class

In [5]:
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np

boston = load_boston()
X = boston.data
y = boston.target
m = X.shape[0]  #number of samples
n = X.shape[1]  #number of features

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=5)

# actually you can do like this too
# X = np.insert(X, 0, 1, axis=1)
intercept = np.ones((X_train.shape[0], 1))
X_train = np.concatenate((intercept, X_train), axis=1)
intercept = np.ones((X_test.shape[0], 1))
X_test = np.concatenate((intercept, X_test), axis=1)

class LinearRegression:
   
    def __init__(self,method="batch", alpha=0.0001, max_iter=100000, previous_loss=99999, tol=0.00001):
        self.method = method
        self.alpha = alpha
        self.max_iter = max_iter
        self.previous_loss = previous_loss
        self.tol = tol
     
    def h_theta(self, X):
        return X @ self.theta

    def mse(self, yhat, y):
        return ((yhat - y)**2 / yhat.shape[0]).sum()

    def gradient(self, X, error):
        return X.T @ error

    def fit(self, X, y):
        self.theta = np.zeros(X.shape[1])
        random_index = [] 
        
        for i in range(self.max_iter):
            
            if self.method == 'batch':
                X_train = X
                y_train = y
                
            elif self.method == "SGD":
                index = np.random.randint(X.shape[0])
                while index in random_index:
                    index = np.random.randint(X.shape[0])
                X_train = X[index, :].reshape(1, -1)
                y_train = y[index]
                random_index.append(index)
                if len(random_index) == X.shape[0]:
                    random_index = []
            
            elif self.method == 'mini-batch':
                batch_size = 10
                index = np.random.randint(X.shape[0])
                X_train = X[index : index + batch_size, :]
                y_train = y[index : index + batch_size]

            yhat = self.h_theta(X_train)

            current_loss = self.mse(yhat, y_train)
            if np.abs(current_loss - previous_loss) < tol: 
                break
              
            self.previous_loss = current_loss
            
            error = yhat - y_train
            grad = self.gradient(X_train, error)
            self.theta = self.theta - self.alpha * grad

    
    def h_theta(self, X):
        return X @ self.theta

    def mse(self, yhat, y):
        return ((yhat - y)**2 / yhat.shape[0]).sum()

    def gradient(self, X, error):
        return X.T @ error



In [6]:
model = LinearRegression(method='batch')
model.fit(X_train, y_train)
yhat = model.h_theta(X_test)
mse = model.mse(yhat, y_test)


print("MSE using batch: ", mse)


MSE using batch:  30.697037704088505


In [9]:
model = LinearRegression(method='SGD')
model.fit(X_train, y_train)
yhat = model.h_theta(X_test)
mse = model.mse(yhat, y_test)


print("MSE using SGD: ", mse)

MSE using SGD:  31.403373644042464


In [8]:
model = LinearRegression(method='mini-batch')
model.fit(X_train, y_train)
yhat = model.h_theta(X_test)
mse = model.mse(yhat, y_test)


print("MSE using mini-batch: ", mse)


MSE using mini-batch:  30.27920248725819
