CS6140 Assignment 1
Q2.3 Root Mean Square
Wing Man, Kwok
May 22 2022


In [1]:
import numpy as np
import pandas as pd

In [2]:
def compute_cost(ip, op, params):
    """
    Cost function in linear regression where the cost is calculated
    ip: input variables
    op: output variables
    params: corresponding parameters
    Returns cost
    """
    num_samples = len(ip)
    cost_sum = 0.0
    for x,y in zip(ip, op):
        y_hat = np.dot(params, np.array([1.0, x]))
        cost_sum += (y_hat - y) ** 2
    
    cost = cost_sum / (num_samples)
    
    return cost

In [3]:
def linear_regression_using_batch_gradient_descent(ip, op, params, alpha, max_iter):
    """
    Compute the params for linear regression using batch gradient descent
    ip: input variables
    op: output variables
    params: corresponding parameters
    alpha: learning rate
    max_iter: maximum number of iterations
    Returns parameters, cost, params_store
    """ 
    # initialize iteration, number of samples, cost and parameter array
    iteration = 0
    num_samples = len(ip)
    cost = np.zeros(max_iter)
    params_store = np.zeros([2, max_iter])
    
    # Compute the cost and store the params for the corresponding cost
    while iteration < max_iter:
        cost[iteration] = compute_cost(ip, op, params)
        params_store[:, iteration] = params
        
        print('--------------------------')
        print(f'iteration: {iteration}')
        print(f'cost: {cost[iteration]}')
        
        for i in range(num_samples):
          y_hat = np.dot(params, np.array([1.0, ip[i]]))
          gradient = np.array([1.0, ip[i]]) * (op[i] - y_hat)   #np.array instead of purley ip[i], is because the whole ip[i] features have to be considered
          params += alpha * gradient/num_samples
          
        iteration += 1
    
    return params, cost, params_store

In [4]:
def lin_reg_stoch_gradient_descent(ip, op, params, alpha):
    """
    Compute the params for linear regression using stochastic gradient descent
    ip: input variables
    op: output variables
    params: corresponding parameters
    alpha: learning rate
    Returns parameters, cost, params_store
    """

    # initialize iteration, number of samples, cost and parameter array
    num_samples = len(input_var)
    cost = np.zeros(num_samples)
    params_store = np.zeros([2, num_samples])
    
    i = 0
    
    # Compute the cost and store the params for the corresponding cost
    for x,y in zip(input_var, output_var):
        cost[i] = compute_cost(input_var, output_var, params)
        params_store[:, i] = params
        
        print('--------------------------')
        print(f'iteration: {i}')
        print(f'cost: {cost[i]}')
        
        # Apply stochastic gradient descent
       
        y_hat = np.dot(params, np.array([1.0, x]))
        gradient = np.array([1.0, x]) * (y - y_hat)   #np.array instead of purley ip[i], is because the whole ip[i] features have to be considered
        params += alpha * gradient/num_samples
          
        i += 1
    
    return params, cost, params_store

In [5]:
# Do not change the code in this cell
true_slope = 15
true_intercept = 2.4
input_var = np.arange(0.0,100.0)
output_var = true_slope * input_var + true_intercept + 300.0 * np.random.rand(len(input_var))

In [6]:
# Do not change the code in this cell
# Training the model
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(input_var, output_var, test_size=0.20)

params_0 = np.array([20.0, 80.0])

alpha_batch = 1e-3
max_iter = 100
params_hat_batch, cost_batch, params_store_batch =\
    linear_regression_using_batch_gradient_descent(x_train, y_train, params_0, alpha_batch, max_iter)

--------------------------
iteration: 0
cost: 13090156.83930492
--------------------------
iteration: 1
cost: 29547.813225415903
--------------------------
iteration: 2
cost: 13017.345671644836
--------------------------
iteration: 3
cost: 12923.887640553728
--------------------------
iteration: 4
cost: 12918.459536897954
--------------------------
iteration: 5
cost: 12915.448427223244
--------------------------
iteration: 6
cost: 12912.518307826856
--------------------------
iteration: 7
cost: 12909.592320224856
--------------------------
iteration: 8
cost: 12906.667917429668
--------------------------
iteration: 9
cost: 12903.745014302223
--------------------------
iteration: 10
cost: 12900.823607281181
--------------------------
iteration: 11
cost: 12897.903695508776
--------------------------
iteration: 12
cost: 12894.985278217257
--------------------------
iteration: 13
cost: 12892.06835464215
--------------------------
iteration: 14
cost: 12889.15292401955
-----------------------

In [7]:
# Do not change the code in this cell
alpha = 1e-3
params_0 = np.array([20.0, 80.0])
params_hat, cost, params_store =\
lin_reg_stoch_gradient_descent(x_train, y_train, params_0, alpha)

--------------------------
iteration: 0
cost: 13116276.228511825
--------------------------
iteration: 1
cost: 13116291.997657
--------------------------
iteration: 2
cost: 13117051.595547473
--------------------------
iteration: 3
cost: 13116832.903385978
--------------------------
iteration: 4
cost: 13116191.101187577
--------------------------
iteration: 5
cost: 13111936.739983846
--------------------------
iteration: 6
cost: 13105418.6980512
--------------------------
iteration: 7
cost: 13096297.534913912
--------------------------
iteration: 8
cost: 13088165.85529024
--------------------------
iteration: 9
cost: 13072257.493464552
--------------------------
iteration: 10
cost: 13055154.681366343
--------------------------
iteration: 11
cost: 13038270.416302878
--------------------------
iteration: 12
cost: 13009813.91857285
--------------------------
iteration: 13
cost: 12979698.276700998
--------------------------
iteration: 14
cost: 12942741.462062228
--------------------------
