In [1]:
import numpy as np
import pandas as pd
import warnings
from sklearn import preprocessing
warnings.filterwarnings("ignore")
#https://numpy.org/doc/stable/user/basics.broadcasting.html

In [7]:
def step_gradient(learning_rate,X,Y,m):
    '''
    The cost is calculated for a machine learning algorithm over the entire training dataset for each iteration 
    of the gradient descent algorithm. In Gradient Descent, one iteration of the algorithm is called one batch, 
    which denotes the total number of samples from a dataset that is used for calculating the gradient for each 
    iteration.
    '''
    M = X.shape[0]
    N = X.shape[1]
    m_slope = np.zeros(N)
    for i in range(M):
        x = X[i]
        y_pred = (m*x).sum()
        for j in range(N):
            m_slope[j] += (-2/M)*(Y[i] - y_pred)*x[j]         
    m = m - learning_rate*(m_slope)
    return m

In [8]:
def cost(X, Y, m):
    return ((Y - np.sum(m*X, axis = 1))**2).mean()

In [9]:
def gradient_descent(learning_rate,no_of_iterations,x_train,y_train):
    '''
    This function is used to calculate the values of m,c at of each iteration.
    '''
    m = np.zeros(x_train.shape[1])
    for i in range(no_of_iterations):
        m = step_gradient(learning_rate,x_train,y_train,m)
    print(i+1, " Cost: ", cost(x_train,y_train, m))
    return m

In [10]:
def predict(X, m):
    return np.sum(m*X, axis = 1)

```With feature scaling```


In [11]:
#Preprocessing
def run():
    boston_data_train = np.genfromtxt("../datasets/training_boston_x_y_train.csv",delimiter=",")
    boston_data_test  = np.genfromtxt("../datasets/test_boston_x_test.csv",delimiter=",")
    no_of_iterations  = 500
    learning_rate     = 0.01
    x_train,y_train   = boston_data_train[:,:-1],boston_data_train[:,-1]
    
    #reshaping in order to avoid brodcasting error
    x_train = np.insert(x_train, x_train.shape[1], 1, axis = 1)
    x_test  = np.insert(boston_data_test, boston_data_test.shape[1], 1, axis = 1)
    
    #feature scaling
    #std_scaler = preprocessing.StandardScaler()
    #x_train = std_scaler.fit_transform(x_train)
    #x_test  = std_scaler.transform(x_test)
    
    #normalization
    #min_max_scaler = preprocessing.MinMaxScaler(feature_range =(0, 1)) 
    #x_train = min_max_scaler.fit_transform(x_train)
    #x_test  = min_max_scaler.fit_transform(x_test) 
    
    
    m = gradient_descent(learning_rate,no_of_iterations,x_train,y_train)
    
    #print(m)
    y_pred = predict(x_test,m)
    y_pred = y_pred.reshape(x_test.shape[0],1)
    #print(y_pred)
    np.savetxt("output.csv",y_pred)
    print(y_pred.shape)
    
    
run()

OSError: ../datasets/training_boston_x_y_train.csv not found.