In [1]:
import numpy as np
from sklearn import preprocessing

##  Code Gradient Descent for N features

In [2]:
def step_gradient(points, learning_rate, m):
    m_slope = np.zeros(points.shape[1])
    
    M = len(points)
    for i in range(M):
        x = points[i,:(points.shape[1]-1)]
        x = np.append(x,1)
        y = points[i,points.shape[1]-1]
        
        for j in range(points.shape[1]):
            m_slope[j] += (-2/M)* (y-(m*x).sum())*x[j]
    
    new_m = m - learning_rate*m_slope
    
    return new_m

In [3]:
def gd(points, learning_rate, num_iterations):
    m = np.zeros(points.shape[1])
    for i in range(num_iterations):
        m = step_gradient(points, learning_rate, m)
        #print((i+1), " Cost: ", cost(points, m))
    return m

In [4]:
def cost(points, m):
    total_cost = 0
    M = len(points)
    for i in range(M):
        x = points[i][:(points.shape[1]-1)]
        x = np.append(x,1)
        y = points[i][points.shape[1]-1]
        total_cost += (1/M)*(((y-(m*x)).sum())**2)
    return total_cost

In [5]:
def pred(data, m):
    ms = m[:-1]
    c = m[-1]
    ans = []
    
    for d in data:
        ans.append((ms*d).sum() + c)
        
    return ans

In [6]:
def run(data, learning_rate, num_iterations):
    #learning_rate = 0.15
    #num_iterations = 100
    m = gd(data, learning_rate, num_iterations)
    #print(m)
    return m

In [7]:
train_data = np.loadtxt('train_boston.csv', delimiter=',')
test_data = np.loadtxt('test_boston.csv', delimiter=',')

m = run(train_data, 0.1, 1000)
ans = pred(test_data,m)

## Feature Scaling-1

In [8]:
scaler = preprocessing.StandardScaler()
scaler.fit(train_data[:,:-1])

fs = scaler.transform(train_data[:,:-1])

train = np.insert(fs, train_data.shape[1]-1, train_data[:,-1], axis=1)
test = scaler.transform(test_data)

m1 = run(train, 0.1, 1000)

In [9]:
ans1  = pred(test, m1)
np.savetxt("predictions_boston.csv", ans1, delimiter=",")

## Coefficient of determination based on Training Data

In [10]:
def score(y_truth, y_pred):
    u = ((y_truth - y_pred)**2).sum()
    v = ((y_truth - y_truth.mean())**2).sum()
    return 1 - u/v

In [11]:
s = pred(train_data[:,:-1], m)
print('Coefficient of determination based on Training Data (Without Feature Scaling) is:',score(train_data[:,-1], s))

s1 = pred(train[:,:-1], m1)
print('Coefficient of determination based on Training Data (With Feature Scaling) is:',score(train[:,-1], s1))

Coefficient of determination based on Training Data (Without Feature Scaling) is: 0.7331366627280851
Coefficient of determination based on Training Data (With Feature Scaling) is: 0.7331366627280635


#### It can clearly be observed that more or less the answer with and without Feature Scaling is same.

## Feature Scaling-2

In [12]:
scaler = preprocessing.MinMaxScaler()
scaler.fit(train_data[:,:-1])

fs = scaler.transform(train_data[:,:-1])

train2 = np.insert(fs, train_data.shape[1]-1, train_data[:,-1], axis=1)
test2 = scaler.transform(test_data)

m2 = run(train, 0.1, 1000)

In [13]:
s = pred(train_data[:,:-1], m)
print('Coefficient of determination based on Training Data (Without Feature Scaling) is:',score(train_data[:,-1], s))

s1 = pred(train[:,:-1], m1)
print('Coefficient of determination based on Training Data (With Feature Scaling) is:',score(train[:,-1], s1))

s2 = pred(train2[:,:-1], m2)
print('Coefficient of determination based on Training Data (With Feature Scaling Min-Max) is:',score(train[:,-1], s2))

Coefficient of determination based on Training Data (Without Feature Scaling) is: 0.7331366627280851
Coefficient of determination based on Training Data (With Feature Scaling) is: 0.7331366627280635
Coefficient of determination based on Training Data (With Feature Scaling Min-Max) is: 0.1969536461125475
