## Implementation of multivariate Linear Regression with batch gradient descent using Vectorization

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
train = pd.read_csv('train.csv').values
X = train[:, :-1]
Y = train[:, -1:]
xt = pd.read_csv('test.csv').values

u = X.mean(axis = 0)
s = X.std(axis = 0)
X = (X-u)/s
xt = (xt-u)/s

In [None]:
X = np.hstack((np.ones((X.shape[0], 1)), X))
xt = np.hstack((np.ones((xt.shape[0], 1)), xt))

In [None]:
def hypothesis(X, theta):
    return np.dot(X, theta)

def gradient(X, Y, theta):
    H = hypothesis(X, theta)
    grad = np.dot(X.T, (H - Y))
    return grad/X.shape[0]

def loss(X, Y, theta):
    H = hypothesis(X, theta)
    error = (H - Y)**2
    return error.mean()

def gradientDescent(X, Y, max_itr=100, lr=0.1):
    error_list = []
    theta = np.zeros((X.shape[1], 1))
    for i in range(max_itr):
        grad = gradient(X, Y, theta)
        theta = theta - lr*grad
        error_list.append(loss(X, Y, theta))
        
    return theta, error_list

In [None]:
theta, error_list = gradientDescent(X, Y)
plt.plot(error_list)
plt.show()

In [None]:
yt = hypothesis(xt, theta)
df = pd.DataFrame(yt, columns = ['target'])
df.to_csv('pred.csv', index_label='Id')
# print(yt.shape)