## Implementation of multivariate Linear Regression using Batch Gradient Descent

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
train = pd.read_csv('Train.csv').values
xt = pd.read_csv('Test.csv').values
X = train[:, :-1]
Y = train[:, -1:]

u = X.mean(axis = 0)
s = X.std(axis = 0)
X = (X-u)/s
xt = (xt-u)/s

In [None]:
X = np.hstack((np.ones((X.shape[0], 1)), X))
xt = np.hstack((np.ones((xt.shape[0], 1)), xt))

In [None]:
def hypothesis(X, theta):
    (m, n) = X.shape
    arr = np.zeros(m)
    
    for i in range(m):
        for j in range(n):
            arr[i] += X[i, j]*theta[j]
    return arr

def gradient(X, Y, theta):
    (m, n) = X.shape
    H = hypothesis(X, theta)
    grad = np.zeros(n)
    
    for j in range(n):
        for i in range(m):
            grad[j] += (H[i]-Y[i]) * X[i, j]
        grad[j] /= m
    
    return grad

def loss(X, Y, theta):
    H = hypothesis(X, theta)
    m = X.shape[0]
    error = 0
    for i in range(m):
        error += (H[i]-Y[i])**2
    return error/m

def gradientDescent(X, Y, max_itr=100, lr=0.1):
    (m, n) = X.shape
    error_list = []
    theta = np.zeros([n, 1])
    for i in range(max_itr):
        grad = gradient(X, Y, theta)
        for j in range(n):
            theta[j] = theta[j] - lr*grad[j]
        error_list.append(loss(X, Y, theta))
        
    return theta, error_list

In [None]:
theta, error_list = gradientDescent(X, Y)
plt.plot(error_list)
plt.show()

In [None]:
yt = hypothesis(xt, theta)
df = pd.DataFrame(yt, columns = ['target'])
df.to_csv('pred.csv', index_label='Id')
# print(theta.shape, yt.shape)