In [44]:
import numpy as np
import math
from matplotlib import pyplot as plt
import pandas as pd

In [45]:
arr = np.loadtxt("Lineardata_train.csv", skiprows = 1, delimiter=",")


In [46]:
X_train=np.array(arr[:40000,1:])

X_test=np.array(arr[40000:,1:])

In [47]:
Y_train=np.array(arr[:40000,0])
Y_test=np.array(arr[40000:,0])
Y_train[0]

-5928.102745620257

In [48]:
# @title normalisation
def normalise(x):
    """
    x= array(m,n) = trainig set m= no of examples, n= no.of features
    mu (ndarray (n,))     : mean of each feature
        sigma (ndarray (n,))  : standard deviation of each feature
    """
    n=np.shape(x[1:][0])
    mu = np.mean(x, axis=0)
    sigma = np.std(x, axis=0)+1e-10
    x_normalised= (x-mu)/sigma
    return x_normalised,mu,sigma


In [49]:
def cost(x,y,w,b):
  """
  x= array(m,n) = trainig set m= no of examples, n= no.of features
  y= array(m)= array containing actual lables
  w= array(n)= feature weights
  b= bias"""
  m=x.shape[0]
  label_pre = np.dot(x,w)+b #array containing predicted lables
  unsq_cost = label_pre - y #array containg unsquared cost of each example
  sq_cost = np.square(unsq_cost) #squared cost of each element
  cost = np.sum(sq_cost)
  cost = cost/2
  cost = cost/m
  return cost


In [50]:
def total_cost(x,y,w,b,_lambda):
  m= x.shape[0]
  n=x.shape[1:][0]
  total_cost = cost(x,y,w,b)
  reg_cost= 0
  for i in range(n):
    reg_cost = reg_cost + w[i]**2
  reg_cost = reg_cost*_lambda
  reg_cost = reg_cost/2
  reg_cost = reg_cost/m
  total_cost = total_cost + reg_cost
  return total_cost



In [51]:
def gradient(x,y,w,b,):
  """
   x= array(m,n) = trainig set m= no of examples, n= no.of features
  y= array(m)= value
  w= array(n)= feature weights
  b= bias
  df_dw: array(n)= derivative of cost wrt weights
  df_db = derivative of cost wrt bias b
  """
  m= x.shape[0]
  n=x.shape[1:][0]


  label_pre = np.dot(x,w)+b #array containing predicted lables

  unsq_cost = label_pre - y #array containg unsquared cost of each example
  df_db = np.sum(unsq_cost) #sum of cost
  df_db = df_db/m
  unsq_cost1 = np.reshape(unsq_cost,(1,-1))   #reshaping the array to (1,m)
  df_dw = np.dot(unsq_cost1,x)
  df_dw= np.reshape(df_dw,(n,1))

  df_dw = df_dw/m
  return df_db, df_dw



In [52]:
def gradient_descent(x,y,w,b,itr_nos,alpha):
  """
  x= array(m,n) = trainig set m= no of examples, n= no.of features
  y= array(m)= value
  w= array(n)= feature weights
  b= bias
  itr_nos = no of iterations
  alpha = learning rate
  """
  m= x.shape[0]
  n=x.shape[1:][0]
  J_log = np.zeros(itr_nos)


  for i in range(itr_nos):
    df_db, df_dw = gradient(x,y,w,b,)

    b = b - alpha*df_db
    for j in range(n):
      w[j]=w[j]-alpha*df_dw[j]

    if i% math.ceil(itr_nos/10) == 0 :
            print(f"itr.no={i}, cost={cost(x,y,w,b)}")



    J_log[i]= J_log[i] + cost(x,y,w,b)

  return w,b,J_log




In [53]:
w_initial = np.random.rand(20,)


b_initial = 0
iterations = 11000

alpha=1e-3

X,mu,sigma=normalise(X_train)


w,b,J_log = gradient_descent(X,Y_train,w_initial,b_initial,iterations,alpha)

itr.no=0, cost=65613450.049907364
itr.no=1100, cost=7216817.893404837
itr.no=2200, cost=795240.4540003639
itr.no=3300, cost=87790.5386295164
itr.no=4400, cost=9709.329083854163
itr.no=5500, cost=1075.7687723613228
itr.no=6600, cost=119.41070375415924
itr.no=7700, cost=13.282219660925705
itr.no=8800, cost=1.4839883718073716
itr.no=9900, cost=0.17007377773639792


In [54]:
def predict(w,b,x1):
  """
  w = array(n)
  b = bias
  x1(m,n) = test set
  """
  n=x1.shape[1:][0]
  m=x1.shape[0]

  predn=  np.dot(w,x1) + b

  return predn


In [55]:

def r2_score(actual,predicted):
    mean_actual = np.mean(actual)
    ss_t = np.sum((actual - mean_actual)**2)
    ss_r = np.sum((actual - predicted.flatten())**2)
    r2_score = 1 - (ss_r / ss_t)
    return r2_score

In [57]:
x_t_nor=(X_test-mu)/sigma

r2_score(Y_test,np.dot(x_t_nor,w)+b)

0.9999999996371814