In [2]:
import numpy as np
import pandas as pd

In [36]:
class LinearRegression():
  def __init__(self, X_train, X_test, y_train, y_test):
    self.X_train = X_train
    self.y_train = y_train
    self.X_test = X_test
    self.y_test = y_test
    
  
  def mxmult(self, mx1, mx2):
    result = [[0 for i in range(0, mx2.shape[1])] for j in range(0, mx1.shape[0])]
   
    for i in range(len(mx1)): #rows of mx1
      for j in range(len(mx2[0])): #cols of mx2
          for k in range(len(mx2)): # rows of mx2
              result[i][j] += mx1[i][k] * mx2[k][j]
    return np.array(result)

  def h(self, beta):
    return self.mxmult(self.X_train, beta)


  def get_cost(self, beta):
    return ( self.mxmult(self.transpose(self.h(beta)-self.y_train),(self.h(beta)-self.y_train)) )/(2*self.y_train.shape[0])


  def gradient_descent(self, beta, lr=0.1, epochs=10):
    m = self.X_train.shape[0]
    J_all = []
    
    for _ in range(epochs):
      h_x = self.h(beta)
      cost_ = (1/m)*( self.mxmult(self.transpose(self.X_train), (h_x-self.y_train)) )
      beta = beta - (lr)*cost_
      J_all.append(self.get_cost(beta))

    return beta, J_all 

  def transpose(self, mx):
    result = [[0 for i in range(mx.shape[0])] for j in range(mx.shape[1])]
    for i in range(len(mx)):
      for j in range(len(mx[0])):
        result[j][i] = mx[i][j]
    return np.array(result)
 
  def fit(self, lr=0.1, epochs=10):
    self.pad_1s_to_mx()
    beta = self.init_beta()

    beta, J_all = self.gradient_descent(beta, lr, epochs)
    J = self.get_cost(beta)
    print("Cost: ", J)
    print("Parameters: ", beta)
    return J, beta

  def predict(self, beta):
    mu = np.mean(self.X_train[:,1:], axis=0)
    std = np.std(self.X_train[:,1:], axis=0)
        
    for i,x in enumerate(self.X_test):
      x_0 = (x[0] - mu[0])/std[0]
      x_1 = (x[1] - mu[1])/std[1]
      y = beta[0] + beta[1]*x_0 + beta[2]*x_1
      print("Predicted price of house: ", y)
      print("Actual price of house: ", self.y_test[i])


  def init_beta(self):
    return np.zeros((self.X_train.shape[1], 1))


  def pad_1s_to_mx(self):
    self.X_train = np.hstack((np.ones((self.X_train.shape[0], 1)), self.X_train))
    self.X_test = np.hstack((np.ones((self.X_test.shape[0], 1)), self.X_test))


In [37]:

# Linear Regression Code Reference: https://towardsdatascience.com/coding-linear-regression-from-scratch-c42ec079902 
# Data source: https://github.com/kumudlakara/Medium-codes/blob/main/linear_regression/house_price_data.txt
df = pd.read_csv("house_price_data.txt", index_col=False)
df.columns = ["housesize", "rooms", "price"]
# Normalize the data
df = (df-df.mean())/df.std()

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values.reshape(-1,1)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=44)

linregmodel = LinearRegression(X_train, X_test, y_train, y_test)

J, beta = linregmodel.fit(lr=0.01, epochs=400)
linregmodel.predict(beta)

Cost:  [[0.13394757]]
Parameters:  [[-0.06724889]
 [ 0.78799061]
 [ 0.05764216]]
Predicted price of house:  [0.57585169]
Actual price of house:  [-1.1111551]
Predicted price of house:  [0.58438741]
Actual price of house:  [-0.19206027]
Predicted price of house:  [0.56731596]
Actual price of house:  [-0.31101026]
Predicted price of house:  [0.59475793]
Actual price of house:  [0.23298768]
Predicted price of house:  [0.62475264]
Actual price of house:  [-0.19206027]
Predicted price of house:  [0.54178854]
Actual price of house:  [-1.34191015]
Predicted price of house:  [0.6635224]
Actual price of house:  [1.0299526]
Predicted price of house:  [0.61278666]
Actual price of house:  [-0.07232521]
Predicted price of house:  [0.67014357]
Actual price of house:  [1.27576671]
Predicted price of house:  [0.63751634]
Actual price of house:  [1.07753259]
