In [1]:
# Loading and scaling the dataset
import pandas as pd 
import numpy as np 
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score 
from sklearn.pipeline import Pipeline

In [2]:
# Loading the datasets 
train_df = pd.read_csv('train.data.csv')
test_df = pd.read_csv('test.data.csv')

In [3]:
pipe = Pipeline([('scaler', StandardScaler()), ('reg', LinearRegression())])

In [4]:
train_df.head()

Unnamed: 0.1,Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,2,6414100192,20141209T000000,538000,3,2.25,2570,7242,2.0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
1,4,2487200875,20141209T000000,604000,4,3.0,1960,5000,1.0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
2,5,1954400510,20150218T000000,510000,3,2.0,1680,8080,1.0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503
3,6,7237550310,20140512T000000,1225000,4,4.5,5420,101930,1.0,0,...,11,3890,1530,2001,0,98053,47.6561,-122.005,4760,101930
4,7,1321400060,20140627T000000,257500,3,2.25,1715,6819,2.0,0,...,7,1715,0,1995,0,98003,47.3097,-122.327,2238,6819


In [5]:
# Selecting the predictors and the target
X_train = train_df[["bedrooms","bathrooms","sqft_living","sqft_lot"]]
y_train = train_df["price"]

X_test = test_df[["bedrooms","bathrooms","sqft_living","sqft_lot"]]
y_test = test_df["price"]

In [6]:
pipe.fit(X_train, y_train)

Pipeline(steps=[('scaler', StandardScaler()), ('reg', LinearRegression())])

In [7]:
pipe.score(X_train, y_train)

0.5101138530794578

In [8]:
pipe.score(X_test, y_test)

0.5049944614037101

In [9]:
regression = pipe.named_steps['reg']

In [10]:
regression.coef_

array([-55655.71729094,   2842.59687198, 290948.51916872, -16587.41536884])

In [11]:
regression.intercept_

538146.886509353

In [12]:
# Implementing the gradient descent algorithm
def gradient(X,y,betas):
    """
    Calculates the gradient of the loss function

    Args:
        X (np.matrix): Data Matrix (n x d)
        y (np.array): Response vector (n x 1)
        betas (np.array): Regression coefficients (d x 1)

    Returns:
        np.array: Vector containing gradient of the loss function
    """
    return (2/(X.shape[0]))*X.T @ (X @ betas - y)


def gradient_descent(X,y,betas,learning_rate,tau = 10e-6):
    """
    Performs Gradient descent on the scaled dataset

    Args:
        X (np.matrix): Data Matrix (n x d)
        y (np.array): Response vector (n x 1)
        betas (np.array): Initial coefficient vector (d x 1)
        learning_rate (np.float64): Learning rate for gradient descent at which the algorithm converges
        tau (np.float64, optional): Threshold value used to determine the stopping point for the algorithm. 
        Defaults to 10e-6.

    Returns:
        np.array: True estimate of the regression coefficients
    """
    
    # Counter variable to keep track of the number of iterations
    i = 0
    
    # Starting the gradient descent algorithm
    while True:
        # Getting the gradient at the 't'th iteration
        gradients = gradient(X,y,betas)
        
        # Calculating the norm of the gradient
        gradient_norm = np.linalg.norm(gradients)
        print("Norm of gradient at iteration {}: {}".format(i, gradient_norm))
        
        # Checking if the norm of the gradient is less than the threshold value
        if gradient_norm < tau:
            break
        
        # Updating the coefficients
        betas = betas - learning_rate*gradient(X,y,betas)
        
        # Incrementing the counter
        i+=1
        
    # Returning the true coefficients
    return betas


In [13]:
scalar = StandardScaler()
X_train_scaled = scalar.fit_transform(X_train)
X_test_scaled = scalar.transform(X_test)

In [14]:
X_train_scaled = np.hstack((np.ones((X_train_scaled.shape[0],1)),X_train_scaled))
X_test_scaled = np.hstack((np.ones((X_test_scaled.shape[0],1)),X_test_scaled))

In [15]:
X_train_scaled.shape

(15129, 5)

In [16]:
X_test_scaled.shape

(6484, 5)

In [17]:
y_train = y_train.to_numpy().reshape(-1,1)

In [18]:
gradient_betas = gradient_descent(X_train_scaled,y_train,np.zeros((X_train_scaled.shape[1],1)),0.01,tau = 10e-6)

Norm of gradient at iteration 0: 1276097.5840578405
Norm of gradient at iteration 1: 1242209.5304384353
Norm of gradient at iteration 2: 1209549.9526426902
Norm of gradient at iteration 3: 1178061.5246820697
Norm of gradient at iteration 4: 1147689.839856283
Norm of gradient at iteration 5: 1118383.2716054874
Norm of gradient at iteration 6: 1090092.8397720542
Norm of gradient at iteration 7: 1062772.0821232477
Norm of gradient at iteration 8: 1036376.9309928807
Norm of gradient at iteration 9: 1010865.5949053472
Norm of gradient at iteration 10: 986198.4450496013
Norm of gradient at iteration 11: 962337.9064737505
Norm of gradient at iteration 12: 939248.3538732276
Norm of gradient at iteration 13: 916896.0118470504
Norm of gradient at iteration 14: 895248.8594977164
Norm of gradient at iteration 15: 874276.5392508638
Norm of gradient at iteration 16: 853950.2697711639
Norm of gradient at iteration 17: 834242.7628510223
Norm of gradient at iteration 18: 815128.1441487273
Norm of gradi

In [19]:
gradient_betas

array([[538146.88650935],
       [-55655.7172881 ],
       [  2842.59688566],
       [290948.51915265],
       [-16587.41536677]])

In [20]:
predictions = X_test_scaled @ gradient_betas

In [21]:
predictions

array([[ 277901.8758834 ],
       [ 205501.37763017],
       [1035367.15026277],
       ...,
       [ 381686.66598857],
       [ 648327.11088438],
       [ 965025.3719985 ]])

In [22]:
y_test = y_test.to_numpy().reshape(-1,1)

In [23]:
r2_score(y_test.flatten(),predictions.flatten())

0.5049944614045041

In [67]:
from numba import jit

In [72]:
def stochastic_gradient(X,y,tau,betas,decay_rate,learning_rate,maxiter = 10e6, batch_size = 1):
    xy = np.c_[X.reshape(X.shape[0],-1),y.reshape(y.shape[0],1)]
    rng = np.random.default_rng(0)
    
    diff = 0
    i = 0
    for _ in range(maxiter):
        rng.shuffle(xy)
        x = 0
        grad = 0
        learning_rate = (1/(i+1))
        for start in range(0,X.shape[0],batch_size):
            stop = start + batch_size
            X_batch, y_batch = xy[start:stop,:-1], xy[start:stop,-1:]
            x+=1
            
            grad += (2/(X_batch.shape[0]))*X_batch.T @ (X_batch @ betas - y_batch)
            # grad += gradient(X_batch,y_batch,betas)
            
        grad = grad/x
        grad_norm = np.linalg.norm(grad)
        if grad_norm < tau:
            break
        print("Norm of gradient at iteration {}: {}".format(i, grad_norm))
        betas += -learning_rate*grad
        
        i+=1
        
    return betas

In [53]:
y_train

array([[538000],
       [604000],
       [510000],
       ...,
       [402101],
       [400000],
       [325000]], dtype=int64)

In [73]:
betas = stochastic_gradient(X_train_scaled,y_train,10e-6,np.zeros((X_train_scaled.shape[1],1)),0.01,0.01,maxiter = 10000, batch_size = 10)

Norm of gradient at iteration 0: 1276080.6084072064
Norm of gradient at iteration 1: 2572714.5807547425
Norm of gradient at iteration 2: 2935767.2170651085
Norm of gradient at iteration 3: 1481105.4704729647
Norm of gradient at iteration 4: 192634.013977105
Norm of gradient at iteration 5: 34548.51966571339
Norm of gradient at iteration 6: 27310.903788501546
Norm of gradient at iteration 7: 25193.149101071205
Norm of gradient at iteration 8: 23674.552159072682
Norm of gradient at iteration 9: 22445.096627400864
Norm of gradient at iteration 10: 21407.049368425327
Norm of gradient at iteration 11: 20490.928581392516
Norm of gradient at iteration 12: 19690.094669314174
Norm of gradient at iteration 13: 18978.338483827658
Norm of gradient at iteration 14: 18359.664244082735
Norm of gradient at iteration 15: 17796.87794388479
Norm of gradient at iteration 16: 17274.0642944129
Norm of gradient at iteration 17: 16800.865788207546
Norm of gradient at iteration 18: 16363.847457190888
Norm of g

KeyboardInterrupt: 

In [64]:
betas

array([[538146.9130253 ],
       [-55407.81004142],
       [  4037.40320119],
       [289545.14933787],
       [-16406.6190379 ]])

In [65]:
predictions = X_test_scaled @ betas

In [66]:
r2_score(y_test.flatten(),predictions.flatten())

0.5050576359316632