In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

Read in the data

In [3]:
df = pd.read_csv('../data/linear_regression_data.csv', delimiter=',')
X = df.values[:,:-1]
y = df.values[:,-1]

Split into training and testing

In [4]:
X_train_prelim, X_test_prelim, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

Perform MinMax scaling

In [6]:
scaler = MinMaxScaler()
X_train_prelim_normalized = scaler.fit_transform( X_train_prelim )
X_test_prelim_normalized = scaler.transform( X_test_prelim )

Add a column of ones for bias term

In [9]:
X_train = np.hstack( ( X_train_prelim_normalized , np.ones( ( X_train_prelim.shape[0] , 1 ) ) ) )
X_test  = np.hstack( ( X_test_prelim_normalized  , np.ones( ( X_test_prelim.shape[0]  , 1 ) ) ) )

In [None]:
class LinearRegression():
    def __init__(self, lambd=1.0, eta=1e-4, n_epochs=1000, batch_size=64, eps=1e-4):
        self.lambda_reg = lambd
        self.eta = eta
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.eps = eps

    def fit(self, X, y):
        
        # initialize weights
        self.theta = self._init_theta(X)

        # create random permutation of indices of data set 
        p = np.random.permutation(range(len(X)))
        
        # retrieve randomly shuffled data points and labels 
        X_shuffled, y_shuffled = X[p], y[p]
        
        # 
        for epoch in range(1, self.n_epochs+1):
            for step in range(0, len(X), self.batch_size):
                X_batch, y_batch = X_shuffled[step:step+self.batch_size], y_shuffled[step:step+self.batch_size]
                grad = self._compute_gradient(X_batch, y_batch)
                self.theta -= self.eta * grad
            loss = self._compute_loss(X_batch, y_batch)
            print(loss)
            if np.sqrt(np.dot(self.theta, self.theta)) < self.eps:
                print('Stopping Early after {} epochs.'.format(epoch))
                break

        return self

    def predict(self, X):
        return np.dot(X, self.theta)

    def _compute_loss(self, X, y, logscale=True, reg=False):
        loss = np.mean(np.square(np.dot(X, self.theta) - y))
        if reg:
            loss += self.lambda_reg * np.dot(self.theta, self.theta)
        if logscale:
            loss = np.log1p(loss)
        return loss

    def _compute_gradient(self, X, y):
        """
        Compute the closed form gradient for linear regression
        
        Inputs:
        X - (n,d) nparray for data
        y - (d,) nparray of labels
        Outputs:
        dJ/dw - (d,) nparray approximating the gradient of J wrt w
        """
        return 2 * np.dot( X.T, ( np.dot( X , self.theta ) - y ) )/ X.shape[0]  + 2 * self.lambda_reg * self.theta

    def _init_theta(self, X):
        """
        Return a row of normally distributed random variates
        
        Inputs:
        X - (n,d) nparray for data
        Outputs:
        w - (d,) nparray of randomly initialized weights
        """
        return np.random.randn(X.shape[1])



In [None]:
regressor = LinearRegression(lr=0.05, batch_size=200, n_epochs=100, C=0.)

regressor.fit(X_train, y_train)