# Linear Regression with Implementation

## Introduction

## Linear Regression Model

## Numpy Implementation of Linear Regression

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import random
import numpy as np

# PyTorch imports.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# TensorFlow import.
import tensorflow as tf

# MXNet imports.
# from mxnet import nd, autograd, init, gluon
# from mxnet.gluon import data as gdata
# from mxnet.gluon import nn
# from mxnet.gluon import loss as gloss

np.random.seed(71)

In [2]:
class LinearRegression(object):
    """Numpy implementation of Linear Regression."""
    def __init__(self, batch_size=64, lr=0.01, n_epochs=1000):
        self.batch_size = batch_size
        self.lr = lr
        self.n_epochs = n_epochs

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information."""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        if shuffle:
            idx = list(range(self.n_examples))
            random.shuffle(idx)
            self.X_train = self.X_train[idx]
            self.y_train = self.y_train[idx]

    def _create_weights(self):
        """Create model weights and bias."""
        self.w = np.zeros(self.n_inputs).reshape(self.n_inputs, 1)
        self.b = np.zeros(1).reshape(1, 1)

    def _model(self, X):
        """Linear regression model."""
        return np.matmul(X, self.w) + self.b

    def _loss(self, y, y_):
        """Squared error loss.

        # squared_error_loss(y, y_) 
        #   = - 1/n * \sum_{i=1}^n (y_i - y_hat_i)^2
        """
        self.squared_error = np.square(y - y_)
        return np.mean(self.squared_error)

    def _optimize(self, X, y):
        """Optimize by stochastic gradient descent."""
        m = X.shape[0]

        y_ = self._model(X) 
        dw = 1 / m * np.matmul(X.T, y_ - y)
        db = np.mean(y_ - y)

        for (param, grad) in zip([self.w, self.b], [dw, db]):
            param[:] = param - self.lr * grad

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train.take(idx_batch, axis=0), 
                   self.y_train.take(idx_batch, axis=0))

    def fit(self):
        """Fit model."""
        self._create_weights()

        for epoch in range(1, self.n_epochs + 1):
            total_loss = 0
            for X_train_b, y_train_b in self._fetch_batch():
                y_train_b = y_train_b.reshape((y_train_b.shape[0], -1))
                self._optimize(X_train_b, y_train_b)
                batch_loss = self._loss(y_train_b, self._model(X_train_b))
                total_loss += batch_loss * X_train_b.shape[0]

            if epoch % 100 == 0:
                print('epoch {0}: training loss {1}'
                      .format(epoch, total_loss / self.n_examples))

        return self

    def get_coeff(self):
        return self.b, self.w.reshape((-1,))

    def predict(self, X):
        return self._model(X).reshape((-1,))

## Data Preparation and Preprocessing

In [3]:
import sklearn
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression as LinearRegressionSklearn

# https://github.com/bowen0701/machine-learning/blob/master/metrics.py
!cp "/notebooks/machine-learning/metrics.py" .
from metrics import mean_squared_error

In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
# Read California housing data.
housing = fetch_california_housing()
X = housing.data
y = housing.target

In [6]:
X.shape, y.shape

((20640, 8), (20640,))

In [7]:
X[:3]

array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,
         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,
         3.78800000e+01, -1.22230000e+02],
       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,
         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,
         3.78600000e+01, -1.22220000e+02],
       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,
         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,
         3.78500000e+01, -1.22240000e+02]])

In [8]:
y[:3]

array([4.526, 3.585, 3.521])

In [9]:
# Split data into training and test datasets.
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=71, shuffle=True)

In [10]:
print(X_train_raw.shape, y_train.shape)
print(X_test_raw.shape, y_test.shape)

(15480, 8) (15480,)
(5160, 8) (5160,)


In [11]:
# Feature engineering for standardizing features by min-max scaler.
min_max_scaler = MinMaxScaler()

X_train = min_max_scaler.fit_transform(X_train_raw)
X_test = min_max_scaler.transform(X_test_raw)

In [118]:
# Convert arrays to float32.
X_train, X_test, y_train, y_test = (
    np.float32(X_train), np.float32(X_test), np.float32(y_train), np.float32(y_test))

In [13]:
X_train.dtype, y_train.dtype

(dtype('float32'), dtype('float32'))

## Fitting Linear Regression in NumPy

In [14]:
# Fit our Linear Regression.
linreg = LinearRegression(batch_size=64, lr=0.1, n_epochs=1000)

In [15]:
# Get datasets and build graph.
linreg.get_data(X_train, y_train, shuffle=True)

In [16]:
linreg.fit()

epoch 100: training loss 0.5293563370658436
epoch 200: training loss 0.5275734594735062
epoch 300: training loss 0.5265158852838248
epoch 400: training loss 0.5256572463949015
epoch 500: training loss 0.5249111460382239
epoch 600: training loss 0.5242498661370676
epoch 700: training loss 0.5236576673849905
epoch 800: training loss 0.5231234686938776
epoch 900: training loss 0.5226389617068218
epoch 1000: training loss 0.5221977221543459


<__main__.LinearRegression at 0x7fe51afba0f0>

In [17]:
# Get coefficient.
linreg.get_coeff()

(array([[3.8694367]]),
 array([ 5.73372961,  0.50739872, -3.33683057,  8.48590097, -0.04395446,
        -3.99943332, -4.18663508, -4.56104079]))

In [18]:
# Predicted response for training data.
y_train_ = linreg.predict(X_train)
y_train_[:10]

array([1.56133286, 1.39834383, 2.14952592, 3.73451918, 2.9295695 ,
       1.94008646, 2.21184873, 0.97634259, 0.9146045 , 1.83039715])

In [19]:
# Prediction squared error for training data.
mean_squared_error(y_train, y_train_)

0.5284810584720984

In [20]:
# Predicted response for test data.
y_test_ = linreg.predict(X_test)
y_test_[:10]

array([1.66620143, 2.83664128, 2.27498959, 2.75466997, 2.9009546 ,
       1.65278449, 2.11967699, 2.69806821, 2.03915454, 1.5909759 ])

In [21]:
# Prediction accuracy for test data.
mean_squared_error(y_test, y_test_)

0.5492088858054294

## PyTorch Implementation of Linear Regression

In [127]:
class LinearRegressionTorch(nn.Module):
    """PyTorch implementation of Linear Regression."""

    def __init__(self, batch_size=64, lr=0.01, n_epochs=1000):
        super(LinearRegressionTorch, self).__init__()
        self.batch_size = batch_size
        self.lr = lr
        self.n_epochs = n_epochs

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information."""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        if shuffle:
            idx = list(range(self.n_examples))
            random.shuffle(idx)
            self.X_train = self.X_train[idx]
            self.y_train = self.y_train[idx]

    def _create_model(self):
        """Linear regression model."""
        self.net = nn.Linear(self.n_inputs, 1)

    def forward(self, x):
        y_red = self.net(x)
        return y_red

    def _create_loss(self):
        """Squared error loss.

        # squared_error_loss(y, y_) 
        #   = - 1/n * \sum_{i=1}^n (y_i - y__i)^2
        """
        self.criterion = nn.MSELoss()

    def _create_optimizer(self):
        """Optimize by stochastic gradient descent."""
        self.optimizer = optim.SGD(self.net.parameters(), lr=self.lr)

    def build_graph(self):
        """Build computational graph."""
        self._create_model()
        self._create_loss()
        self._create_optimizer()

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train.take(idx_batch, axis=0), 
                   self.y_train.take(idx_batch, axis=0))

    def fit(self):
        """Fit model."""
        for epoch in range(1, self.n_epochs + 1):
            total_loss = 0
            for X_train_b, y_train_b in self._fetch_batch():
                # Convert to Tensor from NumPy array and reshape ys.
                X_train_b, y_train_b = (
                    torch.from_numpy(X_train_b), 
                    torch.from_numpy(y_train_b).view(-1, 1))

                y_pred_b = self.net(X_train_b)
                batch_loss = self.criterion(y_pred_b, y_train_b)
                total_loss += batch_loss * X_train_b.shape[0]

                # Zero grads, performs backward pass, and update weights.
                self.optimizer.zero_grad()
                batch_loss.backward()
                self.optimizer.step()

            if epoch % 100 == 0:
                print('Epoch {0}: training loss: {1}'
                      .format(epoch, total_loss / self.n_examples))

    def get_coeff(self):
        """Get model coefficients."""
        # Detach var which require grad.
        return self.net.bias.detach().numpy(), self.net.weight.detach().numpy()

    def predict(self, X):
        """Predict for new data."""
        with torch.no_grad():
            X_ = torch.from_numpy(X)
            return self.net(X_).numpy().reshape((-1,))

## Fitting Linear Regression in PyTorch

In [128]:
# Fit our Linear Regression.
linreg_torch = LinearRegressionTorch(batch_size=64, lr=0.1, n_epochs=1000)

In [129]:
linreg_torch.get_data(X_train, y_train, shuffle=True)

In [130]:
linreg_torch.build_graph()

In [140]:
linreg_torch.net

Linear(in_features=8, out_features=1, bias=True)

In [131]:
linreg_torch.fit()

Epoch 100: training loss: 0.5316377878189087
Epoch 200: training loss: 0.5297052264213562
Epoch 300: training loss: 0.5282834768295288
Epoch 400: training loss: 0.5271416306495667
Epoch 500: training loss: 0.5262001156806946
Epoch 600: training loss: 0.5254138112068176
Epoch 700: training loss: 0.5247516632080078
Epoch 800: training loss: 0.5241917967796326
Epoch 900: training loss: 0.5237173438072205
Epoch 1000: training loss: 0.5233145952224731


In [132]:
# Get coefficient.
linreg_torch.get_coeff()

(array([3.792725], dtype=float32),
 array([[ 5.948561  ,  0.5364436 , -7.015867  , 12.827489  , -0.02664179,
         -4.711181  , -4.0869713 , -4.482226  ]], dtype=float32))

In [133]:
# Predicted response for training data.
y_train_ = linreg_torch.predict(X_train)
y_train_[:10]

array([1.5655985 , 1.4451199 , 2.1933067 , 3.7847376 , 2.9887996 ,
       1.9764644 , 2.2598333 , 1.0112829 , 0.98555326, 1.8750244 ],
      dtype=float32)

In [135]:
# Prediction squared error for training data.
mean_squared_error(y_train, y_train_)

0.5223769

In [136]:
# Predicted response for test data.
y_test_ = linreg_torch.predict(X_test)
y_test_[:10]

array([1.7113876, 2.839499 , 2.3097496, 2.7859013, 2.908202 , 1.6964045,
       2.1401734, 2.7307498, 2.0823755, 1.633853 ], dtype=float32)

In [138]:
# Prediction accuracy for test data.
mean_squared_error(y_test, y_test_)

0.54311526

## TensorFlow Implementation of Logistic Regression

In [36]:
def reset_tf_graph(seed=71):
    """Reset default TensorFlow graph."""
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)


class LinearRegressionTF(object):
    """A TensorFlow implementation of Linear Regression."""
    def __init__(self, batch_size=64, learning_rate=0.01, n_epochs=1000):
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.learning_rate = learning_rate

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information.s"""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        idx = list(range(self.n_examples))
        if shuffle:
            random.shuffle(idx)
        self.X_train = self.X_train[idx]
        self.y_train = self.y_train[idx]

    def _create_placeholders(self):
        """Create placeholder for features and response."""
        self.X = tf.placeholder(tf.float32, shape=(None, self.n_inputs), name='X')
        self.y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

    def _create_weights(self):
        """Create and initialize model weights and bias."""
        self.w = tf.get_variable(shape=[self.n_inputs, 1],
                                 initializer=tf.random_normal_initializer(),
                                 name='weights')
        self.b = tf.get_variable(shape=[1],
                                 initializer=tf.zeros_initializer(),
                                 name='bias')

    def _model(self, X):
        """Linear regression model."""
        return tf.matmul(X, self.w) + self.b

    def _create_model(self):
        """Create linear model."""
        self.y_ = self._model(self.X)

    def _create_loss(self):
        # Create mean squared error loss.
        self.loss = tf.reduce_mean(tf.square(self.y_ - self.y), name='loss')

    def _create_optimizer(self):
        # Create gradient descent optimization.
        self.optimizer = (
            tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
            .minimize(self.loss))

    def build_graph(self):
        """Build computational graph."""
        self._create_placeholders()
        self._create_weights()
        self._create_model()
        self._create_loss()
        self._create_optimizer()

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train[idx_batch, :], self.y_train[idx_batch].reshape(-1, 1))

    def fit(self):
        """Fit model."""
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            for epoch in range(1, self.n_epochs + 1):
                total_loss = 0
                for X_train_b, y_train_b in self._fetch_batch():
                    feed_dict = {self.X: X_train_b, self.y: y_train_b}
                    _, batch_loss = sess.run([self.optimizer, self.loss],
                                             feed_dict=feed_dict)
                    total_loss += batch_loss * X_train_b.shape[0]

                if epoch % 100 == 0:
                    print('Epoch {0}: training loss: {1}'
                          .format(epoch, total_loss / self.n_examples))

            # Save model.
            saver.save(sess, 'checkpoints/linreg')

    def get_coeff(self):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Load model.
            saver = tf.train.Saver()
            saver.restore(sess, 'checkpoints/linreg')
            return self.b.eval(), self.w.eval().reshape((-1,))

    def predict(self, X):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Load model.
            saver = tf.train.Saver()
            saver.restore(sess, 'checkpoints/linreg')
            return self._model(X).eval().reshape((-1,))

## Fitting Logistic Regression in TensorFlow

In [38]:
reset_tf_graph()
linreg_tf = LinearRegressionTF(batch_size=64, learning_rate=0.1, n_epochs=1000)

In [39]:
linreg_tf.get_data(X_train, y_train, shuffle=True)

In [40]:
linreg_tf.build_graph()

In [41]:
linreg_tf.fit()

Epoch 100: training loss: 0.5313578103709898
Epoch 200: training loss: 0.5296086279612795
Epoch 300: training loss: 0.528270764529551
Epoch 400: training loss: 0.5271825757599616
Epoch 500: training loss: 0.5262796482528519
Epoch 600: training loss: 0.5255223750760081
Epoch 700: training loss: 0.5248835207263937
Epoch 800: training loss: 0.5243429550212791
Epoch 900: training loss: 0.5238846719881053
Epoch 1000: training loss: 0.5234957597206421


In [42]:
linreg_tf.get_coeff()

INFO:tensorflow:Restoring parameters from checkpoints/linreg


(array([3.806974], dtype=float32),
 array([ 5.955759  ,  0.5073116 , -7.0784802 , 12.865215  , -0.03720079,
        -4.6820827 , -4.0801024 , -4.484127  ], dtype=float32))

In [43]:
# Predicted probabilities for training data.
y_train_ = linreg_tf.predict((tf.cast(X_train, dtype=tf.float32)))
print(y_train_[:10])

# Prediction mean squared error for training data.
mean_squared_error(y_train, y_train_)

INFO:tensorflow:Restoring parameters from checkpoints/linreg
[1.5626693 1.4334488 2.1840513 3.7922604 2.9843884 1.9680834 2.2502956
 1.0205975 0.9930382 1.8691006]


0.5223551

In [44]:
# Predicted probabilities for test data.
y_test_ = linreg_tf.predict((tf.cast(X_test, dtype=tf.float32)))
print(y_test_[:10])

# Prediction mean squared error for training data.
mean_squared_error(y_test, y_test_)

INFO:tensorflow:Restoring parameters from checkpoints/linreg
[1.7186515 2.8415976 2.2978573 2.7967334 2.903102  1.6998186 2.1301503
 2.728647  2.0734162 1.6343772]


0.54285735

## Benchmark with Sklearn's Linear Regression

In [30]:
# Fit sklearn's Logistic Regression.
linreg_sk = LinearRegressionSklearn()

linreg_sk.fit(X_train, y_train)

LinearRegression()

In [31]:
# Get coefficients.
linreg_sk.intercept_, linreg_sk.coef_

(3.6417923,
 array([  6.348496  ,   0.5144263 , -14.455919  ,  21.595474  ,
         -0.04895439,  -4.965696  ,  -3.9162228 ,  -4.3132935 ],
       dtype=float32))

In [32]:
# Predicted labels for training data.
y_train_ = linreg_sk.predict(X_train)
y_train_[:10]

array([1.5379176, 1.472091 , 2.2133121, 3.8295603, 3.0244732, 1.9933348,
       2.263915 , 1.0535035, 1.0954115, 1.9086264], dtype=float32)

In [33]:
# Prediction squared error for training data.
mean_squared_error(y_train, y_train_)

0.51953274

In [34]:
# Predicted labels for test data.
y_test_ = linreg_sk.predict(X_test)
y_test_[:10]

array([1.75787538, 2.8031482 , 2.30476246, 2.80146927, 2.87024621,
       1.75832087, 2.11390826, 2.71989601, 2.10377988, 1.68258784])

In [35]:
# # Prediction accuracy for test data.
mean_squared_error(y_test, y_test_)

0.5393498488643094