# Linear Regression with Implementation

## Introduction

## Linear Regression Model

## Numpy Implementation of Linear Regression

In [22]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import random
import numpy as np

import tensorflow as tf
from mxnet import nd, autograd, init, gluon
from mxnet.gluon import data as gdata
from mxnet.gluon import nn
from mxnet.gluon import loss as gloss

np.random.seed(71)

In [None]:
class LinearRegression(object):
    """Numpy implementation of Linear Regression."""
    def __init__(self, batch_size=64, lr=0.01, n_epochs=1000):
        self.batch_size = batch_size
        self.lr = lr
        self.n_epochs = n_epochs

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information."""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        if shuffle:
            idx = list(range(self.n_examples))
            random.shuffle(idx)
            self.X_train = self.X_train[idx]
            self.y_train = self.y_train[idx]

    def _create_weights(self):
        """Create model weights and bias."""
        self.w = np.zeros(self.n_inputs).reshape(self.n_inputs, 1)
        self.b = np.zeros(1).reshape(1, 1)

    def _model(self, X):
        """Linear regression model."""
        return np.matmul(X, self.w) + self.b

    def _loss(self, y, y_):
        """Squared error loss.

        # squared_error_loss(y, y_) 
        #   = - 1/n * \sum_{i=1}^n (y_i - y_hat_i)^2
        """
        self.squared_error = np.square(y - y_)
        return np.mean(self.squared_error)

    def _optimize(self, X, y):
        """Optimize by stochastic gradient descent."""
        m = X.shape[0]

        y_ = self._model(X) 
        dw = 1 / m * np.matmul(X.T, y_ - y)
        db = np.mean(y_ - y)

        for (param, grad) in zip([self.w, self.b], [dw, db]):
            param[:] = param - self.lr * grad

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train.take(idx_batch, axis=0), 
                   self.y_train.take(idx_batch, axis=0))

    def fit(self):
        """Fit model."""
        self._create_weights()

        for epoch in range(1, self.n_epochs + 1):
            total_loss = 0
            for X_train_b, y_train_b in self._fetch_batch():
                y_train_b = y_train_b.reshape((y_train_b.shape[0], -1))
                self._optimize(X_train_b, y_train_b)
                train_loss = self._loss(y_train_b, self._model(X_train_b))
                total_loss += train_loss * X_train_b.shape[0]

            if epoch % 100 == 0:
                print('epoch {0}: training loss {1}'
                      .format(epoch, total_loss / self.n_examples))

        return self

    def get_coeff(self):
        return self.b, self.w.reshape((-1,))

    def predict(self, X):
        return self._model(X).reshape((-1,))

## Data Preparation and Preprocessing

In [3]:
import sklearn
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression as LinearRegressionSklearn

# https://github.com/bowen0701/machine-learning/blob/master/metrics.py
from metrics import mean_squared_error

In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
# Read California housing data.
housing = fetch_california_housing()
X = housing.data
y = housing.target

In [6]:
X.shape, y.shape

((20640, 8), (20640,))

In [7]:
X[:3]

array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,
         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,
         3.78800000e+01, -1.22230000e+02],
       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,
         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,
         3.78600000e+01, -1.22220000e+02],
       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,
         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,
         3.78500000e+01, -1.22240000e+02]])

In [8]:
y[:3]

array([4.526, 3.585, 3.521])

In [9]:
# Split data into training and test datasets.
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=71, shuffle=True)

In [10]:
print(X_train_raw.shape, y_train.shape)
print(X_test_raw.shape, y_test.shape)

(15480, 8) (15480,)
(5160, 8) (5160,)


In [11]:
# Feature engineering for standardizing features by min-max scaler.
min_max_scaler = MinMaxScaler()

X_train = min_max_scaler.fit_transform(X_train_raw)
X_test = min_max_scaler.transform(X_test_raw)

## Fitting Linear Regression in NumPy

In [40]:
# Fit our Linear Regression.
linreg = LinearRegression(batch_size=64, lr=0.1, n_epochs=1000)

In [41]:
# Get datasets and build graph.
linreg.get_data(X_train, y_train, shuffle=True)

In [42]:
linreg.fit()

epoch 0: training loss 1.1109458304776678
epoch 100: training loss 0.5293411670004007
epoch 200: training loss 0.5275936241389896
epoch 300: training loss 0.5265447104874758
epoch 400: training loss 0.5256891251901218
epoch 500: training loss 0.5249445110457833
epoch 600: training loss 0.5242842619294596
epoch 700: training loss 0.5236929859314564
epoch 800: training loss 0.5231597123001919
epoch 900: training loss 0.5226761631097233


<__main__.LinearRegression at 0x7f784ad809e8>

In [43]:
# Get coefficient.
linreg.get_coeff()

(array([[3.9161795]]),
 array([ 5.73873066,  0.54251911, -3.34088605,  8.49303336, -0.034906  ,
        -4.00010046, -4.162779  , -4.5535294 ]))

In [44]:
# Predicted response for training data.
y_train_ = linreg.predict(X_train)
y_train_[:10]

array([1.63329384, 1.48534079, 2.23449502, 3.81135064, 3.01983524,
       2.01967553, 2.30766446, 1.04633851, 0.9882211 , 1.91173358])

In [45]:
# Prediction squared error for training data.
mean_squared_error(y_train, y_train_)

0.5248672675382742

In [46]:
# Predicted response for test data.
y_test_ = linreg.predict(X_test)
y_test_[:10]

array([1.74126774, 2.9086391 , 2.3629386 , 2.82907134, 2.97764202,
       1.72007043, 2.20467688, 2.78557864, 2.12347464, 1.66323666])

In [47]:
# Prediction accuracy for test data.
mean_squared_error(y_test, y_test_)

0.545480649164595

## TensorFlow Implementation of Logistic Regression

In [23]:
def reset_tf_graph(seed=71):
    """Reset default TensorFlow graph."""
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)


class LinearRegressionTF(object):
    """A TensorFlow implementation of Linear Regression."""
    def __init__(self, batch_size=64, learning_rate=0.01, n_epochs=1000):
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.learning_rate = learning_rate

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information.s"""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        idx = list(range(self.n_examples))
        if shuffle:
            random.shuffle(idx)
        self.X_train = self.X_train[idx]
        self.y_train = self.y_train[idx]

    def _create_placeholders(self):
        """Create placeholder for features and response."""
        self.X = tf.placeholder(tf.float32, shape=(None, self.n_inputs), name='X')
        self.y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

    def _create_weights(self):
        """Create and initialize model weights and bias."""
        self.w = tf.get_variable(shape=[self.n_inputs, 1],
                                 initializer=tf.random_normal_initializer(),
                                 name='weights')
        self.b = tf.get_variable(shape=[1],
                                 initializer=tf.zeros_initializer(),
                                 name='bias')

    def _model(self, X):
        """Linear regression model."""
        return tf.matmul(X, self.w) + self.b

    def _create_model(self):
        """Create linear model."""
        self.y_ = self._model(self.X)

    def _create_loss(self):
        # Create mean squared error loss.
        self.loss = tf.reduce_mean(tf.square(self.y_ - self.y), name='loss')

    def _create_optimizer(self):
        # Create gradient descent optimization.
        self.optimizer = (
            tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
            .minimize(self.loss))

    def build_graph(self):
        """Build computational graph."""
        self._create_placeholders()
        self._create_weights()
        self._create_model()
        self._create_loss()
        self._create_optimizer()

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train[idx_batch, :], self.y_train[idx_batch].reshape(-1, 1))

    def fit(self):
        """Fit model."""
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            for epoch in range(1, self.n_epochs + 1):
                total_loss = 0
                for X_train_b, y_train_b in self._fetch_batch():
                    feed_dict = {self.X: X_train_b, self.y: y_train_b}
                    _, batch_loss = sess.run([self.optimizer, self.loss],
                                             feed_dict=feed_dict)
                    total_loss += batch_loss * X_train_b.shape[0]

                if epoch % 100 == 0:
                    print('Epoch {0}: training loss: {1}'
                          .format(epoch, total_loss / self.n_examples))

            # Save model.
            saver.save(sess, 'checkpoints/linreg')

    def get_coeff(self):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Load model.
            saver = tf.train.Saver()
            saver.restore(sess, 'checkpoints/linreg')
            return self.b.eval(), self.w.eval().reshape((-1,))

    def predict(self, X):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Load model.
            saver = tf.train.Saver()
            saver.restore(sess, 'checkpoints/linreg')
            return self._model(X).eval().reshape((-1,))

## Fitting Logistic Regression in TensorFlow

In [24]:
reset_tf_graph()
linreg_tf = LinearRegressionTF(batch_size=64, learning_rate=0.5, n_epochs=1000)

In [25]:
linreg_tf.get_data(X_train, y_train, shuffle=True)

In [26]:
linreg_tf.build_graph()

In [27]:
linreg_tf.fit()

Epoch 100: training loss: 0.568918663201714
Epoch 200: training loss: 0.5668342337589856
Epoch 300: training loss: 0.5661185583171918
Epoch 400: training loss: 0.5659296990133995
Epoch 500: training loss: 0.5659298467312672
Epoch 600: training loss: 0.5659850975570753
Epoch 700: training loss: 0.566045966852235
Epoch 800: training loss: 0.5660970148190048
Epoch 900: training loss: 0.5661355321644505
Epoch 1000: training loss: 0.5661631324109488


In [28]:
linreg_tf.get_coeff()

INFO:tensorflow:Restoring parameters from checkpoints/linreg


(array([3.6429107], dtype=float32),
 array([  6.3936462 ,   0.48886588, -13.5008545 ,  20.64034   ,
         -0.06762783,  -5.12426   ,  -3.929075  ,  -4.422648  ],
       dtype=float32))

In [29]:
# Predicted probabilities for training data.
y_train_ = linreg_tf.predict((tf.cast(X_train, dtype=tf.float32)))
print(y_train_[:10])

# Prediction mean squared error for training data.
mean_squared_error(y_train, y_train_)

INFO:tensorflow:Restoring parameters from checkpoints/linreg
[1.4588099 1.3825915 2.1406937 3.8285503 2.9984202 1.908083  2.2254415
 1.0020704 1.0408762 1.8329332]


0.5227011085117187

In [30]:
# Predicted probabilities for test data.
y_test_ = linreg_tf.predict((tf.cast(X_test, dtype=tf.float32)))
print(y_test_[:10])

# Prediction mean squared error for training data.
mean_squared_error(y_test, y_test_)

INFO:tensorflow:Restoring parameters from checkpoints/linreg
[1.725594  2.766179  2.2336502 2.8008242 2.8124397 1.6806722 2.0460281
 2.701027  2.0294802 1.6067545]


0.5435336452162747

## Benchmark with Sklearn's Linear Regression

In [31]:
# Fit sklearn's Logistic Regression.
linreg_sk = LinearRegressionSklearn()

linreg_sk.fit(X_train, y_train)

LinearRegression()

In [32]:
# Get coefficients.
linreg_sk.intercept_, linreg_sk.coef_

(3.6417826978434746,
 array([  6.34849599,   0.51442258, -14.45592149,  21.59547904,
         -0.04895467,  -4.96569765,  -3.91622294,  -4.31329812]))

In [33]:
# Predicted labels for training data.
y_train_ = linreg_sk.predict(X_train)
y_train_[:10]

array([1.53790266, 1.47207536, 2.21329706])

In [34]:
# Prediction squared error for training data.
mean_squared_error(y_train, y_train_)

0.5195327861846003

In [35]:
# Predicted labels for test data.
y_test_ = linreg_sk.predict(X_test)
y_test_[:10]

array([1.7578634 , 2.80313448, 2.30474671])

In [36]:
# # Prediction accuracy for test data.
mean_squared_error(y_test, y_test_)

0.5393499137492421