# Linear Regression with Implementation

## Introduction

## Linear Regression Model

[To be continued.]

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import random
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import tensorflow as tf

import sys
sys.path.append('../numpy/')

from metrics import mean_squared_error

np.random.seed(71)

In [2]:
%load_ext autoreload
%autoreload 2

## California Housing Dataset and Preprocessing

In [3]:
import sklearn
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [4]:
# Read California housing data.
housing = fetch_california_housing()
X = housing.data
y = housing.target

In [5]:
X.shape, y.shape

((20640, 8), (20640,))

In [6]:
print(housing.feature_names)
X[:3]

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,
         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,
         3.78800000e+01, -1.22230000e+02],
       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,
         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,
         3.78600000e+01, -1.22220000e+02],
       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,
         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,
         3.78500000e+01, -1.22240000e+02]])

In [7]:
print(housing.target_names)
y[:3]

['MedHouseVal']


array([4.526, 3.585, 3.521])

In [8]:
# Split data into training and test datasets.
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=71, shuffle=True)

In [9]:
print(X_train_raw.shape, y_train.shape)
print(X_test_raw.shape, y_test.shape)

(15480, 8) (15480,)
(5160, 8) (5160,)


In [10]:
# Feature engineering for standardizing features by min-max scaler.
min_max_scaler = MinMaxScaler()

X_train = min_max_scaler.fit_transform(X_train_raw)
X_test = min_max_scaler.transform(X_test_raw)

In [11]:
# Convert arrays to float32.
X_train, X_test, y_train, y_test = (
    np.float32(X_train), np.float32(X_test), np.float32(y_train), np.float32(y_test))

In [12]:
X_train.dtype, y_train.dtype

(dtype('float32'), dtype('float32'))

## Numpy Implementation of Linear Regression

In [2]:
class LinearRegression(object):
    """Numpy implementation of Linear Regression."""

    def __init__(self, batch_size=64, lr=0.01, n_epochs=1000):
        self.batch_size = batch_size
        self.lr = lr
        self.n_epochs = n_epochs

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information."""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        if shuffle:
            idx = list(range(self.n_examples))
            random.shuffle(idx)
            self.X_train = self.X_train[idx]
            self.y_train = self.y_train[idx]

    def _create_weights(self):
        """Create model weights and bias."""
        self.w = np.zeros(self.n_inputs).reshape(self.n_inputs, 1)
        self.b = np.zeros(1).reshape(1, 1)

    def _model(self, X):
        """Linear regression model."""
        return np.matmul(X, self.w) + self.b

    def _loss(self, y, y_):
        """Squared error loss.

        # squared_error_loss(y, y_) 
        #   = 1/n * \sum_{i=1}^n (y_i - y__i)^2
        """
        self.squared_error = np.square(y - y_)
        return np.mean(self.squared_error)

    def _optimize(self, X, y):
        """Optimize by stochastic gradient descent."""
        m = X.shape[0]

        y_ = self._model(X) 
        dw = 1 / m * np.matmul(X.T, y_ - y)
        db = np.mean(y_ - y)

        for (param, grad) in zip([self.w, self.b], [dw, db]):
            param[:] = param - self.lr * grad

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train.take(idx_batch, axis=0), 
                   self.y_train.take(idx_batch, axis=0))

    def fit(self):
        """Fit model."""
        self._create_weights()

        for epoch in range(self.n_epochs):
            total_loss = 0
            for X_train_b, y_train_b in self._fetch_batch():
                y_train_b = y_train_b.reshape((y_train_b.shape[0], -1))
                self._optimize(X_train_b, y_train_b)
                batch_loss = self._loss(y_train_b, self._model(X_train_b))
                total_loss += batch_loss * X_train_b.shape[0]

            if epoch % 100 == 0:
                print('epoch {0}: training loss {1}'
                      .format(epoch, total_loss / self.n_examples))

        return self

    def get_coeff(self):
        """Get model coefficients."""
        return self.b, self.w.reshape((-1,))

    def predict(self, X):
        """Predict for new data."""
        return self._model(X).reshape((-1,))

## Fitting Linear Regression in NumPy

In [31]:
# Fit our Linear Regression.
linreg = LinearRegression(batch_size=64, lr=0.1, n_epochs=1000)

In [32]:
# Get datasets and build graph.
linreg.get_data(X_train, y_train, shuffle=True)

In [33]:
linreg.fit()

epoch 0: training loss 1.1112243550186303
epoch 100: training loss 0.5293946378389645
epoch 200: training loss 0.5276262445563455
epoch 300: training loss 0.5265699318710377
epoch 400: training loss 0.5257132266662312
epoch 500: training loss 0.5249696632239543
epoch 600: training loss 0.5243109685544148
epoch 700: training loss 0.523721149910845
epoch 800: training loss 0.5231890446317738
epoch 900: training loss 0.5227063340110083


<__main__.LinearRegression at 0x7f5c9271fb00>

In [34]:
# Get coefficient.
linreg.get_coeff()

(array([[3.91335512]]),
 array([ 5.74626582,  0.52201095, -3.34398026,  8.43749893, -0.02363628,
        -4.02147289, -4.14373717, -4.55415946]))

In [35]:
# Predicted response for training data.
y_train_ = linreg.predict(X_train)
y_train_[:10]

array([1.61946665, 1.46694212, 2.21882292, 3.81243392, 3.01226492,
       2.00264181, 2.29643821, 1.04701723, 0.9864546 , 1.89761866])

In [36]:
# Prediction squared error for training data.
mean_squared_error(y_train, y_train_)

0.5247249328649896

In [37]:
# Predicted response for test data.
y_test_ = linreg.predict(X_test)
y_test_[:10]

array([1.74171823, 2.90144268, 2.34501091, 2.83412597, 2.96352553,
       1.71128246, 2.18785575, 2.78037396, 2.10717648, 1.65409458])

In [38]:
# Prediction accuracy for test data.
mean_squared_error(y_test, y_test_)

0.5452821804831438

## PyTorch Implementation of Linear Regression

In [23]:
class LinearRegressionTorch(nn.Module):
    """PyTorch implementation of Linear Regression."""

    def __init__(self, batch_size=64, lr=0.01, n_epochs=1000):
        super(LinearRegressionTorch, self).__init__()
        self.batch_size = batch_size
        self.lr = lr
        self.n_epochs = n_epochs

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information."""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        if shuffle:
            idx = list(range(self.n_examples))
            random.shuffle(idx)
            self.X_train = self.X_train[idx]
            self.y_train = self.y_train[idx]

    def _create_model(self):
        """Create linear regression model."""
        self.fc = nn.Linear(self.n_inputs, 1)

    def forward(self, x):
        """Foward to output model."""
        y = self.fc(x)
        return y

    def _create_loss(self):
        """Create squared error loss."""
        self.criterion = nn.MSELoss()

    def _create_optimizer(self):
        """Create optimizer by stochastic gradient descent."""
        self.optimizer = optim.SGD(self.parameters(), lr=self.lr)

    def build(self):
        """Build computational graph."""
        self._create_model()
        self._create_loss()
        self._create_optimizer()

    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train.take(idx_batch, axis=0), 
                   self.y_train.take(idx_batch, axis=0))

    def fit(self):
        """Fit model."""
        for epoch in range(1, self.n_epochs + 1):
            total_loss = 0
            for X_train_b, y_train_b in self._fetch_batch():
                # Convert to Tensor from NumPy array and reshape ys.
                X_train_b, y_train_b = (
                    torch.from_numpy(X_train_b), 
                    torch.from_numpy(y_train_b).view(-1, 1))

                y_pred_b = self.forward(X_train_b)
                loss = self.criterion(y_pred_b, y_train_b)
                total_loss += loss * X_train_b.shape[0]

                # Zero grads, performs backward pass, and update weights.
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            if epoch % 100 == 0:
                print('Epoch {0}: training loss: {1}'
                      .format(epoch, total_loss / self.n_examples))

    def get_coeff(self):
        """Get model coefficients."""
        # Detach var which require grad.
        return (self.fc.bias.detach().numpy(),
                self.fc.weight.detach().numpy())

    def predict(self, X):
        """Predict for new data."""
        with torch.no_grad():
            X_ = torch.from_numpy(X)
            return self.forward(X_).numpy().reshape((-1,))

## Fitting Linear Regression in PyTorch

In [14]:
# Fit our Linear Regression.
linreg_torch = LinearRegressionTorch(batch_size=64, lr=0.1, n_epochs=1000)

In [15]:
linreg_torch.get_data(X_train, y_train, shuffle=True)

In [16]:
linreg_torch.build()

In [17]:
linreg_torch.fit()

Epoch 100: training loss: 0.532164972235066
Epoch 200: training loss: 0.5302024562537516
Epoch 300: training loss: 0.5287801793379378
Epoch 400: training loss: 0.5276440650609728
Epoch 500: training loss: 0.526711362738942
Epoch 600: training loss: 0.525933820425078
Epoch 700: training loss: 0.525280215481455
Epoch 800: training loss: 0.5247281541503985
Epoch 900: training loss: 0.5242606995949758
Epoch 1000: training loss: 0.5238643141371951


In [18]:
# Get coefficient.
linreg_torch.get_coeff()

(array([3.8662462], dtype=float32),
 array([[ 5.9429622e+00,  5.2768230e-01, -7.0084615e+00,  1.2826800e+01,
         -7.5929281e-03, -4.6167784e+00, -4.1026435e+00, -4.4358959e+00]],
       dtype=float32))

In [19]:
# Predicted response for training data.
y_train_ = linreg_torch.predict(X_train)
y_train_[:10]

array([1.6682272, 1.5375609, 2.2854958, 3.855796 , 3.0556676, 2.0770369,
       2.32626  , 1.0982981, 1.0671127, 1.968696 ], dtype=float32)

In [20]:
# Prediction squared error for training data.
mean_squared_error(y_train, y_train_)

0.5256628

In [21]:
# Predicted response for test data.
y_test_ = linreg_torch.predict(X_test)
y_test_[:10]

array([1.7838631, 2.930702 , 2.400587 , 2.8542545, 3.0089793, 1.798037 ,
       2.2319884, 2.7995212, 2.1735897, 1.7323658], dtype=float32)

In [22]:
# Prediction accuracy for test data.
mean_squared_error(y_test, y_test_)

0.5455416

## TensorFlow Implementation of Logistic Regression

In [78]:
def reset_tf_graph(seed=71):
    """Reset default TensorFlow graph."""
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)


class LinearRegressionTF(object):
    """A TensorFlow implementation of Linear Regression."""

    def __init__(self, batch_size=64, learning_rate=0.01, n_epochs=1000):
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.learning_rate = learning_rate

    def get_data(self, X_train, y_train, shuffle=True):
        """Get dataset and information.s"""
        self.X_train = X_train
        self.y_train = y_train

        # Get the numbers of examples and inputs.
        self.n_examples, self.n_inputs = self.X_train.shape

        idx = list(range(self.n_examples))
        if shuffle:
            random.shuffle(idx)
        self.X_train = self.X_train[idx]
        self.y_train = self.y_train[idx]

    def _create_placeholders(self):
        """Create placeholder for features and response."""
        self.X = tf.placeholder(tf.float32, shape=(None, self.n_inputs), name='X')
        self.y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

    def _create_weights(self):
        """Create and initialize model weights and bias."""
        self.w = tf.get_variable(shape=[self.n_inputs, 1],
                                 initializer=tf.random_normal_initializer(),
                                 name='weights')
        self.b = tf.get_variable(shape=[1],
                                 initializer=tf.zeros_initializer(),
                                 name='bias')

    def _model(self, X):
        """Linear regression model."""
        return tf.matmul(X, self.w) + self.b

    def _create_model(self):
        """Create linear model."""
        self.y_ = self._model(self.X)

    def _create_loss(self):
        # Create mean squared error loss.
        self.loss = tf.reduce_mean(tf.square(self.y_ - self.y), name='loss')

    def _create_optimizer(self):
        # Create gradient descent optimization.
        self.optimizer = (
            tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
            .minimize(self.loss))

    def build_graph(self):
        """Build computational graph."""
        self._create_placeholders()
        self._create_weights()
        self._create_model()
        self._create_loss()
        self._create_optimizer()
        self._sess = tf.Session()
        
    def _fetch_batch(self):
        """Fetch batch dataset."""
        idx = list(range(self.n_examples))
        for i in range(0, self.n_examples, self.batch_size):
            idx_batch = idx[i:min(i + self.batch_size, self.n_examples)]
            yield (self.X_train[idx_batch, :], self.y_train[idx_batch].reshape(-1, 1))

    def fit(self):
        """Fit model."""
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()

            for epoch in range(1, self.n_epochs + 1):
                total_loss = 0
                for X_train_b, y_train_b in self._fetch_batch():
                    feed_dict = {self.X: X_train_b, self.y: y_train_b}
                    _, batch_loss = sess.run([self.optimizer, self.loss],
                                             feed_dict=feed_dict)
                    total_loss += batch_loss * X_train_b.shape[0]

                if epoch % 100 == 0:
                    print('Epoch {0}: training loss: {1}'
                          .format(epoch, total_loss / self.n_examples))

            # Save model.
            saver.save(sess, 'checkpoints/linreg')

    def get_coeff(self):
        """Get model coefficients."""
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Load model.
            saver = tf.train.Saver()
            saver.restore(sess, 'checkpoints/linreg')
            return self.b.eval(), self.w.eval().reshape((-1,))

    def predict(self, X):
        """Predict for new data."""
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Load model.
            saver = tf.train.Saver()
            saver.restore(sess, 'checkpoints/linreg')
            return self._model(X).eval().reshape((-1,))

## Fitting Logistic Regression in TensorFlow

In [79]:
reset_tf_graph()

linreg_tf = LinearRegressionTF(batch_size=64, learning_rate=0.1, n_epochs=1000)

In [80]:
linreg_tf.get_data(X_train, y_train, shuffle=True)

In [81]:
linreg_tf.build_graph()

In [82]:
linreg_tf.fit()

Epoch 100: training loss: 0.5318002716669432


In [83]:
linreg_tf.get_coeff()

INFO:tensorflow:Restoring parameters from checkpoints/linreg


(array([4.0444417], dtype=float32),
 array([ 5.538036  ,  0.5091248 ,  0.60153955,  3.3050392 , -0.07601314,
        -2.0407913 , -4.2091565 , -4.606566  ], dtype=float32))

In [84]:
# Predicted probabilities for training data.
y_train_ = linreg_tf.predict((tf.cast(X_train, dtype=tf.float32)))
print(y_train_[:10])

# Prediction mean squared error for training data.
mean_squared_error(y_train, y_train_)

INFO:tensorflow:Restoring parameters from checkpoints/linreg
[1.691546  1.5031507 2.256196  3.845348  3.0387218 2.0422158 2.3373241
 1.0914235 0.9825156 1.9360917]


0.5322704

In [85]:
# Predicted probabilities for test data.
y_test_ = linreg_tf.predict((tf.cast(X_test, dtype=tf.float32)))
print(y_test_[:10])

# Prediction mean squared error for training data.
mean_squared_error(y_test, y_test_)

INFO:tensorflow:Restoring parameters from checkpoints/linreg
[1.7712905 2.9779644 2.3958097 2.885332  3.0384543 1.7389159 2.2504058
 2.8342211 2.1450553 1.6847153]


0.55203545

## Benchmark with Sklearn's Linear Regression

In [30]:
# Fit sklearn's Logistic Regression.
from sklearn.linear_model import LinearRegression as LinearRegressionSklearn

linreg_sk = LinearRegressionSklearn()

linreg_sk.fit(X_train, y_train.reshape(y_train.shape[0]))

LinearRegression()

In [31]:
# Get coefficients.
linreg_sk.intercept_, linreg_sk.coef_

(3.6417923,
 array([  6.348496  ,   0.5144263 , -14.455919  ,  21.595474  ,
         -0.04895439,  -4.965696  ,  -3.9162228 ,  -4.3132935 ],
       dtype=float32))

In [32]:
# Predicted labels for training data.
y_train_ = linreg_sk.predict(X_train)
y_train_[:10]

array([1.5379176, 1.472091 , 2.2133121, 3.8295603, 3.0244732, 1.9933348,
       2.263915 , 1.0535035, 1.0954115, 1.9086264], dtype=float32)

In [33]:
# Prediction squared error for training data.
mean_squared_error(y_train, y_train_)

0.51953274

In [34]:
# Predicted labels for test data.
y_test_ = linreg_sk.predict(X_test)
y_test_[:10]

array([1.75787538, 2.8031482 , 2.30476246, 2.80146927, 2.87024621,
       1.75832087, 2.11390826, 2.71989601, 2.10377988, 1.68258784])

In [35]:
# # Prediction accuracy for test data.
mean_squared_error(y_test, y_test_)

0.5393498488643094