# Linear Regression with Implementation

## Introduction

## Linear Regression Model

[To be continued.]

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import random

import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
# import torch.nn.functional as F
# import torch.optim as optim

# import sys
# sys.path.append('../numpy/')

# from metrics import mean_squared_error

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
random_seed = 71
np.random.seed(random_seed)

## California Housing Dataset and Preprocessing

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Read California housing data.
housing = fetch_california_housing()
X = housing.data
y = housing.target

In [None]:
X.shape, y.shape

((20640, 8), (20640,))

In [None]:
print(housing.feature_names)

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


In [None]:
X[:3]

array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,
         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,
         3.78800000e+01, -1.22230000e+02],
       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,
         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,
         3.78600000e+01, -1.22220000e+02],
       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,
         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,
         3.78500000e+01, -1.22240000e+02]])

In [None]:
y[:3]

array([4.526, 3.585, 3.521])

In [None]:
# Split data into training, eval and test datasets.
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=random_seed, shuffle=True,
)

X_train_raw, X_eval_raw, y_train, y_eval = train_test_split(
    X_train_raw, y_train, test_size=0.10, random_state=random_seed, shuffle=True,
)

In [None]:

y_train = np.reshape(y_train, (-1, 1))
y_eval = np.reshape(y_eval, (-1, 1))
y_test = np.reshape(y_test, (-1, 1))

In [None]:
# Feature engineering for standardizing features by min-max scaler.
min_max_scaler = MinMaxScaler()

X_train = min_max_scaler.fit_transform(X_train_raw)
X_eval = min_max_scaler.transform(X_eval_raw)
X_test = min_max_scaler.transform(X_test_raw)

In [None]:
print(X_train.shape, y_train.shape)
print(X_eval.shape, y_eval.shape)
print(X_test.shape, y_test.shape)

(14860, 8) (14860, 1)
(1652, 8) (1652, 1)
(4128, 8) (4128, 1)


## PyTorch Implementation of Linear Regression

In [None]:
# Convert Numpy arrays to PyTorch tensors.
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_eval = torch.tensor(X_eval, dtype=torch.float32)
y_eval = torch.tensor(y_eval, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [None]:
# Create datasets.
train_datasets = torch.utils.data.TensorDataset(X_train, y_train)
eval_datasets = torch.utils.data.TensorDataset(X_eval, y_eval)
test_datasets = torch.utils.data.TensorDataset(X_test, y_test)

In [None]:
# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)


In [None]:
# Model hyperparameters.
batch_size = 64

In [None]:
# Create an iterable by DataLoader.
DataLoader()

In [None]:
class LinearRegression(nn.Module):
    """PyTorch implementation of Linear Regression."""

    def __init__(self, input_size, lr):
        super(LinearRegression, self).__init__()

        self.input_size = input_size
        self.lr = lr

        self.fc = nn.Linear(self.input_size, 1)

    def forward(self, x):
        """Foward to output model."""
        x = self.fc(x)
        return x

    def set_loss_optimizer(self):
        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.RMSprop(self.parameters(), lr=self.lr)

    def get_coeff(self):
        """Get model coefficients."""
        # Detach var which require grad.
        return (self.fc.bias.detach().numpy(),
                self.fc.weight.detach().numpy())

In [None]:
def train(self):
    """Train model."""
    self.train()

    for epoch in range(1, self.n_epochs + 1):
        total_loss = 0
        for X_train_b, y_train_b in self._fetch_batch():
            # Convert to Tensor from NumPy array and reshape ys.
            X_train_b, y_train_b = (
                torch.from_numpy(X_train_b), 
                torch.from_numpy(y_train_b).view(-1, 1))

            y_pred_b = self.forward(X_train_b)
            loss = self.criterion(y_pred_b, y_train_b)
            total_loss += loss * X_train_b.shape[0]

            # Zero grads, performs backward pass, and update weights.
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

        if epoch % 100 == 0:
            print('Epoch {0}: training loss: {1}'
                    .format(epoch, total_loss / self.n_examples))


## Fitting Linear Regression in PyTorch

In [None]:
# Fit our Linear Regression.
linreg = LinearRegression(batch_size=64, lr=0.1, n_epochs=1000)

NameError: ignored

In [None]:
linreg_torch.get_data(X_train, y_train, shuffle=True)

NameError: ignored

In [None]:
linreg_torch.build()

In [None]:
linreg_torch.fit()

In [None]:
# Get coefficient.
linreg_torch.get_coeff()

In [None]:
# Predicted response for training data.
y_train_ = linreg_torch.predict(X_train)
y_train_[:10]

In [None]:
# Prediction squared error for training data.
mean_squared_error(y_train_, y_train)

In [None]:
# Predicted response for test data.
y_test_ = linreg_torch.predict(X_test)
y_test_[:10]

In [None]:
# Prediction accuracy for test data.
mean_squared_error(y_test_, y_test)

## Benchmark with Sklearn's Linear Regression

In [None]:
# Fit sklearn's Logistic Regression.
from sklearn.linear_model import LinearRegression as LinearRegressionSklearn

linreg_sk = LinearRegressionSklearn()

linreg_sk.fit(X_train, y_train.reshape(y_train.shape[0]))

LinearRegression()

In [None]:
# Get coefficients.
linreg_sk.intercept_, linreg_sk.coef_

(3.6417923,
 array([  6.348496  ,   0.5144263 , -14.455919  ,  21.595474  ,
         -0.04895439,  -4.965696  ,  -3.9162228 ,  -4.3132935 ],
       dtype=float32))

In [None]:
# Predicted labels for training data.
y_train_ = linreg_sk.predict(X_train)
y_train_[:10]

array([1.5379176, 1.472091 , 2.2133121, 3.8295603, 3.0244732, 1.9933348,
       2.263915 , 1.0535035, 1.0954115, 1.9086264], dtype=float32)

In [None]:
# Prediction squared error for training data.
mean_squared_error(y_train_, y_train)

0.51953274

In [None]:
# Predicted labels for test data.
y_test_ = linreg_sk.predict(X_test)
y_test_[:10]

array([1.75787538, 2.8031482 , 2.30476246, 2.80146927, 2.87024621,
       1.75832087, 2.11390826, 2.71989601, 2.10377988, 1.68258784])

In [None]:
# # Prediction accuracy for test data.
mean_squared_error(y_test_, y_test)

0.5393498488643094