# Linear Regression 

This notebook will construct data for regression problems (both linear and non-linear data) using scikit-learn datasets. It will then train linear regression models and regression models with non-linear pointwise activation functions on that data.


## 0. Preliminaries

In [None]:
# Import Statements

import torch
from torch import nn

from sklearn.datasets import make_regression, make_friedman1
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from helper_functions import plot_predictions, plot_loss
from models import SingleLayerLinearModel, ThreeLayerModel, train_step_regression, test_step_regression

In [None]:
# device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device is {device}")

# random number seed
rns=42

# Flag to make plots and print out things.
verbose = True


In [None]:
def eval_model(model, 
               loss_count, 
               test_loss_count, 
               datatype,
               hid_dim,
               nl_type,
               num_layers):
    """
    Return model name, best loss and best test_loss in a dictionary to compare models at the end.
    """

    return {"model_name": model.__class__.__name__,
            "model_nonlin": nl_type,
            "model_layers": num_layers,
            "model_hidden_units": hid_dim,
            "data": datatype,
            "train_loss": np.array(loss_count).min(),
            "test_loss": np.array(test_loss_count).min()
            }

## 1. Single Layer Linear model on Linear data

### 1a. Generate Linear Data: train & test datasets

In [None]:
# linear data

dims=3
X, y, coef = make_regression(n_samples=100, n_features=dims, noise=0, bias=5, coef=True, random_state=rns)

In [None]:
print(f"X is shape: {X.shape} and type: {type(X)}")
print(f"y is shape: {y.shape} and type: {type(y)}")
print(f"Ground truth coefficients: {coef}")

In [None]:
# Split data into train & test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=rns)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# Convert numpy into torch tensors and put them onto device

X_train = torch.from_numpy(X_train).type(torch.float32).to(device)
y_train = torch.from_numpy(y_train).type(torch.float32).to(device)
X_test = torch.from_numpy(X_test).type(torch.float32).to(device)
y_test = torch.from_numpy(y_test).type(torch.float32).to(device)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# Make y_train & y_test dimensions to be same as what model will predict - ie model0(X).

y_train = y_train.unsqueeze(dim=1)
y_test = y_test.unsqueeze(dim=1)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

### 1b. Visualize data

In [None]:
# Visualize data 

if verbose:
    fig = plt.figure(figsize=(10,3))
    for dim in range(dims):

        plt.subplot(1, dims, dim+1)
        plot_predictions(train_data = X_train[:,dim],
                        train_labels = y_train,
                        test_data = X_test[:,dim],
                        test_labels = y_test
                        )
        fig.subplots_adjust(top=0.85)
        plt.title(f'Dim {dim}')
    fig.suptitle('Linear Data generated from make_regression')

### 1c. Build and instantiate linear regression model

In [None]:
# SingleLayerLinearModel defined in models.py
#
# SingleLayerLinearModel is a single linear layer with 'in_dim' weights and 1 bias parameter 
# to fit a line in 'in_dim' dimensions. Should work for linear regression and fail when data becomes
# more non-linear.

# instantiate model
hid_dim=None
nl_type=None

model0 = SingleLayerLinearModel(in_dim=dims).to(device) # single linear layer with 'dims' weight terms & 1 bias term.
model_name = model0.__class__.__name__

In [None]:
print(f"Randomly initialized, model0 looks like: \n {model0.state_dict()}")

In [None]:
# Make predictions with untrained model and visualize
if verbose:
    with torch.inference_mode():
        preds = model0(X_test)

    fig = plt.figure(figsize=(12,4))
    for dim in range(dims):
        plt.subplot(1, dims, dim+1)
        plot_predictions(train_data = X_train[:,dim],
                        train_labels = y_train,
                        test_data = X_test[:,dim],
                        test_labels = y_test,
                        predictions = preds
                        )
        fig.subplots_adjust(top=0.85)
        plt.title(f"Dim {dim}")
    fig.suptitle(f"Predictions made with untrained model0: {model_name}",fontsize=16);

### 1d. Train linear model on linear data

In [None]:
# Setup loss function and optimizer

# For linear regression problems, can use mean squared error (MSELoss) or mean absolute error (L1Loss)
loss_fn = nn.L1Loss() 
# loss_fn = nn.MSELoss() 

# Set optimizer to be Stochastic Gradient Descent and pass in model parameters.
optimizer = torch.optim.SGD(params = model0.parameters(),
                            lr = 0.1)

In [None]:
# Setup lists to gather loss through training
epoch_count = []
loss_count = []
test_loss_count = []

epochs = 3000 # how many times to run training loop - run through all data and adjust model params.
test_freq = 100 # how often to run test step and print out, collect up results to track progress

# 1. Loop through all data
for epoch in range(epochs):

    # 2. Training step
    loss = train_step_regression(data = X_train,
                                 labels = y_train,
                                 model = model0,
                                 loss_fn = loss_fn,
                                 optimizer = optimizer)
    
    # 3. print what's happening at test_freq
    if epoch % test_freq == 0:
        
        # 4. Test step
        test_loss = test_step_regression(data = X_test,
                                         labels = y_test,
                                         model = model0,
                                         loss_fn = loss_fn) 
        
        # Collect performance into lists
        epoch_count.append(epoch)
        loss_count.append(loss.item())
        test_loss_count.append(test_loss.item())
        
        # Print performance
        print(f"Epoch: {epoch:5d} | loss: {loss:.6f} | test loss: {test_loss:.6f}")

### 1e. Visualize results: Linear regression model predicting on linear data

In [None]:
# Plot the loss
if verbose:
    plt.figure(figsize=(10,4))
    plot_loss(epoch = epoch_count,
             loss = loss_count,
             test_loss = test_loss_count,
             y_scale='linear')
    plt.title(f"Loss during model0 {model_name} training",fontsize=16);

In [None]:
# Make predictions with trained model and visualize

if verbose:
    with torch.inference_mode():
        preds = model0(X_test)

    fig = plt.figure(figsize=(12,4))
    for dim in range(dims):
        plt.subplot(1, dims, dim+1)
        plot_predictions(train_data = X_train[:,dim],
                        train_labels = y_train,
                        test_data = X_test[:,dim],
                        test_labels = y_test,
                        predictions = preds
                        )
        fig.subplots_adjust(top=0.85)
        plt.title(f"Dim: {dim}")
    fig.suptitle(f"Predictions made by trained model0: {model_name}",fontsize=16)
    print(f"Model 0 = \n {model0}")

In [None]:
print(f"Model after training: {model0.state_dict()}")

In [None]:
print(f"Ground truth coefficients were: {coef}")

In [None]:
model0

In [None]:
results0 = eval_model(model=model0,
                      loss_count=loss_count,
                      test_loss_count=test_loss_count,
                      datatype="linear",
                      hid_dim=hid_dim,
                      nl_type=nl_type,
                      num_layers=1)

**Conclusion**: Our single layer Linear Regression model, model0, does well predicting linear data. Model predictions, red x, above lie right on top of ground truth, green o, for test data - data that model never saw during training. Also, the weights learned by the model match very closely the ground truth coefficients used to train the model. **But, how will it do on non-linear data?**

## 2. Single Layer Linear Model on Non-linear data

### 2a. Generate Nonlinear data

In [None]:
# Generate nonlinear data - from Scikit Learn's Friedman1 generator.
#
# y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) +
#        20 * (X[:, 2] - 0.5) ** 2 + 
#        10 * X[:, 3] + 5 * X[:, 4] + 
#        noise * N(0, 1).
#
# Note: Only the 1st 5 dimensions contribute to the nonlinear signal. Dimensions beyond 5 do not contribute to y.

dims=5
Xnl, ynl = make_friedman1(n_samples=500, n_features=dims, noise=0, random_state=rns)

In [None]:
print(f"X is shape: {Xnl.shape} and type: {type(Xnl)}")
print(f"y is shape: {ynl.shape} and type: {type(ynl)}")

In [None]:
# Split data into train & test sets

Xnl_train, Xnl_test, ynl_train, ynl_test = train_test_split(Xnl, ynl, test_size=0.2, random_state=rns)

In [None]:
# Convert numpy into torch tensors and put them onto device

Xnl_train = torch.from_numpy(Xnl_train).type(torch.float32).to(device)
ynl_train = torch.from_numpy(ynl_train).type(torch.float32).to(device)
Xnl_test = torch.from_numpy(Xnl_test).type(torch.float32).to(device)
ynl_test = torch.from_numpy(ynl_test).type(torch.float32).to(device)

In [None]:
Xnl_train.shape, Xnl_test.shape, ynl_train.shape, ynl_test.shape

In [None]:
# Make ynl_train & ynl_test dimensions to be same as what model will predict - ie model(X).

ynl_train = ynl_train.unsqueeze(dim=1)
ynl_test = ynl_test.unsqueeze(dim=1)

In [None]:
Xnl_train.shape, Xnl_test.shape, ynl_train.shape, ynl_test.shape

### 2b. Visualize nonlinear data

In [None]:
# Visualize data 

if verbose:
    fig = plt.figure(figsize=(14,3))
    rows=1
    for dim in range(dims):
        plt.subplot(rows,int(np.ceil(dims/rows)),dim+1)
        plot_predictions(train_data = Xnl_train[:,dim],
                        train_labels = ynl_train,
                        test_data = Xnl_test[:,dim],
                        test_labels = ynl_test
                        )
        fig.subplots_adjust(top=0.75)
        plt.title(f"Dim {dim}")
    fig.suptitle('Nonlinear data made by make_friedman1',fontsize=14)

### 2c. Build and instantiate linear model

In [None]:
# LinearRegressionModelV0 defined in models.py
#
# LinearRegressionModelV0 is a single linear layer with 'in_dim' weights and 1 bias parameter 
# to fit a line in 'in_dim' dimensions. Should work for linear regression and fail when data becomes
# more non-linear.

# instantiate model
hid_dim=None
nl_type=None

model1 = SingleLayerLinearModel(in_dim=dims).to(device) # single linear layer with 'dims' weight terms & 1 bias term.
model_name = model1.__class__.__name__

In [None]:
print(f"Randomly initialized, model1 looks like: \n {model1.state_dict()}")

In [None]:
# Make predictions with untrained model and visualize
with torch.inference_mode():
    preds = model1(Xnl_test)
    
fig = plt.figure(figsize=(14,3))
rows=1
for dim in range(dims):
    plt.subplot(1, int(np.ceil(dims/1)), dim+1)
    plot_predictions(train_data = Xnl_train[:,dim],
                    train_labels = ynl_train,
                    test_data = Xnl_test[:,dim],
                    test_labels = ynl_test,
                    predictions = preds
                    )
    fig.subplots_adjust(top=0.75)
    plt.title(f"Dim {dim}")
fig.suptitle(f"Predictions made with untrained model1: {model_name}",fontsize=14);

### 1d. Train a single layer linear model on nonlinear data

In [None]:
# Setup loss function and optimizer

# loss_fn = nn.L1Loss() 
loss_fn = nn.MSELoss() 

optimizer = torch.optim.SGD(params = model1.parameters(),
                            lr = 0.01)

In [None]:
# Train the model

# Setup lists to gather loss through training
epoch_count = []
loss_count = []
test_loss_count = []

epochs = 3000
test_freq = 300

# 1. Loop through all data
for epoch in range(epochs):

    # 2. Training step
    loss = train_step_regression(data = Xnl_train,
                                 labels = ynl_train,
                                 model = model1,
                                 loss_fn = loss_fn,
                                 optimizer = optimizer)
    
    # 3. print what's happening at test_freq
    if epoch % test_freq == 0:
        
        # 4. Test step
        test_loss = test_step_regression(data = Xnl_test,
                                         labels = ynl_test,
                                         model = model1,
                                         loss_fn = loss_fn) 
        
        # Collect performance into lists
        epoch_count.append(epoch)
        loss_count.append(loss.item())
        test_loss_count.append(test_loss.item())
        
        # Print performance
        print(f"Epoch: {epoch:5d} | loss: {loss:.6f} | test loss: {test_loss:.6f}")

### 2e. Visualize results: Linear regression model predicting on nonlinear data

In [None]:
# Plot the loss
if verbose:
    plt.figure(figsize=(10,4))
    plot_loss(epoch = epoch_count,
             loss = loss_count,
             test_loss = test_loss_count,
             y_scale='log')
    plt.title(f"Loss during training model1 {model_name} on nonlinear data")

In [None]:
# Make predictions with trained model and visualize
if verbose:
    with torch.inference_mode():
        preds = model1(Xnl_test)
        
    fig=plt.figure(figsize=(12,3))
    rows=1
    for dim in range(dims):
        plt.subplot(rows,int(np.ceil(dims/rows)),dim+1)
        plot_predictions(train_data = Xnl_train[:,dim],
                        train_labels = ynl_train,
                        test_data = Xnl_test[:,dim],
                        test_labels = ynl_test,
                        predictions = preds
                        )
        fig.subplots_adjust(top=0.75)
        plt.title(f"Dim {dim}")
    fig.suptitle(f"Predictions made on Nonlinear data by trained model1: {model_name}")
    print(f"Model 1 = \n {model1}")

In [None]:
results1 = eval_model(model=model1,
                      loss_count=loss_count,
                      test_loss_count=test_loss_count,
                      datatype="nonlinear",
                      hid_dim=hid_dim,
                      nl_type=nl_type,
                      num_layers=1)

**Conclusion:** Single layer linear model Model does learn on non-linear data but loss bottoms out and we can see predictions (red xs) dont match ground truth (green os) that well.

## 3. Three Layer Linear Model on non-linear data

Using the same non-linear data from make_freidman1, we will train a 3 layer linear model on it to see if that does better.

In [None]:
# models built / defined in models.py

# instantiate model
hid_dim=10
nl_type=None

model2 = ThreeLayerModel(in_dim=dims,
                               hid_dim=hid_dim,
                               nl_type=nl_type).to(device) 
model_name = model2.__class__.__name__

In [None]:
model2

In [None]:
# print(f"Randomly initialized, model2 looks like: \n {model2.state_dict()}")

In [None]:
# Make predictions with untrained model and visualize

with torch.inference_mode():
    preds = model2(Xnl_test)
    
fig = plt.figure(figsize=(14,3))
rows=1
for dim in range(dims):
    plt.subplot(1, int(np.ceil(dims/1)), dim+1)
    plot_predictions(train_data = Xnl_train[:,dim],
                    train_labels = ynl_train,
                    test_data = Xnl_test[:,dim],
                    test_labels = ynl_test,
                    predictions = preds
                    )
    fig.subplots_adjust(top=0.75)
    plt.title(f"Dim {dim}")
fig.suptitle(f"Predictions made with untrained model2: {model_name}",fontsize=14);

In [None]:
# Setup loss function and optimizer

# loss_fn = nn.L1Loss() 
loss_fn = nn.MSELoss() 

optimizer = torch.optim.SGD(params = model2.parameters(),
                            lr = 0.01)

In [None]:
# Train the model

# Setup lists to gather loss through training
epoch_count = []
loss_count = []
test_loss_count = []

epochs = 3000
test_freq = 300

# 1. Loop through all data
for epoch in range(epochs):

    # 2. Training step
    loss = train_step_regression(data = Xnl_train,
                                 labels = ynl_train,
                                 model = model2,
                                 loss_fn = loss_fn,
                                 optimizer = optimizer)
    
    # 3. print what's happening at test_freq
    if epoch % test_freq == 0:
        
        # 4. Test step
        test_loss = test_step_regression(data = Xnl_test,
                                         labels = ynl_test,
                                         model = model2,
                                         loss_fn = loss_fn) 
        
        # Collect performance into lists
        epoch_count.append(epoch)
        loss_count.append(loss.item())
        test_loss_count.append(test_loss.item())
        
        # Print performance
        print(f"Epoch: {epoch:5d} | loss: {loss:.6f} | test loss: {test_loss:.6f}")

In [None]:
# Plot the loss

if verbose:
    plt.figure(figsize=(10,4))
    plot_loss(epoch = epoch_count,
             loss = loss_count,
             test_loss = test_loss_count,
             y_scale='log')
    plt.title(f"Loss during training model2 {model_name} on nonlinear data")

In [None]:
# Make predictions with trained model and visualize

if verbose:
    with torch.inference_mode():
        preds = model2(Xnl_test)
        
    fig=plt.figure(figsize=(12,3))
    rows=1
    for dim in range(dims):
        plt.subplot(rows,int(np.ceil(dims/rows)),dim+1)
        plot_predictions(train_data = Xnl_train[:,dim],
                        train_labels = ynl_train,
                        test_data = Xnl_test[:,dim],
                        test_labels = ynl_test,
                        predictions = preds
                        )
        fig.subplots_adjust(top=0.75)
        plt.title(f"Dim {dim}")
    fig.suptitle(f"Predictions made on Nonlinear data by trained linear model2 {model_name}")
    print(f"Model 2 = \n {model2}")

In [None]:
results2 = eval_model(model=model2,
                      loss_count=loss_count,
                      test_loss_count=test_loss_count,
                      datatype="nonlinear",
                      hid_dim=hid_dim,
                      nl_type=nl_type,
                      num_layers=3)

**Conclusion:** Three layer linear model does learn on non-linear data but loss bottoms out at basically where the single layer linear did. And the predictions it makes (red x's) dont match ground truth (green o's) any better.



## 4. Three Layer Non-linear Model on non-linear data

In [None]:
# models built / defined in models.py

# instantiate model
hid_dim=10
nl_type='relu'

model3 = ThreeLayerModel(in_dim=dims,
                               hid_dim=hid_dim,
                               nl_type=nl_type).to(device) 
model_name = model3.__class__.__name__

In [None]:
model3

In [None]:
# Make predictions with untrained model and visualize

with torch.inference_mode():
    preds = model3(Xnl_test)
    
fig = plt.figure(figsize=(14,3))
rows=1
for dim in range(dims):
    plt.subplot(1, int(np.ceil(dims/1)), dim+1)
    plot_predictions(train_data = Xnl_train[:,dim],
                    train_labels = ynl_train,
                    test_data = Xnl_test[:,dim],
                    test_labels = ynl_test,
                    predictions = preds
                    )
    fig.subplots_adjust(top=0.75)
    plt.title(f"Dim {dim}")
fig.suptitle(f'Predictions made with untrained model3: {model_name}',fontsize=14);

In [None]:
# Setup loss function and optimizer

# loss_fn = nn.L1Loss() 
loss_fn = nn.MSELoss() 

optimizer = torch.optim.SGD(params = model3.parameters(),
                            lr = 0.01)

In [None]:
# Train the model

# Setup lists to gather loss through training
epoch_count = []
loss_count = []
test_loss_count = []

epochs = 10000
test_freq = 300

# 1. Loop through all data
for epoch in range(epochs):

    # 2. Training step
    loss = train_step_regression(data = Xnl_train,
                                 labels = ynl_train,
                                 model = model3,
                                 loss_fn = loss_fn,
                                 optimizer = optimizer)
    
    # 3. print what's happening at test_freq
    if epoch % test_freq == 0:
        
        # 4. Test step
        test_loss = test_step_regression(data = Xnl_test,
                                         labels = ynl_test,
                                         model = model3,
                                         loss_fn = loss_fn) 
        
        # Collect performance into lists
        epoch_count.append(epoch)
        loss_count.append(loss.item())
        test_loss_count.append(test_loss.item())
        
        # Print performance
        print(f"Epoch: {epoch:5d} | loss: {loss:.6f} | test loss: {test_loss:.6f}")

In [None]:
# Plot the loss

if verbose:
    plt.figure(figsize=(10,4))
    plot_loss(epoch = epoch_count,
             loss = loss_count,
             test_loss = test_loss_count,
             y_scale='log')
    plt.title(f"Loss during training of model3 {model_name} on nonlinear data")

In [None]:
# Make predictions with trained model and visualize

if verbose:
    with torch.inference_mode():
        preds = model3(Xnl_test)
        
    fig=plt.figure(figsize=(12,3))
    rows=1
    for dim in range(dims):
        plt.subplot(rows,int(np.ceil(dims/rows)),dim+1)
        plot_predictions(train_data = Xnl_train[:,dim],
                        train_labels = ynl_train,
                        test_data = Xnl_test[:,dim],
                        test_labels = ynl_test,
                        predictions = preds
                        )
        fig.subplots_adjust(top=0.75)
        plt.title(f"Dim {dim}")
    fig.suptitle(f"Predictions made on Nonlinear data by trained linear model3: {model_name}")
    print(f"Model 3 = \n {model3}")

In [None]:
results3 = eval_model(model=model3,
                      loss_count=loss_count,
                      test_loss_count=test_loss_count,
                      datatype="nonlinear",
                      hid_dim=hid_dim,
                      nl_type=nl_type,
                      num_layers=3)

**Conclusion:** Three layer non-linear model does learn on non-linear data better than linear models. And the predictions it makes (red x's) are closer to ground truth (green o's). Still not perfect, but we can train for longer. We can also add more hidden units.

In [None]:
res = pd.DataFrame([results0,results1,results2,results3])
res.head()

In [None]:
print(f"IM DONE!!!")