# Packages

In [None]:
# !pip install torchsummary

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import make_swiss_roll, fetch_openml, load_digits
from sklearn.model_selection import train_test_split
from torchvision import datasets, transforms
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss
from torch.optim import SGD
from torch.optim.lr_scheduler import ExponentialLR
from torch.utils.data import DataLoader, Subset, TensorDataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchsummary import summary
from util import _parse_batch_size, _parse_shuffle

# Constants

In [None]:
DATA_DIR = "./data/"

# Example 1: Compressed Sensing

## Data

In [None]:
# Set dimensional parameters and define signal
#

# Create (noisy) data
#

# Create training and test set
#

# Create DataLoaders
#

## Model and training set-up

In [None]:
# Define Lasso class
#    

# Create Lasso instance
#

# Define loss(es), optimizer and tuning parameter
#

## Training

In [None]:
# Set number of epochs
#

# Instantiate variables for tracking training
#

# Loop 1
    # Loop 2
        # zero the previous gradients
        
        # obtain predictions and compute loss
        
        # track optimization info
        
        # compute gradients and update weights
        

## Solution quality

In [None]:
def get_plotting_domain(num_epochs, num_batches):
    domain = (
        np.linspace(0, 1, num_batches).reshape(1, -1) + 
        np.arange(num_epochs).reshape(-1, 1)
    ).ravel()
    return domain

In [None]:
# format optimization info for plotting
domain = get_plotting_domain(num_epochs, len(train_loader))
resid_hist = np.array(resid_hist)
error_hist = np.array(error_hist)
l1_value_hist = np.array(l1_value_hist)

In [None]:
plt.rcParams['font.size'] = 16
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['lines.linewidth'] = 3
fig, ax = plt.subplots(1,2,figsize=(15,5))
# Axis 0:
# Plot residual MSE with label '$\|\hat y - Xw_0\|_2^2$'

# Plot error MSE with label '$\|\hat w - w_0\|_2^2$'

# Plot on a log scale

# Add a legend

# Axis 1:
# Plot value of ell-1 norm over optimization with label '$\|\hat w\|_1$'

# Add a legend


In [None]:
# Obtain the solution, w_hat

# Obtain the support size with tolerance 0.01 and print it


In [None]:
# Examine the first five entries of w_hat


# Example 2: Swiss Roll Classification

## Creating a training and test set

In [None]:
def make_swiss_roll_classification(
    n_samples=10000, noise=1, seed=2112, n_segments=6, val_size=.15, test_size=.15
):
    X, tt = make_swiss_roll(n_samples, noise=noise, random_state=seed)
    bins = np.linspace(tt.min(), tt.max(), n_segments, endpoint=False)
    tt_dig = np.digitize(tt, bins)
    y = np.where((tt_dig % 2) == 0, 0, 1)
    # Split data into X_train, X_val, X_test, y_train, y_val, y_test
    # ...
    return X_train, X_val, X_test, y_train, y_val, y_test

In [None]:
def make_dataloaders(
    *arrays, batch_size=None, shuffle=None, make_eval_train=False
):
    batch_size = _parse_batch_size(batch_size)
    shuffle = _parse_shuffle(shuffle)
    if len(arrays) == 6:
        X_train, X_val, X_test, y_train, y_val, y_test = arrays
    else:
        X_train, X_test, y_train, y_test = arrays
        X_val = None

    dset_train = TensorDataset(
        torch.tensor(X_train).float(), torch.tensor(y_train).long()
    )
    dl_train = DataLoader(
        dset_train, batch_size=batch_size[0], shuffle=shuffle[0]
    )
    to_be_returned = [dl_train]
    
    if make_eval_train:
        J = np.random.choice(X_train.shape[0], X_test.shape[0], replace=False)
        dset_eval_train = Subset(dset_train, J)
        dl_eval_train = DataLoader(
            dset_eval_train, batch_size=batch_size[1], shuffle=shuffle[1]
        )
        to_be_returned.append(dl_eval_train)

    if X_val is not None:
        dset_val = TensorDataset(
            torch.tensor(X_val).float(), torch.tensor(y_val).long()
        )
        dl_val = DataLoader(
            dset_val, batch_size=batch_size[1], shuffle=shuffle[1]
        )
        to_be_returned.append(dl_val)

    dset_test = TensorDataset(
        torch.tensor(X_test).float(), torch.tensor(y_test).long()
    )
    dl_test = DataLoader(
        dset_test, batch_size=batch_size[-1], shuffle=shuffle[-1]
    )
    to_be_returned.append(dl_test)

    return to_be_returned

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = # ...
dl_train, dl_eval_train, dl_val, dl_test = # ...

## A simple network

In [None]:
class TwoLayerDenseNet(nn.Module):
    def __init__(self, in_features, hidden_size, num_classes):
        super().__init__()
        self.fc1 = # ...
        self.fc2 = # ...
        self.do = # ...

    def forward(self, inputs):
        # ...
        return outputs

In [None]:
# Create a model with 100 hidden layers
model = # ...

In [None]:
# Print a summary of model
# ...

## Training setup

In [None]:
# Define objects for optimization
# ...

In [None]:
def make_train_and_val_functions(model, criterion, optimizer):
    def train_step(batch):
        model.train()
        inputs, targets = batch
        y_logits = model(inputs)
        loss = criterion(y_logits, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        y_pred = torch.max(y_logits, 1).indices
        loss_value = loss.item()
        correct = (y_pred == targets.detach())
        return {'loss': loss_value, 
                'num_correct': correct.long().sum().numpy(), 
                'batch_accuracy': correct.float().mean().numpy(), 
                'batch_size': correct.numpy().size}

    def eval_step(batch):
        model.eval()
        inputs, targets = batch
        y_logits = model(inputs)
        loss = criterion(y_logits, targets)
        y_pred = torch.max(y_logits, 1).indices
        loss_value = loss.item()
        correct = (y_pred == targets.detach())
        return {'loss': loss_value, 
                'num_correct': correct.long().sum().numpy(), 
                'batch_accuracy': correct.float().mean().numpy(), 
                'batch_size': correct.numpy().size}
    return train_step, eval_step


## Training loop

In [None]:
# Number of epochs

# Variables to store training information

# Make train_step and eval_step

# Outer loop
    # Train phase: Inner loop
        # Step
        # ...
        # Record stats
        # ...

    # Step scheduler

    # Train eval phase: Inner loop
        # Step
        # ...
        # Record stats
        # ...
    
    # Val phase: Inner loop
        # Step
        # ...
        # Record stats
        # ...

    # Print information at end of epoch
    # ...

# Finished
print('done.')

## Process Training Info

In [None]:
def transpose_lod(lod):
    return {key: [dd[key] for dd in lod] for key in lod[0].keys()}

In [None]:
train_stats = transpose_lod(train_history)
eval_train_stats = transpose_lod(eval_train_stats)
val_stats = transpose_lod(val_stats)

In [None]:
df = pd.concat([
    pd.DataFrame.from_dict(x) 
    for x in [train_stats, eval_train_stats, val_stats]], 
    keys=['train', 'eval_train', 'val']
).reset_index(level=0).rename(columns={'level_0': 'phase'})
df.head()

In [None]:
train_df = df.loc[df.phase == 'train']
train_avg_epoch_loss = (
    train_df.groupby(['epoch'])
    .loss
    .agg(['mean', 'std'])
    .reset_index()
    .rename(columns={'mean': 'mean_batch_loss', 'std': 'sd_batch_loss'})
)
train_avg_epoch_loss.head()

## *Post silico*

Notice that the `train_stats` are skewed by `Dropout`. This is why we use `eval_train`.

In [None]:
plt.figure(figsize=(8,4))
plt.plot(
    train_avg_epoch_loss.epoch, 
    train_avg_epoch_loss.mean_batch_loss, 
    c='blue', 
    label='train'
)
plt.fill_between(
    train_avg_epoch_loss.epoch, 
    train_avg_epoch_loss.mean_batch_loss - 1.96 * train_avg_epoch_loss.sd_batch_loss, 
    train_avg_epoch_loss.mean_batch_loss + 1.96 * train_avg_epoch_loss.sd_batch_loss, 
    alpha=.1, color='blue'
)
# Plot val loss as a black dashed line with label 'validation'
# ...
# Plot eval_train loss as an orange dotted line with label 'eval_train'
# ...
# Add a legend
# ...

In [None]:
plt.figure(figsize=(8, 4))
# Plot val accuracy as a black dashed line with label 'validation'
# ...
# Plot eval_train accuracy as an orange dotted line with label 'eval_train'
# ...
# Add a legend
# ...

In [None]:
# Obtain test predictions as `test_pred`
# ...
# Compute test accuracy
# ...
# Report test accuracy
# ...

In [None]:
def plot_swiss_rolls(data, coloring):
    fig, ax = plt.subplots(1,3,figsize=(15,5))
    ax[0].scatter(data[:, 0], data[:, 1], c=coloring)
    ax[0].set_title('x0 vs. x1')
    ax[1].scatter(data[:, 0], data[:, 2], c=coloring)
    ax[1].set_title('x0 vs. x2')
    ax[2].scatter(data[:, 1], data[:, 2], c=coloring)
    ax[2].set_title('x1 vs. x2')
    return fig, ax

In [None]:
plot_swiss_rolls(X_test, y_test);

In [None]:
plot_swiss_rolls(X_test, (y_test != test_pred.numpy()));