<a href="https://colab.research.google.com/github/tronghieu2810/DEEP-LEARNING/blob/main/UDEMY/%5BMike_X_Cohen%5D_Deep_understanding/DUDL_total.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LIBRARIES

In [None]:
# For DL modeling
import torch
from torch.utils.data import TensorDataset, DataLoader, Subset
import torch.nn.functional as F
import torch.nn as nn
from torchsummary import summary # Getting summary info on models

# For number-crunching
import numpy as np
import scipy.stats as stats

# For dataset management
import pandas as pd
from sklearn.model_selection import train_test_split

# For data visualization
import matplotlib.pyplot as plt
from IPython import display
display.set_matplotlib_formats('svg')
import seaborn as sns

# For timing computations
import time

import copy

import sklearn.metrics as skm

import sys

# For doing PCA on the model output
from sklearn.decomposition import PCA

import torchvision # For importing data
import torchvision.transforms as T

# IMPORT DATASET

## TORCHVISION

In [None]:
import torchvision

my_data = torchvision.datasets.CIFAR10(root='cifar10', download=True)


## WEB

In [None]:
data_url = 'https://www.cdc.gov/nchs/data/dvs/state-data-rates-90-95-99-19.xlsx'

# Import directly into pandas
data = pd.read_excel(data_url,header=5)

## HARD DRIVE

In [None]:
from google.colab import files
uploaded = files.upload()

## GOOGLE DRIVE

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

# DATASET

## REGRESSION

In [None]:
N = 30
x = torch.randn(N, 1) # 30 random numbers
y = x + torch.randn(N, 1) / 2

plt.plot(x, y, 's')
plt.show()

## QWERTY

## QWETY 2

In [None]:
# Parameters
n_per_clust = 300
blur        = 1
A           = [1, 1]
B           = [5, 1]
C           = [4, 3]

# Generate data
a = [A[0] + np.random.randn(n_per_clust) * blur, A[1] + np.random.randn(n_per_clust) * blur]
b = [B[0] + np.random.randn(n_per_clust) * blur, B[1] + np.random.randn(n_per_clust) * blur]
c = [C[0] + np.random.randn(n_per_clust) * blur, C[1] + np.random.randn(n_per_clust) * blur]

# Concatanate into a matrix
data_np   = np.hstack(tup=(a, b, c)).T

# True labels
labels_np = np.hstack(tup=(np.zeros(shape=(n_per_clust)), 
                           np.ones(shape=(n_per_clust)),
                           1 + np.ones(shape=(n_per_clust))))

# Convert to a pytorch tensor
data   = torch.tensor(data_np).float()
labels = torch.tensor(labels_np).long() # note: "long" format for CCE

# Show the data
fig = plt.figure(figsize=(5, 5))
plt.plot(data[np.where(labels == 0)[0], 0], data[np.where(labels == 0)[0], 1], 'bs', alpha=0.5)
plt.plot(data[np.where(labels == 1)[0], 0], data[np.where(labels == 1)[0], 1], 'ko', alpha=0.5)
plt.plot(data[np.where(labels == 2)[0], 0], data[np.where(labels == 2)[0], 1], 'r^', alpha=0.5)

plt.title('The qwerties!')
plt.xlabel('qwerty dimension 1')
plt.ylabel('qwerty dimension 2')
plt.show()

### CREATING FUNCTION

In [None]:
def create_some_data(n_per_clust):
    """
    qwerty 2
    """
    
    A = [1, 1]
    B = [5, 1]
    C = [4, 4]

    # Generate data
    a = [A[0] + np.random.randn(n_per_clust), A[1] + np.random.randn(n_per_clust)]
    b = [B[0] + np.random.randn(n_per_clust), B[1] + np.random.randn(n_per_clust)]
    c = [C[0] + np.random.randn(n_per_clust), C[1] + np.random.randn(n_per_clust)]

    # Concatanate into a matrix
    data_np   = np.hstack(tup=(a, b, c)).T

    # True labels
    labels_np = np.hstack(tup=(np.zeros(shape=(n_per_clust)), 
                            np.ones(shape=(n_per_clust)),
                            1 + np.ones(shape=(n_per_clust))))
    
    # Pul all outputs into a Dict
    output = {}
    output['data']   = torch.tensor(data_np).float()
    output['labels'] = torch.tensor(labels_np).long()

    # Use scikitlearn to split the data
    train_data, test_data, train_labels, test_labels = train_test_split(output['data'], output['labels'],
                                                       train_size=0.9)
    
    ## Step 3: Convert into PyTorch Datasets
    train_data_set = TensorDataset(train_data, train_labels)
    test_data_set  = TensorDataset(test_data, test_labels)

    # Step 4: Translate into Dataloader objects
    batch_size   = 8
    output['train_data'] = DataLoader(dataset=train_data_set, batch_size=batch_size,
                            shuffle=True, drop_last=True)
    output['test_data']  = DataLoader(dataset=test_data_set, 
                            batch_size=test_data_set.tensors[0].shape[0])
    
    return output

## QWETY 2 - 3D

In [None]:
# Parameters
n_per_clust = 300
blur        = 1
A           = [1, 1]
B           = [5, 1]
C           = [4, 3]

# Generate data
a = [A[0] + np.random.randn(n_per_clust) * blur, A[1] + np.random.randn(n_per_clust) * blur]
b = [B[0] + np.random.randn(n_per_clust) * blur, B[1] + np.random.randn(n_per_clust) * blur]
c = [C[0] + np.random.randn(n_per_clust) * blur, C[1] + np.random.randn(n_per_clust) * blur]

# Concatanate into a matrix
data_np   = np.hstack(tup=(a, b, c)).T

# True labels
labels_np = np.hstack(tup=(np.zeros(shape=(n_per_clust)), 
                           np.ones(shape=(n_per_clust)),
                           1 + np.ones(shape=(n_per_clust))))

# Convert to a pytorch tensor
data   = torch.tensor(data_np).float()
labels = torch.tensor(labels_np).long() # note: "long" format for CCE

# Show the data
fig = plt.figure(figsize=(8, 8))

# Draw distance to origin
color = 'bkr'
for i in range(len(data)):
    plt.plot([0, data[i, 0]], [0, data[i, 1]], color=color[labels[i]], alpha=0.2)

plt.plot(data[np.where(labels == 0)[0], 0], data[np.where(labels == 0)[0], 1], 'bs', alpha=0.5)
plt.plot(data[np.where(labels == 1)[0], 0], data[np.where(labels == 1)[0], 1], 'ko', alpha=0.5)
plt.plot(data[np.where(labels == 2)[0], 0], data[np.where(labels == 2)[0], 1], 'r^', alpha=0.5)

plt.grid(color=[0.9, 0.9, 0.9])
plt.title('The qwerties!')
plt.xlabel('qwerty dimension 1')
plt.ylabel('qwerty dimension 2')
plt.show()

In [None]:
# Compute Euclidean distance to the origin
dist_2_orig = torch.sqrt((data[:, 0] ** 2) + (data[:, 1] ** 2))

plt.plot(labels + torch.randn(900) / 10, dist_2_orig, 'o')
plt.xticks([0, 1, 2], labels=['Blue', 'Black', 'Red'])
plt.ylabel('Euclidean Distance (a.u.)')
plt.title('Distance to Origin')
plt.show()

In [None]:
# Add that to the data matrix
data_aug = torch.cat((data, dist_2_orig.view(len(data), 1)), axis=1)

# Check data sizes
print(data.shape)
print(data_aug.shape)
print('')

# Look at some of the data
print(data_aug)

## QWETY DOUGNUTS

## IRIS

In [None]:
# Import dataset (comes with seaborn)
import seaborn as sns
iris = sns.load_dataset('iris')

# Convert from pandas dataframe to tensor | Final column is the outcome variable
data = torch.tensor(iris[iris.columns[0:4]].values).float()

# Transform species to number
labels = torch.zeros(len(data), dtype=torch.long)

# Labels[iris.species=='setosa'] = 0 # don't need!
labels[iris.species == 'versicolor'] = 1
labels[iris.species == 'virginica'] = 2

In [None]:
# Check out the first few lines of data
iris.head()

## WINE

In [None]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')
# Remove rows with outliers
data = data[data['total sulfur dioxide']<200]

# Z-SCORE ALL VARIABLES EXCEPT FOR QUALITY
# Find the columns we want to normalize (all except quality)
cols_2_zscore = data.keys()
cols_2_zscore = cols_2_zscore.drop('quality')
data[cols_2_zscore] = data[cols_2_zscore].apply(stats.zscore)

qual_threshold = 5
# Create a new column for binarized (boolean) quality
temp = (data['quality'] > qual_threshold).astype(int)
data['boolQuality'] = temp

In [None]:
# Convert from pandas dataframe to tensor
dataT  = torch.tensor(data=data[cols_2_zscore].values).float()
labels = torch.tensor(data=data['boolQuality'].values).float()
labels = labels[:, None] # Transform to matrix. We'll actually need the labels to be a "tensor"

#  Use scikitlearn to split the data
train_data, test_data, train_labels, test_labels = \
    train_test_split(dataT, labels, test_size=0.1)

# Convert into PyTorch Datasets
train_data_set = TensorDataset(train_data, train_labels)
test_data_set  = TensorDataset(test_data, test_labels)

# Translate into Dataloader objects
batch_size   = 32
train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                          shuffle=True, drop_last=True)
test_loader  = DataLoader(dataset=test_data_set, 
                          batch_size=test_data_set.tensors[0].shape[0])

### PROCESS THE DATA BY QUALITY METRIC

In [None]:
def create_a_dataset(qual_threshold):

    # Create a new column for binarized (boolean) quality
    data['boolQuality'] = 0

    temp = (data['quality'] > qual_threshold).astype(int)
    data['boolQuality'] = temp

    # Convert from pandas dataframe to tensor
    dataT  = torch.tensor(data=data[cols_2_zscore].values).float()
    labels = torch.tensor(data=data['boolQuality'].values).float()
    labels = labels[:, None] # Transform to matrix. We'll actually need the labels to be a "tensor"

    # Use scikitlearn to split the data
    train_data, test_data, train_labels, test_labels = \
        train_test_split(dataT, labels, test_size=0.1)

    # Convert into PyTorch Datasets
    train_data_set = TensorDataset(train_data, train_labels)
    test_data_set  = TensorDataset(test_data, test_labels)

    # Translate into Dataloader objects
    batch_size   = 8
    train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                            shuffle=True, drop_last=True)
    test_loader  = DataLoader(dataset=test_data_set, 
                          batch_size=test_data_set.tensors[0].shape[0])
    
    return train_loader, test_loader

In [None]:
# Test the dataset and the number of samples

# Note that the data are roughly balanced with thresh=5, not 4 or 6
train_loader, test_loader = create_a_dataset(qual_threshold=6)

# Get number of high/low quality wines
quality_ratings = train_loader.dataset.tensors[1].detach()
print(f'{torch.sum(quality_ratings == 0).item()} low-rated wines')
print(f'{torch.sum(quality_ratings == 1).item()} high-rated wines')

## MNIST

In [None]:
# Import dataset (comes with colab!)
data = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# extract labels (number IDs) and remove from data
labels = data[: ,0]
data   = data[:, 1:]

# normalize the data to a range of [0 1]
data_norm = data / np.max(data)

# Reshape to 2D (CNN)
data_norm = data_norm.reshape(data_norm.shape[0], 1, 28, 28)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 4))
ax[0].hist(data.flatten(), 50)
ax[0].set_xlabel('Pixel intensity values')
ax[0].set_ylabel('Count')
ax[0].set_title('Histogram of original data')
ax[0].set_yscale('log')

ax[1].hist(data_norm.flatten(), 50)
ax[1].set_xlabel('Pixel intensity values')
ax[1].set_ylabel('Count')
ax[1].set_title('Histogram of normalized data')

plt.show()

In [None]:
# Normalize the data to 0 or 1
data_norm = (data > 0).astype(float)

fig, ax = plt.subplots(1, 2, figsize=(10, 4))
ax[0].hist(data.flatten(), 50)
ax[0].set_xlabel('Pixel intensity values')
ax[0].set_ylabel('Count')
ax[0].set_title('Histogram of original data')
ax[0].set_yscale('log')

ax[1].hist(data_norm.flatten(), 50)
ax[1].set_xlabel('Pixel intensity values')
ax[1].set_ylabel('Count')
ax[1].set_title('Histogram of normalized data')

plt.show()

# Confirm that the data have limited values
print(np.unique(data))
print('')
print(np.unique(data_norm))

### RANDOMLY SCRAMBLE THE DATA
PRESERVING THE RE-ORDERING FOR EACH IMAGE

In [None]:
eggs      = np.random.permutation(data.shape[1]) # 784 pixels
scrambled = data_norm[:, eggs] # Resort the pixels order

# Show a few random digits
fig, axs = plt.subplots(3, 4, figsize=(10, 6))

for ax in axs.flatten():
  # Pick a random image
  rand_img_2_show = np.random.randint(0, high=data.shape[0])

  # Create the image
  img = np.reshape(scrambled[rand_img_2_show, :], (28, 28))
  ax.imshow(img, cmap='gray')

  # Title
  ax.set_title(f'The number {labels[rand_img_2_show]}')

plt.suptitle('The scrambled data', fontsize=20)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

### SHIFT THE TEST IMAGES

In [None]:
# First let's see how to shift a vectorized image
# print(test_loader.dataset.tensors[0].shape)
# Grab one image data
temp = test_loader.dataset.tensors[0][0, :]
# Reshape to 2D image
temp = temp.reshape(28, 28)

# Shift the image (pytorch calls it "rolling")
temp_shift = torch.roll(temp, shifts=8, dims=1)

# Now show them both
fig, ax = plt.subplots(1, 2, figsize=(10, 6))
ax[0].imshow(temp, cmap='gray')
ax[0].set_title('Original')

ax[1].imshow(temp_shift, cmap='gray')
ax[1].set_title('Shifted (Rolled)')

plt.show()

In [None]:
# Now repeat for all images in the test set

for i in range(test_loader.dataset.tensors[0].shape[0]):

  # Get the image
  img = test_loader.dataset.tensors[0][i, :]

  # Reshape and roll by max. 10 pixels
  rand_roll = np.random.randint(-10, 11)
  img       = torch.roll(img.reshape(28, 28), shifts=rand_roll, dims=1)

  # Re-vectorize and put back into the matrix 
  test_loader.dataset.tensors[0][i, :] = img.reshape(1, -1)

# Note: now run the previous cell again to confirm the shifting

In [None]:
# Now repeat for all images in the test set
# CNN 
for i in range(train_loader.dataset.tensors[0].shape[0]):

  # Get the image
  img = train_loader.dataset.tensors[0][i, :, :]

  # Reshape and roll by max. 10 pixels
  rand_roll = np.random.randint(-10, 11)
  img       = torch.roll(img, shifts=rand_roll, dims=1)

  # Re-vectorize and put back into the matrix 
  train_loader.dataset.tensors[0][i, :, :] = img

for i in range(test_loader.dataset.tensors[0].shape[0]):

  # Get the image
  img = test_loader.dataset.tensors[0][i, :, :]

  # Reshape and roll by max. 10 pixels
  rand_roll = np.random.randint(-10, 11)
  img       = torch.roll(img, shifts=rand_roll, dims=1)

  # Re-vectorize and put back into the matrix 
  test_loader.dataset.tensors[0][i, :, :] = img

# Note: now run the previous cell again to confirm the shifting

### NO7

In [None]:
# Step 1: Convert to tensor
data_tensor   = torch.tensor(data_norm).float()
labels_tensor = torch.tensor(labels).long()

# Boolean vector with the 7's
where7 = (labels == 7)

# Separate data into tensors with, and without 7's
data_no7   = data_tensor[~where7, :]
labels_no7 = labels_tensor[~where7]
data7      = data_tensor[where7, :]

# Step 3: Convert into PyTorch Datasets
train_data_set = TensorDataset(data_no7, labels_no7)
test_data_set  = TensorDataset(data7)

# Step 4: Translate into Dataloader objects
batch_size   = 32
train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                          shuffle=True, drop_last=True)
test_loader  = DataLoader(dataset=test_data_set, 
                          batch_size=test_data_set.tensors[0].shape[0])

# Confirm that the separation is accurate
print(np.unique(labels_no7))

### AUTO_ENCODER_OCCLUSION

In [None]:
# Reconstruct a sample as an image

img = data_tensor[12345, :].view(28, 28)

occluded = copy.deepcopy(img)
occluded[10:13, :] = 1

fig, ax = plt.subplots(1, 2, figsize=(8, 5))

ax[0].imshow(img, cmap='gray')
ax[0].set_title('Original image')
ax[0].axis('off')

ax[1].imshow(occluded, cmap='gray')
ax[1].set_title('Occluded image')
ax[1].axis('off')

plt.show()

### (F)MNIST

In [None]:
# MNIST DATA
# Import dataset (comes with colab!)
data = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# Extract labels, normalize, reshape
labels_T    = torch.tensor(data[:, 0]).long()
data        =              data[:, 1:]
data_norm   = data / np.max(data)
data_norm_T = torch.tensor(data_norm.reshape(data_norm.shape[0], 1, 28, 28)).float()

# Split the data
train_data, test_data, train_labels, test_labels = train_test_split(data_norm_T, labels_T, test_size=0.1)

# Convert into PyTorch Datasets
train_data_set = TensorDataset(train_data, train_labels)
test_data_set  = TensorDataset(test_data,  test_labels)

# Translate into Dataloader objects
batch_size   = 32
numbers_train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                        shuffle=True, drop_last=True)
numbers_test_loader  = DataLoader(dataset=test_data_set, 
                        batch_size=test_data_set.tensors[0].shape[0])

In [None]:
# FMNIST data
# Transformation
transform = T.Compose([T.ToTensor(), T.Normalize(0.5, 0.5)])

# Import the data and simutaneously apply the transform
train_data_set = torchvision.datasets.FashionMNIST(root='./data', train=True,   download=True, transform=transform)
test_data_set  = torchvision.datasets.FashionMNIST(root='./data', train=False,  download=True, transform=transform)

# Transform to dataloaders
batch_size           = 32
fashion_train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                        shuffle=True, drop_last=True)
fashion_test_loader  = DataLoader(dataset=test_data_set, 
                        batch_size=len(test_data_set))

In [None]:
# Transformations
transform = T.Compose([T.ToTensor(),
                       T.RandomHorizontalFlip(p=0.5),
                       T.Normalize(0.5, 0.5)])

# Import the data and simultaneously apply the transform
train_set = torchvision.datasets.FashionMNIST(root='./data', train=True,   download=True, transform=transform)
dev_test  = torchvision.datasets.FashionMNIST(root='./data', train=False,  download=True, transform=transform)

# Split the devtest into two separate sets
rand_idx = np.random.permutation(10000) # Random permutation of indices
dev_set  = Subset(dev_test, rand_idx[:6000])
test_set = Subset(dev_test, rand_idx[6000:])

# Translate into DataLoader objects
batch_size   = 32
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, drop_last=True)
dev_loader   = DataLoader(dataset=dev_set,   batch_size=len(dev_set))
test_loader  = DataLoader(dataset=test_set,  batch_size=len(test_set))


### GAN

In [None]:
# Import dataset (comes with colab!)
data = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# extract labels (number IDs) and remove from data
data   = data[:, 1:]

# normalize the data to a range of [-1 1]
data_norm = data / np.max(data)
data_norm = 2 * data_norm - 1

data_T = torch.tensor(data_norm).float()
batch_size = 100

## EMNIST

In [None]:
# Download the dataset
cdata = torchvision.datasets.EMNIST(root='emnist', split='letters', download=True)

# Transform to 4D tensor for conv layer (and transform from int8 to float)
images = cdata.data.view([124800, 1, 28, 28]).float()

# Eliminate the N/A and subtract 1 from the original
# Remove the first class category
letter_categories = cdata.classes[1:]
# Relabel labels to start at 0
labels = copy.deepcopy(cdata.targets) - 1

# Normalize the images
images /= torch.max(images)

### REVIEW

In [None]:
# INSPECT THE DATA

# The categories
print(cdata.classes)
print(f'{len(cdata.classes)} classes')
print(f'\nData size: {cdata.data.shape}')

# Transform to 4D tensor for conv layer (and transform from int8 to float)
images = cdata.data.view([124800, 1, 28, 28]).float()
print(f'Tensor data: {images.shape}')

In [None]:
# Eliminate the N/A and subtract 1 from the original
# Remove the first class category
letter_categories = cdata.classes[1:]

# Relabel labels to start at 0
labels = copy.deepcopy(cdata.targets) - 1
print(labels.shape)

print(torch.sum(labels == 0))
print(torch.unique(labels))

In [None]:
# next issue: do we need to normalize the images?
plt.hist(images[:10, :, :, :].view(1, -1).detach(), 40);
plt.title('Raw values')
plt.show()

# yarp.
images /= torch.max(images)

plt.hist(images[:10, :, :, :].view(1, -1).detach(), 40);
plt.title('After normalization')
plt.show()

In [None]:
# Visualize some images
fig, axs = plt.subplots(3, 7, figsize=(13, 6))

for i, ax in enumerate(axs.flatten()):

    # Pick a random pic
    which_pic = np.random.randint(images.shape[0])

    # Extract the image and its target letter
    I      = np.squeeze(images[which_pic, :, :])
    letter = letter_categories[labels[which_pic]]
    
    # Visualize
    ax.imshow(I.T, cmap='gray')
    ax.set_title(f'The letter {letter}')
    ax.set_xticks([])
    ax.set_yticks([])

plt.show()

## CIFAR10

In [None]:
c_data = torchvision.datasets.CIFAR10(root='cifar10', download=True)

## GAUSS
Create Gaussian blurs with different widths

In [None]:
n_per_class = 1000 # Total 2000 images
img_size    = 91

x    = np.linspace(start=-4, stop=4, num=img_size)
X, Y = np.meshgrid(x, x)

# The two widths (a.u.) sigma
widths = [1.8, 2.4]

# Initialize tensors containing images and labels
images = torch.zeros(2 * n_per_class, 1, img_size, img_size)
labels = torch.zeros(2 * n_per_class)

for i in range(2 * n_per_class):
    
    # Create the gaussian with random centers
    # Ro = random offset
    # `i % 2`: Even image belong to label 0
    ro = 2 * np.random.randn(2)
    G  = np.exp(-((X - ro[0]) ** 2 + (Y - ro[1]) ** 2) / (2 * widths[i % 2] ** 2))
    
    # And add noise
    G += np.random.randn(img_size, img_size) / 5
    
    # Add to the tensor
    # `.view`: Convert from numpy matrix to PTorch Tensor
    images[i, :, :, :] = torch.tensor(G).view(1, img_size, img_size)
    labels[i]          = i % 2

labels = labels[:, None]

In [None]:
# Visualize some images
fig, axs = plt.subplots(3, 7, figsize=(13, 6))

for i, ax in enumerate(axs.flatten()):
    which_pic = np.random.randint(2 * n_per_class)
    G         = np.squeeze(images[which_pic, :, :])
    ax.imshow(G, vmin=-1, vmax=1, cmap='jet')
    ax.set_title(f'Class {int(labels[which_pic].item())}')
    ax.set_xticks([])
    ax.set_yticks([])

plt.show()

### VARYING WIDTHS AND OCCLUDING BARS

In [None]:
n_gauss     = 1000 # Total 2000 images
img_size    = 91

x    = np.linspace(start=-4, stop=4, num=img_size)
X, Y = np.meshgrid(x, x)

# Vary the weight smoothly
widths = np.linspace(start=2, stop=20, num=n_gauss)

# Initialize tensors containing images
images = torch.zeros(n_gauss, 1, img_size, img_size)

for i in range(n_gauss):
    
    # Create the gaussian with random centers
    # Ro = random offset
    # `i % 2`: Even image belong to label 0
    ro = 1.5 * np.random.randn(2)
    G  = np.exp(-((X - ro[0]) ** 2 + (Y - ro[1]) ** 2) / widths[i])
    
    # And add noise
    G += np.random.randn(img_size, img_size) / 5
    
    # Add a random bar randomly
    i1 = np.random.choice(np.arange(2, 28))
    i2 = np.random.choice(np.arange(2, 6))
    
    if np.random.randn() > 0:
        G[i1:i1 + i2, :] = 1
    else:
        G[:, i1:i1 + i2] = 1
    
    # Add to the tensor
    # `.view`: Convert from numpy matrix to PTorch Tensor
    images[i, :, :, :] = torch.tensor(G).view(1, img_size, img_size)

In [None]:
# Visualize some images
fig, axs = plt.subplots(3, 7, figsize=(13, 6))

for i, ax in enumerate(axs.flatten()):
    which_pic = np.random.randint(n_gauss)
    G         = np.squeeze(images[which_pic, :, :])
    ax.imshow(G, vmin=-1, vmax=1, cmap='jet')
    ax.set_xticks([])
    ax.set_yticks([])

plt.show()

### OCC AND NON-OCC

In [None]:
n_gauss     = 1000 # Total 2000 images
img_size    = 91

x    = np.linspace(start=-4, stop=4, num=img_size)
X, Y = np.meshgrid(x, x)

# Vary the weight smoothly
widths = np.linspace(start=2, stop=20, num=n_gauss)

# Initialize 02 tensors containing images
images_occ    = torch.zeros(n_gauss, 1, img_size, img_size)
images_no_occ = torch.zeros(n_gauss, 1, img_size, img_size)


for i in range(n_gauss):
    
    # Create the gaussian with random centers
    # Ro = random offset
    # `i % 2`: Even image belong to label 0
    ro = 1.5 * np.random.randn(2)
    G  = np.exp(-((X - ro[0]) ** 2 + (Y - ro[1]) ** 2) / widths[i])
    G += np.random.randn(img_size, img_size) / 5 # And add noise
    
    # Add the original to the no_occ
    images_no_occ[i, :, :, :] = torch.tensor(G).view(1, img_size, img_size)
    
    # Add a random bar randomly
    i1 = np.random.choice(np.arange(10, img_size - 10))
    i2 = np.random.choice(np.arange(2, 6))
    
    if np.random.randn() > 0:   G[i1:i1 + i2, :] = 1
    else:                       G[:, i1:i1 + i2] = 1
    
    # Add to the tensor with occlusion
    # `.view`: Convert from numpy matrix to PTorch Tensor
    images_occ[i, :, :, :] = torch.tensor(G).view(1, img_size, img_size)

In [None]:
# Visualize some images
fig, axs = plt.subplots(2, 10, figsize=(15, 3))

for i in range(10):
    which_pic = np.random.randint(n_gauss)
    
    axs[0, i].imshow(np.squeeze(images_no_occ[which_pic, :, :]), vmin=-1, vmax=1, cmap='jet')
    axs[0, i].set_xticks([]), axs[0, i].set_yticks([])
    
    axs[1, i].imshow(np.squeeze(images_occ   [which_pic, :, :]), vmin=-1, vmax=1, cmap='jet')
    axs[1, i].set_xticks([]), axs[1, i].set_yticks([])

plt.show()

### FIND GAUSS

In [None]:
n_gauss  = 1000
img_size = 91

x    = np.linspace(start=-4, stop=4, num=img_size)
X, Y = np.meshgrid(x, x)

# Initialize tensors containing images and labels
images = torch.zeros(n_gauss, 1, img_size, img_size)
labels = torch.zeros(n_gauss, 3)

for i in range(n_gauss):
    # Location and width parameters
    loc = np.max(x) / 2 + np.random.randn(2) # Center coordinate
    wid = np.random.rand() * 10 + 5          # Width of Gaussian
    
    # Create the Gaussian with random centers
    G =  np.exp(-((X - loc[0]) ** 2 + (Y - loc[1]) ** 2) / wid)
    G += np.random.randn(img_size, img_size) / 10
    
    # Add to the tensor
    images[i, :, :, :] = torch.tensor(G).view(1, img_size, img_size)
    labels[i, :]       = torch.tensor([loc[0], loc[1], wid])

In [None]:
# Visualize some images
fig, axs = plt.subplots(3, 7, figsize=(15, 7))

for i, ax in enumerate(axs.flatten()):
    which_pic = np.random.randint(n_gauss)
    G         = np.squeeze(images[which_pic, :, :])
    ax.imshow(G, vmin=-1, vmax=1, cmap='jet', extent=[-4, 4, -4, 4], origin='upper')
    ax.set_title(f'XY=({labels[which_pic, 0]:.0f}, {labels[which_pic, 1]:.0f}, W={labels[which_pic, 2]:.0f})')
    ax.plot([-4, 4], [0, 0], 'w--')
    ax.plot([0, 0], [-4, 4], 'w--')
    ax.set_xticks([])
    ax.set_yticks([])

plt.tight_layout()
plt.show()

### GAN

In [None]:
n_images = 3000
img_size = 64

x    = np.linspace(start=-4, stop=4, num=img_size)
X, Y = np.meshgrid(x, x)

# Initialize tensors containing images and labels
images = torch.zeros(n_images, 1, img_size, img_size)

for i in range(n_images):
    
    # Create the gaussian with random centers
    # Ro = random offset
    # `i % 2`: Even image belong to label 0
    ro    = 2 * np.random.randn(2)
    width = np.random.rand() / 0.6 + 1.8 # Random width
    G     = np.exp(-((X - ro[0]) ** 2 + (Y - ro[1]) ** 2) / (2 * width ** 2))
    
    # And add noise
    G += np.random.randn(img_size, img_size) / 5
    
    # Add to the tensor
    # `.view`: Convert from numpy matrix to PTorch Tensor
    images[i, :, :, :] = torch.tensor(G).view(1, img_size, img_size)

In [None]:
# Visualize some images
fig, axs = plt.subplots(3, 7, figsize=(13, 6))

for i, ax in enumerate(axs.flatten()):
    which_pic = np.random.randint(n_images)
    G         = np.squeeze(images[which_pic, :, :])
    ax.imshow(G, vmin=-1, vmax=1, cmap='jet')
    ax.set_xticks([])
    ax.set_yticks([])

plt.show()

# STL10

In [None]:
# Transformations
transform = T.Compose([T.ToTensor(),    # Normalizes to range [0, 1]
                       T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) # Further normalization

# Import the data and simultaneously apply the transform
train_data_set = torchvision.datasets.STL10(root='./data', download=True, split='train', transform=transform)
test_data_set  = torchvision.datasets.STL10(root='./data', download=True, split='test',  transform=transform)

# Translate into Dataloader objects
batch_size   = 32
train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                        shuffle=True, drop_last=True)
test_loader  = DataLoader(dataset=test_data_set, 
                        batch_size=256)

### REVIEW

In [None]:
# Shape of the datasets
print(f'Data shapes (train/test):\n {train_data_set.data.shape}\n {test_data_set.data.shape}\n')
# Range of pixel intensity values
print(f'Data value range: {np.min(train_data_set.data)} -> {np.max(train_data_set.data)}\n')
# The unique categories
print(f'Data categories: {train_data_set.classes}')

In [None]:
# Histogram of the data
plt.hist(X.data.numpy().flatten(), 100);

In [None]:
# INSPECT A FEW RANDOM IMAGES
fig, axs = plt.subplots(4, 4, figsize=(10, 10))
for (i, ax) in enumerate(axs.flatten()):
    # Extract that image (need to transpose it back to 32*32*3)
    pic = X.data[i].numpy().transpose((1, 2, 0))
    pic = pic - np.min(pic)
    pit = pic / np.max(pic)

    label = train_data_set.classes[y[i]]

    ax.imshow(pic)
    ax.text(0, 0, label, ha='left', va='top', fontweight='bold', color='k', backgroundcolor='y')
    ax.axis('off')

plt.tight_layout()
plt.show()

# PREPROCESS DATASET

## CREATE TRAIN/TEST GROUPS USING DATALOADER

In [None]:
# Convert to tensor
data_tensor   = torch.tensor(data_norm).float()
labels_tensor = torch.tensor(labels).long()

#  Use scikitlearn to split the data
train_data, test_data, train_labels, test_labels = \
    train_test_split(data_tensor, labels_tensor, test_size=0.1)

# Convert into PyTorch Datasets
train_data_set = TensorDataset(train_data, train_labels)
test_data_set  = TensorDataset(test_data, test_labels)

# Translate into Dataloader objects
batch_size   = 32
train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                        shuffle=True, drop_last=True)
test_loader  = DataLoader(dataset=test_data_set, 
                        batch_size=test_data_set.tensors[0].shape[0])

# How many batches are there?
# print(f'There are {len(train_loader)} batches, each with { batch_size} samples')

# Sizes of each batch
# for data, label in data_loader:
#     print(f'BATCH INFO: {data.size()} | {label.size()}\n')

# print(train_loader.dataset.tensors) # All the images + All the labels
# print(test_loader.dataset.tensors[0].shape)

In [None]:
# Use scikitlearn to split the data
train_data, test_data, train_labels, test_labels = train_test_split(images, labels, test_size=0.1)

# Convert into PyTorch Datasets
train_data_set = TensorDataset(train_data, train_labels)
test_data_set  = TensorDataset(test_data,  test_labels)

# Translate into Dataloader objects
batch_size   = 32
train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                        shuffle=True, drop_last=True)
test_loader  = DataLoader(dataset=test_data_set, 
                        batch_size=test_data_set.tensors[0].shape[0])

##  TRAIN/DEV/TEST

In [None]:
# Specify sizes of the partitions
# Order is train, devset, test
partitions = [3 * n_per_clust - 400, 200, 200]

# Split the data
train_data, dev_test_data, train_labels, dev_test_labels = train_test_split(data, labels, train_size=partitions[0])

# Now split the devtest data
dev_data, test_data, dev_labels, test_labels             = train_test_split(dev_test_data, dev_test_labels, train_size=partitions[1])

# Print out the sizes
print(f'   Total data size: {data.shape}')
print('--------------------------------------------')
print(f'Training data size: {train_data.shape}')
print(f'  Devset data size: {dev_data.shape}')
print(f'    Test data size: {test_data.shape}')

# Convert them into PyTorch Datasets
train_data_set = TensorDataset(train_data, train_labels)
dev_data_set   = TensorDataset(dev_data,   dev_labels)
test_data_set  = TensorDataset(test_data,  test_labels)

# Translate into DataLoader objects
batch_size   = 30
train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size, shuffle=True, drop_last=True)
dev_loader   = DataLoader(dataset=dev_data_set,   batch_size=dev_data.tensors[0].shape[0])
test_loader  = DataLoader(dataset=test_data_set,  batch_size=test_data.tensors[0].shape[0])


## CUSTOM DATASET CLASS

In [None]:
class custom_dataset(Dataset):
    def __init__(self, tensors, transform=None):
        """
        Check that sizes of data and labels match
        All of the images must have corresponding labels
        """
        assert all(tensors[0].size(0) == t.size(0) for t in tensors), "Size mismatch between tensors"

        # Assign inputs
        self.tensors   = tensors
        self.transform = transform

    def __getitem__(self, index):
        """
        What to do when someone wants and item from the dataset
        Return transformed version of x if there are transforms
        """
        if self.transform:
            x = self.transform(self.tensors[0][index])
        else:
            x = self.tensors[0][index]

        # And return label
        y = self.tensors[1][index]

        # Return the (data,label) tuple
        return x, y
    
    def __len__(self):
        # Total number of images in the dataset
        return self.tensors[0].size(0)

# LOSS FUNCTION

## CNN_CUSTOM_LOSS_FUNC

In [None]:
# L1 Loss Function
class my_L1_Loss(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, y_hat, y):
        loss = torch.mean(torch.abs(y_hat - y))
        
        return loss

In [None]:
# L2 + Average Loss Function
class my_L2_Avg_Loss(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, y_hat, y):
        # MSE part
        mse = torch.mean((y_hat - y) ** 2)
        
        # Average part
        avg = torch.abs(torch.mean(y_hat))
        
        # Sum together
        return mse + avg

In [None]:
# Correlation Loss Function
class my_Corr_Loss(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, y_hat, y):
        mean_x = torch.mean(y_hat)
        mean_y = torch.mean(y)
        num    = torch.sum((y_hat - mean_x) * (y - mean_y))
        den    = (torch.numel(y) - 1) * torch.std(y_hat) * torch.std(y)
        
        return -num / den

# MODEL

## LOAD

In [None]:
# Create arbitary model
net = create_the_MNIST_net()[0]
# Replace that model with the one trained yet
net.load_state_dict(torch.load('trainedModel.pt'))

## CLASS

### REGRESSION

In [None]:
ANN_reg = nn.Sequential(    # Object containing the model
    nn.Linear(1, 1),        # Input layer|Number of input, number of output
    nn.ReLU(),              # Non-linear Activation Function
    nn.Linear(1, 1)         # Output layer|Prediction of model
)

ANN_reg

# Learning rate
learning_rate = 0.05

# Loss function | The object that implements the MSE loss function
loss_func = nn.MSELoss()

# Optimizer (the flavor of gradient descent to implement) | Stochastic GD
optimizer = torch.optim.SGD(params=ANN_reg.parameters(), lr=learning_rate)

### META_PARAM_RELUS

In [None]:
class ANN_wine(nn.Module):
  """
  META_PARAM_RELUS
  """
  def __init__(self, act_func):
    super().__init__()

    # LAYERS
    # Input layer
    self.input = nn.Linear(11, 16)

    # Hidden layer(s). 'fc' = fully connected
    self.fc1 = nn.Linear(16, 32)
    self.fc2 = nn.Linear(32, 32)

    # Output layer
    self.output = nn.Linear(32, 1)

    # Activation funcion to pass through
    self.act_func = act_func
  
  # Forward pass
  def forward(self, x):
    # Get activation function type, this code replaces torch.relu with torch.<self.actfun>
    act_func = getattr(torch.nn, self.act_func)
    x = act_func()(self.input(x))
    x = act_func()(self.fc1(x))
    x = act_func()(self.fc2(x))
    x = self.output(x)

    return x

### DATA_ UNBALANCED DATA | WEIGHTS_ XAVIER_VS._KAIMING

In [None]:
class ANN_wine(nn.Module):
  """
  DATA_ UNBALANCED DATA | WEIGHTS_ XAVIER_VS._KAIMING
  """
  def __init__(self):
    super().__init__()

    # LAYERS
    # Input layer
    self.input = nn.Linear(11, 16)

    # Hidden layer(s). 'fc' = fully connected
    self.fc1 = nn.Linear(16, 32)
    self.fc2 = nn.Linear(32, 32)

    # Output layer
    self.output = nn.Linear(32, 1)
  
  # Forward pass
  def forward(self, x):
    x = F.leaky_relu(self.input(x))
    x = F.leaky_relu(self.fc1(x))
    x = F.leaky_relu(self.fc2(x))

    return self.output(x)

### WEIGHTS_ XAVIER_KAIMING_INITS

In [None]:
class the_net(nn.Module):
    def __init__(self):
        super().__init__()

        # Input layer
        self.input = nn.Linear(100, 100)

        # Hidden layer
        self.fc1 = nn.Linear(100, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
    
        # Output layer
        self.output = nn.Linear(100, 2)
    
    # Forward pass
    def forward(self, x):
        x = F.relu(self.input(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))

        return self.output(x)

### GAN_MNIST

In [None]:
class discriminator_net(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256    , 256)
        self.out = nn.Linear(256    , 1)
    
    def forward(self, x):
        x = F.leaky_relu(self.fc1(x))
        x = F.leaky_relu(self.fc2(x))
        x = self.out(x)

        return torch.sigmoid(x)

In [None]:
class generator_net(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(64,  256)
        self.fc2 = nn.Linear(256, 256)
        self.out = nn.Linear(256, 784)
    
    def forward(self, x):
        x = F.leaky_relu(self.fc1(x))
        x = F.leaky_relu(self.fc2(x))
        x = self.out(x)

        return torch.tanh(x)

### GAN_CNN_GAUSS

In [None]:
class discriminator_net(nn.Module):
    def __init__(self):
        super().__init__()

        # Convolution layers
        self.conv1 = nn.Conv2d(1,    64, kernel_size=4, stride=2, padding=1, bias=False)
        self.conv2 = nn.Conv2d(64,  128, kernel_size=4, stride=2, padding=1, bias=False)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False)
        self.conv5 = nn.Conv2d(512,   1, kernel_size=4, stride=1, padding=0, bias=False)

        # Batchnorm
        self.bn2 = nn.BatchNorm2d(num_features=128)
        self.bn3 = nn.BatchNorm2d(num_features=256)
        self.bn4 = nn.BatchNorm2d(num_features=512)
    
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x), negative_slope=0.2)
        x = F.leaky_relu(self.conv2(x), negative_slope=0.2)
        x = self.bn2(x)
        x = F.leaky_relu(self.conv3(x), negative_slope=0.2)
        x = self.bn3(x)
        x = F.leaky_relu(self.conv4(x), negative_slope=0.2)
        x = self.bn4(x)

        return torch.sigmoid(self.conv5(x)).view(-1, 1)

In [None]:
class generator_net(nn.Module):
    def __init__(self):
        super().__init__()

        # Convolution layers
        self.conv1 = nn.ConvTranspose2d(100, 512, kernel_size=4, stride=1, padding=0, bias=False)
        self.conv2 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False)
        self.conv3 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False)
        self.conv4 = nn.ConvTranspose2d(128,  64, kernel_size=4, stride=2, padding=1, bias=False)
        self.conv5 = nn.ConvTranspose2d(64,   1,  kernel_size=4, stride=2, padding=1, bias=False)

        # Batchnorm
        self.bn1 = nn.BatchNorm2d(num_features=512)
        self.bn2 = nn.BatchNorm2d(num_features=256)
        self.bn3 = nn.BatchNorm2d(num_features=128)
        self.bn4 = nn.BatchNorm2d(num_features=64)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = torch.tanh(self.conv5(x))

        return x

## CREATE

### META_PARAMS_MULTIOUTPUTS | DATA_SAVE_BEST_MODEL

In [None]:
def create_the_qwety_net():
    """"
    META_PARAMS_MULTIOUTPUTS | DATA_SAVE_BEST_MODEL
    """
    class qwerty_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear (2, 8)

            # Hidden layer
            self.fc1 = nn.Linear(8, 8)

            # Output layer
            self.output = nn.Linear(8, 3)

        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = self.output(x)

            return x
        
    # Create the model instance
    net = qwerty_net()
    
    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.SGD(params=net.parameters(), lr=0.01)

    return net, loss_func, optimizer

### META_PARAMS_OPTIMIZERS_QWERTY

In [None]:
def create_the_qwety_net(optimizer_algo, learning_rate):
    """
    META_PARAMS_OPTIMIZERS_QWERTY
    """
    class qwerty_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear (2, 8)

            # Hidden layer
            self.fc1 = nn.Linear(8, 8)

            # Output layer
            self.output = nn.Linear(8, 3)

        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = self.output(x)

            return x
        
    # Create the model instance
    net = qwerty_net()
    
    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer_func = getattr(torch.optim, optimizer_algo)
    optimizer      = optimizer_func(net.parameters(), lr=learning_rate)


    return net, loss_func, optimizer

# # Test the model with optimizer type as input

# # Try 'SGD', 'RMSprop', and 'Adam'
# optim = create_the_qwety_net('RMSprop')[2]
# optim

### FFN_NONMNIST, FFN_BINARIZED MNIST, FFN_NO7

In [None]:
def create_the_MNIST_net():
    """"
    FFN_NONMNIST | FFN_BINARIZED MNIST | FFN_NO7
    """
    class mnist_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, 64)

            # Hidden layer
            self.fc1 = nn.Linear(64, 32)
            self.fc2 = nn.Linear(32, 32)

            # Output layer
            self.output = nn.Linear(32, 10)

        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))

            return torch.log_softmax(self.output(x), axis=1)
        
    # Create the model instance
    net = mnist_net()

    # Loss Function
    loss_func = nn.NLLLoss()

    # Optimizer
    optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

    return net, loss_func, optimizer

### FFN_WEIGHTHISTOGRAMS, MODEL_PERFORM_APRF_WINE, WEIGHTS_FREEZE_WEIGHTS, WEIGHTS_WEIGHTS_CHANGES 

In [None]:
def create_the_MNIST_net():
    """"
    FFN_WEIGHTHISTOGRAMS | MODEL_PERFORM_APRF_WINE | WEIGHTS_FREEZE_WEIGHTS | WEIGHTS_WEIGHTS_CHANGES
    """
    class mnist_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, 64)

            # Hidden layer
            self.fc1 = nn.Linear(64, 32)
            self.fc2 = nn.Linear(32, 32)

            # Output layer
            self.output = nn.Linear(32, 10)

        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))

            return self.output(x)
        
    # Create the model instance
    net = mnist_net()

    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.SGD(net.parameters(), lr=0.001)

    return net, loss_func, optimizer


### FFN_ BREADTH VS. DEPTH

In [None]:
def create_the_MNIST_net(n_units, n_layers):
    """"
    FFN_ BREADTH VS. DEPTH
    """
    class mnist_net(nn.Module):
        def __init__(self, n_units, n_layers):
            super().__init__()

            # Create dictionary to store the layers
            self.layers   = nn.ModuleDict()
            self.n_layers = n_layers

            # Input layer
            self.layers['input'] = nn.Linear(784, n_units)

            # Hidden layer
            for i in range(n_layers):
              self.layers[f'hidden{i}'] = nn.Linear(n_units, n_units)

            # Output layer
            self.layers['output'] = nn.Linear(n_units, 10)

        # Forward pass
        def forward(self, x):
            # Input layer
            x = self.layers['input'](x)

            # Hidden layers
            for i in range(self.n_layers):
              x = F.relu(self.layers[f'hidden{i}'](x))

            return self.layers['output'](x)

        
    # Create the model instance
    net = mnist_net(n_units, n_layers)

    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

    return net, loss_func, optimizer


### FFN_OPTIMIZERS

In [None]:
def create_the_mnist_net(optimizer_algo, learning_rate):
    """
    FFN_OPTIMIZERS
    """
    class mnist_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, 64)

            # Hidden layer
            self.fc1 = nn.Linear(64, 32)
            self.fc2 = nn.Linear(32, 32)

            # Output layer
            self.output = nn.Linear(32, 10)

        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))

            return self.output(x)
        
    # Create the model instance
    net = mnist_net()
    
    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer_func = getattr(torch.optim, optimizer_algo)
    optimizer      = optimizer_func(net.parameters(), lr=learning_rate)


    return net, loss_func, optimizer

# # Test the model with optimizer type as input

# # Try 'SGD', 'RMSprop', and 'Adam'
# optim = create_the_qwety_net('RMSprop')[2]
# optim

### FFN_SCRAMBLEDMNIST, FFN_SHIFTEDMNIST, DATA_ DATA_OVERSAMPLING, DATA_NOISE_AUGMENTATION

In [None]:
def create_the_MNIST_net():
    """"
    FFN_SCRAMBLEDMNIST | FFN_SHIFTEDMNIST | DATA_ DATA_OVERSAMPLING | DATA_NOISE_AUGMENTATION
    """
    class mnist_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, 64)

            # Hidden layer
            self.fc1 = nn.Linear(64, 32)
            self.fc2 = nn.Linear(32, 32)

            # Output layer
            self.output = nn.Linear(32, 10)

        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))

            return self.output(x)
        
    # Create the model instance
    net = mnist_net()

    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

    return net, loss_func, optimizer


### DATA_DATA_VS_DEPTH_QWERTY2

In [None]:
def create_the_qwerty_net(n_units, n_layers):
  """
  DATA_DATA_VS_DEPTH_QWERTY2
  """
  class qwerty_net(nn.Module):
    def __init__(self, n_units, n_layers):
      super().__init__()

      # Create dictionary to store the layers
      self.layers = nn.ModuleDict()
      self.n_layers = n_layers

      # Input layer
      self.layers['input'] = nn.Linear(2, n_units) 
      
      # Didden layers
      for i in range(n_layers):
        self.layers[f'hidden{i}'] = nn.Linear(n_units, n_units) 

      # Output layer
      self.layers['output'] = nn.Linear(n_units, 3)
    

    # Forward pass
    def forward(self, x):
      # Input layer
      x = self.layers['input'](x)

      # Hidden layers
      for i in range(self.n_layers):
        x = F.relu(self.layers[f'hidden{i}'](x))
      
      # Return output layer
      x = self.layers['output'](x)
      return x
  
  # Create the model instance
  net = qwerty_net(n_units, n_layers)
  
  # Loss function
  loss_func = nn.CrossEntropyLoss()

  # optimizer
  optimizer = torch.optim.SGD(net.parameters(),lr=.01)

  return net, loss_func, optimizer

In [None]:
# Test the model with fake input
n_units_per_layer = 12
n_layers = 4

net, loss_func, optimizer = create_the_qwerty_net(n_units_per_layer, n_layers)
print(net)

# Input is 10 samples
input = torch.rand(10, 2)
net(input)

### DATA_DATA_FEATURE_AUGMENTATION

In [None]:
def create_the_qwety_net(use_extra_feature=False):
    """"
    DATA_DATA_FEATURE_AUGMENTATION
    """
    class qwerty_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            if use_extra_feature:
                self.input = nn.Linear(3, 8)
            else:
                self.input = nn.Linear (2, 8)

            # Hidden layer
            self.fc1 = nn.Linear(8, 8)

            # Output layer
            self.output = nn.Linear(8, 3)

        # Forward pass
        def forward(self, x):

            # By requeset, only use XY features
            if not use_extra_feature:
                x = x[:, :2]

            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = self.output(x)

            return x
        
    # Create the model instance
    net = qwerty_net()
    
    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.SGD(params=net.parameters(), lr=0.001)


    return net, loss_func, optimizer

### MODEL_PERFORM_APRF_WINE

In [None]:
class ANN_wine(nn.Module):
    """"
    APRF_WINE
    """
    def __init__(self):
        super().__init__()

        # LAYERS
        # Input layer
        self.input = nn.Linear(11, 16)

        # Hidden layer(s). 'fc' = fully connected
        self.fc1 = nn.Linear(16, 32)
        self.fc2 = nn.Linear(32, 32)

        # Output layer
        self.output = nn.Linear(32, 1)
    
    # Forward pass
    def forward(self, x):
        x = F.relu(input=self.input(x))
        x = F.relu(input=self.fc1(x))
        x = F.relu(input=self.fc2(x))
        x = self.output(x)

        return x

### MODEL_PERFORM_MNIST_NO7, MODEL_PERFORM_TIME | WEIGHTS_DEMO_INITS | WEIGHTS_VARIANCE_INITS

In [None]:
def create_the_MNIST_net():
    """
    MODEL_PERFORM_MNIST_NO7 | MODEL_PERFORM_TIME | WEIGHTS_DEMO_INITS | WEIGHTS_VARIANCE_INITS
    """
    class mnist_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, 64)

            # Hidden layer
            self.fc1 = nn.Linear(64, 32)
            self.fc2 = nn.Linear(32, 32)

            # Output layer
            self.output = nn.Linear(32, 10)

        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))

            return self.output(x)
        
    # Create the model instance
    net = mnist_net()

    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=0.01)

    return net, loss_func, optimizer

### AUTOENCODER_DENOISING_MNIST

In [None]:
# Create a class for the model
def create_the_MNIST_AE():
    """
    AUTOENCODER_DENOISING_MNIST
    """
    class ae_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, 250)

            # Encoder layer
            self.enc = nn.Linear(250, 50)

            # Latent layer
            self.lat = nn.Linear(50, 250)

            # Decoder layer
            self.dec = nn.Linear(250, 784)
    
        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.enc(x))
            x = F.relu(self.lat(x))
            y = torch.sigmoid(self.dec(x))

            return y
    
    # Create the model instance
    net = ae_net()

    # Loss function
    loss_func = nn.MSELoss()

    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)

    return net, loss_func, optimizer

### AUTOENCODER_HOW_MANY_UNIT

In [None]:
# Create a class for the model
def create_the_MNIST_AE(n_enc, n_bottle):
    """
    AUTOENCODER_HOW_MANY_UNIT
    """
    class ae_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, n_enc)

            # Encoder layer
            self.encoding = nn.Linear(n_enc, n_bottle)

            # Latent layer
            self.bottleneck = nn.Linear(n_bottle, n_enc)

            # Decoder layer
            self.decoding = nn.Linear(n_enc, 784)
    
        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.encoding(x))
            x = F.relu(self.bottleneck(x))
            y = torch.sigmoid(self.decoding(x))

            return y
    
    # Create the model instance
    net = ae_net()

    # Loss function
    loss_func = nn.MSELoss()

    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)

    return net, loss_func, optimizer

### AUTO_ENCODER_OCCLUSION

In [None]:
# Create a class for the model
def create_the_MNIST_AE():
    """
    AUTO_ENCODER_OCCLUSION
    """
    class ae_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, 128)

            # Encoder layer
            self.enc = nn.Linear(128, 50)

            # Latent layer
            self.lat = nn.Linear(50, 128)

            # Decoder layer
            self.dec = nn.Linear(128, 784)
    
        # Forward pass
        def forward(self, x):
            x = F.relu(self.input(x))
            x = F.relu(self.enc(x))
            x = F.relu(self.lat(x))
            y = torch.sigmoid(self.dec(x))

            return y
    
    # Create the model instance
    net = ae_net()

    # Loss function
    loss_func = nn.MSELoss()

    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)

    return net, loss_func, optimizer

### AUTOENCODER_ LATENT_CODE

In [None]:
# Create a class for the model
def create_the_MNIST_AE():
    """
    AUTOENCODER_ LATENT_CODE
    """
    class ae_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Input layer
            self.input = nn.Linear(784, 150)

            # Encoder layer
            self.enc = nn.Linear(150, 15)

            # Latent layer
            self.lat = nn.Linear(15, 150)

            # Decoder layer
            self.dec = nn.Linear(150, 784)
    
        # Forward pass
        def forward(self, x):
            x     = F.relu(self.input(x))
            codex = F.relu(self.enc(x)) # Output the hidden-layer activation
            x     = F.relu(self.lat(codex))
            y     = torch.sigmoid(self.dec(x))

            return y, codex
    
    # Create the model instance
    net = ae_net()

    # Loss function
    loss_func = nn.MSELoss()

    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)

    return net, loss_func, optimizer

### CNN_MNIST

In [None]:
def create_the_MNIST_net(print_toggle=False):
    """
    CNN_MNIST
    """

    class mnist_net(nn.Module):
        def __init__(self, print_toggle):
            super().__init__()
            # Output Image size(Conv + Pool) = (Floor((Input + 2 * Padding - Filter) / Stride) + 1) / Pool

            # Convolution layers. Not put Pooling layer here, do not have parameter.
            self.conv1 = nn.Conv2d(1,  10, kernel_size=5, stride=1, padding=1) # Out(Max + Pool) = 13
            self.conv2 = nn.Conv2d(10, 20, kernel_size=5, stride=1, padding=1) # Out(Max + Pool) = 5

            # Compute the number of units in FClayer
            expect_size = np.floor((5 + 2 * 0 - 1) / 1) + 1 # FC1 layer has no padding or kernel, so set to 0/1
            expect_size = 20 * int(expect_size ** 2)        # 20 feature maps comes from conv2

            # Fully-connected layer
            self.fc1 = nn.Linear(expect_size, 50)

            # Output layer
            self.out = nn.Linear(50, 10)

            # Toggle for printing out tensor sizes during forward prop
            self.print = print_toggle
        
        # Forward pass
        def forward(self, x):
            
            print(f'Input: {x.shape}') if self.print else None

            # Convolution -> Maxpool -> Relu
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            print(f'Layer conv1/pool1: {x.shape}') if self.print else None

            # Convolution -> Maxpool -> Relu
            x = F.relu(F.max_pool2d(self.conv2(x), 2))
            print(f'Layer conv2/pool2: {x.shape}') if self.print else None

            # Reshape for Linear layer
            n_units = x.shape.numel() / x.shape[0] # Numbers in X / Number of data samples
            x       = x.view(-1, int(n_units))     # Number of images / Number of units
            if self.print: print(f'Vectorized: {x.shape}')

            # Linear layer
            x = F.relu(self.fc1(x))
            if self.print: print(f'Layer fc1: {x.shape}')
            x = self.out(x)
            if self.print: print(f'Layer out: {x.shape}')

            return x
        
    # Create the model instance
    net = mnist_net(print_toggle)

    # Loss function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)

    return net, loss_func, optimizer

### CNN_CLASSIFY_GAUSSIAN_BLURS

In [None]:
def make_the_net():
    """
    CNN_CLASSIFY_GAUSSIAN_BLURS
    """
    class gauss_net(nn.Module):
        def __init__(self):
            super().__init__()
            
            # All layers in one go using nn.Sequential
            self.enc = nn.Sequential(
                nn.Conv2d(1, 6, kernel_size=3, padding=1),  # (91 + 2*1 -3)/1 + 1 = 91
                nn.ReLU(),                                  # Treated like a "layer"
                nn.AvgPool2d(2, 2),                         # 91 / 2 = 45
                
                
                nn.Conv2d(6, 4, kernel_size=3, padding=1),  # (45 +2*1 -3)/1 + 1 = 45
                nn.ReLU(),
                nn.AvgPool2d(2, 2),                         # 45 / 2 = 22
                
                nn.Flatten(),                               # Vectorize conv output
                nn.Linear(22 * 22 * 4, 50),                 # 50
                nn.Linear(50, 1)                            # 1
            )
        
        def forward(self, x):
            return self.enc(x)
        
    
    # Create the model instance
    net = gauss_net()
    
    # Loss Function
    loss_func = nn.BCEWithLogitsLoss()
    
    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)
    
    return net, loss_func, optimizer

### CNN_GAUSS_FEATURE_MAPS

In [None]:
def make_the_net():
    """
    CNN_GAUSS_FEATURE_MAPS
    """
    class gauss_net(nn.Module):
        def __init__(self):
            super().__init__()
            
            # Conv1
            self.conv1 = nn.Conv2d(1, 6, kernel_size=3, padding=1)
                # Out: (91 + 2*1 - 3)/1 + 1 = 91
                # Pool: 91 / 2 = 45
            
            # Conv2
            self.conv2 = nn.Conv2d(6, 4, kernel_size=3, padding=1)
                # Out: (45 + 2*1 - 3)/1 + 1 = 45
                # Pool: 45 / 2 = 22
            
            # Fc1
            self.fc1 = nn.Linear(22*22*4, 50)
            
            # Fc2 (output)
            self.fc2 = nn.Linear(50, 1)
        
        def forward(self, x):
            # First conv-pool set
            conv1_act = F.relu(self.conv1(x))
            x         = F.avg_pool2d(conv1_act, (2, 2))
            
            # Second conv-pool set
            conv2_act = F.relu(self.conv2(x))
            x         = F.avg_pool2d(conv2_act, (2, 2))
            
            # ANN part
            x = x.reshape(x.shape[0], -1)
            x = F.relu(self.fc1(x))
            x = self.fc2(x)
            
            return x, conv1_act, conv2_act
    
    # Create the model instance
    net = gauss_net()
    
    # Loss Function
    loss_func = nn.BCEWithLogitsLoss()
    
    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)
    
    return net, loss_func, optimizer

### CNN_SOFTCODE

In [None]:
def make_the_net():
    """
    CNN_SOFTCODE
    """
    class gauss_net(nn.Module):
        def __init__(self):
            super().__init__()

            # Soft-code variables
            k = (3, 2) # Kernel size
            s = (2, 3) # Stride size
            
            # Conv1
            self.conv1 = nn.Conv2d(1, 3, kernel_size=k, stride=s)
            
            # Output of Conv layer (Second line is for pooling layer)
            im_size_W = np.floor((91 + 2 * self.conv1.padding[0] - k[0]) / s[0]) + 1
            im_size_W = np.floor(im_size_W / 2)
            im_size_H = np.floor((91 + 2 * self.conv1.padding[1] - k[1]) / s[1]) + 1
            im_size_H = np.floor(im_size_H / 2)

            # Conv2
            self.conv2 = nn.Conv2d(3, 6, kernel_size=k, stride=s)

            # Requires the img size from the previous conv-pool layer
            im_size_W = np.floor((im_size_W + 2 * self.conv2.padding[0] - k[0]) / s[0]) + 1
            # Input to nn.Linear must be ints, and int() rounds down
            im_size_W = int(im_size_W / 2) 
            im_size_H = np.floor((im_size_H + 2 * self.conv2.padding[1] - k[1]) / s[1]) + 1
            im_size_H = int(im_size_H / 2)
            
            # Fc1
            self.fc1 = nn.Linear(im_size_H * im_size_W * self.conv2.out_channels, 50)
            
            # Fc2 (output)
            self.fc2 = nn.Linear(50, 1)
        
        def forward(self, x):
            # First conv-pool set
            conv1_act = F.relu(self.conv1(x))
            x         = F.avg_pool2d(conv1_act, (2, 2))
            
            # Second conv-pool set
            conv2_act = F.relu(self.conv2(x))
            x         = F.avg_pool2d(conv2_act, (2, 2))
            
            # ANN part
            x = x.reshape(x.shape[0], -1)
            x = F.relu(self.fc1(x))
            x = self.fc2(x)
            
            return x, conv1_act, conv2_act
    
    # Create the model instance
    net = gauss_net()
    
    # Loss Function
    loss_func = nn.BCEWithLogitsLoss()
    
    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)
    
    return net, loss_func, optimizer

### CNN_LINEAR_UNITS

In [None]:
def make_the_net(fc_units):
    """
    CNN_LINEAR_UNITS
    """
    class gauss_net(nn.Module):
        def __init__(self):
            super().__init__()
            
            # All layers in one go using nn.Sequential
            self.enc = nn.Sequential(
                nn.Conv2d(1, 6, kernel_size=3, padding=1),  # (91 + 2*1 -3)/1 + 1 = 91
                nn.ReLU(),                                  # Treated like a "layer"
                nn.AvgPool2d(2, 2),                         # 91 / 2 = 45
                
                
                nn.Conv2d(6, 4, kernel_size=3, padding=1),  # (45 +2*1 -3)/1 + 1 = 45
                nn.ReLU(),
                nn.AvgPool2d(2, 2),                         # 45 / 2 = 22
                
                nn.Flatten(),                               # Vectorize conv output
                nn.Linear(22 * 22 * 4, 2 * fc_units),       
                nn.Linear(2 * fc_units, fc_units),
                nn.Linear(fc_units, 1)                            # 1
            )
        
        def forward(self, x):
            return self.enc(x)
        
    
    # Create the model instance
    net = gauss_net()
    
    # Loss Function
    loss_func = nn.BCEWithLogitsLoss()
    
    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)
    
    return net, loss_func, optimizer

### CNN_GAUSS_AUTOENCODER|GAUSS_AE_OCCLUSION

In [None]:
def make_the_net():
    """
    CNN_GAUSS_AUTOENCODER|GAUSS_AE_OCCLUSION
    """
    class gauss_net(nn.Module):
        def __init__(self):
            super().__init__()
            
            # Encoding layer
            self.enc = nn.Sequential(
                nn.Conv2d(1, 6, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                
                nn.Conv2d(6, 4, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(2, 2)
            )
            
            # Decoding layer
            self.dec = nn.Sequential(
                nn.ConvTranspose2d(4, 6, 3, 2),
                nn.ReLU(),
                nn.ConvTranspose2d(6, 1, 3, 2)
            )
        
        def forward(self, x):
            return self.dec(self.enc(x))
        
    
    # Create the model instance
    net = gauss_net()
    
    # Loss Function
    loss_func = nn.MSELoss()
    
    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)
    
    return net, loss_func, optimizer

### CNN_CUSTOM_LOSS_FUNC

In [None]:
def make_the_net():
    """
    CNN_CUSTOM_LOSS_FUNC
    """
    class gauss_net(nn.Module):
        def __init__(self):
            super().__init__()
            
            # Encoding layer
            self.enc = nn.Sequential(
                nn.Conv2d(1, 6, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                
                nn.Conv2d(6, 4, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(2, 2)
            )
            
            # Decoding layer
            self.dec = nn.Sequential(
                nn.ConvTranspose2d(4, 6, 3, 2),
                nn.ReLU(),
                nn.ConvTranspose2d(6, 1, 3, 2)
            )
        
        def forward(self, x):
            return self.dec(self.enc(x))
        
    
    # Create the model instance
    net = gauss_net()
    
    # Loss Function
    loss_func = my_L1_Loss()
    # loss_func = my_L2_Avg_Loss()
    # loss_func = my_Corr_Loss()
    
    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)
    
    return net, loss_func, optimizer

### CNN_FIND_GAUSS

In [None]:
def make_the_net():
    """
    CNN_FIND_GAUSS
    """
    class gauss_net(nn.Module):
        def __init__(self):
            super().__init__()
            
            # All layers in one go using nn.Sequential
            self.enc = nn.Sequential(
                nn.Conv2d(1, 6, kernel_size=3, padding=1),  # (91 + 2*1 -3)/1 + 1 = 91
                nn.ReLU(),                                  # Treated like a "layer"
                nn.AvgPool2d(2, 2),                         # 91 / 2 = 45
                
                
                nn.Conv2d(6, 4, kernel_size=3, padding=1),  # (45 +2*1 -3)/1 + 1 = 45
                nn.ReLU(),
                nn.AvgPool2d(2, 2),                         # 45 / 2 = 22
                
                nn.Flatten(),                               # Vectorize conv output
                nn.Linear(22 * 22 * 4, 50),                 # 50
                nn.Linear(50, 3)                            # 3
            )
        
        def forward(self, x):
            return self.enc(x)
        
    
    # Create the model instance
    net = gauss_net()
    
    # Loss Function
    loss_func = nn.MSELoss()
    
    # Optimizer
    optimizer = torch.optim.Adam(params=net.parameters(), lr=0.001)
    
    return net, loss_func, optimizer

### CNN_EMNIST, TRANSFER_LETTER2NUMBER

In [None]:
def make_the_net(print_toggle=False):
    """
    CNN_EMNIST, TRANSFER_LETTER2NUMBER
    """
    class emnist_net(nn.Module):
        def __init__(self, print_toggle):
            super().__init__()

            self.print = print_toggle

            ######################## FEATURE MAP LAYERS ########################
            self.conv1  = nn.Conv2d(1, 6, kernel_size=3, padding=1)
            self.bnorm1 = nn.BatchNorm2d(num_features=6)
                # (28 + 2*1 - 3)/1 + 1 = 28/2 = 14
            
            self.conv2  = nn.Conv2d(6, 6, kernel_size=3, padding=1)
            self.bnorm2 = nn.BatchNorm2d(num_features=6)
                # (14 + 2*1 - 3)/1 + 1 = 14/2 = 7

            ######################## LINEAR DECISION LAYERS ####################
            self.fc1 = nn.Linear(7 * 7 * 6, 50)
            self.fc2 = nn.Linear(50, 26)
        
        def forward(self, x):
            # Convolution -> Maxpool -> Batchnorm -> Relu
            if self.print:    print(f'Input:            {list(x.shape)}')

            x = F.max_pool2d(self.conv1(x), 2)
            x = F.leaky_relu(self.bnorm1(x))
            if self.print:    print(f'First CPR Block:  {list(x.shape)}')

            x = F.max_pool2d(self.conv2(x), 2)
            x = F.leaky_relu(self.bnorm2(x))
            if self.print:    print(f'Second CPR Block: {list(x.shape)}')

            # Reshape for linear layer
            n_units = x.shape.numel() / x.shape[0]
            x       = x.view(-1, int(n_units))
            if self.print:    print(f'Vectorized:       {list(x.shape)}')

            # Linear layer
            x = F.leaky_relu(self.fc1(x))
            x = self.fc2(x)
            if self.print:    print(f'Final output:     {list(x.shape)}')

            return x
    
    # Create the model instance
    net = emnist_net(print_toggle)

    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    return net, loss_func, optimizer

### CNN_ HOW_LOW

In [None]:
def make_the_net(print_toggle=False):
    """
    CNN_ HOW_LOW
    """
    class emnist_net(nn.Module):
        def __init__(self, print_toggle):
            super().__init__()

            self.print = print_toggle

            ######################## FEATURE MAP LAYERS ########################
            self.conv1  = nn.Conv2d(1, 64, kernel_size=3, padding=1)
            self.bnorm1 = nn.BatchNorm2d(num_features=64)
                # (28 + 2*1 - 3)/1 + 1 = 28/2 = 14
            
            self.conv2  = nn.Conv2d(64, 128, kernel_size=3)
            self.bnorm2 = nn.BatchNorm2d(num_features=128)
                # (14 + 2*0 - 3)/1 + 1 = 12/2 = 6
            
            self.conv3  = nn.Conv2d(128, 256, kernel_size=3)
            self.bnorm3 = nn.BatchNorm2d(num_features=256)
                # (6 + 2*0 - 3)/1 + 1 = 4/2 = 2

            ######################## LINEAR DECISION LAYERS ####################
            self.fc1 = nn.Linear(2 * 2 * 256, 256)
            self.fc2 = nn.Linear(256, 64)
            self.fc3 = nn.Linear(64, 26)

        
        def forward(self, x):
            # Convolution -> Maxpool -> Batchnorm -> Relu
            if self.print:    print(f'Input:            {list(x.shape)}')

            x = F.max_pool2d(self.conv1(x), 2)
            x = F.leaky_relu(self.bnorm1(x))
            x = F.dropout(input=x, p=0.25, training=self.training)
            if self.print:    print(f'First CPR Block:  {list(x.shape)}')

            x = F.max_pool2d(self.conv2(x), 2)
            x = F.leaky_relu(self.bnorm2(x))
            x = F.dropout(input=x, p=0.25, training=self.training)
            if self.print:    print(f'Second CPR Block: {list(x.shape)}')

            x = F.max_pool2d(self.conv3(x), 2)
            x = F.leaky_relu(self.bnorm3(x))
            x = F.dropout(input=x, p=0.25, training=self.training)
            if self.print:    print(f'Third CPR Block:  {list(x.shape)}')

            # Reshape for linear layer
            n_units = x.shape.numel() / x.shape[0]
            x       = x.view(-1, int(n_units))
            x = F.dropout(input=x, p=0.25, training=self.training)
            if self.print:    print(f'Vectorized:       {list(x.shape)}')

            # Linear layer
            x = F.leaky_relu(self.fc1(x))
            x = F.dropout(input=x, p=0.5, training=self.training)
            x = self.fc2(x)
            x = F.dropout(input=x, p=0.5, training=self.training)
            x = self.fc3(x)
            if self.print:    print(f'Final output:     {list(x.shape)}')

            return x
    
    # Create the model instance
    net = emnist_net(print_toggle)

    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    return net, loss_func, optimizer

### CNN_ NUM_CHANS

In [None]:
def make_the_net(num_chans=(6, 6)):
    """
    CNN_EMNIST
    """
    class emnist_net(nn.Module):
        def __init__(self, num_chans):
            super().__init__()


            ######################## FEATURE MAP LAYERS ########################
            self.conv1  = nn.Conv2d(1, num_chans[0], kernel_size=3, padding=1)
            self.bnorm1 = nn.BatchNorm2d(num_features=num_chans[0])
                # (28 + 2*1 - 3)/1 + 1 = 28/2 = 14
            
            self.conv2  = nn.Conv2d(num_chans[0], num_chans[1], kernel_size=3, padding=1)
            self.bnorm2 = nn.BatchNorm2d(num_features=num_chans[1])
                # (14 + 2*1 - 3)/1 + 1 = 14/2 = 7

            ######################## LINEAR DECISION LAYERS ####################
            self.fc1 = nn.Linear(7 * 7 * num_chans[1], 50)
            self.fc2 = nn.Linear(50, 26)
        
        def forward(self, x):
            # Convolution -> Maxpool -> Batchnorm -> Relu

            x = F.max_pool2d(self.conv1(x), 2)
            x = F.leaky_relu(self.bnorm1(x))

            x = F.max_pool2d(self.conv2(x), 2)
            x = F.leaky_relu(self.bnorm2(x))

            # Reshape for linear layer
            n_units = x.shape.numel() / x.shape[0]
            x       = x.view(-1, int(n_units))

            # Linear layer
            x = F.leaky_relu(self.fc1(x))
            x = self.fc2(x)

            return x
    
    # Create the model instance
    net = emnist_net(num_chans)

    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    return net, loss_func, optimizer

### TRANSFER_FMNIST

In [None]:
def create_the_MNIST_net(print_toggle=False):
    """
    TRANSFER_ FMNIST
    """

    class mnist_net(nn.Module):
        def __init__(self, print_toggle):
            super().__init__()
            # Output Image size(Conv + Pool) = (Floor((Input + 2 * Padding - Filter) / Stride) + 1) / Pool

            # Convolution layers. Not put Pooling layer here, do not have parameter.
            self.conv1 = nn.Conv2d(1,  10, kernel_size=5, stride=1, padding=1) # Out(Max + Pool) = 13
            self.conv2 = nn.Conv2d(10, 20, kernel_size=5, stride=1, padding=1) # Out(Max + Pool) = 5

            # Compute the number of units in FClayer
            expect_size = np.floor((5 + 2 * 0 - 1) / 1) + 1 # FC1 layer has no padding or kernel, so set to 0/1
            expect_size = 20 * int(expect_size ** 2)        # 20 feature maps comes from conv2

            # Fully-connected layer
            self.fc1 = nn.Linear(expect_size, 50)

            # Output layer
            self.out = nn.Linear(50, 10)

            # Toggle for printing out tensor sizes during forward prop
            self.print = print_toggle
        
        # Forward pass
        def forward(self, x):
            
            print(f'Input: {x.shape}') if self.print else None

            # Convolution -> Maxpool -> Relu
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            print(f'Layer conv1/pool1: {x.shape}') if self.print else None

            # Convolution -> Maxpool -> Relu
            x = F.relu(F.max_pool2d(self.conv2(x), 2))
            print(f'Layer conv2/pool2: {x.shape}') if self.print else None

            # Reshape for Linear layer
            n_units = x.shape.numel() / x.shape[0] # Numbers in X / Number of data samples
            x       = x.view(-1, int(n_units))     # Number of images / Number of units
            if self.print: print(f'Vectorized: {x.shape}')

            # Linear layer
            x = F.relu(self.fc1(x))
            if self.print: print(f'Layer fc1: {x.shape}')
            x = self.out(x)
            if self.print: print(f'Layer out: {x.shape}')

            return x
        
    # Create the model instance
    net = mnist_net(print_toggle)

    # Loss function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.SGD(params=net.parameters(), lr=0.005)

    return net, loss_func, optimizer

### TRANSFER_ PRETRAIN_FMNIST

In [None]:
def make_the_AE_net(print_toggle=False):
    """
    TRANSFER_ PRETRAIN_FMNIST
    """
    class AE_net(nn.Module):
        def __init__(self, print_toggle):
            super().__init__()

            self.print = print_toggle

            ########################## ENCODER LAYERS ##########################
            self.enc_conv1 = nn.Conv2d(1, 16,  kernel_size=3, padding=1, stride=2)
                # (28 + 2 * 1 - 3)/2 + 1 = 14
            self.enc_conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1, stride=2)
                # (14 + 2 * 1 - 3)/2 + 1 = 7

            ############################ DECODER LAYERS ########################
            self.dec_conv1 = nn.ConvTranspose2d(32, 16, kernel_size=4, padding=1, stride=2)
                # (28 + 2 * 1 - 3)/2 + 1 = 14
            self.dec_conv2 = nn.ConvTranspose2d(16, 1,  kernel_size=4, padding=1, stride=2)
                # (14 + 2 * 1 - 3)/2 + 1 = 7

        
        def forward(self, x):
            # Convolution -> Maxpool -> Batchnorm -> Relu
            if self.print:  print(f'Input:                {list(x.shape)}')

            # First encoder layer
            x = F.leaky_relu(self.enc_conv1(x))
            if self.print:  print(f'First Encoder layer:  {list(x.shape)}')

            # Second encoder layer
            x = F.leaky_relu(self.enc_conv2(x))
            if self.print:  print(f'Second Encoder layer: {list(x.shape)}')

            # First Decoder layer
            x = F.leaky_relu(self.dec_conv1(x))
            if self.print:  print(f'First Decoder layer:  {list(x.shape)}')

            # Second Decoder layer
            x = F.leaky_relu(self.dec_conv2(x))
            if self.print:  print(f'Second Decoder layer: {list(x.shape)}')       

            return x
    
    # Create the model instance
    net = AE_net(print_toggle)

    # Loss Function
    loss_func = nn.MSELoss()

    # Optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    return net, loss_func, optimizer

In [None]:
def make_the_class_net(print_toggle=False):
    """
    TRANSFER_ PRETRAIN_FMNIST
    """
    class cnn_net(nn.Module):
        def __init__(self, print_toggle):
            super().__init__()

            self.print = print_toggle

            ########################## ENCODER LAYERS ##########################
            self.enc_conv1 = nn.Conv2d(1, 16,  kernel_size=3, padding=1, stride=2)
                # (28 + 2 * 1 - 3)/2 + 1 = 14
            self.enc_conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1, stride=2)
                # (14 + 2 * 1 - 3)/2 + 1 = 7

            ######################## LINEAR DECISION LAYERS ####################
            self.fc1 = nn.Linear(7 * 7 * 32, 50)
            self.fc2 = nn.Linear(50, 10)
        
        def forward(self, x):
            # Convolution -> Maxpool -> Batchnorm -> Relu
            if self.print:    print(f'Input:              {list(x.shape)}')

            # First encoder layer
            x = F.leaky_relu(self.enc_conv1(x))
            if self.print:  print(f'First Encoder layer:  {list(x.shape)}')

            # Second encoder layer
            x = F.leaky_relu(self.enc_conv2(x))
            if self.print:  print(f'Second Encoder layer: {list(x.shape)}')

            # Reshape for Linear layer
            n_units = x.shape.numel() / x.shape[0] # Numbers in X / Number of data samples
            x       = x.view(-1, int(n_units))     # Number of images / Number of units
            if self.print: print(f'Vectorized: {list(x.shape)}')

            # Linear layer
            x = F.leaky_relu(self.fc1(x))
            if self.print: print(f'First Linear layer:    {list(x.shape)}')
            x = F.leaky_relu(self.fc2(x))
            if self.print: print(f'Second Linear layer:   {list(x.shape)}')

            return x
    
    # Create the model instance
    net = cnn_net(print_toggle)

    # Loss Function
    loss_func = nn.CrossEntropyLoss()

    # Optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    return net, loss_func, optimizer

## TEST

### FFN_NONMNIST

In [None]:
# Test the model with one batch
net, loss_func, optimizer = create_the_MNIST_net()

X, y = iter(train_loader).next()
y_hat = net(X)

# Values are log-prob of each number (0 - 9)
print(torch.exp(y_hat))

# Compute the loss
loss = loss_func(y_hat, y)
print(f'Loss: {loss}')

### FFN_BINARIZED MNIST

In [None]:
# Test the model with one batch
net, loss_func, optimizer = create_the_MNIST_net()

X, y = iter(train_loader).next()
y_hat = net(X)

# Confirm really binary
torch.unique(X)

### FFN_ BREADTH VS. DEPTH

In [None]:
# Generate an instance of the model and confirm that it returns the expected network.
n_units_per_layer = 12
n_layers          = 4
net = create_the_MNIST_net(n_units_per_layer, n_layers)
net

### CNN_MNIST

In [None]:
# Test the model with one batch
net, loss_func, optimizer = create_the_MNIST_net(print_toggle=True)
X, y  = iter(train_loader).next()
y_hat = net(X)

# Check sizes of model outputs and target variable
print('')
print(y_hat.shape)
print(y    .shape)

# Now let's compute the loss
loss = loss_func(y_hat, y)
print('')
print(f'Loss: {loss}')

### CNN_CLASSIFY_GAUSSIAN_BLURS|FIND_GAUSS

In [None]:
# Test the model with one batch
net, loss_func, optimizer = make_the_net()

X, y = iter(train_loader).next()
y_hat = net(X)

print(''), print(y_hat.shape)

loss = loss_func(y_hat, y)
print(''), print(f'Loss: {loss}')

### CNN_GAUSS_FEATURE_MAPS

In [None]:
# Test the model with one batch
net, loss_func, optimizer = make_the_net()

# Test that the model runs and can compute a loss
X, y                          = iter(train_loader).next()
y_hat, feat_map_1, feat_map_2 = net(X)
loss                          = loss_func(y_hat, y)

# Check sizes of outputs
print(f'Predicted category:      {y_hat.shape}')
print(f'Feature map after conv1: {feat_map_1.shape}')
print(f'Feature map after conv2: {feat_map_2.shape}')


### CNN_GAUSS_AUTOENCODER

In [None]:
# Test the model with one batch
net, loss_func, optimizer = make_the_net()

y_hat = net(images[:10, :, :, :])

# Check size of output
print('')
print(y_hat.shape)

fig, ax = plt.subplots(1, 2, figsize=(8, 3))
ax[0].imshow(torch.squeeze(images[0, 0, :, :]).detach(), cmap='jet')
ax[1].imshow(torch.squeeze(y_hat [0, 0, :, :]).detach(), cmap='jet')
ax[0].set_title('Model input')
ax[1].set_title('Model output')

plt.show()

### CNN_GAUSS_AE_OCCLUSION

In [None]:
# Test the model with one batch
net, loss_func, optimizer = make_the_net()

y_hat = net(images_no_occ[:10, :, :, :])

# Check size of output
print('')
print(y_hat.shape)

fig, ax = plt.subplots(1, 2, figsize=(8, 3))
ax[0].imshow(torch.squeeze(images_no_occ[0, 0, :, :]).detach(), cmap='jet')
ax[1].imshow(torch.squeeze(y_hat        [0, 0, :, :]).detach(), cmap='jet')
ax[0].set_title('Model input')
ax[1].set_title('Model output')

plt.show()

### CNN_EMNIST

In [None]:
# Test the model with one batch
net, loss_func, optimizer = make_the_net(print_toggle=True)

X, y = iter(train_loader).next()
y_hat = net(X)

print(''), print(f'Output size: {y_hat.shape}')

loss = loss_func(y_hat, torch.squeeze(y))
print(''), print(f'Loss: {loss}')

### CNN_ NUM_CHANS

In [None]:
# Test the model with one batch
net, loss_func, optimizer = make_the_net(num_chans=(6, 12))

X, y = iter(train_loader).next()
y_hat = net(X)

print(''), print(f'Output size: {y_hat.shape}')

loss = loss_func(y_hat, torch.squeeze(y))
print(''), print(f'Loss: {loss}')

### TRANSFER_ PRETRAIN_FMNIST

In [None]:
# Test the model with one batch
net, loss_func, optimizer = make_the_AE_net(print_toggle=True)

X, y = iter(train_loader).next()
y_hat = net(X)

print(''), print(f'Output size: {y_hat.shape}')

loss = loss_func(y_hat, X)
print(''), print(f'Loss: {loss}')

In [None]:
# Test the model with one batch
net, loss_func, optimizer = make_the_class_net(print_toggle=True)

X, y = iter(train_loader).next()
y_hat = net(X)

print(''), print(f'Output size: {y_hat.shape}')

loss = loss_func(y_hat, y)
print(''), print(f'Loss: {loss}')

## TRAIN

### META_PARAMS_MULTIOUTPUTS

In [None]:
def train_the_model():
  """
  META_PARAMS_MULTIOUTPUTS
  """
  num_epochs = 100
  
  net, loss_func, optimizer = create_the_qwety_net()

  # Initialize accuracies as empties
  train_acc = []
  test_acc  = []
  losses = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):
    # Activate training mode
    net.train()

    # Loop over training data batches
    batch_acc = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(test_loader))  # Extract x,y from test dataloader
    net.eval()                 # Activate testing mode
    with torch.no_grad():           # Deactivates autograd
      # pred_labels = torch.argmax(net(X), axis=1)
      pred_labels = net(X)

    # Compute accuracy
    test_acc.append(100 * torch.mean(((torch.argmax(pred_labels, axis=1)) == y).float()))

  # Function output
  return train_acc, test_acc, losses, net

### META_PARAMS_OPTIMIZERS_QWERTY

In [None]:
num_epochs = 50

def train_the_model(optimizer_type, learning_rate):
  """
  META_PARAMS_OPTIMIZERS_QWERTY
  """
  net, loss_func, optimizer = create_the_qwety_net(optimizer_type, learning_rate)

  # Initialize accuracies as empties
  train_acc = []
  test_acc  = []
  losses = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):
    # Activate training mode
    net.train()

    # Loop over training data batches
    batch_acc = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(test_loader))  # Extract x,y from test dataloader
    net.eval()                 # Activate testing mode
    with torch.no_grad():           # Deactivates autograd
      y_hat = net(X)

    # Compute accuracy
    test_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

  # Function output
  return train_acc, test_acc, losses, net

### META_PARAM_RELUS

In [None]:
# Global parameter
num_epochs = 600

def train_the_model():
  """
  META_PARAM_RELUS
  """

  loss_func = nn.BCEWithLogitsLoss()
  optimizer = torch.optim.SGD(wine_net.parameters(), lr=0.01)

  # Initialize accuracies as empties
  train_acc = []
  test_acc  = []
  losses = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):
    # Activate training mode
    wine_net.train()

    # Loop over training data batches
    batch_acc = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = wine_net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((y_hat > 0) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(test_loader))  # Extract x,y from test dataloader
    wine_net.eval()                 # Activate testing mode
    with torch.no_grad():           # Deactivates autograd
      # pred_labels = torch.argmax(wine_net(X), axis=1)
      pred_labels = wine_net(X)

    # Compute accuracy
    test_acc.append(100 * torch.mean(((pred_labels > 0) == y).float()).item())

  # Function output
  return train_acc, test_acc, losses

### FFN_NONMNIST, FFN_BINARIZED MNIST, CNN_MNIST, CNN_MNIST_SHIFTED

In [None]:
def train_the_model():
    """
    FFN_NONMNIST, FFN_BINARIZED MNIST, CNN_MNIST, CNN_MNIST_SHIFTED
    """
    
    num_epochs = 60

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net()

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        net.train()
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        y_hat = net(X)
        test_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

    # End epochs

    return train_acc, test_acc, losses, net

### FFN_WEIGHTHISTOGRAMS

In [None]:
def train_the_model():
    """
    FFN_WEIGHTHISTOGRAMS
    """
    
    num_epochs = 100

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net()

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    test_acc  = []

    # Initialize histogram variables
    hist_x = np.zeros((num_epochs, 100))
    hist_y = np.zeros((num_epochs, 100))

    # Loop over epochs
    for epoch_i in range(num_epochs):
        # Get the weights distribution at the start of this epoch
        hist_x, hist_y[epoch_i, :] = weights_histogram(net)

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad(): # Deactivates autograd
          y_hat = net(X)

        test_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

    # End epochs

    return train_acc, test_acc, losses, net, hist_x, hist_y

### FFN_ BREADTH VS. DEPTH

In [None]:
def train_the_model(n_units, n_layers):
    """
    FFN_ BREADTH VS. DEPTH
    """
    
    num_epochs = 60

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net(n_units, n_layers)

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad(): # Deactivates autograd
          y_hat = net(X)
          
        test_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

    # End epochs

    return train_acc, test_acc, losses, net

### FFN_OPTIMIZERS

In [None]:
num_epochs = 50

def train_the_model(optimizer_type, learning_rate):
  """
  FFN_OPTIMIZERS
  """
  net, loss_func, optimizer = create_the_mnist_net(optimizer_type, learning_rate)

  # Initialize accuracies as empties
  train_acc = []
  test_acc  = []
  losses = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):

    # Loop over training data batches
    batch_acc = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(test_loader))  # Extract x,y from test dataloader
    
    with torch.no_grad():           # Deactivates autograd
      y_hat = net(X)

    # Compute accuracy
    test_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

  # Function output
  return train_acc, test_acc, losses, net

### FFN_SCRAMBLEDMNIST | DATA_ DATA_OVERSAMPLING

In [None]:
def train_the_model():
    """
    FFN_SCRAMBLEDMNIST | DATA_ DATA_OVERSAMPLING
    """
    
    num_epochs = 50

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net()

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad():
          y_hat = net(X)
        test_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

    # End epochs

    return train_acc, test_acc, losses, net

### FFN_SHIFTEDMNIST

In [None]:
def train_the_model():
    """
    FFN_SHIFTEDMNIST
    """
    
    num_epochs = 50

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net()

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad():
          y_hat = net(X)
        test_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

    # End epochs

    return train_acc, test_acc, losses, net

### FFN_NO7

In [None]:
def train_the_model():
    """
    FFN_NO7
    NOTE: anything test-related can be deleted!
    """
    
    num_epochs = 100

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net()

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

    # End epochs

    return train_acc, losses, net

### DATA_DATA_VS_DEPTH_QWERTY2

In [None]:
def train_the_model(n_units, n_layers):
    """
    DATA_DATA_VS_DEPTH_QWERTY2
    """
    
    num_epochs = 50

    # Create a new model
    net, loss_func, optimizer = create_the_qwerty_net(n_units, n_layers)

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_data:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        X, y = next(iter(test_data)) # Extract X, y from dataloader
        with torch.no_grad():
          y_hat = net(X)
        test_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

    # End epochs

    return train_acc, test_acc, losses, net

### DATA_ UNBALANCED DATA

In [None]:
# Global parameter
num_epochs = 500

def train_the_model():
  """
  DATA_ UNBALANCED DATA
  """

  loss_func = nn.BCEWithLogitsLoss()
  optimizer = torch.optim.Adam(wine_net.parameters(), lr=0.001)

  # Initialize accuracies as empties
  train_acc = []
  test_acc  = []
  losses = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):

    # Loop over training data batches
    batch_acc = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = wine_net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((y_hat > 0) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(test_loader))  # Extract x,y from test dataloader
    with torch.no_grad():           # Deactivates autograd
      pred_labels = wine_net(X)

    # Compute accuracy
    test_acc.append(100 * torch.mean(((pred_labels > 0) == y).float()).item())

  # Function output
  return train_acc, test_acc, losses

### DATA_NOISE_AUGMENTATION

In [None]:
def train_the_model():
    """
    DATA_NOISE_AUGMENTATION
    """
    
    num_epochs = 50

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net()

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    dev_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        X, y = next(iter(dev_loader)) # Extract X, y from dataloader
        with torch.no_grad():
          y_hat = net(X)
        dev_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

    # End epochs

    return train_acc, dev_acc, losses, net

### DATA_DATA_FEATURE_AUGMENTATION

In [None]:
def train_the_model(use_extra_feature=False):
  """
  DATA_DATA_FEATURE_AUGMENTATION
  """
  num_epochs = 200

  net, loss_func, optimizer = create_the_qwety_net(use_extra_feature)

  # Initialize accuracies as empties
  train_acc = []
  test_acc  = []
  losses = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):

    # Loop over training data batches
    batch_acc = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(test_loader))  # Extract x,y from test dataloader
    with torch.no_grad():           # Deactivates autograd
      y_hat = net(X)

    # Compute accuracy
    test_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

  # Function output
  return train_acc, test_acc, losses, net

### DATA_SAVE_BEST_MODEL

In [None]:
def train_the_model():
  """
  DATA_SAVE_BEST_MODEL
  """

  # Initialize a Dictionary for the best model
  the_best_model = {'Accuracy': 0, 'net': None}

  num_epochs = 100
  
  net, loss_func, optimizer = create_the_qwety_net()

  # Initialize accuracies as empties
  train_acc = []
  dev_acc   = []
  losses    = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):
    # Activate training mode
    net.train()

    # Loop over training data batches
    batch_acc  = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(dev_loader))   # Extract x,y from test dataloader
    net.eval()                      # Activate testing mode
    with torch.no_grad():           # Deactivates autograd
      pred_labels = net(X)

    # Compute accuracy
    dev_acc.append(100 * torch.mean(((torch.argmax(pred_labels, axis=1)) == y).float()))

    # Store this model if it's the best so far
    if (dev_acc[-1] > the_best_model['Accuracy']):
      # New best accuracy
      the_best_model['Accuracy'] = dev_acc[-1].item()

      # Model's internal state
      the_best_model['net']      = copy.deepcopy(net.state_dict())

  # Function output
  return train_acc, dev_acc, losses, the_best_model

### APRF_WINE

In [None]:
# Global parameter
num_epochs = 1000

def train_the_model():
  """
  APRF_WINE
  """
  # Continuous value
  loss_func = nn.BCEWithLogitsLoss()
  optimizer = torch.optim.SGD(wine_net.parameters(), lr=0.01)

  # Initialize accuracies as empties
  train_acc = []
  test_acc  = []

  # initialize losses
  losses = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):
    # Activate training mode
    wine_net.train()

    # Loop over training data batches
    batch_acc = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = wine_net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((y_hat > 0) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(test_loader))  # Extract x,y from test dataloader
    with torch.no_grad():           # Deactivates autograd
      pred_labels = wine_net(X)

    # Compute accuracy
    test_acc.append(100 * torch.mean(((pred_labels > 0) == y).float()).item())

  # Function output
  return train_acc, test_acc, losses

### MODEL_PERFORM_APRF_MNIST, MODEL_PERFORM_MNIST_NO7

In [None]:
def train_the_model():
    """
    MODEL_PERFORM_APRF_MNIST | MODEL_PERFORM_MNIST_NO7
    """
    
    num_epochs = 10

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net()

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad():
            y_hat = net(X)
        test_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

    # End epochs

    return train_acc, test_acc, losses, net

### MODEL_PERFORM_TIME

In [None]:
def train_the_model():
    """
    MODEL_PERFORM_TIME
    """
    
    # Start the timer!
    time_in_function = time.process_time()

    num_epochs = 10

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net()

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad():
            y_hat = net(X)
        test_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

        # Report the epoch number, computation time, accuracy
        comp_time = time.process_time() - time_in_function
        print(f'Epoch {epoch_i + 1}/{num_epochs}, elapsed time: {comp_time:.2f} sec, test accuracy: {test_acc[-1]:.0f}')

    # End epochs

    return train_acc, test_acc, losses, net

### WEIGHTS_DEMO_INITS | WEIGHTS_VARIANCE_INITS

In [None]:
def train_the_model(net, loss_func, optimizer):
  """
  WEIGHTS_DEMO_INITS | WEIGHTS_VARIANCE_INITS
  """
  num_epochs = 10
  

  # Initialize accuracies as empties
  train_acc = []
  test_acc  = []
  losses = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):
    # Activate training mode
    net.train()

    # Loop over training data batches
    batch_acc = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(test_loader))  # Extract x,y from test dataloader
    net.eval()                 # Activate testing mode
    with torch.no_grad():           # Deactivates autograd
      # pred_labels = torch.argmax(net(X), axis=1)
      pred_labels = net(X)

    # Compute accuracy
    test_acc.append(100 * torch.mean(((torch.argmax(pred_labels, axis=1)) == y).float()))

  # Function output
  return train_acc, test_acc, losses, net

### WEIGHTS_ XAVIER_VS._KAIMING

In [None]:
# Global parameter
num_epochs = 600

def train_the_model(wine_net):
  """
  WEIGHTS_ XAVIER_VS._KAIMING
  """

  loss_func = nn.BCEWithLogitsLoss()
  optimizer = torch.optim.SGD(wine_net.parameters(), lr=0.01)

  # Initialize accuracies as empties
  train_acc = []
  test_acc  = []
  losses = torch.zeros(num_epochs)

  # Loop over epochs
  for epoch_i in range(num_epochs):
    # Activate training mode
    wine_net.train()

    # Loop over training data batches
    batch_acc = []
    batch_loss = []

    for X, y in train_loader:

      # Forward pass and loss
      y_hat = wine_net(X)
      loss  = loss_func(y_hat, y)

      # Backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # Compute training accuracy just for this batch
      batch_acc.append(100 * torch.mean(((y_hat > 0) == y).float()).item())

      # Loss from this batch
      batch_loss.append(loss.item())

    # End of batch loop...

    # Now that we've trained through the batches, get their average training accuracy 
    train_acc.append(np.mean(batch_acc))

    # Get average losses across the batches
    losses[epoch_i] = np.mean(batch_loss)

    # Test accuracy (NOTE: testing in batches!)    
    X, y = next(iter(test_loader))  # Extract x,y from test dataloader
    wine_net.eval()                 # Activate testing mode
    with torch.no_grad():           # Deactivates autograd
      # pred_labels = torch.argmax(wine_net(X), axis=1)
      pred_labels = wine_net(X)

    # Compute accuracy
    test_acc.append(100 * torch.mean(((pred_labels > 0) == y).float()).item())

  # Function output
  return train_acc, test_acc, losses

### WEIGHTS_FREEZE_WEIGHTS

In [None]:
def train_the_model(net, loss_func, optimizer):
    """
    WEIGHTS_FREEZE_WEIGHTS
    """
    
    num_epochs = 100

    # Initialize accuracies as empties
    train_acc = []
    test_acc  = []
    losses = torch.zeros(num_epochs)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Switch off learning in all-but-output layers during first 1/2 of training
        if (epoch_i < (num_epochs / 2)):
            for p in net.named_parameters():
                if ('output' not in p[0]):
                    p[1].requires_grad = False
        else:
            for p in net.named_parameters():
                p[1].requires_grad = True
        
        # Activate training mode
        net.train()

        # Loop over training data batches
        batch_acc = []
        batch_loss = []

        for X, y in train_loader:

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat, y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Compute training accuracy just for this batch
            batch_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

            # Loss from this batch
            batch_loss.append(loss.item())

        # End of batch loop...

        # Now that we've trained through the batches, get their average training accuracy 
        train_acc.append(np.mean(batch_acc))

        # Get average losses across the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy (NOTE: testing in batches!)    
        X, y = next(iter(test_loader))  # Extract x,y from test dataloader
        net.eval()                 # Activate testing mode
        with torch.no_grad():           # Deactivates autograd
            # pred_labels = torch.argmax(net(X), axis=1)
            pred_labels = net(X)

        # Compute accuracy
        test_acc.append(100 * torch.mean(((torch.argmax(pred_labels, axis=1)) == y).float()))

    # Function output
    return train_acc, test_acc, losses, net

### WEIGHTS_WEIGHTS_CHANGES

In [None]:
def train_the_model(net, loss_func, optimizer):
    """
    WEIGHTS_WEIGHTS_CHANGES
    """
    
    num_epochs = 60

    # Initialize accuracies as empties
    train_acc = []
    test_acc  = []
    losses = torch.zeros(num_epochs)

    # Initialize Weight change matrices
    # 4: 4 layers
    weight_change = np.zeros((num_epochs, 4))
    weight_conds  = np.zeros((num_epochs, 4))

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Store the weights for each layer
        pre_W = []
        for p in net.named_parameters():
            if ('weight' in p[0]):
                pre_W.append(copy.deepcopy(p[1].data.numpy()))
        
        # Activate training mode
        net.train()

        # Loop over training data batches
        batch_acc = []
        batch_loss = []

        for X, y in train_loader:

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat, y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Compute training accuracy just for this batch
            batch_acc.append(100 * torch.mean(((torch.argmax(y_hat, axis=1)) == y).float()).item())

            # Loss from this batch
            batch_loss.append(loss.item())

        # End of batch loop...

        # Now that we've trained through the batches, get their average training accuracy 
        train_acc.append(np.mean(batch_acc))

        # Get average losses across the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy (NOTE: testing in batches!)    
        X, y = next(iter(test_loader))  # Extract x,y from test dataloader
        net.eval()                 # Activate testing mode
        with torch.no_grad():           # Deactivates autograd
            # pred_labels = torch.argmax(net(X), axis=1)
            pred_labels = net(X)

        # Compute accuracy
        test_acc.append(100 * torch.mean(((torch.argmax(pred_labels, axis=1)) == y).float()))

        # Finally, get the post-learning state of the weights
        for (i, p) in enumerate(net.named_parameters()):
          # int(i / 2): Only look for the weight
            if ('weight' in p[0]):
                # Condition number
                weight_conds[epoch_i, int(i / 2)] = np.linalg.cond(p[1].data)

                # Frobenius Norm of the weight change from pre-learning
                weight_change[epoch_i, int(i / 2)] = np.linalg.norm(pre_W[int(i / 2)] - p[1].data.numpy(), ord='fro')
    # End epochs
    # Function output
    return train_acc, test_acc, losses, net, weight_change, weight_conds, pre_W

### AUTOENCODER_DENOISING_MNIST|LATENT_CODE

In [None]:
def train_the_model():
    """
    AUTOENCODER_DENOISING_MNIST|LATENT_CODE
    """

    num_epochs = 10000

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_AE()

    # Initialize losses
    losses = torch.zeros(num_epochs)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Select a random set of images
        random_idx = np.random.choice(data_tensor.shape[0], size=32)
        X          = data_tensor[random_idx, :]

        # Forward pass and loss
        y_hat = net(X)
        loss  = loss_func(y_hat, X)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Losses in this epoch
        losses[epoch_i] = loss.item()
    
    # End epochs
    # Function output
    return losses, net

### AUTOENCODER_HOW_MANY_UNIT

In [None]:
def train_the_model(n_enc, n_bottle):
    """
    AUTOENCODER_HOW_MANY_UNIT
    """

    num_epochs = 3

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_AE(n_enc, n_bottle)

    # Initialize losses
    losses = []

    # Batch size and number of batches
    batch_size = 32
    num_batch  = int(data_tensor.shape[0] / batch_size)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Get a permuted index vector
        rand_idx = np.random.permutation(data_tensor.shape[0]).astype(int)

        # Lossed during batches
        batch_losses = []

        for batch_i in range(num_batch):

            # Samples to use in this batch
            samps_2_use = range((batch_i - 1) * batch_size, batch_i * batch_size)

            # Select those images
            X = data_tensor[rand_idx[samps_2_use], :]

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat, X)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Losses in this batch
            batch_losses.append(loss.item())
        # End minibatch loop

        losses.append(np.mean(batch_losses[-3:]))
    
    # End epochs
    # Function output
    return losses, net

### AUTO_ENCODER_OCCLUSION

In [None]:
def train_the_model():
    """
    AUTO_ENCODER_OCCLUSION
    """

    num_epochs = 5

    # Create a new model
    net, loss_func, optimizer = create_the_MNIST_net()

    # Initialize losses
    losses = []

    # Batch size and number of batches
    batch_size = 32
    num_batch  = int(data_tensor.shape[0] / batch_size)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Get a permuted index vector
        rand_idx = np.random.permutation(data_tensor.shape[0]).astype(int)

        # Lossed during batches
        batch_losses = []

        for batch_i in range(num_batch):

            # Samples to use in this batch
            samps_2_use = range((batch_i - 1) * batch_size, batch_i * batch_size)

            # Select those images
            X = data_tensor[rand_idx[samps_2_use], :]

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat, X)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Losses in this batch
            losses.append(loss.item())

        # End minibatch loop
    
    # End epochs
    
    # Function output
    return losses, net

### CNN_CLASSIFY_GAUSSIAN_BLURS

In [None]:
def train_the_model():
    """
    CNN_CLASSIFY_GAUSSIAN_BLURS
    """
    
    num_epochs = 10

    # Create a new model
    net, loss_func, optimizer = make_the_net()

    # Initialize
    train_loss = torch.zeros(num_epochs)
    test_loss  = torch.zeros(num_epochs)
    train_acc  = torch.zeros(num_epochs)
    test_acc   = torch.zeros(num_epochs)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())
            batch_acc .append(torch.mean(((y_hat > 0) == y).float()).item())

        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        train_acc [epoch_i] = 100 * np.mean(batch_acc)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad():
            y_hat = net(X)
            loss  = loss_func(y_hat, y)
            
        test_loss[epoch_i] = loss.item()
        test_acc [epoch_i] = 100 * torch.mean(((y_hat > 0) == y).float()).item()

    # End epochs

    return train_loss, test_loss, train_acc, test_acc, net

### CNN_GAUSS_FEATURE_MAPS|SOFTCODE

In [None]:
def train_the_model():
    """
    CNN_GAUSS_FEATURE_MAPS|SOFTCODE
    """
    
    num_epochs = 10

    # Create a new model
    net, loss_func, optimizer = make_the_net()

    # Initialize
    train_loss = torch.zeros(num_epochs)
    test_loss  = torch.zeros(num_epochs)
    train_acc  = torch.zeros(num_epochs)
    test_acc   = torch.zeros(num_epochs)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)[0] # Only need the first output
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())
            batch_acc .append(torch.mean(((y_hat > 0) == y).float()).item())

        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        train_acc [epoch_i] = 100 * np.mean(batch_acc)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad():
            y_hat = net(X)[0]
            loss  = loss_func(y_hat, y)
            
        test_loss[epoch_i] = loss.item()
        test_acc [epoch_i] = 100 * torch.mean(((y_hat > 0) == y).float()).item()

    # End epochs

    return train_loss, test_loss, train_acc, test_acc, net

### CNN_LINEAR_UNITS

In [None]:
def train_the_model(fc_units):
    """
    CNN_LINEAR_UNITS
    """
    
    num_epochs = 10

    # Create a new model
    net, loss_func, optimizer = make_the_net(fc_units)

    # Send the model to the GPU
    net.to(device)

    # Initialize
    train_loss = torch.zeros(num_epochs)
    test_loss  = torch.zeros(num_epochs)
    train_acc  = torch.zeros(num_epochs)
    test_acc   = torch.zeros(num_epochs)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Push data to GPU
            X = X.to(device)
            y = y.to(device)

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())
            batch_acc .append(torch.mean(((y_hat > 0) == y).float()).item())

        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        train_acc [epoch_i] = 100 * np.mean(batch_acc)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        
        # Push data to GPU
        X = X.to(device)
        y = y.to(device)

        with torch.no_grad():
            y_hat = net(X)
            loss  = loss_func(y_hat, y)
            
        test_loss[epoch_i] = loss.item()
        test_acc [epoch_i] = 100 * torch.mean(((y_hat > 0) == y).float()).item()

    # End epochs

    return train_loss, test_loss, train_acc, test_acc, net

### CNN_CLASSIFY_GAUSSIAN_BLURS

In [None]:
def train_the_model():
    """
    CNN_CLASSIFY_GAUSSIAN_BLURS
    """
    
    num_epochs = 10

    # Create a new model
    net, loss_func, optimizer = make_the_net()

    # Initialize
    train_loss = torch.zeros(num_epochs)
    test_loss  = torch.zeros(num_epochs)
    train_acc  = torch.zeros(num_epochs)
    test_acc   = torch.zeros(num_epochs)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())
            batch_acc .append(torch.mean(((y_hat > 0) == y).float()).item())

        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        train_acc [epoch_i] = 100 * np.mean(batch_acc)

        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad():
            y_hat = net(X)
            loss  = loss_func(y_hat, y)
            
        test_loss[epoch_i] = loss.item()
        test_acc [epoch_i] = 100 * torch.mean(((y_hat > 0) == y).float()).item()

    # End epochs

    return train_loss, test_loss, train_acc, test_acc, net

### CNN_GAUSS_AE_OCCLUSION

In [None]:
def train_the_model():
    """
    CNN_GAUSS_AE_OCCLUSION
    """
    
    num_epochs = 1000

    # Create a new model
    net, loss_func, optimizer = make_the_net()

    # Initialize
    losses = torch.zeros(num_epochs)
    
    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Pick a set of images at random
        pics_2_use = np.random.choice(n_gauss, size=32, replace=False)
        
        # Get the input (has occlusions) and the target (no occlusions)
        X = images_occ   [pics_2_use, :, :, :]
        Y = images_no_occ[pics_2_use, :, :, :]
        
        # Forward pass and loss
        y_hat           = net(X)
        loss            = loss_func(y_hat, Y)
        losses[epoch_i] = loss.item()

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # End epochs
    return losses, net

### CNN_GAUSS_AUTOENCODER|CUSTOM_LOSS_FUNC

In [None]:
def train_the_model():
    """
    CNN_GAUSS_AUTOENCODER|CUSTOM_LOSS_FUNC
    """
    
    num_epochs = 1000

    # Create a new model
    net, loss_func, optimizer = make_the_net()

    # Initialize
    losses = torch.zeros(num_epochs)
    
    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Pick a set of images at random
        pics_2_use = np.random.choice(n_gauss, size=32, replace=False)
        X          = images[pics_2_use, :, :, :]
        
        # Forward pass and loss
        y_hat           = net(X)
        loss            = loss_func(y_hat, X)
        losses[epoch_i] = loss.item()

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # End epochs
    return losses, net

### CNN_FIND_GAUSS

In [None]:
def train_the_model():
    """
    CNN_FIND_GAUSS
    """
    
    num_epochs = 30

    # Create a new model
    net, loss_func, optimizer = make_the_net()

    # Initialize
    train_loss = torch.zeros(num_epochs)
    test_loss  = torch.zeros(num_epochs)

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        
        # Test accuracy
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad():
            y_hat = net(X)
            loss  = loss_func(y_hat, y)
            
        test_loss[epoch_i] = loss.item()

    # End epochs

    return train_loss, test_loss, net

### CNN_EMNIST|HOW_LOW

In [None]:
def train_the_model():
    """
    CNN_EMNIST|HOW_LOW
    """
    
    num_epochs = 10

    # Create a new model
    net, loss_func, optimizer = make_the_net()

    # Initialize
    train_loss = torch.zeros(num_epochs)
    test_loss  = torch.zeros(num_epochs)
    train_err  = torch.zeros(num_epochs)
    test_err   = torch.zeros(num_epochs)

    # Send the model to the GPU
    net.to(device)

    # Loop over epochs
    for epoch_i in range(num_epochs):
        net.train()

        # Loop over training data batches
        batch_err  = []
        batch_loss = [] 

        for X, y in train_loader:

            # Push data to GPU
            X = X.to(device)
            y = y.to(device)

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat, y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss and Error from this batch
            batch_loss.append(loss.item())
            batch_err .append(torch.mean((torch.argmax(y_hat, axis=1) != y).float()).item())

        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        train_err [epoch_i] = 100 * np.mean(batch_err)

        # Test accuracy
        net.eval()
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        
        # Push data to GPU
        X = X.to(device)
        y = y.to(device)

        with torch.no_grad():
            y_hat = net(X)
            loss  = loss_func(y_hat, y)
            
        test_loss[epoch_i] = loss.item()
        test_err     [epoch_i] = 100 * torch.mean((torch.argmax(y_hat, axis=1) != y).float()).item()

    # End epochs

    return train_loss, test_loss, train_err, test_err, net

### CNN_ NUM_CHANS

In [None]:
def train_the_model(num_chans):
    """
    CNN_ NUM_CHANS
    """
    
    num_epochs = 5

    # Create a new model
    net, loss_func, optimizer = make_the_net(num_chans)

    # Initialize
    train_loss = torch.zeros(num_epochs)
    test_loss  = torch.zeros(num_epochs)
    train_err  = torch.zeros(num_epochs)
    test_err   = torch.zeros(num_epochs)

    # Send the model to the GPU
    net.to(device)

    # Loop over epochs
    for epoch_i in range(num_epochs):
        net.train()

        # Loop over training data batches
        batch_err  = []
        batch_loss = [] 

        for X, y in train_loader:

            # Push data to GPU
            X = X.to(device)
            y = y.to(device)

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat, y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss and Error from this batch
            batch_loss.append(loss.item())
            batch_err .append(torch.mean((torch.argmax(y_hat, axis=1) != y).float()).item())

        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        train_err [epoch_i] = 100 * np.mean(batch_err)

        # Test accuracy
        net.eval()
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        
        # Push data to GPU
        X = X.to(device)
        y = y.to(device)

        with torch.no_grad():
            y_hat = net(X)
            loss  = loss_func(y_hat, y)
            
        test_loss[epoch_i] = loss.item()
        test_err     [epoch_i] = 100 * torch.mean((torch.argmax(y_hat, axis=1) != y).float()).item()

    # End epochs

    return train_loss, test_loss, train_err, test_err, net

### TRANSFER_ FMNIST

In [None]:
def train_the_model(net, train_loader, test_loader, num_epochs=10):
    """
    TRANSFER_ FMNIST
    """

    # Initialize
    losses    = torch.zeros(num_epochs)
    train_acc = []
    test_acc  = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        # Loop over training data batches
        net.train()
        batch_acc  = []
        batch_loss = [] 

        for X, y in train_loader:
            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat,y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss from this batch
            batch_loss.append(loss.item())

            # Compute accuracy
            matches = torch.argmax(y_hat, axis=1) == y       # Booleans (True/False)
            matches_numeric = matches.float()                # Convert to numbers (1/0)
            accuracy_pct = 100 * torch.mean(matches_numeric) # Average and *100
            batch_acc.append(accuracy_pct)                   # Add to list of accuracies
        # End of batch loop.

        # Get the average training accuracy of the batches
        train_acc.append(np.mean(batch_acc))

        # The average losses accross the batches
        losses[epoch_i] = np.mean(batch_loss)

        # Test accuracy
        net.eval()
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        with torch.no_grad():
            y_hat = net(X)
        test_acc.append(100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()))

    # End epochs

    return train_acc, test_acc, losses, net

### TRANSFER_LETTER2NUMBER

In [None]:
def train_the_model(net, optimizer, train_loader, test_loader, num_epochs=10):
    """
    TRANSFER_LETTER2NUMBER
    """
    
    num_epochs = 10

    # Create a new model
    net, loss_func, optimizer = make_the_net()

    # Initialize
    train_loss = torch.zeros(num_epochs)
    test_loss  = torch.zeros(num_epochs)
    train_err  = torch.zeros(num_epochs)
    test_err   = torch.zeros(num_epochs)

    # Send the model to the GPU
    net.to(device)

    # Loop over epochs
    for epoch_i in range(num_epochs):
        net.train()

        # Loop over training data batches
        batch_err  = []
        batch_loss = [] 

        for X, y in train_loader:

            # Push data to GPU
            X = X.to(device)
            y = y.to(device)

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat, y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss and Error from this batch
            batch_loss.append(loss.item())
            batch_err .append(torch.mean((torch.argmax(y_hat, axis=1) != y).float()).item())

        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        train_err [epoch_i] = 100 * np.mean(batch_err)

        # Test accuracy
        net.eval()
        X, y = next(iter(test_loader)) # Extract X, y from dataloader
        
        # Push data to GPU
        X = X.to(device)
        y = y.to(device)

        with torch.no_grad():
            y_hat = net(X)
            loss  = loss_func(y_hat, y)
            
        test_loss[epoch_i] = loss.item()
        test_err     [epoch_i] = 100 * torch.mean((torch.argmax(y_hat, axis=1) != y).float()).item()

    # End epochs

    return train_loss, test_loss, train_err, test_err, net

### TRANSFER_ RESNET

In [None]:
num_epochs = 10

# Initialize
train_loss = torch.zeros(num_epochs)
test_loss  = torch.zeros(num_epochs)
train_acc  = torch.zeros(num_epochs)
test_acc   = torch.zeros(num_epochs)

# Loop over epochs
for epoch_i in range(num_epochs):
    
    resnet.train()

    # Loop over training data batches
    batch_acc  = []
    batch_loss = [] 

    for X, y in train_loader:
        # Push data to GPU
        X = X.to(device)
        y = y.to(device)

        # Forward pass and loss
        y_hat = resnet(X)
        loss  = loss_func(y_hat,y)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Loss from this batch
        batch_loss.append(loss.item())
        batch_acc .append(torch.mean((torch.argmax(y_hat,axis=1) == y).float()).item())

    # End of batch loop.

    # Get the average training accuracy of the batches
    train_loss[epoch_i] = np.mean(batch_loss)
    train_acc [epoch_i] = 100 * np.mean(batch_acc)

    # Test accuracy
    resnet.eval()
    batch_acc  = []
    batch_loss = []

    for X, y in test_loader:
        
        X = X.to(device)
        y = y.to(device)

        with torch.no_grad():
            y_hat = resnet(X)
            loss  = loss_func(y_hat, y)
        
        batch_loss.append(loss.item())
        batch_acc .append(torch.mean((torch.argmax(y_hat,axis=1) == y).float()).item())

    test_loss[epoch_i] = np.mean(batch_loss)
    test_acc [epoch_i] = 100 * np.mean(torch.mean((torch.argmax(y_hat,axis=1) == y).float()).item())

    print(f'Finished epoch {epoch_i + 1} / {num_epochs}. Test accuracy = {test_acc[epoch_i]:.2f}%')

### TRANSFER_ PRETRAIN_FMNIST

In [None]:
def train_the_class_model(net, loss_func, optimizer):
    """
    TRANSFER_ PRETRAIN_FMNIST
    """
    
    num_epochs = 10

    # Initialize
    train_loss = torch.zeros(num_epochs)
    dev_loss   = torch.zeros(num_epochs)
    train_acc  = torch.zeros(num_epochs)
    dev_acc    = torch.zeros(num_epochs)

    # Send the model to the GPU
    net.to(device)

    # Loop over epochs
    for epoch_i in range(num_epochs):
        net.train()

        # Loop over training data batches
        batch_loss = [] 
        batch_acc  = []

        for X, y in train_loader:

            # Push data to GPU
            X = X.to(device)
            y = y.to(device)

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat, y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss and Error from this batch
            batch_loss.append(loss.item())
            batch_acc.append(torch.mean((torch.argmax(y_hat, axis=1) == y).float()).item())


        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        train_acc [epoch_i] = 100 * np.mean(batch_acc)

        # Test accuracy
        net.eval()
        X, y = next(iter(dev_loader)) # Extract X, y from dataloader
        
        # Push data to GPU
        X = X.to(device)
        y = y.to(device)

        with torch.no_grad():
            y_hat = net(X)
            loss  = loss_func(y_hat, y)
            
        dev_loss[epoch_i] = loss.item()
        dev_acc [epoch_i] = 100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float().item())

    # End epochs

    return train_loss, dev_loss, train_acc, dev_acc, net

In [None]:
def train_the_class_model(net, loss_func, optimizer):
    """
    TRANSFER_ PRETRAIN_FMNIST
    """
    
    num_epochs = 10

    # Initialize
    train_loss = torch.zeros(num_epochs)
    dev_loss   = torch.zeros(num_epochs)
    train_acc  = torch.zeros(num_epochs)
    dev_acc    = torch.zeros(num_epochs)

    # Send the model to the GPU
    net.to(device)

    # Loop over epochs
    for epoch_i in range(num_epochs):
        net.train()

        # Loop over training data batches
        batch_loss = [] 
        batch_acc  = []

        for X, y in train_loader:

            # Push data to GPU
            X = X.to(device)
            y = y.to(device)

            # Forward pass and loss
            y_hat = net(X)
            loss  = loss_func(y_hat, y)

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Loss and Error from this batch
            batch_loss.append(loss.item())
            batch_acc.append(torch.mean((torch.argmax(y_hat, axis=1) == y).float()).item())


        # End of batch loop.

        # Get the average training accuracy of the batches
        train_loss[epoch_i] = np.mean(batch_loss)
        train_acc [epoch_i] = 100 * np.mean(batch_acc)

        # Test accuracy
        net.eval()
        X, y = next(iter(dev_loader)) # Extract X, y from dataloader
        
        # Push data to GPU
        X = X.to(device)
        y = y.to(device)

        with torch.no_grad():
            y_hat = net(X)
            loss  = loss_func(y_hat, y)
            
        dev_loss[epoch_i] = loss.item()
        dev_acc [epoch_i] = 100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()).item()

    # End epochs

    return train_loss, dev_loss, train_acc, dev_acc, net

## BUILD AND TRAIN

### REGRESSION

In [None]:
def build_and_train_the_model(x, y):
  """
  Regression slopes
  """
  # Build the model
  ANN_reg = nn.Sequential(
    nn.Linear(in_features=1, out_features=1), # Input layer
    nn.ReLU(),                                # Activation Function
    nn.Linear(in_features=1, out_features=1), # Output layer
  )

  # Loss and optimizer functions
  loss_func = nn.MSELoss()
  optimizer = torch.optim.SGD(params=ANN_reg.parameters(), lr=.05)

  ## Train the model
  num_epochs  = 500
  losses      = torch.zeros(num_epochs)

  for epoch in range(num_epochs):
    # Forward pass
    y_hat = ANN_reg(x)
    # Compute loss
    loss          = loss_func(y_hat, y)
    losses[epoch] = loss
    # Backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  ## End training loop

  ## Compute model predictions
  predictions = ANN_reg(x)

  # output: 
  return predictions, losses

## SAVE

In [None]:
# Save the model
# `net`: The trained model
# `state_dict()`: Dictionary of the current state of the model, every thing about the model
# `'trained_model.pt'`: File name

torch.save(net.state_dict(),'trained_model.pt')

# EXPERIMENTS

## REGRESSION

In [None]:
num_epochs  = 500
losses      = torch.zeros(num_epochs)

for epoch in range(num_epochs):
  # Forward pass -> Output (prediction) of the model
  y_hat = ANN_reg(x)
  # Compute loss
  loss          = loss_func(y_hat, y)
  losses[epoch] = loss
  # Backprop
  optimizer.zero_grad() # Set all the derivative in the model to be zero
  loss.backward()       # Implement back prop based on the loss computed
  optimizer.step() 

In [None]:
# Show the losses

# Manually compute losses
# Final forward pass
predictions = ANN_reg(x)

# Final loss (MSE)
test_loss = (predictions - y).pow(2).mean()

# `detach()`: detach the number from all other information
plt.plot(losses.detach(), 'o', markerfacecolor='w', linewidth=0.1)
plt.plot(num_epochs, test_loss.detach(), 'ro')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title(f'Final Loss = {np.round(test_loss.item(), 3)}')
plt.show()

## REGRESSION SLOPES

In [None]:
# m: Slope
def create_the_data(m):
  N = 50
  x = torch.randn(N, 1)
  y = m * x + torch.randn(N, 1) / 2
  return x, y

In [None]:
# Create a dataset
x, y = create_the_data(m=.8)

# Run the model
y_hat, losses = build_and_train_the_model(x, y)

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 4))

ax[0].plot(losses.detach(), 'o', markerfacecolor='w', linewidth=.1)
ax[0].set_xlabel('Epoch')
ax[0].set_title('Loss')
ax[0].grid()

ax[1].plot(x, y, 'bo', label='Real Data')
ax[1].plot(x, y_hat.detach(), 'rs', label='Predictions')
ax[1].set_xlabel('x')
ax[1].set_ylabel('y')
ax[1].set_title(f'Prediction-data corrr = {np.corrcoef(y.T, y_hat.detach().T)[0, 1]:.2f}')
ax[1].legend()
ax[1].grid()

plt.show()

## META_PARAMS_RELUS

In [None]:
# List activation functions to test
activation_funcs = ['ReLU', 'ReLU6', 'LeakyReLU']

train_acc_by_act = np.zeros(shape=(num_epochs, len(activation_funcs)))
test_acc_by_act  = np.zeros(shape=(num_epochs, len(activation_funcs)))

for i, act_func_i in enumerate(activation_funcs):
  # Create a model and train it
  wine_net = ANN_wine(act_func=act_func_i)
  train_acc_by_act[:, i], test_acc_by_act[:, i], losses = train_the_model()

In [None]:
fig, ax = plt.subplots(1 ,2, figsize=(20,7))

ax[0].plot(train_acc_by_act)
ax[0].set_title('Train accuracy')
ax[1].plot(test_acc_by_act)
ax[1].set_title('Test accuracy')

# Common features
for i in range(2):
  ax[i].legend(activation_funcs)
  ax[i].set_xlabel('Epoch')
  ax[i].set_ylabel('Accuracy (%)')
  ax[i].set_ylim([50, 100])
  ax[i].grid()

plt.show()

### META_PARAMS_MULTIOUTPUTS

In [None]:
# Run the data through the model to get the categorical predictions
y_hat       = net(data)
predictions = torch.argmax(y_hat, axis=1)

# And plot those against the real data
plt.figure(figsize=(18, 7))
plt.plot(predictions, 'o', label='Predicted values', alpha=0.4)
plt.plot(labels + 0.2, 's', label='True value', alpha=0.4)
plt.xlabel('Qwerty number')
plt.ylabel('Category')
plt.yticks([0, 1, 2])
plt.ylabel([-1, 3])
plt.legend()
plt.show()

In [None]:
# Recode each prediction by accuracy
accuracy = (predictions == labels).float()

# Compute overall accuracy
total_acc = torch.mean(100 * accuracy).item()

# And average by group
accuracy_by_group = np.zeros(3)

for i in range(3):
  accuracy_by_group[i] = 100 * torch.mean(accuracy[labels == i])

plt.bar(range(3), accuracy_by_group)
plt.ylim([80, 100])
plt.xticks([0, 1, 2])
plt.xlabel('Group')
plt.ylabel('Accuracy (%)')
plt.title(f'Final Accuracy = {total_acc:.2f}')
plt.show()

In [None]:
# Finally, show the data in their native space, labeled by accuracy
number_classes = 3
# Data markers
colors_shapes = ['bs', 'ko', 'g^']

# Show the data
fig = plt.figure(figsize=(7, 7))

# Plot correct and incorrect labeled data
for i in range(number_classes):
    # Plot all data points for this label
    plt.plot(data[labels == i, 0], data[labels == i, 1], colors_shapes[i], alpha=0.3, label=f'Group {i}')
    
    # Cross out the incorrect ones
    idx_error = (accuracy == 0) & (labels == i)
    plt.plot(data[idx_error, 0], data[idx_error, 1], 'rx')

plt.title(f'The qwerties! ({total_acc:.2f}% accurately labeled)')
plt.xlabel('qwerty dimension 1')
plt.ylabel('qwerty dimension 2')
plt.legend()
plt.show()

## META_PARAMS_OPTIMIZERS_QWERTY

In [None]:
# Variables to loop over
learning_rates  = np.logspace(start=np.log10(0.0001), stop=np.log10(0.1), num=20)
optimizer_types = ['SGD', 'RMSprop', 'Adam']

# Initialize performance matrix
final_performance = np.zeros(shape=(len(learning_rates), len(optimizer_types)))

# Now for the experiment!
for idx_optimizer, optimizer_i in enumerate(optimizer_types):
  for idx_l_rate, l_rate_i in enumerate(learning_rates):
    train_acc, test_acc, losses, net = train_the_model(optimizer_type=optimizer_i, learning_rate=l_rate_i)
    final_performance[idx_l_rate, idx_optimizer] = np.mean(test_acc[-10:])

In [None]:
# Plot the results! 
plt.plot(learning_rates, final_performance, 'o-', linewidth=2)
plt.legend(optimizer_types)
plt.xscale('log')
plt.xlabel('Learning rates')
plt.ylabel('Test accuracy (ave. last 10 epochs)')
plt.title('Comparison of optimizers by learning rate')
plt.show()

## FFN_DISTRIBUTIONS OF WEIGHTS PRE- AND POST-LEARNING

In [None]:
# EXPLORING THE "INNARDS" OF THE MODEL
# Create a temp model to explore
net = create_the_MNIST_net()[0]

# Summary of the entire model
print('Summary of model: ')
print(net, '\n')

# Explore one of the layers
print('Summary of input layer: ')
print(vars(net.input), '\n')

# Check out the matrix of weights
print('Input layer weights: ')
print(net.input.weight.shape)
print(net.input.weight, '\n')

# Finally, extract the weights and make a histogram
w = net.input.weight.detach().flatten()
plt.hist(w, 40)
plt.xlabel('Weight value')
plt.ylabel('Count')
plt.title('Distribution of initialized input-layer weights')
plt.show()

In [None]:
# FUNCTION RETURNING A HISTOGRAM OF ALL WEIGHTS (ACROSS ALL LAYERS)
def weights_histogram(net):

  # Initialize weight vector
  W = np.array([])

  # Concatenate each set of weights into 1 vector
  # Get weights of all the layers
  for layer in net.parameters():
    W = np.concatenate((W, layer.detach().flatten().numpy()))

  # Compute theri histogram (Range is hard-coded)
  # 100 bin between -0.8 & 0.8
  hist_y, hist_x = np.histogram(W, bins=np.linspace(-0.8, 0.8, 101), density=True)
  hist_x = (hist_x[1:] + hist_x[:-1]) / 2

  return hist_x, hist_y

# Test
hist_x, hist_y = weights_histogram(net)
plt.plot(hist_x, hist_y)

In [None]:
# SHOW THE HISTOGRAM OF THE WEIGHTS
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

for i in range(hist_y.shape[0]):
  ax[0].plot(hist_x, hist_y[i, :], color=[1 - i / 100, 0.3, i / 100])

ax[0].set_title('Histogram of Weights')
ax[0].set_xlabel('Weight value')
ax[0].set_ylabel('Density')

ax[1].imshow(hist_y, vmin=0, vmax=3, extent=[hist_x[0], hist_x[-1], 0, 99],
             aspect='auto', origin='lower', cmap='hot')
ax[1].set_xlabel('Weight value')
ax[1].set_xlabel('Training epoch')
ax[1].set_title('Image of weight histogram')

plt.show()

## FFN_NONMNIST, FFN_BINARIZED MNIST

In [None]:
# Run the model through for the test data
X, y        = next(iter(test_loader))
predictions = net(X).detach()
print(torch.exp(predictions))

# Evidence for all numbers from one sample
sample_2_show = 120

plt.bar(range(10), predictions[sample_2_show])
plt.xticks(range(10))
plt.xlabel('Number')
plt.ylabel('Evidence for that number')
plt.title(f'True number was {y[sample_2_show].item()}')
plt.show()

plt.bar(range(10), torch.exp(predictions[sample_2_show]))
plt.xticks(range(10))
plt.xlabel('Number')
plt.ylabel('Evidence for that number')
plt.title(f'True number was {y[sample_2_show].item()}')
plt.show()

In [None]:
# Find the errors
errors = np.where(torch.max(predictions, axis=1)[1] != y)[0]
print(errors)

# Evidence for all numbers from one sample
sample_2_show = 14

fig, ax = plt.subplots(1, 2, figsize=(14, 5))

ax[0].bar(range(10), np.exp(predictions[errors[sample_2_show]]))
ax[0].set_xticks(range(10))
ax[0].set_xlabel('Number')
ax[0].set_ylabel('Evidence for that number')
ax[0].set_title(f'True number: {y[errors[sample_2_show]].item()}, model guesses: \
                {torch.argmax(predictions[errors[sample_2_show]]).item()}')

ax[1].imshow( np.reshape(X[errors[sample_2_show],:],(28,28)), cmap='gray')

## FFN_ BREADTH VS. DEPTH

In [None]:
# Define the model parameters
num_layers = range(1, 4)            # Number of hidden layers
num_units  = np.arange(50, 251, 50) # Units per hidden layer

# Initialize output matrices
accuracies = np.zeros((2, len(num_units), len(num_layers)))

for unit_i in range(len(num_units)):
  for layer_i in range(len(num_layers)):

    # Create and train a fresh model
    train_acc, test_acc, losses, net = train_the_model(num_units[unit_i], num_layers[layer_i])

    # Store the results (Average of final 5 epochs)
    accuracies[0, unit_i, layer_i] = np.mean(train_acc[-5:])
    accuracies[1, unit_i, layer_i] = np.mean(test_acc[-5:])

    # Print a friendly status message
    print(f'Finished {unit_i + 1} / {len(num_units)} unit and layers {layer_i + 1} / {len(num_layers)}')


In [None]:
# SHOW ACCURACY AS A FUNCTION OF MODEL DEPTH
fig, ax = plt.subplots(1, 2, figsize=(15, 6))

ax[0].plot(num_units, accuracies[0, :, :], markerfacecolor='w', markersize=9)
ax[1].plot(num_units, accuracies[0, :, :], markerfacecolor='w', markersize=9)

for i in range(2):
  ax[i].legend(num_layers)
  ax[i].set_ylabel('Accuracy')
  ax[i].set_xlabel('Number of hidden units')
  ax[i].set_title(['Train' if (i == 0) else 'Test'][0])

plt.show()

## FFN_OPTIMIZERS

In [None]:
# Variables to loop over
learning_rates  = np.logspace(start=np.log10(0.0001), stop=np.log10(0.1), num=6)
optimizer_types = ['SGD', 'RMSprop', 'Adam']

# Initialize performance matrix
final_performance = np.zeros(shape=(len(learning_rates), len(optimizer_types)))

# Now for the experiment!
for idx_optimizer, optimizer_i in enumerate(optimizer_types):
  for idx_l_rate, l_rate_i in enumerate(learning_rates):
    train_acc, test_acc, losses, net = train_the_model(optimizer_type=optimizer_i, learning_rate=l_rate_i)
    final_performance[idx_l_rate, idx_optimizer] = np.mean(test_acc[-10:])

In [None]:
# Plot the results! 
plt.plot(learning_rates, final_performance, 'o-', linewidth=2)
plt.legend(optimizer_types)
plt.xscale('log')
plt.xlabel('Learning rates')
plt.ylabel('Test accuracy (ave. last 10 epochs)')
plt.title('Comparison of optimizers by learning rate')
plt.show()

## FFN_MNIST_NO7

In [None]:
# Run the model through for the test data
# note: only need one output, hence the [0]
X           = next(iter(test_loader))[0]
predictions = net(X).detach()

# Save the model guesses
guesses = torch.argmax(predictions, axis=1).detach()

# Show some 7's and their labels
fig, axs = plt.subplots(3, 4, figsize=(10, 6))
some_random_7s = np.random.choice(len(X), size=12)

for i, ax in enumerate(axs.flatten()):
  this_img = X[some_random_7s[i]].view(28, 28)
  ax.imshow(this_img, cmap='gray')
  ax.set_title(f'The number {guesses[some_random_7s[i]]}')
  ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Which numbers are most likely to be confused with 7?
print(np.unique(guesses))

fig, ax = plt.subplots(1, 2, figsize=(10, 5))

prob_mistaken = np.zeros(10)
for i in range(10):
  prob_mistaken[i] = torch.mean((guesses == i).float())

ax[0].bar(range(10), prob_mistaken)
ax[0].xticks(range(10))
ax[0].xlabel('Number')
ax[0].ylabel('Proportion of times "7" was labeled')
ax[0].show()

# Evidence for all numbers from one sample
sample_2_show = 30

plt.bar(range(10), torch.exp(predictions[sample_2_show]))
plt.xticks(range(10))
plt.xlabel('Number')
plt.ylabel('Evidence for that number')
plt.show()

## DATA_DATA_VS_DEPTH_QWERTY2

In [None]:
# Before the experiment, configure and confirm the metaparameters
# Specify the parameters for the model
n_nodes_in_model = 80
layers_range     = [1, 5, 10, 20]
n_data_points    = np.arange(start=50, stop=551, step=50)

# Create a legend for later plotting
legend = []

# Print out the model architectures
for layer_idx, layers in enumerate(layers_range):
    # Create a model
    units_per_layer = int(n_nodes_in_model / layers_range[layer_idx])
    net             = create_the_qwerty_net(n_units=units_per_layer, n_layers=layers)[0]

    # Count its parameters
    n_params = np.sum([p.numel() for p in net.parameters() if p.requires_grad])

    legend.append(f'{layers} layers, {units_per_layer} units, {n_params} params')
    print(f'This model will have {layers} layers, each with {units_per_layer} units, totalling {n_params} parameters')

In [None]:
# Initialize results matrix
results = np.zeros((len(n_data_points), len(layers_range), 2))

for data_point_idx, data_point_i in enumerate(n_data_points):
    # Create data (note: same data for each layer manipulation!)
    the_data   = create_some_data(n_per_clust=data_point_i)
    train_data = the_data['train_data']
    test_data  = the_data['test_data'] 

    # Now loop over layers
    for layers_range_idx, layers_range_i in enumerate(layers_range):
        units_per_layer = int(n_nodes_in_model / layers_range[layers_range_idx])
        train_acc, test_acc, losses, net = train_the_model(n_units=units_per_layer, n_layers=layers_range_i)

        # Average of last 5 accuracies and losses
        results[data_point_idx, layers_range_idx, 0] = np.mean(test_acc[-5:])
        results[data_point_idx, layers_range_idx, 1] = torch.mean(losses[-5:]).item()

In [None]:
# Show the results
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

ax[0].plot(n_data_points, results[:, :, 1], 's-')
ax[0].set_ylabel('Loss')
ax[0].set_xlabel('Number of data points')
ax[0].legend(legend)
ax[0].set_title('Losses')

ax[1].plot(n_data_points, results[:, :, 0], 'o-')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_xlabel('Number of data points')
ax[1].set_title('Accuracy')
ax[1].legend(legend)

plt.show()

## DATA_ UNBALANCED DATA

In [None]:
# Create the data
train_loader, test_loader = create_a_dataset(qual_threshold=5)

# Create a model and train it
wine_net = ANN_wine()
train_acc, test_acc, losses = train_the_model()

# Compute accuracy per quality type
X, y = next(iter(test_loader))
y_hat = wine_net(X)
item_accuracy = ((y_hat > 0) == y).float()

per_qual_acc = [100 * torch.mean(item_accuracy[y == 0]),
                100 * torch.mean(item_accuracy[y == 1])]

per_qual_acc

In [None]:
# Setup the figure
fig, ax = plt.subplots(3, 3, figsize=(18, 12))

# The quality thresholds
quality_thresholds = [4, 5, 6]

# Loop over quality thresholds
for qual_thres_idx in range(len(quality_thresholds)):

    # Create the data and model, and train it
    train_loader, test_loader = create_a_dataset(qual_threshold=quality_thresholds[qual_thres_idx])
    wine_net                  = ANN_wine()
    train_acc, test_acc, loss = train_the_model()

    # Compute accuracy per quality type
    X, y          = next(iter(test_loader))
    item_accuracy = ((wine_net(X) > 0) == y).float()
    per_qual_acc  = [100 * torch.mean(item_accuracy[y == 0]),
                     100 * torch.mean(item_accuracy[y == 1])]

    # Plot losses
    ax[qual_thres_idx, 0].plot(losses)
    ax[qual_thres_idx, 0].set_title(f'Losses with Threshold = {quality_thresholds[qual_thres_idx] + 0.5}')
    ax[qual_thres_idx, 0].set_xlabel('Epoch')
    ax[qual_thres_idx, 0].grid()

    # Plot overall accuracy
    ax[qual_thres_idx, 1].plot(train_acc, label='Train')
    ax[qual_thres_idx, 1].plot(test_acc,  label='Test')
    ax[qual_thres_idx, 1].set_title(f'Accuracy with Threshold = {quality_thresholds[qual_thres_idx] + 0.5}')
    ax[qual_thres_idx, 1].legend()
    ax[qual_thres_idx, 1].set_xlabel('Epoch')
    ax[qual_thres_idx, 1].set_ylim([0, 100])
    ax[qual_thres_idx, 1].grid()

    # Plot the per-quality accuracy
    bh = ax[qual_thres_idx, 2].bar(['Bad', 'Good'], per_qual_acc)
    ax[qual_thres_idx, 2].set_ylim([0, 100])
    ax[qual_thres_idx, 2].set_xlabel('Wine quality')
    ax[qual_thres_idx, 2].set_ylabel('Test Accuracy')
    ax[qual_thres_idx, 2].set_title(f'Per-Qual acc. with Threshold = {quality_thresholds[qual_thres_idx] + 0.5}')

    # Print the counts on top of each bar
    for i, r in enumerate(bh):
        N = torch.sum(train_loader.dataset.tensors[1] == i).item()
        ax[qual_thres_idx, 2].text(r.get_x() + r.get_width()/2, r.get_height() + 1,
                                    f'N = {N}', ha='center', va='bottom', fontsize=14)

plt.tight_layout()
plt.show()

## DATA_ DATA_OVERSAMPLING

### FUNC - RETURNS A DATASET WITH A SPECIFIED SIZE

In [None]:
data_full = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# Now for the function
def make_the_dataset(N, double_the_data=False):

    # Extract labels (number IDs) and remove from data
    labels = data_full[:N, 0]
    data   = data_full[:N, 1:]

    # Normalize the data to a range of [0 1]
    data_norm = data / np.max(data)

    # Make an exact copy of ALL the data
    if double_the_data:
        data_norm = np.concatenate((data_norm, data_norm), axis=0)
        labels    = np.concatenate((labels, labels), axis=0)

    # Convert to tensor
    data_tensor   = torch.tensor(data_norm).float()
    labels_tensor = torch.tensor(labels).long()

    # Use scikitlearn to split the data
    train_data, test_data, train_labels, test_labels = train_test_split(data_tensor, labels_tensor, train_size=0.9)

    # # Make an exact copy of the TRAIN data
    # if doubleTheData:
    #   train_data   = torch.cat((train_data,train_data),axis=0)
    #   train_labels = torch.cat((train_labels,train_labels),axis=0)
    
    # Convert into PyTorch Datasets
    train_data_set = TensorDataset(train_data, train_labels)
    test_data_set  = TensorDataset(test_data, test_labels)

    # Translate into Dataloader objects
    batch_size   = 20
    train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                            shuffle=True, drop_last=True)
    test_loader  = DataLoader(dataset=test_data_set, 
                            batch_size=test_data_set.tensors[0].shape[0])

    return train_loader, test_loader

In [None]:
# Check the sizes
r, t = make_the_dataset(N=200, double_the_data=False)
print(r.dataset.tensors[0].shape)
print(t.dataset.tensors[0].shape)

r, t = make_the_dataset(N=200, double_the_data=True)
print(r.dataset.tensors[0].shape)
print(t.dataset.tensors[0].shape)

In [None]:
# List of data sample sizes
sample_sizes = np.arange(start=500, stop=4001, step=500)

# Initialize results matrix
results_single = np.zeros(shape=(len(sample_sizes), 3))
results_double = np.zeros(shape=(len(sample_sizes), 3))

for sample_size_idx, sample_size_i in enumerate(sample_sizes):

    # Without doubling the data!
    # Generate a dataset and train the model
    train_loader, test_loader        = make_the_dataset(N=sample_size_i, double_the_data=False)
    train_acc, test_acc, losses, net = train_the_model()

    # Grab the results
    results_single[sample_size_idx, 0] = np.mean(train_acc[-5:])
    results_single[sample_size_idx, 1] = np.mean(test_acc[-5:])
    results_single[sample_size_idx, 2] = torch.mean(losses[-5:]).item()

    # With doubling the data!
    # Generate a dataset and train the model
    train_loader, test_loader        = make_the_dataset(N=sample_size_i, double_the_data=True)
    train_acc, test_acc, losses, net = train_the_model()

    # Grab the results
    results_double[sample_size_idx, 0] = np.mean(train_acc[-5:])
    results_double[sample_size_idx, 1] = np.mean(test_acc[-5:])
    results_double[sample_size_idx, 2] = torch.mean(losses[-5:]).item()


In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 5))

# Axis and title labels
titles      = ['Train', 'Devset', 'Losses']
y_ax_labels = ['Accuracy', 'Accuracy', 'Losses']

# Common features
for i in range(3):

    # Plot the lines
    ax[i].plot(sample_sizes, results_single[:, i], 's-', label='Original')
    ax[i].plot(sample_sizes, results_double[:, i], 's-', label='Doubled')

    # Make it look nicer
    ax[i].set_ylabel(y_ax_labels[i])
    ax[i].set_title(titles[i])
    ax[i].legend()
    ax[i].set_xlabel('Unique sample size')
    ax[i].grid('on')

    if (i < 2):
        ax[i].set_ylim([20, 102])

plt.tight_layout()
plt.show()

## DATA_NOISE_AUGMENTATION

In [None]:
data_full = np.loadtxt(open('sample_data/mnist_train_small.csv','rb'),delimiter=',')

# Normalize the data to a range of [0 1]
data_full[:, 1:] = data_full[:, 1:] / np.max(data_full)

# Now for the function
def make_the_dataset(N, double_the_data=False):

    # Extract labels (number IDs) and remove from data
    labels = data_full[:N, 0]
    data   = data_full[:N, 1:]

    # Make a noisy copy of ALL the data
    if double_the_data:
        data_noise = data + np.random.random_sample(size=data.shape) / 2
        data       = np.concatenate((data, data_noise), axis=0)
        labels     = np.concatenate((labels, labels), axis=0)

    # Convert to tensor
    data_tensor   = torch.tensor(data).float()
    labels_tensor = torch.tensor(labels).long()

    # Use scikitlearn to split the data
    train_data, dev_data, train_labels, dev_labels = train_test_split(data_tensor, labels_tensor, train_size=0.9)

    # # Make an exact copy of the TRAIN data
    # if doubleTheData:
    #   train_data   = torch.cat((train_data,train_data),axis=0)
    #   train_labels = torch.cat((train_labels,train_labels),axis=0)
    
    # Convert into PyTorch Datasets
    train_data_set = TensorDataset(train_data, train_labels)
    dev_data_set   = TensorDataset(dev_data, dev_labels)

    # Translate into Dataloader objects
    batch_size   = 20
    train_loader = DataLoader(dataset=train_data_set, batch_size=batch_size,
                            shuffle=True, drop_last=True)
    dev_loader  = DataLoader(dataset=dev_data_set, 
                            batch_size=dev_data_set.tensors[0].shape[0])
    
    # Create a test set (don't need a dataloader)
    test_data   = torch.tensor(data_full[N:, 1:]).float()
    test_labels = torch.tensor(data_full[N:, 0]).long() 

    return train_loader, dev_loader, (test_data, test_labels)

In [None]:
# VISUALIZE THE IMAGES
# Get some sample data
train_loader, dev_loader, test_data_set = make_the_dataset(N=12, double_the_data=True)

# Pop out the data matrices
img = train_loader.dataset.tensors[0].detach()

# Show the numbers
fig, ax = plt.subplots(3, 4, figsize=(12, 8))
for i, ax in enumerate(ax.flatten()):
    ax.imshow(np.reshape(img[i, :], (28, 28)), cmap='gray')
    ax.axis('off')

plt.show()

In [None]:
# List of data sample sizes
sample_sizes = np.arange(start=500, stop=4001, step=500)

# Initialize results matrix
results_single = np.zeros(shape=(len(sample_sizes), 3))
results_double = np.zeros(shape=(len(sample_sizes), 3))

for sample_size_idx, sample_size_i in enumerate(sample_sizes):

    # Without doubling the data!
    # Generate a dataset and train the model
    train_loader, dev_loader, test_data_set        = make_the_dataset(N=sample_size_i, double_the_data=False)
    train_acc, dev_acc, losses, net                = train_the_model()

    # Grab the results
    results_single[sample_size_idx, 0] = np.mean(train_acc[-5:])
    results_single[sample_size_idx, 1] = np.mean(dev_acc[-5:])
    results_single[sample_size_idx, 2] = torch.mean(losses[-5:]).item()

    # With doubling the data!
    # Generate a dataset and train the model
    train_loader, dev_loader, test_data_set        = make_the_dataset(N=sample_size_i, double_the_data=True)
    train_acc, dev_acc, losses, net                = train_the_model()

    # Grab the results
    results_double[sample_size_idx, 0] = np.mean(train_acc[-5:])
    results_double[sample_size_idx, 1] = np.mean(dev_acc[-5:])
    results_double[sample_size_idx, 2] = torch.mean(losses[-5:]).item()


In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 5))

# Axis and title labels
titles      = ['Train', 'Devset', 'Losses']
y_ax_labels = ['Accuracy', 'Accuracy', 'Losses']

# Common features
for i in range(3):

    # Plot the lines
    ax[i].plot(sample_sizes, results_single[:, i], 's-', label='Original')
    ax[i].plot(sample_sizes, results_double[:, i], 's-', label='Augmented')

    # Make it look nicer
    ax[i].set_ylabel(y_ax_labels[i])
    ax[i].set_title(titles[i])
    ax[i].legend()
    ax[i].set_xlabel('Unique sample size')
    ax[i].grid('on')

    if (i < 2):
        ax[i].set_ylim([20, 102])

plt.tight_layout()
plt.show()

In [None]:
# TEST ACCURACY
sample_size = 500

train_loader, dev_loader, test_data_set = make_the_dataset(N=sample_size, double_the_data=False)
train_acc_O, dev_acc_O, losses_O, net_O = train_the_model() # O = original

train_loader, dev_loader, test_data_set = make_the_dataset(N=sample_size, double_the_data=True)
train_acc_A, dev_acc_A, losses_A, net_A = train_the_model() # A = augmented

# Extract the test data
X, y = test_data_set

# Run the original model
y_hat  = net_O(X)
test_O = torch.mean((torch.argmax(y_hat, axis=1) == y).float())


# And the augmented model
y_hat  = net_A(X)
test_A = torch.mean((torch.argmax(y_hat, axis=1) == y).float())

# Print the results!
print(f'ORIGINAL MODEL  (N = {sample_size})\n  Train: {train_acc_O[-1]:.2f}%, Devset: {dev_acc_O[-1]:.2f}%, Test: {test_O:.2f}%\n\n')
print(f'AUGMENTED MODEL (N = {sample_size})\n  Train: {train_acc_A[-1]:.2f}%, Devset: {dev_acc_A[-1]:.2f}%, Test: {test_A:.2f}%\n\n')


## DATA_DATA_FEATURE_AUGMENTATION

In [None]:
def plot_the_results():
    # Compute accuracy over entire dataset (train + test)
    y_hat       = net(data_aug)
    predictions = torch.argmax(y_hat, axis=1)
    accuracy    = (predictions == labels).float()

    # Accuracy by group
    accuracy_by_group = np.zeros(3)
    for i in range(3):
        accuracy_by_group[i] = 100 * torch.mean(accuracy[labels == i])
    
    # Create the figure
    fig, ax = plt.subplots(2, 2, figsize=(10, 6))

    # Plot  the loss function
    ax[0, 0].plot(losses.detach())
    ax[0, 0].set_ylabel('Loss')
    ax[0, 0].set_xlabel('Epoch')
    ax[0, 0].set_title('Losses')

    # Plot the accuracy function
    ax[0, 1].plot(train_acc, label='Train')
    ax[0, 1].plot(test_acc,  label='Test')
    ax[0, 1].set_ylabel('Accuracy (%)')
    ax[0, 1].set_xlabel('Epoch')
    ax[0, 1].set_title('Accuracy')
    ax[0, 1].legend()

    # Plot overall accuracy by group
    ax[1, 0].bar(range(3), accuracy_by_group)
    ax[1, 0].set_ylim([np.min(accuracy_by_group) - 5, np.max(accuracy_by_group) + 5])
    ax[1, 0].set_xticks([0, 1, 2])
    ax[1, 0].set_xlabel('Group')
    ax[1, 0].set_ylabel('Accuracy (%)')
    ax[1, 0].set_title('Accuracy by group')

    # Scatter plot of correct and incorrect labeled data
    color_shapes = ['bs', 'ko', 'g^'] # Data makers
    for i in range(3):
        # Plot all data points
        ax[1, 1].plot(data_aug[labels == i, 0], data_aug[labels == i, 1], color_shapes[i],
                      alpha=0.3, label=f'Group {i}')

        # Cross-out the incorrect ones
        idx_err = (accuracy == 0) & (labels == i)
        ax[1, 1].plot(data_aug[idx_err, 0], data_aug[idx_err, 1], 'rx')
    
    ax[1, 1].set_title('All groups')
    ax[1, 1].set_xlabel('qwerty dimension 1')
    ax[1, 1].set_ylabel('qwerty dimension 2')
    ax[1, 1].legend()

    plt.tight_layout()
    plt.show()

In [None]:
# Run the model and visualize the results
train_acc, test_acc, losses, net = train_the_model(use_extra_feature=False)
print(f'Final accuracy: {test_acc[-1]:.2f}')
plot_the_results()

# Run the model and visualize the results
train_acc, test_acc, losses, net = train_the_model(use_extra_feature=True)
print(f'Final accuracy: {test_acc[-1]:.2f}')
plot_the_results()

In [None]:
# Run the exper multiple times and do a t-test

final_acc_2 = np.zeros(10)
final_acc_3 = np.zeros(10)

for i in range(10):
    final_acc_2[i] = train_the_model(use_extra_feature=False)[1][-1]
    final_acc_3[i] = train_the_model(use_extra_feature=True)[1][-1]

# Show the numbers
print(np.round(np.vstack(tup=(final_acc_2, final_acc_3)).T, decimals=2))

# Run the t-test and print the results
t, p = stats.ttest_ind(final_acc_3, final_acc_2)
print(f't = {t:.2f}, p = {p:.2f}')
    

## DATA_SAVE_BEST_MODEL

In [None]:
# Run the model
train_acc, dev_acc, losses, the_best_model = train_the_model()

In [None]:
# Recreate the best-performing model
best_net = create_the_qwerty_net()[0]
best_net.load_state_dict(the_best_model['net'])

# Run the data through TEST
X, y     = next(iter(test_loader))
y_hat    = best_net(X)
best_acc = 100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float())
  

In [None]:
fig = plt.figure(figsize=(10, 5))

plt.plot(train_acc, 'o-', label='Train')
plt.plot(dev_acc,   'o-', label='Devset')
plt.plot([0, len(dev_acc)], [best_acc, best_acc], 'r--', label='Best Dev model on TEST')
plt.ylabel('Accuracy (%)')
plt.xlabel('Epoch')
plt.title('Accuracy')
plt.ylim([best_acc - 5, best_acc + 5])
plt.legend()
plt.show()

## MODEL_PERFORM_APRF_WINE

In [None]:
# Create and train a model
wine_net = ANN_wine()
train_acc, test_acc, losses = train_the_model()

In [None]:
# Predictions for Training data
train_predictions = wine_net(train_loader.dataset.tensors[0])

# Predictions for Training data
test_predictions  = wine_net(test_loader.dataset.tensors[0])

# Using scikitlearn to compute APRF
# Initialize vectors
train_metrics   = [0, 0, 0, 0]
test_metrics    = [0, 0, 0, 0]

# Training
train_metrics[0] = skm.accuracy_score(y_true=train_loader.dataset.tensors[1], y_pred=(train_predictions > 0))
train_metrics[1] = skm.precision_score(y_true=train_loader.dataset.tensors[1], y_pred=(train_predictions > 0))
train_metrics[2] = skm.recall_score(y_true=train_loader.dataset.tensors[1], y_pred=(train_predictions > 0))
train_metrics[3] = skm.f1_score(y_true=train_loader.dataset.tensors[1], y_pred=(train_predictions > 0))

# Test
test_metrics[0] = skm.accuracy_score(y_true=test_loader.dataset.tensors[1], y_pred=(test_predictions > 0))
test_metrics[1] = skm.precision_score(y_true=test_loader.dataset.tensors[1], y_pred=(test_predictions > 0))
test_metrics[2] = skm.recall_score(y_true=test_loader.dataset.tensors[1], y_pred=(test_predictions > 0))
test_metrics[3] = skm.f1_score(y_true=test_loader.dataset.tensors[1], y_pred=(test_predictions > 0))

In [None]:
plt.bar(x=np.arange(4) - 0.1, height=train_metrics, width=0.5)
plt.bar(x=np.arange(4) + 0.1, height=test_metrics, width=0.5)
plt.xticks([0, 1, 2, 3], ['Accuracy', 'Precision', 'Recall', 'F1-score'])
plt.ylim([0.6, 1])
plt.legend(['Train', 'Test'])
plt.title('Performance Metrics')
plt.show()

In [None]:
# CONFUSION MATRICES
train_conf  = skm.confusion_matrix(y_true=train_loader.dataset.tensors[1], y_pred=(train_predictions > 0))
test_conf   = skm.confusion_matrix(y_true=test_loader.dataset.tensors[1], y_pred=(test_predictions > 0))

fig, ax = plt.subplots(1, 2, figsize=(10, 4))

# During TRAIN
ax[0].imshow(train_conf, 'Blues', vmax=len(train_predictions) / 2)
ax[0].set_xticks([0, 1])
ax[0].set_yticks([0, 1])
ax[0].set_xticklabels(['bad', 'good'])
ax[0].set_yticklabels(['bad', 'good'])
ax[0].set_xlabel('Prediction Quality')
ax[0].set_ylabel('True Quality')
ax[0].set_title('TRAIN Confusion Matrix')

# Add text labels
ax[0].text(0, 0, f'True Negatives:\n{train_conf[0, 0]}',   ha='center', va='center')
ax[0].text(0, 1, f'False Negatives:\n{train_conf[1, 0]}',  ha='center', va='center')
ax[0].text(1, 1, f'True Positives:\n{train_conf[1, 1]}',   ha='center', va='center')
ax[0].text(1, 0, f'False Positives:\n{train_conf[0, 1]}',  ha='center', va='center')

# During TEST
ax[1].imshow(test_conf, 'Blues', vmax=len(train_predictions) / 2)
ax[1].set_xticks([0, 1])
ax[1].set_yticks([0, 1])
ax[1].set_xticklabels(['bad', 'good'])
ax[1].set_yticklabels(['bad', 'good'])
ax[1].set_xlabel('Prediction Quality')
ax[1].set_ylabel('True Quality')
ax[1].set_title('TRAIN Confusion Matrix')

# Add text labels
ax[1].text(0, 0, f'True Negatives:\n{test_conf[0, 0]}',   ha='center', va='center')
ax[1].text(0, 1, f'False Negatives:\n{test_conf[1, 0]}',  ha='center', va='center')
ax[1].text(1, 1, f'True Positives:\n{test_conf[1, 1]}',   ha='center', va='center')
ax[1].text(1, 0, f'False Positives:\n{test_conf[0, 1]}',  ha='center', va='center')

plt.show()


## MODEL_PERFORM_APRF_MNIST

In [None]:
# Predicted digits for training data
y_hat             = net(train_loader.dataset.tensors[0])
train_predictions = torch.argmax(y_hat, axis=1)

# Predictions for test data
y_hat             = net(test_loader.dataset.tensors[0])
test_predictions  = torch.argmax(y_hat, axis=1)

In [None]:
# Initialize vectors
train_metrics = [0, 0, 0, 0]
test_metrics  = [0, 0, 0, 0]

# Training
train_metrics[0] = skm.accuracy_score(y_true=train_loader.dataset.tensors[1],  y_pred=train_predictions)
train_metrics[1] = skm.precision_score(y_true=train_loader.dataset.tensors[1], y_pred=train_predictions, 
                                        average='weighted')
train_metrics[2] = skm.recall_score(y_true=train_loader.dataset.tensors[1],    y_pred=train_predictions, 
                                        average='weighted')
train_metrics[3] = skm.f1_score(y_true=train_loader.dataset.tensors[1],        y_pred=train_predictions, 
                                        average='weighted')

# Test
test_metrics[0] = skm.accuracy_score(y_true=test_loader.dataset.tensors[1],  y_pred=test_predictions)
test_metrics[1] = skm.precision_score(y_true=test_loader.dataset.tensors[1], y_pred=test_predictions, 
                                        average='weighted')
test_metrics[2] = skm.recall_score(y_true=test_loader.dataset.tensors[1],    y_pred=test_predictions,
                                        average='weighted')
test_metrics[3] = skm.f1_score(y_true=test_loader.dataset.tensors[1],        y_pred=test_predictions, 
                                        average='weighted')

In [None]:
plt.bar(x=np.arange(4) - 0.1, height=train_metrics, width=0.5)
plt.bar(x=np.arange(4) + 0.1, height=test_metrics,  width=0.5)
plt.xticks([0, 1, 2, 3], ['Accuracy', 'Precision', 'Recall', 'F1-score'])
plt.ylim([0.9, 1])
plt.legend(['Train', 'Test'])
plt.title('Performance Metrics')
plt.show()

In [None]:
# Bar graphs of class-specific precision and recall for test data

precision = skm.precision_score(y_true=test_loader.dataset.tensors[1], y_pred=test_predictions, average=None)
recall    = skm.recall_score   (y_true=test_loader.dataset.tensors[1], y_pred=test_predictions, average=None)

fig = plt.figure(figsize=(12, 3))
plt.bar(x=np.arange(10) - 0.15, height=precision, width=0.5)
plt.bar(x=np.arange(10) + 0.15, height=recall,    width=0.5)
plt.xticks(range(10), range(10))
plt.ylim([0.5, 1])
plt.xlabel('Number')
plt.legend(['Precision', 'Recall'])
plt.title('Categoty-specific performance metrics')
plt.show()

In [None]:
# CONFUSION MATRICES
train_conf  = skm.confusion_matrix(y_true=train_loader.dataset.tensors[1], y_pred=train_predictions, normalize='true')
test_conf   = skm.confusion_matrix(y_true=test_loader.dataset.tensors[1],  y_pred=test_predictions,  normalize='true')

fig, ax = plt.subplots(1, 2, figsize=(10, 4))

# During TRAIN
ax[0].imshow(train_conf, 'Blues', vmax=0.05)
ax[0].set_xticks(range(10))
ax[0].set_yticks(range(10))
ax[0].set_xlabel('Predicted number')
ax[0].set_ylabel('True Number')
ax[0].set_title('TRAIN Confusion Matrix')

# During TEST
a = ax[1].imshow(test_conf, 'Blues', vmax=0.05)
ax[1].set_xticks(range(10))
ax[1].set_yticks(range(10))
ax[1].set_xlabel('Predicted number')
ax[1].set_ylabel('True Number')
ax[1].set_title('TEST Confusion Matrix')

fig.colorbar(a)
plt.show()


## MODEL_PERFORM_MNIST_NO7

In [None]:
# See whether there is a roughtly equal number of elements in each category

# Category counts
cat_count = np.unique(labels, return_counts=True)

# Visualize
plt.bar(cat_count[0], cat_count[1])
plt.xlabel('Digit')
plt.ylabel('N occurrences')
plt.xticks(range(10))
plt.show()

In [None]:
# Find all the 7's
where7s = np.where(labels == 7)[0]

# How many to remove, to leave 500 in the data
N_2_remove = where7s.shape[0] - 500

# Pick that number of 7's at random
remove_7s = np.random.choice(where7s, size=N_2_remove, replace=False)

# And remove them from the data!
print(f'Sizes before removing: {data_norm.shape} | {labels.shape}')

data_norm = np.delete(data_norm, remove_7s, axis=0)
labels    = np.delete(labels,    remove_7s)

print(f'Sizes after removing: {data_norm.shape} | {labels.shape}')

In [None]:
# Visual confirmation of our manipulation
cat_count = np.unique(labels, return_counts=True)
plt.bar(cat_count[0], cat_count[1])
plt.xlabel('Digit')
plt.ylabel('N occurrences')
plt.xticks(range(10))
plt.show()

## MODEL_PERFORM_TIME

In [None]:
# Start the timer!
timer_outside_function = time.process_time()

for i in range(10):
    train_the_model()

total_experiment_time = time.process_time() - timer_outside_function
print(f'\n\nTotal elapsed experiment time: {total_experiment_time/60:.2f} minutes')

## WEIGHTS_DEMO_INITS

In [None]:
# Run the model without changing the weights; this will be the baseline performance.
# Notice the model creation is outside the function2train
net_base, loss_func, optimizer                  = create_the_MNIST_net()
train_acc_base, test_acc_base, losses, net_base = train_the_model(net_base, loss_func, optimizer)

# Plot the results
plt.plot(range(len(train_acc_base)), train_acc_base, 'o-', range(len(test_acc_base)), test_acc_base, 's-')
plt.legend(['Train', 'Test'])
plt.title('Accuracy over epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.show()

### SET ALL THE WEIGHTS OF LAYER 1 TO ZERO

In [None]:
# Change the weights before training
net_zero, loss_func, optimizer = create_the_MNIST_net()

# Set to zeros
net_zero.fc1.weight.data = torch.zeros_like(net_zero.fc1.weight)

# Run the model and show the results
train_acc_zero, test_acc_zero, losses, net_zero = train_the_model(net_zero, loss_func, optimizer)

# Plot the results
plt.plot(range(len(train_acc_base)), train_acc_base, 'b-', range(len(test_acc_base)), test_acc_base, 'b:')
plt.plot(range(len(train_acc_zero)), train_acc_zero, 'r-', range(len(test_acc_zero)), test_acc_zero, 'r:')
plt.legend(['Train base', 'Test base', 'Train fc1=zero', 'Test fc1=zero'])
plt.title('Accuracy comparison with layer FC1 init to zeros')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.show()

In [None]:
# Are the weights still zeros?
print(net_zero.fc1.weight.data)

# Show the distributions in a histogram
y, x = np.histogram(a=net_base.fc2.weight.data.flatten(), bins=30)
plt.plot((x[1:] + x[:-1]) / 2, y, 'r', label='Baseline')

y, x = np.histogram(a=net_zero.fc2.weight.data.flatten(), bins=30)
plt.plot((x[1:] + x[:-1]) / 2, y, 'b', label='FC1=zeros')

plt.legend()
plt.xlabel('Weight value')
plt.ylabel('Count')
plt.show()

### ALL LEARNABLE PARAMETERS SET TO ZERO

In [None]:
# Change the weights before training
net_all_zero, loss_func, optimizer = create_the_MNIST_net()

# Loop over parameters and set them all to zeros
for p in net_all_zero.named_parameters():
    p[1].data = torch.zeros_like(p[1].data)

# And confirm for a few select parameters (y-axis offset for visibility)
plt.plot(0 + net_all_zero.fc1.weight.data.flatten(), 'bo')
plt.plot(1 + net_all_zero.fc2.weight.data.flatten(), 'rx')
plt.plot(2 + net_all_zero.fc1.bias.data.flatten(), 'g^')
plt.xlabel('Parameter index')
plt.ylim([-1, 3])
plt.ylabel('Parameter value')
plt.show()

In [None]:
# Run the model and show the results
train_acc_all_zero, test_acc_all_zero, losses, net_all_zero = train_the_model(net_all_zero, loss_func, optimizer)

# Plot the results
plt.plot(range(len(train_acc_base)), train_acc_base, 'b-', range(len(test_acc_base)), test_acc_base, 'b:')
plt.plot(range(len(train_acc_all_zero)), train_acc_all_zero, 'r-', range(len(test_acc_all_zero)), test_acc_all_zero, 'r:')
plt.legend(['Train base', 'Test base', 'Train all zero', 'Test all zero'])
plt.title('Accuracy comparison with all layer init to zeros')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.show()

In [None]:
# Show the distributions in a histogram
y, x = np.histogram(a=net_base.fc1.weight.data.flatten(), bins=30)
plt.plot((x[1:] + x[:-1]) / 2, y, 'r', label='Baseline')

y, x = np.histogram(a=net_all_zero.fc1.weight.data.flatten(), bins=30)
plt.plot((x[1:] + x[:-1]) / 2, y, 'b', label='All zeros')

plt.legend()
plt.xlabel('Weight value')
plt.ylabel('Count')
plt.show()

### INITIALIZING TO 1'S

In [None]:
# Change the weights before training
net_all_one, loss_func, optimizer = create_the_MNIST_net()

# Loop over parameters and set them all to zeros
for p in net_all_one.named_parameters():
    p[1].data = torch.zeros_like(p[1].data) + 1

# Run the model and show the results
train_acc_all_one, test_acc_all_one, losses, net_all_one = train_the_model(net_all_one, loss_func, optimizer)

# Plot the results
plt.plot(range(len(train_acc_base)), train_acc_base, 'b-', range(len(test_acc_base)), test_acc_base, 'b:')
plt.plot(range(len(train_acc_all_one)), train_acc_all_one, 'r-', range(len(test_acc_all_one)), test_acc_all_one, 'r:')
plt.legend(['Train base', 'Test base', 'Train all ones', 'Test all ones'])
plt.title('Accuracy comparison with all layer init to ones')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.show()

## WEIGHTS_VARIANCE_INITS

In [None]:
# Range of Standard Deviations to use
stdevs = np.logspace(start=np.log10(0.001), stop=np.log10(10), num=25)

# Number of histogram bins for plotting distributions
n_hist_bins = 80

# Initialize results output matrix
acc_results = np.zeros(len(stdevs))
histo_data  = np.zeros((len(stdevs), 2, n_hist_bins))

# Start the timer!
start_time = time.process_time()

# Start the experiment
for stdev_idx, stdev_i in enumerate(stdevs):

    # Create the network
    net, loss_func, optimizer = create_the_MNIST_net()

    # Set all parameters according to the standard deviation
    for p in net.named_parameters():
        p[1].data = torch.randn_like(p[1].data) * stdev_i
    
    # Train the model
    train_acc, test_acc, losses, net = train_the_model(net, loss_func, optimizer)

    # Get test accuracy on final 3 runs (averaging increases stability)
    acc_results[stdev_idx] = np.mean(test_acc[-3:])

    # Collect all parameters into one vector and compute its histogram
    temp_params = np.array([])
    for p in net.named_parameters():
        temp_params = np.concatenate((temp_params, p[1].data.numpy().flatten()), axis=0)
    
    # Compute their histogram
    y, x = np.histogram(temp_params, n_hist_bins)
    histo_data[stdev_idx, 0, :] = (x[1:] + x[:-1]) / 2
    histo_data[stdev_idx, 1, :] = y

    # Status report
    time_elapsed = time.process_time() - start_time
    print(f'Finished {stdev_idx + 1}/{len(stdevs)} after {time_elapsed:3.0f}s. Model accuracy: {acc_results[stdev_idx]:.2f}%')

In [None]:
# Show the results
plt.plot(stdevs, acc_results, 's-')
plt.xlabel('Standard deviation for weight initializations')
plt.ylabel('Final-3 test accuracy (ave %)')
plt.xscale('log')
plt.ylim([80, 100])
plt.show()

In [None]:
# Show the weights distributions

for i in range(len(stdevs)):
    plt.plot(histo_data[i, 0, :], histo_data[i, 1, :], color=[1 - i / len(stdevs), 0.2, 1 - i / len(stdevs)])

plt.xlabel('Weight value')
plt.ylabel('Count')
plt.legend(np.round(stdevs, 4), bbox_to_anchor=(1, 1), loc='upper left')
plt.xlim([-1, 1])
plt.show()

## WEIGHTS_ XAVIER_KAIMING_INITS

### EXPLORE THE INITIALIZED WEIGHTS

In [None]:
# Create an instance of the model
net = the_net()
print(net)

In [None]:
# Collect all weights and biases
all_weight = np.array([])
all_biases = np.array([])

for p in net.named_parameters():
    if ('bias' in p[0]):
        all_biases = np.concatenate((all_biases, p[1].data.numpy().flatten()), axis=0)
    elif ('weight' in p[0]):
        all_weight = np.concatenate((all_weight, p[1].data.numpy().flatten()), axis=0)
    
# How many?
print('There are {:<10} bias parameters.'.format(len(all_biases)))
print('There are {:<10} weight parameters.'.format(len(all_weight)))

# Histograms
fig, ax = plt.subplots(1, 3, figsize=(18, 4))

ax[0].hist(all_biases, 40)
ax[0].set_title('Histogram of initial biases')

ax[1].hist(all_weight, 40)
ax[1].set_title('Histogram of initial weights')

# Collect histogram data to show as line plots
y_B, x_B = np.histogram(all_biases, bins=30)
y_W, x_W = np.histogram(all_weight, bins=30)

ax[2].plot(x_B[1:] + x_B[:-1] / 2, y_B / np.sum(y_B), label='Bias')
ax[2].plot(x_W[1:] + x_W[:-1] / 2, y_W / np.sum(y_W), label='Weights')
ax[2].set_title('Density estimate for both')
ax[2].legend()
ax[2].set_ylabel('Probability')

# Plot adjustments common to all subplots
for i in range(3):
    ax[i].set_xlabel('Initial value')
    ax[i].set_ylabel('Count')

plt.show()

### LAYER-SPECIFIC DISTRIBUTIONS

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 4))

for p in net.named_parameters():

    # Get the data and compute their histogram
    params_data = p[1].data.numpy().flatten()
    y, x = np.histogram(params_data, 10)

    # For the bias
    if ('bias' in p[0]):
        ax[0].plot((x[1:] + x[:-1]) / 2, y / np.sum(y), label=f'{p[0][:-5]} bias ')
    
    # For the weight
    elif ('weight' in p[0]):
        ax[1].plot((x[1:] + x[:-1]) / 2, y / np.sum(y), label=f'{p[0][:-7]} weight ')
    
ax[0].set_title('Biases per layer')
ax[0].legend()
ax[1].set_title('Weights per layer')
ax[1].legend(bbox_to_anchor=(1, 1), loc='upper left')

plt.show()

In [None]:
# Let's test whether the numbers match our prediction from the formula
# Empirical bias range
bias_range = [torch.min(net.fc1.bias.data).item(), torch.max(net.fc1.bias.data).item()]
bias_count = len(net.fc1.bias.data)

# Theoretical expected value
sigma = np.sqrt(1 / bias_count)

# Drum rolllllll.....
print(f'Theoretical sigma = {sigma}')
print(f'Empirical range   = {bias_range}')

### INITIALIZE THE WEIGHTS USING THE XAVIER METHOD

In [None]:
# Create a new instance of the model
net = the_net()

# Change the weights (leave biases as Kaiming (default))
for p in net.named_parameters():
    if ('weight' in p[0]):
        nn.init.xavier_normal_(tensor=p[1].data)

# Test whether the numbers match our predictions from the formula
# Empirical weight standard deviation
weight_var   = torch.var(net.fc1.weight.data.flatten()).item()
weight_count = len(net.fc1.weight.data)

# Theoretical expected value
sigma_2 = 2 / (weight_count + weight_count)

print(f'Theoretical sigma = {sigma_2}')
print(f'Empirical range   = {weight_var}')

## WEIGHTS_ XAVIER_VS._KAIMING

In [None]:
# Create a model
wine_net_xavier = ANN_wine()

# Change the weights (leave biases as Kaiming [default])
for p in wine_net_xavier.named_parameters():
    if ('weight' in p[0]):
        nn.init.xavier_normal_(p[1].data)

# Train the model and record its output
train_acc_X, test_acc_X, losses_X = train_the_model(wine_net=wine_net_xavier)

#==========================================

# Create a model
wine_net_kaiming = ANN_wine()

# Change the weights (leave biases as Kaiming [default])
for p in wine_net_kaiming.named_parameters():
    if ('weight' in p[0]):
        nn.init.kaiming_uniform_(p[1].data, nonlinearity='relu')

# Train the model and record its output
train_acc_K, test_acc_K, losses_K = train_the_model(wine_net=wine_net_kaiming)

In [None]:
# Plot the results
fig, ax = plt.subplots(1, 3, figsize=(18, 4))

# Losses
ax[0].plot(losses_X, label='Xavier')
ax[0].plot(losses_K, label='Kaiming')
ax[0].set_title('Loss')

# Train accuracy
ax[1].plot(train_acc_X, label='Xavier')
ax[1].plot(train_acc_K, label='Kaiming')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title('TRAIN')

# Test accuracy
ax[2].plot(test_acc_X, label='Xavier')
ax[2].plot(test_acc_K, label='Kaiming')
ax[2].set_ylabel('Accuracy (%)')
ax[2].set_title('TEST')

for i in range(3):
    ax[i].legend()
    ax[i].grid('on')
    ax[i].set_xlabel('Epochs')

plt.show()

### REPEAT THE EXPERIMENT TO GET MORE STABLE RESULTS

In [None]:
# Number of experiment iterations
num_exps = 10

""" Dimensions of results
1 - Experiment run
2 - Metric (loss/train/test)
3 - Weight init (X/K)
"""
results = np.zeros((num_exps, 3, 2))

for exp_idx in range(num_exps):
    ## XAVIER
    # Create a model
    wine_net_xavier = ANN_wine()

    # Change the weights (leave biases as Kaiming [default])
    for p in wine_net_xavier.named_parameters():
        if ('weight' in p[0]):
            nn.init.xavier_normal_(p[1].data)

    # Train the model and record its output
    train_acc_X, test_acc_X, losses_X = train_the_model(wine_net=wine_net_xavier)

    ## KAIMING
    # Create a model
    wine_net_kaiming = ANN_wine()

    # Change the weights (leave biases as Kaiming [default])
    for p in wine_net_kaiming.named_parameters():
        if ('weight' in p[0]):
            nn.init.kaiming_uniform_(p[1].data, nonlinearity='relu')

    # Train the model and record its output
    train_acc_K, test_acc_K, losses_K = train_the_model(wine_net=wine_net_kaiming)

    ## Collect the results
    results[exp_idx, 0, 0] = torch.mean(losses_X[-5:]).item()
    results[exp_idx, 0, 1] = torch.mean(losses_K[-5:]).item()

    results[exp_idx, 1, 0] = np.mean(train_acc_X[-5:])
    results[exp_idx, 1, 1] = np.mean(train_acc_K[-5:])

    results[exp_idx, 2, 0] = np.mean(test_acc_X[-5:])
    results[exp_idx, 2, 1] = np.mean(test_acc_K[-5:])

    print(f'Finished run {exp_idx} / {num_exps}')

In [None]:
# Plot the results
fig, ax = plt.subplots(1, 3, figsize=(15, 4))

# Plot titles
metric = ['Loss', 'Train acc.', 'Test acc.']

for i in range(3):

    # Plot the results
    ax[i].plot(np.zeros(num_exps), results[:, i, 0], 'bo')
    ax[i].plot(np.ones(num_exps),  results[:, i, 1], 'ro')

    # Run a t-test to formalize the comparison
    t, p = stats.ttest_ind(results[:, i, 0], results[:, i, 1])
    title = f'{metric[i]}, (t = {t:.2f}, p = {p:.3f})'

    # Make the plot nicer
    ax[i].set_xlim([-1, 2])
    ax[i].set_xticks([0, 1])
    ax[i].set_xticklabels(['Xavier', 'Kaiming'])
    ax[i].set_title(title)

plt.show()

## WEIGHTS_ IDENTICALLY_RANDOM_WEIGHTS

In [None]:
# Create a model
net = nn.Sequential(
    nn.Linear(2, 8),
    nn.Linear(8, 1),
    nn.Linear(1, 1)
)

### FOUR NETWORKS

In [None]:
## NO RANDOMSEED
# Create a "template" network
net_no_seed = copy.deepcopy(net)

# Update the weights
for p in net_no_seed.named_parameters():
    if ('weight' in p[0]):
        nn.init.xavier_normal_(p[1].data)

## RANDOM SEED 1a
# Create a "template" network
net_rs1a = copy.deepcopy(net)

# Set the seed to 1
torch.manual_seed(1)

# Update the weights
for p in net_rs1a.named_parameters():
    if ('weight' in p[0]):
        nn.init.xavier_normal_(p[1].data)

## RANDOM SEED 2
# Create a "template" network
net_rs2 = copy.deepcopy(net)

# Set the seed to 1
torch.manual_seed(2)

# Update the weights
for p in net_rs2.named_parameters():
    if ('weight' in p[0]):
        nn.init.xavier_normal_(p[1].data)

## RANDOM SEED 1b
# Create a "template" network
net_rs1b = copy.deepcopy(net)

# Set the seed to 1
torch.manual_seed(1)

# Update the weights
for p in net_rs1b.named_parameters():
    if ('weight' in p[0]):
        nn.init.xavier_normal_(p[1].data)


### EXTRACT ALL WEIGHTS FROM ALL NETWORKS

In [None]:
# Initialize empty arrays
w_ns = np.array([])
w_1a = np.array([])
w_2  = np.array([])
w_1b = np.array([])

# Loop over layers in the models
for i in range(3):
    
    # Extract the vectorized weights matrices
    # .view(-1): Reshape a Pytorch matrix to vector
    w_ns = np.append(arr=w_ns, values=net_no_seed[i].weight.view(-1).detach().numpy())
    w_1a = np.append(arr=w_1a, values=net_rs1a[i].   weight.view(-1).detach().numpy())
    w_2  = np.append(arr=w_2 , values=net_rs2[i].    weight.view(-1).detach().numpy())
    w_1b = np.append(arr=w_1b, values=net_rs1b[i].   weight.view(-1).detach().numpy())

# Plotting
fig = plt.figure(figsize=(15, 5))

plt.plot(w_ns, 'ro', markersize=12, label='no seed')
plt.plot(w_1a, 'ks', markersize=12, label='rs1a')
plt.plot(w_2, ' m^', markersize=12, label='rs2')
plt.plot(w_1b, 'g+', markersize=12, label='rs1b')
plt.legend()
plt.xlabel('Vectorized weight index')
plt.ylabel('Weight value')

plt.show()

## WEIGHTS_FREEZE_WEIGHTS

In [None]:
# Create the network
net, loss_func, optimizer = create_the_MNIST_net()

# Train the model
train_acc, test_acc, losses, net = train_the_model(net, loss_func, optimizer)

plt.plot(train_acc, label='Train')
plt.plot(test_acc,  label='Test')
plt.plot([len(train_acc) / 2, len(train_acc) / 2], [10, 80], 'k--', label='Learning switched on')
plt.legend()
plt.show()

## WEIGHTS_WEIGHTS_CHANGES

In [None]:
# Create the network
net, loss_func, optimizer = create_the_MNIST_net()

# Train the model
train_acc, test_acc, losses, net, weight_change, weight_conds, pre_W = train_the_model(net, loss_func, optimizer)

In [None]:
# Get a list of layer names
layer_name = []
for (i, p) in enumerate(net.named_parameters()):
    if ('weight' in p[0]):
        layer_name.append(p[0][:-7])

# Set up the plot
fig, ax = plt.subplots(1, 3, figsize=(16, 3))

# Accuracy
ax[0].plot(train_acc)
ax[0].plot(test_acc)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Accuracy (%)')
ax[0].set_title('Accuracy')
ax[0].legend(['Train', 'Test'])

# Weight changes
ax[1].plot(weight_change)
ax[1].set_xlabel('Epochs')
ax[1].set_title('Weight change from previous epoch')
ax[1].legend(layer_name)

# Weight condition numbers
ax[2].plot(weight_conds)
ax[2].set_xlabel('Epochs')
ax[2].set_title('Condition number')
ax[2].legend(layer_name)
ax[2].set_ylim([0, 20])

plt.show()

In [None]:
# Final inspection: check the derivative of accuracy against the weight change
# Normalize for scaling offsets
from scipy.stats import zscore

plt.plot(zscore(np.diff(train_acc)),             label='d(train_acc)')
plt.plot(zscore(np.mean(weight_change, axis=1)), label='Weight change')
plt.legend()
plt.title('Change in weights by change in accuracy')
plt.xlabel('Epoch')
plt.show()

In [None]:
# See what the model did
fig, axs = plt.subplots(2, 5, figsize=(10, 3))

for i in range(5):
    axs[0, i].imshow(X    [i, :].view(28, 28).detach(), cmap='gray')
    axs[1, i].imshow(y_hat[i, :].view(28, 28).detach(), cmap='gray')
    axs[0, i].set_xticks([]), axs[0, i].set_yticks([])
    axs[1, i].set_xticks([]), axs[1, i].set_yticks([])

plt.suptitle('Yikes!')
plt.show()

## AUTOENCODER_DENOISING_MNIST

In [None]:
# Test the model with a bit of data
net, loss_func, optimizer = create_the_MNIST_AE()

X     = data_tensor[:5, :]
y_hat = net(X)

print(X.shape)
print(y_hat.shape)

In [None]:
# See what the model did
fig, axs = plt.subplots(2, 5, figsize=(10, 3))

for i in range(5):
    axs[0, i].imshow(X    [i, :].view(28, 28).detach(), cmap='gray')
    axs[1, i].imshow(y_hat[i, :].view(28, 28).detach(), cmap='gray')
    axs[0, i].set_xticks([]), axs[0, i].set_yticks([])
    axs[1, i].set_xticks([]), axs[1, i].set_yticks([])

plt.suptitle('Yikes!')
plt.show()

### RUN THE MODEL

In [None]:
# Train the model
losses, net = train_the_model()
print(f'Final loss: {losses[-1]:.4f}')

# Visualize the losses
plt.plot(losses, '.-')
plt.xlabel('Epochs')
plt.ylabel('Model loss')
plt.title('OK, but what dis it actually learn?')
plt.show()

In [None]:
# Repeat the visualization when testing the model
X     = data_tensor[:5, :]
y_hat = net(X)

# See what the model did
fig, axs = plt.subplots(2, 5, figsize=(10, 3))

for i in range(5):
    axs[0, i].imshow(X    [i, :].view(28, 28).detach(), cmap='gray')
    axs[1, i].imshow(y_hat[i, :].view(28, 28).detach(), cmap='gray')
    axs[0, i].set_xticks([]), axs[0, i].set_yticks([])
    axs[1, i].set_xticks([]), axs[1, i].set_yticks([])

plt.suptitle('Disregard the Yikes!')
plt.show()

### ADD NOISE TO SEE A USE CASE OF AN AUTOENCODER

In [None]:
# Grab a small set of images
X = data_tensor[:10, :]

# Add noise
X_noise = X + torch.rand_like(X) / 4

# Clip at 1
X_noise[X_noise > 1] = 1

# Show the noisy image
fig, axs = plt.subplots(2, 5, figsize=(10, 3))

for i in range(5):
    axs[0, i].imshow(X      [i, :].view(28, 28).detach(), cmap='gray')
    axs[1, i].imshow(X_noise[i, :].view(28, 28).detach(), cmap='gray')
    axs[0, i].set_xticks([]), axs[0, i].set_yticks([])
    axs[1, i].set_xticks([]), axs[1, i].set_yticks([])

plt.show()

In [None]:
# Run through the model
Y = net(X_noise)

fig, axs = plt.subplots(3, 10, figsize=(12, 5))

for i in range(10):
    axs[0, i].imshow(X      [i, :].view(28, 28).detach(), cmap='gray')
    axs[1, i].imshow(X_noise[i, :].view(28, 28).detach(), cmap='gray')
    axs[2, i].imshow(Y[i, :]      .view(28, 28).detach(), cmap='gray')
    axs[0, i].set_xticks([]), axs[0, i].set_yticks([])
    axs[1, i].set_xticks([]), axs[1, i].set_yticks([])
    axs[2, i].set_xticks([]), axs[2, i].set_yticks([])

plt.suptitle('Neato.')
plt.show()

## AUTOENCODER_HOW_MANY_UNIT

In [None]:
# Specific the number of units
N_encdec_units = np.linspace(start=10, stop=500, num=12).astype(int)
N_bottle_units = np.linspace(start=5,  stop=100, num=8) .astype(int)

# Initialize results matrix
exp_results = np.zeros((len(N_encdec_units), len(N_bottle_units)))

# Start the experiment
for N_encdec_idx, N_encdec_i in enumerate(N_encdec_units):
    for N_bottle_idx, N_bottle_i in enumerate(N_bottle_units):

        # Build/Train the model
        losses = train_the_model(N_encdec_i, N_bottle_i)[0] # Only need the first output
        exp_results[N_encdec_idx, N_bottle_idx] = np.mean(losses[-1])

        # Send update message
        current_iter = N_encdec_idx * len(N_bottle_units) + N_bottle_idx + 1
        total_iter   = len(N_bottle_units) * len(N_encdec_units)
        msg          = 'Finished experiment {:<2}/{:<2}'.format(current_iter, total_iter)
        sys.stdout.write('\r' + msg)

In [None]:
# Show the result matrix
fig = plt.figure(figsize=(6, 6))

plt.imshow(exp_results, aspect='auto',              # Data and aspect ratio
           vmin=0.01, vmax=0.04, cmap='Purples',    # Color range and palette
           extent=[N_bottle_units[0], N_bottle_units[-1], N_encdec_units[-1], N_encdec_units[0]]) # xy axis ticks

plt.xlabel('Number of bottleneck units')
plt.ylabel('Number of encoder/decoder units')
plt.colorbar()

plt.show()

In [None]:
plt.plot(N_encdec_units, exp_results)
plt.legend(N_bottle_units, loc=(1.01, 0))
plt.xlabel('Number of enc/dec units')
plt.title('Loss by bottleneck units')
plt.ylabel('Loss')
plt.show()

## AUTO_ENCODER_OCCLUSION

In [None]:
# Train the model
losses, net = train_the_model()
print(f'Final loss: {losses[-1]:.4f}')

# Visualize the losses
plt.plot(losses, '.-')
plt.xlabel('Epochs')
plt.ylabel('Model loss')
plt.show()

### ADD OCCLUSION TO SOME IMAGES

In [None]:
# Grab a small set of image
X = copy.deepcopy(data_tensor[:10, :])

# Add noise
for i in range(X.shape[0]):

    # Reshape the image
    img = X[i, :].view(28, 28)

    # Occlude random rows or columns
    start_loc = np.random.choice(range(10, 21))

    # Even -> Horizontal occlusion
    if (i % 2 == 0):
        img[start_loc:start_loc + 1, :] = 1
    # Odd -> Vertical occlusion
    else:
        img[:, start_loc:start_loc + 1] = 1
    
# Run the samples through the model
de_occluded = net(X)

# Show the noisy images
fig, axs = plt.subplots(3, 10, figsize=(15, 5))

for i in range(10):
    axs[0, i].imshow(data_tensor[i, :].view(28, 28).detach(), cmap='gray')
    axs[1, i].imshow(X          [i, :].view(28, 28).detach(), cmap='gray')
    axs[2, i].imshow(de_occluded[i, :].view(28, 28).detach(), cmap='gray')
    axs[0, i].set_xticks([]), axs[0, i].set_yticks([])
    axs[1, i].set_xticks([]), axs[1, i].set_yticks([])
    axs[2, i].set_xticks([]), axs[2, i].set_yticks([])

plt.show()

### SOMETHING MORE QUANTITATIVE

In [None]:
# Quantify the performance of the "de-occluder" by correlating the sample with the original

in_out_corr = np.corrcoef(data_tensor[9, :].detach(), de_occluded[9, :].detach())

plt.plot(data_tensor[9, :].detach(), de_occluded[9, :].detach(), '.')
plt.xlabel('Original pixel values')
plt.ylabel('Reconstructed pixel values')
plt.title(f'Correlation r = {in_out_corr[0, 1]:.3f}')
plt.show()

In [None]:
# Try again without the zero-valued pixels
# Extract to variables for convenience
orig  = data_tensor[9, :].detach()
recon = de_occluded[9, :].detach()

# Boolean vector that indicates pixels>0 (with some tolerance)
tol             = 1e-4
non_zero_pixels = (orig > tol) & (recon > tol)

# Then re-compute the correlation
in_out_corr = np.corrcoef(orig[non_zero_pixels], recon[non_zero_pixels])

plt.plot(orig[non_zero_pixels], recon[non_zero_pixels], '.')
plt.xlabel('Original pixel values')
plt.ylabel('Reconstructed pixel values')
plt.title(f'Correlation r = {in_out_corr[0, 1]:.3f}')
plt.show()

In [None]:
# Get data with no occlusion
no_occlusion = net(data_tensor[:10, :])

# Compare deoccluded-original to noocclusion-original correlation
r = np.zeros((10, 2))
for i in range(de_occluded.shape[0]):

    # Pixel selection (note: tolerance defined in previous cell)
    non_zero_pixels = (data_tensor[i, :] > tol) & (no_occlusion[i, :] > tol) & (de_occluded[i, :] > tol)

    # Now compute the correlations
    r[i, 0] = np.corrcoef(data_tensor[i, non_zero_pixels].detach(), no_occlusion[i, non_zero_pixels].detach())[0, 1]
    r[i, 1] = np.corrcoef(data_tensor[i, non_zero_pixels].detach(), de_occluded [i, non_zero_pixels].detach())[0, 1]

# Plot the correlation coefficients
plt.plot(r, 'o', markersize=10)
plt.legend(['No occlusion', 'Occlusion'])
plt.xlabel('Sample number')
plt.ylabel('Correlation with original')
plt.show()

## AUTOENCODER_ LATENT_CODE

In [None]:
# Test the model with a bit of data
net, loss_func, optimizer = create_the_MNIST_AE()

X     = data_tensor[:5, :]
y_hat = net(X)

print(f'Input shape: {X.shape}', '\n')
print(type(y_hat), len(y_hat), '\n')
print(f'Shape of model output: {y_hat[0].shape}', '\n')
print(f'Shape of encoding layer output: {y_hat[1].shape}', '\n')

In [None]:
# Train the model
losses, net = train_the_model()
print(f'Final loss: {losses[-1]:.4f}')

# Visualize the losses
plt.plot(losses, '.-')
plt.xlabel('Epochs')
plt.ylabel('Model loss')
plt.show()

### INSPECT THE LATENT "CODE" OF THE MODEL

In [None]:
# Output the latent layer
# Push through the entire dataset
y_hat, latent = net(data_tensor)

# Print sizes
print(f'{y_hat.shape}, {latent.shape}')

fig, ax = plt.subplots(1, 2, figsize=(15, 5))

ax[0].hist(latent.flatten().detach(), 100)
ax[0].set_xlabel('Latent Activation value')
ax[0].set_ylabel('Count')
ax[0].set_title('Distribution of latent units activation')

ax[1].imshow(latent.detach(), aspect='auto', vmin=0, vmax=10)
ax[1].set_xlabel('Latent node')
ax[1].set_ylabel('Image number')
ax[1].set_title('All latent activations')

plt.show()

## CNN_CLASSIFY_GAUSSIAN_BLURS

In [None]:
# Visualize some images
X, y = next(iter(test_loader))
y_hat = net(X)

fig, axs = plt.subplots(2, 10, figsize=(20, 4))

for i, ax in enumerate(axs.flatten()):
    G = torch.squeeze(X[i, 0, :, :]).detach()
    ax.imshow(G, vmin=-1, vmax=1, cmap='jet')
    t = (int(y[i].item()), int(y_hat[i].item() > 0))
    ax.set_title(f'T: {t[0]}, P: {t[1]}')
    ax.set_xticks([])
    ax.set_yticks([])

plt.show()

In [None]:
# Look at the filters
print(net)

layer_1W = net.enc[0].weight
layer_3W = net.enc[3].weight

print('')
print(layer_1W.shape)
print(layer_3W.shape)

In [None]:
fig, axs = plt.subplots(1, 6, figsize=(15, 3))

for i, ax in enumerate(axs.flatten()):
    ax.imshow(torch.squeeze(layer_1W[i, :, :, :]).detach(), cmap='Purples')
    ax.axis('off')

plt.suptitle('First convolution layer filters')
plt.show()

In [None]:
fig, axs = plt.subplots(4, 6, figsize=(15, 9))

for i in range(6 * 4):
    idx = np.unravel_index(indices=i, shape=(4, 6))
    axs[idx].imshow(torch.squeeze(layer_3W[idx[0], idx[1], :, :]).detach(), cmap='Purples')
    axs[idx].axis('off')

plt.suptitle('Second convolution layer filters')
plt.show()

## CNN_GAUSS_FEATURE_MAPS

In [None]:
# Visualize some images
X, y                          = next(iter(test_loader))
y_hat, feat_map_1, feat_map_2 = net(X)

fig, axs = plt.subplots(2, 10, figsize=(20, 4))

for i, ax in enumerate(axs.flatten()):
    G = torch.squeeze(X[i, 0, :, :]).detach()
    ax.imshow(G, vmin=-1, vmax=1, cmap='jet')
    t = (int(y[i].item()), int(y_hat[i].item() > 0.5))
    ax.set_title(f'T: {t[0]}, P: {t[1]}')
    ax.set_xticks([])
    ax.set_yticks([])

plt.show()

### DRAW THE FEATURE MAPS

In [None]:
# From the conv1 layer
fig, axs = plt.subplots(7, 10, figsize=(12, 6))

# Loop over 10 pictures (First row)
for pic_i in range(10):
    # Show the original picture. `0`: Only one Gray channel
    img = X[pic_i, 0, :, :].detach()
    axs[0, pic_i].imshow(img, cmap='jet', vmin=0, vmax=1)
    axs[0, pic_i].axis('off')
    axs[0, pic_i].text(2, 2, f'T: {int(y[pic_i].item())}', ha='left', va='top', color='w', fontweight='bold')
    
    # Loop over 06 feature maps (Each column except the first row)
    for feat_i in range(6):
        # Extract the feature map from this image
        img = feat_map_1[pic_i, feat_i, :, :].detach()
        axs[feat_i + 1, pic_i].imshow(img, cmap='inferno', vmin=0, vmax=torch.max(img) * 0.9)
        axs[feat_i + 1, pic_i].axis('off')
        axs[feat_i + 1, pic_i].text(-5, 45, feat_i, ha='right') if (pic_i == 0) else None

plt.tight_layout()
plt.suptitle('First set of feature map activations for 10 test images', x=0.5, y=1.01)
plt.show()

In [None]:
# From the conv2 layer
fig, axs = plt.subplots(5, 10, figsize=(12, 6))

# Loop over 10 pictures (First row)
for pic_i in range(10):
    # Show the original picture. `0`: Only one Gray channel
    img = X[pic_i, 0, :, :].detach()
    axs[0, pic_i].imshow(img, cmap='jet', vmin=0, vmax=1)
    axs[0, pic_i].axis('off')
    axs[0, pic_i].text(2, 2, f'T: {int(y[pic_i].item())}', ha='left', va='top', color='w', fontweight='bold')
    
    # Loop over 04 feature maps (Each column except the first row)
    for feat_i in range(4):
        # Extract the feature map from this image
        img = feat_map_2[pic_i, feat_i, :, :].detach()
        axs[feat_i + 1, pic_i].imshow(img, cmap='inferno', vmin=0, vmax=torch.max(img) * 0.9)
        axs[feat_i + 1, pic_i].axis('off')
        axs[feat_i + 1, pic_i].text(-5, 45, feat_i, ha='right') if (pic_i == 0) else None

plt.tight_layout()
plt.suptitle('Second set of feature map activations for 10 test images', x=0.5, y=1.01)
plt.show()

### SPATIAL CORRELATIONS ACROSS THE FEATURE MAPS

In [None]:
# Correlations across the SECOND convolution layer

# Convenient variables
n_stim = feat_map_2.shape[0]
n_maps = feat_map_2.shape[1]
n_cors = (n_maps * (n_maps - 1)) // 2

# Initialze the matrix of all correlation values
allrs = np.zeros((n_stim, n_cors))
Call  = np.zeros((n_maps, n_maps))

# Loop over each stimulus/image
for i in range(n_stim):
  
  # Extract the vectorized feature maps from this image
  # `.view(n_maps, -1)`: Reshape: Number of maps, total number of pixels with each feature maps
  feat_map = feat_map_2[i, :, :, :].view(n_maps, -1).detach()
  
  # Compute the correlation matrix. Correlation of each feature map pairs of 1 image
  C     = np.corrcoef(feat_map)
  Call += C
  
  # Extract the unique correlations from the matrix
  idx         = np.nonzero(np.triu(C, 1))
  allrs[i, :] = C[idx]

# Define the x-axis labels
x_lab = [] * n_cors
for i in range(n_cors):
  x_lab.append(f'{idx[0][i]} - {idx[1][i]}')

# Now visualize the correlations
fig = plt.figure(figsize=(16, 5))
ax0 = fig.add_axes([0.1, 0.1, 0.55, 0.9]) # [left, bottom, width, height]
ax1 = fig.add_axes([0.68, 0.1, 0.3, 0.9])
cax = fig.add_axes([0.98, 0.1, 0.01, 0.9])

for i in range(n_cors):
  ax0.plot(i + np.random.randn(n_stim) / 30, allrs[:, i], 'o', markerfacecolor='w', markersize=10)

# Make the plot more interpretable
ax0.set_xlim([-.5, n_cors - .5])
ax0.set_ylim([-1.05, 1.05])
ax0.set_xticks(range(n_cors))
ax0.set_xticklabels(x_lab)
ax0.set_xlabel('Feature map pair')
ax0.set_ylabel('Correlation coefficient')
ax0.set_title('Correlations for each image')

# Now show the average correlation matrix
h = ax1.imshow(Call / n_stim, vmin=-1, vmax=1)
ax1.set_title('Correlation matrix')
ax1.set_xlabel('Feature map')
ax1.set_ylabel('Feature map')

# Add a colorbar
fig.colorbar(h, cax=cax)
plt.show()


## CNN_LINEAR_UNITS

In [None]:
# Number of hidden units
number_of_linear_units = np.round(np.linspace(5, 500, 20))

# Initialize results matrix
results = np.zeros((len(number_of_linear_units), 4))

for units_idx, units_i in enumerate(number_of_linear_units):
    train_loss, test_loss, train_acc, test_acc, net = train_the_model(fc_units=int(units_i))
    results[units_idx, :] = [train_loss[-1], test_loss[-1], train_acc[-1], test_acc[-1]] 

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 5))

ax[0].plot(number_of_linear_units, results[:, :2], 's-')
ax[0].set_xlabel('Number of units in final linear layer')
ax[0].set_ylabel('Loss (MSE)')
ax[0].set_title('Final model loss')
ax[0].legend(['Train', 'Test'])

ax[1].plot(number_of_linear_units, results[:, 2:], 's-')
ax[1].set_xlabel('Number of units in final linear layer')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title('Final model test accuracy')
ax[1].legend(['Train', 'Test'])

plt.show()

## CNN_GAUSS_AUTOENCODER|GAUSS_AE_OCCLUSION|CUSTOM_LOSS_FUNC

In [None]:
plt.plot(losses, 's-', label='Train')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title(f'Model loss (Final loss = {losses[-1]:.3f})')
plt.show()

In [None]:
pics_2_use = np.random.choice(n_gauss, size=32, replace=False)
X          = images[pics_2_use, :, :, :]
# X          = images_occ[pics_2_use, :, :, :]
y_hat      = net(X)

fig, axs = plt.subplots(2, 10, figsize=(18, 4))
for i in range(10):
    G = torch.squeeze(X    [i, 0, :, :]).detach()
    O = torch.squeeze(y_hat[i, 0, :, :]).detach()
    
    axs[0, i].imshow(G, vmin=-1, vmax=1, cmap='jet')
    axs[0, i].axis('off')
    axs[0, i].set_title('Model input')
    
    axs[1, i].imshow(O, vmin=-1, vmax=1, cmap='jet')
    axs[1, i].axis('off')
    axs[1, i].set_title('Model output')

plt.show()

## CNN_FIND_GAUSS

In [None]:
# Visualize some images
X, Y  = next(iter(test_loader))
y_hat = net(X)

fig, axs = plt.subplots(2, 10, figsize=(16, 4))
theta    = np.linspace(start=0, stop=2 * np.pi)

for i, ax in enumerate(axs.flatten()):
    # Get the Gaussian and draw it, and draw the white-guide-line
    G = torch.squeeze(X[i, 0, :, :]).detach()
    ax.imshow(G, vmin=-1, vmax=1, cmap='jet', extent=[-4, 4, -4, 4], origin='lower')
    ax.plot([-4, 4], [0, 0], 'w--')
    ax.plot([0, 0], [-4, 4], 'w--')
    
    # Compute the model's prediction
    cx = y_hat[i][0].item() # Center X
    cy = y_hat[i][1].item() # Center Y
    rd = y_hat[i][2].item() # Radius
    
    # Draw it
    x = cx + np.cos(theta) * np.sqrt(rd)
    y = cy + np.sin(theta) * np.sqrt(rd)
    ax.plot(x,  y,  'b')
    ax.plot(cx, cy, 'bo')
    
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlim([-4, 4])
    ax.set_ylim([-4, 4])

plt.tight_layout()
plt.show()

In [None]:
fig         = plt.figure(figsize=(5, 5))
param_names = ['Cx', 'Cy', 'rad.']

for i in range(3):
    # Extract parameters and compute correlation
    yy = Y    [:, i].detach()
    yh = y_hat[:, i].detach()
    cr = np.corrcoef(yy, yh)[0, 1]
    
    plt.plot(yy, yh, 'o', label=f'{param_names[i]}, r = {cr:.3f}')

plt.legend()
plt.xlabel('True values')
plt.ylabel('Predicted values')
plt.grid()
plt.show()

## CNN_EMNIST

In [None]:
# Visualize some images

# Extract X, y from test dataloader
X, y = next(iter(test_loader))
# Push data to GPU
X     = X.to(device)
y     = y.to(device)
y_hat = net(X)

# Pick some examples at random to show
rand_idx = np.random.choice(len(y), size=21, replace=False)

fig, axs = plt.subplots(3, 7, figsize=(15, 6))
for i, ax in enumerate(axs.flatten()):
    # Extract the image and its target letter
    I           = np.squeeze(X[rand_idx[i], 0, :, :]).cpu()
    true_letter = letter_categories[y[rand_idx[i]]]
    pred_letter = letter_categories[torch.argmax(y_hat[rand_idx[i], :])]

    # Color-code the accuracy
    col = 'gray' if (true_letter == pred_letter) else 'hot'

    # Visualize
    ax.imshow(I.T, cmap=col)
    ax.set_title(f'True: {true_letter}|Predicted: {pred_letter}', fontsize=10)
    ax.set_xticks([])
    ax.set_yticks([])

plt.show()

In [None]:
# ACCURACY BY LETTER
import sklearn.metrics as skm

# Confusion matrix
C = skm.confusion_matrix(y_true=y.cpu(), y_pred=torch.argmax(y_hat.cpu(), axis=1), normalize='true')

# Visualize
fig = plt.figure(figsize=(10, 10))
plt.imshow(C, 'Blues', vmax=0.05)

plt.xticks(range(26), labels=letter_categories)
plt.yticks(range(26), labels=letter_categories)
plt.title('TEST confusion matrix')
plt.ylabel('True number')
plt.xlabel('Predicted number')
plt.show()

## CNN_ NUM_CHANS

In [None]:
import enum
conv_chans = [2, 5, 8]

# Initialize results matrix
results     = np.zeros((len(conv_chans), len(conv_chans), 2))
conv_params = np.zeros((len(conv_chans), len(conv_chans)))

for i, n_chan_i in enumerate(conv_chans):
    for j, n_chan_j in enumerate(conv_chans):
        train_loss, test_loss, train_err, test_err, net = train_the_model(num_chans=(n_chan_i, n_chan_j))

        results    [i, j, :] = train_err[-1], test_err[-1]
        conv_params[i, j]    = n_chan_i + n_chan_j # Total number of conv layer channels

        print(i, j)      

In [None]:
# Show the results matrix

fig, ax = plt.subplots(1, 2, figsize=(10, 4))

for i in range(2):
    h = ax[i].imshow(results[:, :, i], vmin=np.min(results), vmax=np.max(results))
    ax[i].set_xlabel('Channels in conv1')
    ax[i].set_ylabel('Channels in conv2')
    ax[i].set_xticks(range(j + 1))
    ax[i].set_yticks(range(j + 1))
    ax[i].set_xticklabels(conv_chans)
    ax[i].set_yticklabels(conv_chans)
    title = 'Train' if (i == 0) else 'Test'
    ax[i].set_title(f'Error rates {title}', fontweight='bold')

# Add a colorbar right of the plot
axpos = ax[1].get_position()
cax = fig.add_axes([axpos.x1+.01,axpos.y0,.01,.75])
hh = fig.colorbar(h,cax=cax)
hh.set_label('Error rate (%)',rotation=270,labelpad=10)

plt.show()

In [None]:
# Error rate as a function of the total number of conv channels
corr_train = np.corrcoef(conv_params.flatten(), results[:, :, 0].flatten())
corr_test  = np.corrcoef(conv_params.flatten(), results[:, :, 1].flatten())

plt.plot(conv_params.flatten(), results[:, :, 0].flatten(), 'o',
         label=f'Train (r = {corr_train[0, 1]:.2f})')
plt.plot(conv_params.flatten(), results[:, :, 1].flatten(), 's',
         label=f'Test  (r = {corr_test [0, 1]:.2f})')

plt.legend()
plt.xlabel('Total number of convolution channels')
plt.ylabel('Error rate (%)')
plt.show()

## TRANSFER_ FMNIST

In [None]:
# Create a new model
number_net, loss_func, optimizer = create_the_MNIST_net()

# Train it on numbers data
train_acc, test_acc, losses, number_net = train_the_model(net=number_net, train_loader=numbers_train_loader, test_loader=numbers_test_loader, num_epochs=5)

In [None]:
# Test the model with fashion data
# Extract X, y from FASHION test dataloader
X, y  = next(iter(fashion_test_loader))
y_hat = number_net(X)

# The test
fashion_acc = 100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float())
print(f'NUMBER_NET performance on FASHION data: {fashion_acc:.2f}%')

In [None]:
## NOTE about this cell: I've added print() statements in here to show that the copying works.
## The first print function will show a non-zeros matrix because the weights of the two models
## differ. The second print function shows the zeros matrix because the two models have identical
## weights. See Q&A for this lecture.

# Create the target model
fashion_net, loss_func, optimizer = create_the_MNIST_net()
print(fashion_net.conv1.weight[0] - number_net.conv1.weight[0])

# Replace all the weights in TARGET model from SOURCE model
for target, source in zip(fashion_net.named_parameters(), number_net.named_parameters()):
    target[1].data = copy.deepcopy(source[1].data)

print(fashion_net.conv1.weight[0] - number_net.conv1.weight[0])

In [None]:
# Now re-train the network on the numbers data
train_acc, test_acc, losses, fashion_net = train_the_model(net=fashion_net, train_loader=fashion_train_loader, test_loader=fashion_test_loader, num_epochs=1)


## TRANSFER_LETTER2NUMBER

In [None]:
letter_net, loss_func, optimizer = make_the_net()
train_loss, test_loss, train_err, test_err, net = train_the_model(net=letter_net, optimizer=optimizer, train_loader=letter_train_loader, test_loader=letter_test_loader, num_epochs=5)

In [None]:
## TEST THE MODEL ON THE NUMBER DATA
# Extract X, y from NUMBER test dataloader
X, y = next(iter(number_test_loader))
X    = X.to(device)
y    = y.to(device)

letter_net.eval()
y_hat = letter_net(X)

# Test
number_acc = 100 * torch.mean((torch.argmax(y_hat, axis=1) != y).float())
print(f'number_net error rate on NUMBER data: {number_acc:.2f}%')

In [None]:
## Fine-tune the model with one training batch
# Create the target model
number_net, loss_func, optimizer = make_the_net()

# Replace all the weights in TARGET model from SOURCE model
for target, source in zip(number_net.named_parameters(), letter_net.named_parameters()):
    target[1].data = copy.deepcopy(source[1].data)

# Check out the network
print(number_net, '\n')
# The final layer
print(number_net.fc2, '\n')

# Replace the final layer to have 10 outputs instead of 26
number_net.fc2 = nn.Linear(50, 10)
# Check again
print(number_net)

In [None]:
## Re-train the network on the numbers data
train_loss, test_loss, train_err, test_err, number_net = train_the_model(net=number_net, optimizer=optimizer, train_loader=number_train_loader, test_loader=number_test_loader, num_epochs=1)

In [None]:
## Try again, only train the output layer
# Create the target model
number_net_2, loss_func, optimizer = make_the_net()

# Replace all the weights in TARGET model from SOURCE model
for target, source in zip(number_net_2.named_parameters(), letter_net.named_parameters()):
    target[1].data = copy.deepcopy(source[1].data)

# Replace the final layer to have 10 outputs instead of 26
number_net_2.fc2 = nn.Linear(50, 10)

# Freeze all layers except output
for p in number_net_2.named_parameters():
    if (not 'fc2' in p[0]):
        p[1].requires_grad = False

## Re-train the network on the numbers data
train_loss, test_loss, train_err, test_err, number_net2 = train_the_model(net=number_net2, optimizer=optimizer, train_loader=number_train_loader, test_loader=number_test_loader, num_epochs=1)

## Try again, only train the output layer
# Create the target model
number_net_2, loss_func, optimizer = make_the_net()

# Replace all the weights in TARGET model from SOURCE model
for target, source in zip(number_net_2.named_parameters(), letter_net.named_parameters()):
    target[1].data = copy.deepcopy(source[1].data)

# Replace the final layer to have 10 outputs instead of 26
number_net_2.fc2 = nn.Linear(50, 10)

# Freeze all layers except output
for p in number_net_2.named_parameters():
    if (not 'fc2' in p[0]):
        p[1].requires_grad = False

## Re-train the network on the numbers data
train_loss, test_loss, train_err, test_err, number_net2 = train_the_model(net=number_net2, optimizer=optimizer, train_loader=number_train_loader, test_loader=number_test_loader, num_epochs=1)

print(f'number_net TRAIN error rate: {train_err[-1]:.2f}%')
print(f'number_net TEST  error rate: {test_err [-1]:.2f}%')

## TRANSFER_ RESNET

In [None]:
resnet = torchvision.models.resnet18(pretrained=True)

# Freeze all layers
for p in resnet.parameters():
    p.requires_grad = False

# Change the final layer
resnet.fc = nn.Linear(512, 10)

# Push the model to the GPU
resnet.to(device);

In [None]:
## TRAIN THE MODEL
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet.parameters(), lr=0.001, momentum=0.9)

In [None]:
# Inspect a few random images
X, y = next(iter(test_loader))
X = X.to(device)
y = y.to(device)
resnet.eval()
predictions = torch.argmax(resnet(X), axis=1)

fig, axs = plt.subplots(4, 4, figsize=(10, 10))
for (i, ax) in enumerate(axs.flatten()):
    # Extract image
    pic = X.data[i].cpu().numpy().transpose((1, 2, 0))
    pic = pic - np.min(pic)
    pic = pic / np.max(pic)

    ax.imshow(pic)

    label = train_data_set.classes[predictions[i]]
    truec = train_data_set.classes[y[i]]
    title = f'Pred: {label} - True: {truec}'

    # Set the title with color-coded accuracy
    title_color = 'g' if (truec == label) else 'r'
    ax.text(44, 90, title, ha='center', va='top', fontweight='bold', color='k', backgroundcolor=title_color, fontsize=8)
    ax.axis('off')

plt.tight_layout()
plt.show()

## TRANSFER_ PRETRAIN_FMNIST

In [None]:
## Show some random examples

# Get some data
X, y = next(iter(dev_loader))

# Forward pass and loss
ae_net.cpu()
ae_net.eval() # Switch to test mose
y_hat = ae_net(X)

fig, axs = plt.subplots(2, 10, figsize=(15, 3))
for i in range(10):
    pic = y_hat[i, 0, :, :].detach()
    pic = pic / 2 + 0.5 # Undo normalization
    axs[0, i].imshow(pic, cmap='gray')
    axs[0, i].axis('off')

    pic = X[i, 0, :, :].detach()
    pic = pic / 2 + 0.5 # Undo normalization
    axs[1, i].imshow(pic, cmap='gray')
    axs[1, i].axis('off')

    if (i == 0):
        axs[0, 0].text(-6, 14, 'Reconstructed', rotation=90, va='center')
        axs[1, 0].text(-6, 14, 'Original',      rotation=90, va='center')

plt.show()

In [None]:
pre_train_net, loss_func, optimizer = make_the_class_net()
### Note about the code below: Both networks have the same number of layers overall; in other applications
#    you may need to modify the code to find the matching layers.
# then replace the conv weights in TARGET model from encoder weights in SOURCE model
for target, source in zip(pre_train_net.named_parameters(), ae_net.named_parameters()):
    print(f'PRETRAIN: {target[0]}, AENET: {source[0]}')
    if ('enc' in target[0]):
        target[1].data = copy.deepcopy(source[1].data)

In [None]:
# TRAIN THE PRETRAINED MODEL
train_loss_pre, dev_loss_pre, train_acc_pre, dev_acc_pre, pre_train_net = train_the_class_model(pre_train_net, loss_func, optimizer)

# Evaluate on the test set
pre_train_net.eval() # Switch to test mode
X, y = next(iter(test_loader))

# Push data to GPU
X = X.to(device)
y = y.to(device)

# Forward pass and loss
with torch.no_grad():
    y_hat = pre_train_net(X)
    loss  = loss_func(y_hat, y)

test_loss_pre = loss.item()
test_acc_pre  = 100 * torch.mean((torch.argmax(y_hat, axis=1) == y).float()).item()


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 5))

ax[0].plot(train_loss_pre, 'rs-',    label='PRE Train')
ax[0].plot(dev_loss_pre,   'ro--',   label='PRE Dev')
ax[0].plot(len(dev_loss_pre) - 1, test_loss_pre, 'rp', markersize=15,    label='PRE Test')

ax[0].plot(train_loss_naive, 'bs-',  label='NAIVE Train')
ax[0].plot(dev_loss_naive,   'bo--', label='NAIVE Dev')
ax[0].plot(len(dev_loss_naive) - 1, test_loss_naive, 'b*', markersize=15, label='NAIVE Test')

ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Model Loss')
ax[0].legend()

ax[1].plot(train_acc_pre, 'rs-',    label='PRE Train')
ax[1].plot(dev_acc_pre,   'ro--',   label='PRE Dev')
ax[1].plot(len(dev_acc_pre) - 1, test_acc_pre, 'rp', markersize=15,    label='PRE Test')

ax[1].plot(train_acc_naive, 'bs-',  label='NAIVE Train')
ax[1].plot(dev_acc_naive,   'bo--', label='NAIVE Dev')
ax[1].plot(len(dev_acc_naive) - 1, test_acc_naive, 'b*', markersize=15, label='NAIVE Test')

ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title(f'Final NaiveTest/PreTest accuracy: {test_acc_naive:.2f}%/{test_acc_pre:.2f}%')
ax[1].legend()

plt.show()

In [None]:
# grab one image
x = X[10,:,:,:].view(1,1,28,28)

# compute the activations of the first layer (excluding the bias b/c this is simply a constant)
layer1ActPre = F.relu( F.conv2d(x,pretrainNet.encconv1.weight) )
layer1ActNai = F.relu( F.conv2d(x,naivenet.encconv1.weight) )



## show the feature map activations for the pretrained model
fig,axs = plt.subplots(2,8,figsize=(14,4))
for i,ax in enumerate(axs.flatten()):
  act = torch.squeeze(layer1ActPre[0,i,:,:]).detach().cpu()
  ax.imshow(act,cmap='gray')
  ax.axis('off')

plt.suptitle('Pretrained activations',y=.9)
plt.show()



## show the feature map activations for the naive model
fig,axs = plt.subplots(2,8,figsize=(14,4))
for i,ax in enumerate(axs.flatten()):
  act = torch.squeeze(layer1ActNai[0,i,:,:]).detach().cpu()
  ax.imshow(act,cmap='gray')
  ax.axis('off')

plt.suptitle('Naive activations',y=.9)
plt.show()

## GAN_MNIST

In [None]:
# Loss Function (Same for both phases of training)
loss_func = nn.BCELoss()

# Create instance of the models
d_net = discriminator_net().to(device)
g_net = generator_net()    .to(device)

# Optimizers (Same algo but different variables b/c different parameters)
d_optimizer = torch.optim.Adam(d_net.parameters(), lr=0.0003)
g_optimizer = torch.optim.Adam(g_net.parameters(), lr=0.0003)

In [None]:
## TRAINING ##
num_epochs = 50000

losses                  = np.zeros((num_epochs, 2))
discriminator_decisions = np.zeros((num_epochs, 2))

for epoch_i in range(num_epochs):

    # Create minibatches of REAL and FAKE images
    rand_idx    = torch.randint(data_T.shape[0], (batch_size, ))
    real_images = data_T[rand_idx, :]               .to(device)
    fake_images = g_net(torch.randn(batch_size, 64) .to(device)) # Output of generator

    # Labels used for REAL and FAKE images
    real_labels = torch.ones (batch_size, 1).to(device)
    fake_labels = torch.zeros(batch_size, 1).to(device)
    
    ########################### Train the Discriminator ########################
    # Forward pass and loss for REAL images
    pred_real   = d_net(real_images)                # REAL images into Discriminator
    d_loss_real = loss_func(pred_real, real_labels) # All labels are 1

    # Forward pass and loss for FAKE images
    pred_fake   = d_net(fake_images)                # FAKE images into Discriminator
    d_loss_fake = loss_func(pred_fake, fake_labels) # All labels are 0

    # Collect loss (Using combined loss)
    d_loss                              = d_loss_real + d_loss_fake
    losses                 [epoch_i, 0] = d_loss.item()
    discriminator_decisions[epoch_i, 0] = torch.mean((pred_real > 0.5).float()).detach()

    # Backprop
    d_optimizer.zero_grad()
    d_loss.backward()
    d_optimizer.step()
    
    ############################ Train the Generator ###########################
    # Create fake images and compute loss
    fake_images = g_net(torch.randn(batch_size, 64).to(device))
    pred_fake   = d_net(fake_images)

    # Compute and collect loss and accuracy
    g_loss = loss_func(pred_fake, real_labels)
    losses                 [epoch_i, 1] = g_loss.item()
    discriminator_decisions[epoch_i, 1] = torch.mean((pred_fake > 0.5).float()).detach()

    # Backprop
    g_optimizer.zero_grad()
    g_loss.backward()
    g_optimizer.step()

    ############################################################################
    # Print out a status message
    if ((epoch_i + 1) % 500 == 0):
        msg = 'Finished epoch {:<5}/{:<5}'.format(epoch_i + 1, num_epochs)
        sys.stdout.write('\r' + msg)

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(18, 5))

ax[0].plot(losses)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Model Loss')
ax[0].legend(['Discrimator', 'Generator'])
ax[0].set_xlim([4000, 5000])

ax[1].plot(losses[::5, 0], losses[::5, 1], 'k.', alpha=0.1)
ax[1].set_xlabel('Discriminator Loss')
ax[1].set_ylabel('Generator Loss')

ax[2].plot(discriminator_decisions)
ax[2].set_xlabel('Epochs')
ax[2].set_ylabel('Probability ("Real")')
ax[2].set_title('Discriminator Output')
ax[2].legend(['Real', 'Fake'])

plt.show()

In [None]:
## See some fake digits

# Generage the images from the generator network
g_net.eval()
fake_data = g_net(torch.randn(12, 64).to(device)).cpu()

fig, axs = plt.subplots(3, 4, figsize=(8, 6))
for (i, ax) in enumerate(axs.flatten()):
    ax.imshow(fake_data[i, :].detach().view(28, 28), cmap='gray')
    ax.axis('off')

plt.show()

## GAN_CNN_GAUSS

In [None]:
# Loss Function (Same for both phases of training)
loss_func = nn.BCELoss()

# Create instance of the models
d_net = discriminator_net().to(device)
g_net = generator_net()    .to(device)

# Optimizers (Same algo but different variables b/c different parameters)
d_optimizer = torch.optim.Adam(d_net.parameters(), lr=0.0002, betas=(0.5, 0.999))
g_optimizer = torch.optim.Adam(g_net.parameters(), lr=0.0002, betas=(0.5, 0.999))

In [None]:
## TRAINING ##
num_epochs              = 1500
batch_size              = 86
losses                  = []
discriminator_decisions = []

for epoch_i in range(num_epochs):

    # Create minibatches of REAL and FAKE images
    rand_idx = torch.randint(images.shape[0], (batch_size, ))
    data     = images[rand_idx, :]               .to(device)

    # Labels used for REAL and FAKE images
    real_labels = torch.ones (batch_size, 1).to(device)
    fake_labels = torch.zeros(batch_size, 1).to(device)
    
    ########################### Train the Discriminator ########################
    # Forward pass and loss for REAL images
    pred_real   = d_net(data)                       # REAL images into Discriminator
    d_loss_real = loss_func(pred_real, real_labels) # All labels are 1

    # Forward pass and loss for FAKE images
    fake_data   = torch.randn(batch_size, 100, 1, 1).to(device)
    fake_images = g_net(fake_data)                  # Output of generator
    pred_fake   = d_net(data)                       # FAKE images into Discriminator
    d_loss_fake = loss_func(pred_fake, fake_labels) # All labels are 0

    d_loss = d_loss_real + d_loss_fake

    # Backprop
    d_optimizer.zero_grad()
    d_loss.backward()
    d_optimizer.step()
    
    ############################ Train the Generator ###########################
    # Create fake images and compute loss
    fake_images = g_net(torch.randn(batch_size, 100, 1, 1).to(device))
    pred_fake   = d_net(fake_images)

    # Compute and collect loss and accuracy
    g_loss = loss_func(pred_fake, real_labels)

    # Backprop
    g_optimizer.zero_grad()
    g_loss.backward()
    g_optimizer.step()

    losses.append([d_loss.item(), g_loss.item()])
    d1 = torch.mean((pred_real > 0.5).float()).detach()
    d2 = torch.mean((pred_fake > 0.5).float()).detach()
    discriminator_decisions.append([d1, d2])

    ############################################################################
    # Print out a status message
    if ((epoch_i + 1) % 50 == 0):
        msg = 'Finished epoch {:<4}/{:<4}'.format(epoch_i + 1, num_epochs)
        sys.stdout.write('\r' + msg)

# Convert performance from list to Numpy array
losses                  = np.array(losses)
discriminator_decisions = np.array(discriminator_decisions)

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(18, 5))

ax[0].plot(smooth(losses[:, 0]))
ax[0].plot(smooth(losses[:, 1]))
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Model Loss')
ax[0].legend(['Discrimator', 'Generator'])
ax[0].set_xlim([4000, 5000])

ax[1].plot(losses[200:, 0], losses[200:, 1], 'k.', alpha=0.1)
ax[1].set_xlabel('Discriminator Loss')
ax[1].set_ylabel('Generator Loss')

ax[2].plot(smooth(discriminator_decisions[:, 0]))
ax[2].plot(smooth(discriminator_decisions[:, 1]))
ax[2].plot(discriminator_decisions)
ax[2].set_xlabel('Epochs')
ax[2].set_ylabel('Probability ("Real")')
ax[2].set_title('Discriminator Output')
ax[2].legend(['Real', 'Fake'])

plt.show()

In [None]:
# Generage the images from the generator network
g_net.eval()
fake_data = g_net(torch.randn(batch_size, 100, 1, 1).to(device)).cpu()

fig, axs = plt.subplots(3, 6, figsize=(12, 6))
for (i, ax) in enumerate(axs.flatten()):
    ax.imshow(fake_data[i, :].detach().squeeze(), cmap='jet')
    ax.axis('off')

plt.show()

# RESULTS

## REGRESSION

In [None]:
# Plot the data
plt.plot(x, y,                    'bo', label='Real Data')
plt.plot(x, predictions.detach(), 'rs', label='Predictions')
# Correlation coefficient between the observed data and the predicted data
plt.title(f'Prediction-data r = {np.corrcoef(y.T, predictions.detach().T)[0, 1]:.3f}')
plt.legend()
plt.show()

## LOSS, ACCURACY - TRAIN/TEST 

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 5))

ax[0].plot(losses, 's-')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_ylim([0, 3])
ax[0].set_title('Model Loss')

ax[1].plot(train_acc, 's-', label='Train')
ax[1].plot(test_acc,  'o-', label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_ylim([10, 100])
ax[1].set_title(f'Final model test accuracy: {test_acc[-1]:.2f}%')
ax[1].legend()

plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 5))

ax[0].plot(losses.detach())
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Model Loss')

ax[1].plot(train_acc, label='Train')
ax[1].plot(test_acc,  label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title('Accuracy')
ax[1].legend()

plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 5))

ax[0].plot(train_loss, 's-', label='Train')
ax[0].plot(test_loss,  'o-', label='Test')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Model Loss')
ax[0].legend()

ax[1].plot(train_acc, 's-', label='Train')
ax[1].plot(test_acc,  'o-', label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title(f'Final model train/test accuracy: {train_acc[-1]:.2f}%/{test_acc[-1]:.2f}%')
ax[1].legend()

plt.show()

## LOSS, ACCURACY - TRAIN/DEV 

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 5))

ax[0].plot(losses.detach(), 'o-')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Losses')

ax[1].plot(train_acc, 'o-', label='Train')
ax[1].plot(dev_acc,   'o-', label='Devset')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title(f'Final model Dev accuracy: {dev_acc[-1]:.2f}')
ax[1].legend()

plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 5))

ax[0].plot(train_loss, 's-', label='Train')
ax[0].plot(dev_loss,   'o-', label='Dev')
ax[0].plot(len(dev_loss) -1, test_loss, 'r*', markersize=15, label='Test')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Losses')
ax[0].legend()

ax[1].plot(train_acc, 's-', label='Train')
ax[1].plot(dev_acc,   'o-', label='Devset')
ax[1].plot(len(dev_acc) -1, test_acc, 'r*', markersize=15, label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title(f'Final model Dev/Test accuracy: {dev_acc[-1]:.2f}%/{test_acc:.2f}%')
ax[1].legend()

plt.show()

## LOSS - TRAIN/DEV

In [None]:
plt.plot(train_loss, 's-', label='Train')
plt.plot(dev_loss,   'o-', label='Dev')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Losses')
plt.legend()
plt.show()

## LOSS - TRAIN/TEST 

In [None]:
plt.plot(train_loss, 's-', label='Train')
plt.plot(test_loss,  'o-', label='Test')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.title(f'Model loss (Final Test loss: {test_loss[-1]:.2f})')
plt.show()

## LOSS, ACCURACY - TRAIN/TEST 

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 5))

ax[0].plot(train_loss, 's-', label='Train')
ax[0].plot(test_loss,  's-', label='Test')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Model Loss')
ax[0].legend()

ax[1].plot(train_acc, 's-', label='Train')
ax[1].plot(test_acc,  'o-', label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title(f'Final model test accuracy: {test_acc[-1]:.2f}%')
ax[1].legend()

plt.show()

## LOSS, ERROR - TRAIN/TEST

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(16, 5))

ax[0].plot(train_loss, 's-', label='Train')
ax[0].plot(test_loss,  'o-', label='Test')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Model Loss')
ax[0].legend()

ax[1].plot(train_err, 's-', label='Train')
ax[1].plot(test_err,  'o-', label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Error rates (%)')
ax[1].set_title(f'Final model test error rate: {test_err[-1]:.2f}%')
ax[1].legend()

plt.show()

___

# GPU USING

In [None]:
# Use GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Send model and data to the GPU
net.to(device)
data   = data  .to(device)
lables = labels.to(device)

# Get result from the model
output = net(data)

# Transfer output back to the CPU
output = output.detach().cpu()

In [None]:
device = 'cpu'
net.to(device)
data   = data.to(device)
labels = labels.to(device)
output = net(data).detach().cpu()