In [1]:
%%bash

pip install torchinfo torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.1-py3-none-any.whl (22 kB)
Collecting torchmetrics
  Downloading torchmetrics-0.10.0-py3-none-any.whl (529 kB)
Installing collected packages: torchmetrics, torchinfo
Successfully installed torchinfo-1.7.1 torchmetrics-0.10.0


In [2]:
import numpy as np
import pandas as pd
from pandas.api import types
from sklearn import compose, impute, model_selection, pipeline, preprocessing
import torch
from torch import nn, optim, utils
import torchinfo
import torchmetrics
from torchvision import datasets, transforms

## Storing and updating model parameters using PyTorch Tensors

In [None]:
torch.tensor?

In [11]:
t0 = torch.tensor([1.2, 3.2, 0.5, 6])
t0

tensor([1.2000, 3.2000, 0.5000, 6.0000])

In [13]:
t0.requires_grad

False

In [14]:
t1 = torch.tensor([1.2, 3.2, 0.5, 6], requires_grad=True)
t1

tensor([1.2000, 3.2000, 0.5000, 6.0000], requires_grad=True)

In [17]:
_ = t0.requires_grad_()

In [18]:
t0

tensor([1.2000, 3.2000, 0.5000, 6.0000], requires_grad=True)

In [None]:
nn.init.xavier_normal_?

In [21]:
prng = torch.manual_seed(42)

weights = torch.empty(10, 5)
print("Default tensor values...")
print(weights)

print("Tensor values after initialization...")
nn.init.xavier_normal_(weights)
print(weights)

Default tensor values...
tensor([[2.9408e-35, 0.0000e+00, 2.9287e-43, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [1.8788e+31, 1.7220e+22, 2.1715e-18, 3.4003e-06, 1.0723e-08],
        [4.1577e+21, 1.3601e-05, 4.4203e-05, 2.1353e+20, 2.6407e-06],
        [2.6581e-06, 6.4891e-10, 1.4580e-19, 1.1495e+24, 3.0881e+29],
        [1.5766e-19, 7.3313e+22, 7.2151e+22, 2.8404e+29, 2.3089e-12],
        [1.9421e+31, 2.7491e+20, 6.1949e-04, 1.9421e+31, 2.7491e+20],
        [2.3078e-12, 7.1760e+22, 7.2250e+28, 1.5766e-19, 1.3002e+22],
        [4.3681e-05, 2.6307e+20, 1.3089e+22, 2.1952e-04, 1.7186e-04],
        [2.6540e-09, 1.3177e-08, 2.3049e-12, 2.6302e+20, 6.1949e-04]])
Tensor values after initialization...
tensor([[ 0.7036,  0.5431,  0.3289, -0.7688,  0.2477],
        [-0.4508, -0.0157, -0.5859, -0.2746,  0.6020],
        [-0.1433, -0.5125, -0.2658, -0.2043, -0.2807],
        [ 0.2784,  0.5997, -0.0583, -0.1816,  0.1605],
        [-0.2768

In [3]:
class MultiLayerPerceptron(nn.Module):
    
    def __init__(
        self,
        input_size,
        hidden_sizes,
        hidden_activation,
        output_size,
        output_activation
    ):
        super().__init__()
        
        modules = []
        for hidden_size in hidden_sizes:
            hidden_block = self._make_hidden_block(
                input_size,
                hidden_size,
                hidden_activation
            )
            modules.extend(hidden_block)
            input_size = hidden_size
        output_block = self._make_hidden_block(
            hidden_size,
            output_size,
            output_activation
        )
        modules.extend(output_block)
        
        self.module_list = nn.ModuleList(modules)

    @staticmethod
    def _make_hidden_block(in_features, out_features, activation=None):
        module = nn.Linear(in_features, out_features)
        nn.init.xavier_normal_(module.weight)
        if activation is not None:
            block = [module, activation]
        else:
            block = [module]
        return block

    def forward(self, X):
      for f in self.module_list:
          X = f(X)
      return X


In [4]:
hyperparameters = {
    "input_size": 10,
    "hidden_sizes": [8, 8, 8],
    "output_size": 1,
    "hidden_activation": nn.ReLU(),
    "output_activation": None,
}
model_fn = MultiLayerPerceptron(**hyperparameters)

## Computing gradients via automatic differentiation

### Computing the gradient of the loss function with respect to model parameters

In [5]:
prng = torch.manual_seed(42)
m = 100
X = torch.normal(mean=1.0, std=1.0, size=(m, 1), generator=prng)
error = torch.normal(mean=0.0, std=5e-1, size=(m, 1), generator=prng)
beta = torch.tensor([[3.0]])
y = X @ beta + error

In [6]:
weight = torch.tensor([[2.0]], requires_grad=True)
bias = torch.zeros((1, 1), requires_grad=True)
z = X @ weight + bias

In [7]:
rmse_loss = torch.sqrt(torch.mean((y - z)**2))

In [8]:
rmse_loss.backward()

In [9]:
weight.grad

tensor([[-1.3826]])

In [10]:
torch.mean((y - z)**2)**(-0.5) * (-torch.mean((y - z) * X))

tensor(-1.3826, grad_fn=<MulBackward0>)

In [11]:
bias.grad

tensor([[-0.7059]])

In [12]:
torch.mean((y - z)**2)**(-0.5) * (-torch.mean(y - z))

tensor(-0.7059, grad_fn=<MulBackward0>)

## Simplifing implementations of common neural networks

### Implementing models using nn.Sequential

In [None]:
nn.Sequential?

In [13]:
model_fn = nn.Sequential(
    nn.Linear(m, 10),
    nn.ReLU(),
    nn.Linear(10, 10),
    nn.ReLU(),
    nn.Linear(10, 1)
)

In [14]:
model_fn

Sequential(
  (0): Linear(in_features=100, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): ReLU()
  (4): Linear(in_features=10, out_features=1, bias=True)
)

### Writing custom layers in PyTorch

In [None]:
nn.Parameter?

In [15]:
class NoisyLinear(nn.Module):

    def __init__(self, input_size, output_size, noise_stddev, generator):
        super().__init__()

        w = torch.zeros(input_size, output_size)
        nn.init.xavier_uniform_(w)
        self.w = nn.Parameter(w)
        
        b = torch.zeros(output_size)
        self.b = nn.Parameter(b)

        self.noise_stddev = noise_stddev
        self.generator = generator

    def forward(self, X, training=False):
        if training:
            noise = torch.normal(0.0, self.noise_stddev, X.shape, generator=self.generator)
            noisy_X = torch.add(X, noise)
        else:
            noisy_X = X
        Z = noisy_X @ self.w + self.b
        return Z


In [16]:
generator = torch.manual_seed(1)
noisy_layer = NoisyLinear(4, 2, 0.1, generator)

In [17]:
X = torch.zeros((1, 4))
print(noisy_layer(X, training=True))

tensor([[ 0.1154, -0.0598]], grad_fn=<AddBackward0>)


In [18]:
print(noisy_layer(X, training=True))

tensor([[ 0.0432, -0.0375]], grad_fn=<AddBackward0>)


In [19]:
print(noisy_layer(X, training=False))

tensor([[0., 0.]], grad_fn=<AddBackward0>)


In [20]:
model_fn = nn.Sequential(
    NoisyLinear(10, 10, 0.1, generator),
    nn.ReLU(),
    nn.Linear(10, 10),
    nn.ReLU(),
    nn.Linear(10, 1)
)

### Writing a custom loss function

In [23]:
class RMSELoss(torch.nn.Module):

    def __init__(self):
        super().__init__()

        self._mse_loss = nn.MSELoss()

    def forward(self, x, y):
        rmse = torch.sqrt(self._mse_loss(x, y))
        return rmse

In [27]:
prng = torch.manual_seed(42)
m = 100
X = torch.normal(mean=1.0, std=1.0, size=(m, 1), generator=prng)
error = torch.normal(mean=0.0, std=5e-1, size=(m, 1), generator=prng)
beta = torch.tensor([[3.0]])
y = X @ beta + error

In [28]:
weight = torch.tensor([[2.0]], requires_grad=True)
bias = torch.zeros((1, 1), requires_grad=True)
z = X @ weight + bias

In [29]:
rmse_loss = RMSELoss()

In [30]:
loss = rmse_loss(z, y)
loss.backward()

In [31]:
weight.grad

tensor([[-1.3826]])

In [32]:
bias.grad

tensor([[-0.7059]])

## Predicting the fuel efficiency of a car

In [33]:
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"

column_names = [
    "MPG",
    "Cylinders",
    "Displacement",
    "Horsepower",
    "Weight",
    "Acceleration",
    "Model Year",
    "Origin",
]

df = pd.read_csv(
    url,
    names=column_names,
    na_values = "?",
    comment='\t',
    sep=" ",
    skipinitialspace=True
)

In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   MPG           398 non-null    float64
 1   Cylinders     398 non-null    int64  
 2   Displacement  398 non-null    float64
 3   Horsepower    392 non-null    float64
 4   Weight        398 non-null    float64
 5   Acceleration  398 non-null    float64
 6   Model Year    398 non-null    int64  
 7   Origin        398 non-null    int64  
dtypes: float64(5), int64(3)
memory usage: 25.0 KB


In [35]:
df.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [36]:
_random_state = np.random.RandomState(42)
train_df, test_df = model_selection.train_test_split(
    df,
    test_size=0.1,
    random_state=_random_state
)

In [37]:
train_features = train_df.drop("MPG", axis=1)
train_target = train_df.loc[:, ["MPG"]]

test_features = test_df.drop("MPG", axis=1)
test_target = test_df.loc[:, ["MPG"]]


In [38]:
float_pipeline = pipeline.make_pipeline(
    impute.SimpleImputer(strategy="mean"),
    preprocessing.StandardScaler()
)

categorical_pipeline = pipeline.make_pipeline(
    preprocessing.OneHotEncoder()
)

ordinal_pipeline = pipeline.make_pipeline(
    preprocessing.OrdinalEncoder(),
)

model_year_pipeline = pipeline.make_pipeline(
    preprocessing.KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="quantile")
)

encoding_pipeline = compose.make_column_transformer(
    (float_pipeline, compose.make_column_selector(dtype_include=np.float64)),
    (categorical_pipeline, ["Origin"]),
    (ordinal_pipeline, ["Cylinders"]),
    (model_year_pipeline, ["Model Year"])
)

type_conversion_pipeline = pipeline.make_pipeline(
    preprocessing.FunctionTransformer(lambda X: X.astype(np.float32)),
    preprocessing.FunctionTransformer(lambda X: torch.from_numpy(X))
)

preprocessing_pipeline = pipeline.make_pipeline(
    encoding_pipeline,
    type_conversion_pipeline
)

In [39]:
train_features_tensor = preprocessing_pipeline.fit_transform(train_features)
train_target_tensor = type_conversion_pipeline.fit_transform(train_target.to_numpy())

In [40]:
train_features_tensor

tensor([[-0.1803,  0.3066, -0.3129,  ...,  0.0000,  3.0000,  2.0000],
        [-0.9192, -1.1655, -1.3492,  ...,  0.0000,  1.0000,  0.0000],
        [-0.6858,  0.2263, -0.0341,  ...,  0.0000,  1.0000,  0.0000],
        ...,
        [-0.5594, -0.2288, -0.5343,  ...,  1.0000,  1.0000,  2.0000],
        [-0.9969, -1.1120, -1.0907,  ...,  1.0000,  1.0000,  2.0000],
        [-0.9192, -1.5403, -1.2104,  ...,  0.0000,  1.0000,  0.0000]])

In [41]:
train_features_tensor.shape

torch.Size([358, 9])

In [42]:
train_target_tensor.shape

torch.Size([358, 1])

In [43]:
train_dataset = utils.data.TensorDataset(train_features_tensor, train_target_tensor)
train_dataloader = utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True)

In [61]:
model_fn = MultiLayerPerceptron(
    input_size = train_features_tensor.shape[1],
    hidden_sizes=[100],
    output_size=1,
    hidden_activation=nn.ReLU(),
    output_activation=None,
)

In [62]:
model_fn

MultiLayerPerceptron(
  (module_list): ModuleList(
    (0): Linear(in_features=9, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=1, bias=True)
  )
)

In [63]:
loss_fn = RMSELoss()

optimizer = optim.Adam(
    model_fn.parameters(),
    lr=1e-3,
)

In [64]:
epochs = 200
log_epochs = 20

for epoch in range(epochs):
    loss_hist_train = 0
    for X, y in train_dataloader:

        # forward pass
        y_hat = model_fn(X)
        loss = loss_fn(y_hat, y)
        loss_hist_train += loss.item()

        # backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    if epoch % log_epochs==0:
        print(f"Epoch {epoch} Loss {loss_hist_train / len(train_dataloader):.4f}")

Epoch 0 Loss 11.7564
Epoch 20 Loss 1.9949
Epoch 40 Loss 1.9066
Epoch 60 Loss 1.7738
Epoch 80 Loss 1.7248
Epoch 100 Loss 1.7254
Epoch 120 Loss 1.6916
Epoch 140 Loss 1.6410
Epoch 160 Loss 1.6772
Epoch 180 Loss 1.6166


In [67]:
y_hat

tensor([[23.1306]], grad_fn=<AddmmBackward0>)

In [68]:
test_features_tensor = preprocessing_pipeline.transform(test_features)
test_target_tensor = type_conversion_pipeline.transform(test_target.to_numpy())

In [70]:
with torch.no_grad():
    y_hat = model_fn(test_features_tensor)
    loss = loss_fn(y_hat, test_target_tensor)

mae = nn.L1Loss()
print(f'Test RMSE: {loss.item():.4f}')
print(f'Test MAE: {mae(y_hat, test_target_tensor).item():.4f}')

Test RMSE: 2.1587
Test MAE: 1.5549


## Classifying images



In [71]:
_transform = transforms.Compose(
    [
        transforms.ToTensor(),
     ]
)

_train_dataset = datasets.MNIST(
    "./sample_data",
    download=True,
    train=True,
    transform=_transform,
)

train_dataset, val_dataset = utils.data.random_split(
    _train_dataset,
    lengths = [48000, 12000]
)

test_dataset = datasets.MNIST(
    "./sample_data",
    download=True,
    train=False,
    transform=_transform,
)

train_dataloader = utils.data.DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
)

val_dataloader = utils.data.DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
)

test_dataloader = utils.data.DataLoader(
    test_dataset,
    batch_size=128,
    shuffle=False,
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./sample_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./sample_data/MNIST/raw/train-images-idx3-ubyte.gz to ./sample_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./sample_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./sample_data/MNIST/raw/train-labels-idx1-ubyte.gz to ./sample_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./sample_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./sample_data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./sample_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./sample_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./sample_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./sample_data/MNIST/raw



### Create a model

In [104]:
model_fn = nn.Sequential(
    nn.Flatten(),
    nn.Linear(1 * 28 * 28, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
    nn.Softmax(dim=-1)
)

In [105]:
model_fn

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=100, bias=True)
  (2): ReLU()
  (3): Linear(in_features=100, out_features=100, bias=True)
  (4): ReLU()
  (5): Linear(in_features=100, out_features=10, bias=True)
  (6): Softmax(dim=-1)
)

In [106]:
torchinfo.summary(model_fn, input_size=(1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [1, 10]                   --
├─Flatten: 1-1                           [1, 784]                  --
├─Linear: 1-2                            [1, 100]                  78,500
├─ReLU: 1-3                              [1, 100]                  --
├─Linear: 1-4                            [1, 100]                  10,100
├─ReLU: 1-5                              [1, 100]                  --
├─Linear: 1-6                            [1, 10]                   1,010
├─Softmax: 1-7                           [1, 10]                   --
Total params: 89,610
Trainable params: 89,610
Non-trainable params: 0
Total mult-adds (M): 0.09
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.36
Estimated Total Size (MB): 0.36

### Defining a loss function and an optimizer

In [107]:
loss_fn = nn.NLLLoss()

In [108]:
_optimizer_hyperparameters = {
    "lr": 1e-3,
}
optimizer = optim.Adam(model_fn.parameters(), **_optimizer_hyperparameters)

### Training and evaluating the model


In [96]:
torchmetrics.classification.MulticlassAccuracy?

In [109]:
train_accuracy = torchmetrics.classification.MulticlassAccuracy(num_classes=10)
val_accuracy = torchmetrics.classification.MulticlassAccuracy(num_classes=10)
test_accuracy = torchmetrics.classification.MulticlassAccuracy(num_classes=10)

In [110]:
def validate(model_fn, loss_fn, data_loader, metric):

    batch_metrics = []
    batch_losses = []
    
    for X, y in data_loader:
        predictions = model_fn(X)
        batch_metrics.append(metric(predictions, y))
        batch_losses.append(loss_fn(predictions, y))
    
    avg_metric = (torch.stack(batch_metrics)
                       .mean())
    avg_loss = (torch.stack(batch_losses)
                     .mean())

    return avg_metric, avg_loss

In [112]:
number_epochs = 5

for epoch in range(number_epochs):
        
    model_fn.train()
    batch_metrics = []
    batch_losses = []
    for X, y in train_dataloader:
     
        # forward pass
        predictions = model_fn(X)
        batch_metrics.append(train_accuracy(predictions, y))
        loss = loss_fn(predictions, y)
        batch_losses.append(loss)
  
        # back propagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    # compute training metrics
    train_metric = (torch.stack(batch_metrics)
                         .mean())
    train_loss = (torch.stack(batch_losses)
                       .mean()) 

    # compute validation metrics
    model_fn.eval()
    val_metric, val_loss = validate(model_fn, loss_fn, val_dataloader, val_accuracy)
    message = (f"Epoch: {epoch}, "
               f"Train accuracy: {train_metric:.4f}, "
               f"Train loss: {train_loss:.4f}, "
               f"Val accuracy: {val_metric:.4f}, "
               f"Val loss: {val_loss:.4f}")
    print(message)


Epoch: 0, Train accuracy: 0.9021, Train loss: -0.9312, Val accuracy: 0.9402, Val loss: -0.9381
Epoch: 1, Train accuracy: 0.9161, Train loss: -0.9469, Val accuracy: 0.9476, Val loss: -0.9475
Epoch: 2, Train accuracy: 0.9235, Train loss: -0.9562, Val accuracy: 0.9585, Val loss: -0.9566
Epoch: 3, Train accuracy: 0.9306, Train loss: -0.9627, Val accuracy: 0.9570, Val loss: -0.9562
Epoch: 4, Train accuracy: 0.9311, Train loss: -0.9672, Val accuracy: 0.9630, Val loss: -0.9624


In [113]:
test_acc, test_loss = validate(model_fn, loss_fn, test_dataloader, test_accuracy)
print(f"Test accuracy: {test_acc:.4f}, Test loss: {test_loss:.4f}")


Test accuracy: 0.9656, Test loss: -0.9641
