In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

%config InlineBackend.figure_format = 'retina'

In [None]:
RANDOM_SEED = 666

In [None]:
pd.set_option("display.max_columns", None)

In [None]:
import torch

torch.manual_seed(RANDOM_SEED)

## Tensors

PyTorch supports many similar operations to numpy.

In [None]:
X = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
X

In [None]:
X.shape

In [None]:
y = torch.tensor([1, 2, 3])
y

In [None]:
# Scalar operations
y * 2 + 1

In [None]:
# Elementwise multiplication
X * X

In [None]:
# Matrix multiplication
X @ X

In [None]:
# Create a 1-D column vector by using `reshape`
beta = torch.tensor([0.1, 0.2, 0.3]).reshape(-1, 1)
beta.shape

In [None]:
y_pred = X @ beta

In [None]:
y_pred = X.float() @ beta


## Gradients

We can take gradients with respect to any free parameters. A parameter is any `torch.nn.Parameter` object.

In [None]:
beta = torch.nn.Parameter(torch.tensor([3.0]))

In [None]:
beta

In [None]:
loss = 2 * beta

In [None]:
# Calculate the gradient
loss.backward()

In [None]:
beta.grad

In [None]:
# Set the gradient back to zero
beta.grad.detach_()
beta.grad.zero_()

In [None]:
loss = 3 * beta ** 2

In [None]:
# derivative = 3 * (2 * beta) = 3 * (2 * 3) = 18

In [None]:
loss.backward()

In [None]:
beta.grad

We rarely work directly with `torch.nn.Parameter`. Instead, we work with `torch.nn.Module` objects which contain lots of parameters.

In [None]:
X = torch.rand(10, 5)

In [None]:
layer = torch.nn.Linear(5, 5)
layer

In [None]:
layer.weight

In [None]:
layer.bias

In [None]:
output = layer(X)
output

Let's construct a feed forward neural network with activation functions.

In [None]:
# Create some fake data
num_samples = 10
num_features = 30
X = torch.rand(num_samples, num_features)
y = torch.rand(num_samples)

In [None]:
num_layers = 2
hidden_size = 10
net = [torch.nn.Linear(num_features, hidden_size), torch.nn.ReLU()]
for _ in range(num_layers - 1):
    net += [torch.nn.Linear(hidden_size, hidden_size), torch.nn.ReLU()]

# Put a linear layer at the end to convert to a single output.
net += [torch.nn.Linear(hidden_size, 1)]

In [None]:
def predict(net, X):
    out = X
    for layer in net:
        out = layer(out)
    return out

In [None]:
predict(net, X)

It's often desirable to package all of our free parameters together into a single class, similar to scikit-learn. That's what `torch.nn.Module` is for.

In [None]:
class MyNet(torch.nn.Module):

    def __init__(self, num_layers: int, hidden_size: int, num_features: int):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.num_features = num_features
        net = [torch.nn.Linear(num_features, hidden_size), torch.nn.ReLU()]
        for _ in range(num_layers - 1):
            net += [torch.nn.Linear(hidden_size, hidden_size), torch.nn.ReLU()]
        
        # Put a linear layer at the end to convert to a single output.
        net += [torch.nn.Linear(hidden_size, 1)]
        self.net = torch.nn.Sequential(*net)

    def forward(self, X: torch.Tensor) -> torch.Tensor:
        return self.net(X)

In [None]:
net = MyNet(num_layers, hidden_size, num_features)

In [None]:
net

In [None]:
net(X)

## Optimization

By the beauty of backprop, we can optimize essentially any loss function you can think up.

In [None]:
net = MyNet(num_layers, hidden_size, num_features)
optimizer = torch.optim.SGD(net.parameters(), lr=0.05)

In [None]:
for epoch in range(20):
    y_pred = net(X)
    loss = ((y - y_pred).abs() ** 3).median()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch:>2}: loss = {loss.item():2.5f}")

We have access to all sorts of other optimizers and modifications, like weight decay (which is similar to regularization).

In [None]:
net = MyNet(num_layers, hidden_size, num_features)
optimizer = torch.optim.Adam(net.parameters(), lr=0.05, weight_decay=1e-6)

In [None]:
for epoch in range(20):
    y_pred = net(X)
    loss = ((y - y_pred).abs() ** 3).median()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch:>2}: loss = {loss.item():2.5f}")

## GPUs

(This likely won't work unless you have an M1/M2 mac)

In [None]:
if torch.backends.mps.is_available():
    device = "mps"
elif torch.cuda.is_available():
    device = "cuda:0"
else:
    device = "cpu"

We can send tensors and models to the GPU

In [None]:
X = X.to(device)
y = y.to(device)
y

In [None]:
net = net.to(device)

In [None]:
y_pred = net(X)

## Logistic Regression

In [None]:
data = pd.read_csv("../../04/data/airline_satisfaction/train.csv", index_col=0)
data.head()

In [None]:
# Train a simple model using only numerical features
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, MaxAbsScaler

In [None]:
ignore_features = ["id"]
features = [
    column
    for column, series in data.items()
    if np.issubdtype(series.dtype, np.number) and column not in ignore_features
]
target = "satisfaction"

In [None]:
X = data[features]
# y is 0s and 1s
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data[target])

In [None]:
imputer = SimpleImputer()
scaler = MaxAbsScaler()
X_trans = scaler.fit_transform(imputer.fit_transform(X))

In [None]:
y = torch.from_numpy(y).float()
X_trans = torch.from_numpy(X_trans).float()

In [None]:
num_samples, num_features = X_trans.shape

In [None]:
model = torch.nn.Sequential(torch.nn.Linear(num_features, 1), torch.nn.Sigmoid())

In [None]:
model

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.0)

In [None]:
loss_function = torch.nn.BCELoss()

In [None]:
batch_size = 32
num_epochs = 200
for epoch in range(1, num_epochs + 1):
    epoch_loss = []
    for start_idx in range(0, num_features, batch_size):
        batch_slice = slice(start_idx, start_idx + batch_size)
        y_pred = model(X_trans[batch_slice, :])[:, 0]
        loss = loss_function(y_pred, y[batch_slice].float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss.append(loss.item())
    print(f"Epoch {epoch:>2}: loss = {np.mean(epoch_loss):2.5f}")

In [None]:
model.eval()
with torch.inference_mode():
    y_pred = (model(X_trans) >= 0.5).numpy()
    y = y.detach().numpy()

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
ax = ConfusionMatrixDisplay.from_predictions(y, y_pred)


In [None]:
from sklearn.metrics import classification_report


In [None]:
# Quick view of metrics:
print(classification_report(y, y_pred))