In [None]:
%pip install --upgrade git+https://github.com/GRAAL-Research/poutyne.git@dev
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

from poutyne import Model, EpochProgressionCallback, SKLearnMetrics, BestModelRestore

plt.rcParams['figure.dpi'] = 150

Some constants we will need.

In [None]:
cuda_device = 0
device = torch.device("cuda:%d" % cuda_device if torch.cuda.is_available() else "cpu")

num_features = 13

batch_size = 32
learning_rate = 0.001
num_epochs = 1000
log_every_n_epochs = 100

Let's load some dataset from scikit-learn.

In [None]:
X, y = load_boston(return_X_y=True)
X = X.astype('float32')
y = y.astype('float32')
X.shape, y.shape

In [None]:
X

In [None]:
y

Let's split our data. For neural networks, we need 3 datasets: training, validation and testing datasets.

In [None]:
X_train_valid, X_test, y_train_valid, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

scaler = StandardScaler()
X_train_valid = scaler.fit_transform(X_train_valid)
X_test = scaler.transform(X_test)

X_train, X_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, train_size=0.75, random_state=42)

In [None]:
print("Training set shapes (X, y):", X_train.shape, y_train.shape)
print("Validation set shapes (X, y):", X_valid.shape, y_valid.shape)
print("Testing set shapes (X, y):", X_test.shape, y_test.shape)

Let's train a simple SVM for regression.

In [None]:
clf = SVR()
clf.fit(X_train_valid, y_train_valid)
prediction = clf.predict(X_test)
print("MSE:", mean_squared_error(y_test, prediction))

Now, we will train neural networks using the PyTorch library.

In [None]:
train_dataset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
valid_dataset = TensorDataset(torch.from_numpy(X_valid), torch.from_numpy(y_valid))
test_dataset = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [None]:
def pytorch_train_one_epoch(pytorch_network, optimizer, loss_function):
    """
    Trains the neural network for one epoch on the train DataLoader.
    
    Args:
        pytorch_network (torch.nn.Module): The neural network to train.
        optimizer (torch.optim.Optimizer): The optimizer of the neural network
        loss_function: The loss function.
    
    Returns:
        A tuple (loss, accuracy) corresponding to an average of the losses and
        an average of the accuracy, respectively, on the train DataLoader.
    """
    pytorch_network.train(True)
    with torch.enable_grad():
        loss_sum = 0.
        example_count = 0
        for (x, y) in train_loader:
            # Transfer batch on GPU if needed.
            x = x.to(device)
            y = y.to(device)

            # We need to zero the gradient before every batch because the new
            # gradients would otherwise be summed with the previous gradients.
            optimizer.zero_grad()

            # Compute the predictions of the neural network on the batch.
            y_pred = pytorch_network(x)
            
            loss = loss_function(y_pred, y)
            
            # Do the the backpropagation to compute the gradients of the parameters.
            loss.backward()

            # Update our parameters with the gradient.
            optimizer.step()

            # Since the loss and accuracy are averages for the batch, we multiply 
            # them by the the number of examples so that we can do the right 
            # averages at the end of the epoch.
            loss_sum += float(loss) * len(x)
            example_count += len(x)

    avg_loss = loss_sum / example_count
    return avg_loss

def pytorch_test(pytorch_network, loader, loss_function):
    """
    Tests the neural network on a DataLoader.
    
    Args:
        pytorch_network (torch.nn.Module): The neural network to test.
        loader (torch.utils.data.DataLoader): The DataLoader to test on.
        loss_function: The loss function.
    
    Returns:
        A tuple (loss, accuracy) corresponding to an average of the losses and
        an average of the accuracy, respectively, on the DataLoader.
    """
    pytorch_network.eval()
    with torch.no_grad():
        loss_sum = 0.
        example_count = 0
        for (x, y) in loader:
            # Transfer batch on GPU if needed.
            x = x.to(device)
            y = y.to(device)
            
            y_pred = pytorch_network(x)
            loss = loss_function(y_pred, y)

            # Since the loss and accuracy are averages for the batch, we multiply 
            # them by the the number of examples so that we can do the right 
            # averages at the end of the test.
            loss_sum += float(loss) * len(x)
            example_count += len(x)
    
    avg_loss = loss_sum / example_count
    return avg_loss
        
    
def pytorch_train(pytorch_network):   
    """
    This function transfers the neural network to the right device, 
    trains it for a certain number of epochs, tests at each epoch on
    the validation set and outputs the results on the test set at the
    end of training.
    
    Args:
        pytorch_network (torch.nn.Module): The neural network to train.
    """
    print("Network:")
    print(pytorch_network)
    print()
    
    # Transfer weights on GPU if needed.
    pytorch_network.to(device)
    
    loss_function = nn.MSELoss()
    optimizer = optim.SGD(pytorch_network.parameters(), lr=learning_rate)
    
    for epoch in range(1, num_epochs + 1):
        # Training the neural network via backpropagation
        train_loss = pytorch_train_one_epoch(pytorch_network, optimizer, loss_function)
        
        # Validation at the end of the epoch
        valid_loss = pytorch_test(pytorch_network, valid_loader, loss_function)

        if epoch % log_every_n_epochs == 0:
            print("Epoch {}/{}: loss: {}, val_loss: {}".format(
                epoch, num_epochs, train_loss, valid_loss
            ))
    
    # Test at the end of the training
    test_loss = pytorch_test(pytorch_network, test_loader, loss_function)
    print('Test Loss: {}'.format(test_loss))

A simple regression.

In [None]:
fc_net = nn.Sequential(
    nn.Linear(num_features, 1),
    nn.Flatten(0)
)
pytorch_train(fc_net)

A neural network with 2 layers and no activation function.

In [None]:
fc_net = nn.Sequential(
    nn.Linear(num_features, 100),
    nn.Linear(100, 1),
    nn.Flatten(0)
)
pytorch_train(fc_net)

Let's add an activation function.

In [None]:
fc_net = nn.Sequential(
    nn.Linear(num_features, 100),
    nn.ReLU(),
    nn.Linear(100, 1),
    nn.Flatten(0)
)
pytorch_train(fc_net)

Let's add a third layer.

In [None]:
fc_net = nn.Sequential(
    nn.Linear(num_features, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 1),
    nn.Flatten(0)
)
pytorch_train(fc_net)

And a fourth.

In [None]:
fc_net = nn.Sequential(
    nn.Linear(num_features, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 1),
    nn.Flatten(0)
)
pytorch_train(fc_net)

Let's use the Poutyne library instead of our own functions.

In [None]:
def poutyne_train(pytorch_network):
    """
    This function creates a Poutyne Model (see https://poutyne.org/model.html), sends the Model
    on the specified device, and uses the `fit_generator` method to train the neural network. 
    At the end, the `evaluate_generator` is used on  the test set.
    
    Args:
        pytorch_network (torch.nn.Module): The neural network to train.
    """
    print(pytorch_network)
    
    optimizer = optim.SGD(pytorch_network.parameters(), lr=learning_rate)
    loss_function = nn.MSELoss()

    # Poutyne Model on GPU
    model = Model(pytorch_network, optimizer, loss_function, 
                  #batch_metrics=['l1'],
                  #epoch_metrics=[SKLearnMetrics(r2_score)],
                  device=device)

    # Train
    history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), 
                        epochs=num_epochs,
                        verbose=False,
                        callbacks=[
                            EpochProgressionCallback(show_every_n_epochs=log_every_n_epochs),
                            #BestModelRestore(verbose=True)
                        ])

    # Test
    test_loss = model.evaluate(X_test, y_test, 
                               progress_options=dict(show_every_n_test_steps='none'))

    return history, test_loss

In [None]:
fc_net = nn.Sequential(
    nn.Linear(num_features, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 1),
    nn.Flatten(0)
)
history, test_loss = poutyne_train(fc_net)

Let's plot our losses.

In [None]:
epochs = [log['epoch'] for log in history]
train_losses = [log['loss'] for log in history]
valid_losses = [log['val_loss'] for log in history]
plt.plot(epochs, train_losses, label='Training loss')
plt.plot(epochs, valid_losses, label='Validation loss')
plt.legend()
plt.yscale('log')
plt.show()

Let's plot our metrics.

In [None]:
metric = 'loss'  # 'l1', 'r2_score'
metric_name = 'loss'  # 'L1', '$R^2$'
epochs = [log['epoch'] for log in history]
train_losses = [log[metric] for log in history]
valid_losses = [log['val_'+ metric] for log in history]
plt.plot(epochs, train_losses, label='Training ' + metric_name)
plt.plot(epochs, valid_losses, label='Validation ' + metric_name)
plt.legend()
plt.yscale('log')
plt.show()