In [None]:
import numpy

import torch
import torch.nn as nn

from torch.optim import SGD, Adam
from torch.utils.data import Dataset, DataLoader

import matplotlib
matplotlib.rcParams['figure.figsize'] = (9.0, 7.0)
from matplotlib import pyplot

In [None]:
# Load data
class RAMQDataset(Dataset):

    def __init__(self, data):
        super().__init__()

        self.path = data
        # charger les données
        with gzip.open(path, 'rb') as f:
            self.data = torch.load(f)
        # Pour faciliter la lecture des targets
        self.targets = numpy.array(list(zip(*self.data))[1])

    def __getitem__(self, index):
        return self.data[index]
    
    def __len__(self):
        return len(self.data)

In [None]:
# Clustering model
# Ref tied weights:
# https://discuss.pytorch.org/t/how-to-create-and-train-a-tied-autoencoder/2585/13
# https://gist.github.com/InnovArul/500e0c57e88300651f8005f9bd0d12bc

class Autoencoder(nn.Module):
    """
    Définition d'un réseau de neurones pleinement connecté
    de type autoencoder qui permet de réduire l'erreur de
    reconstruction sur le jeu de donnée
    """
    def __init__(self, input_len):
        super(Autoencoder, self).__init__()
        
        # Initialisation of the network layers
        self.encoder = nn.Sequential(
            nn.Linear(input_len, 30), 
            nn.ReLU(True), 
            nn.Linear(30, 15),
            nn.ReLU(True),
            nn.Linear(15, 5),
            nn.ReLU(True))
        self.decoder = nn.Sequential(
            nn.Linear(5, 15),
            nn.ReLU(True),
            nn.Linear(15, 30),
            nn.ReLU(True),
            nn.Linear(30, input_len),
            nn.ReLU(True))
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
# AE Training
# Pretrain autoencoder to minimise reconstruction loss 
# and keep model for clustering

# Setup cuda if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Training parameters 
nb_epoch = 5
learning_rate = 0.01
momentum = 0.9
batch_size = 100     # SGD optimizer
weight_decay = 1e-5  # Adam optimizer

# Load data set and create dataloader
train_set = RAMQDataset()
test_set = RAMQDataset()

train_loader = DataLoader(train_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)

# Load model and setup proper device
model = Autoencoder()
model.to(device)

# Reconstruction loss and optimzer
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=learning_rate, momentum=momentum)
# optimizer = Adam(model.parameters(), weight_decay=weight_decay)

model.train()
    
for i_epoch in range(nb_epoch):

    start_time, train_losses = time.time(), []
    for i_batch, batch in enumerate(train_loader):
        # Read batch data
        x, y = batch
        y = y.type(torch.FloatTensor).unsqueeze(-1)
        x = y.to(device)
        y = y.to(device)
        
        # Reset gradients to zero
        optimizer.zero_grad()

        # Predictions and loss
        y_pred = model(y)
        loss = criterion(y_pred, y)
        
        # Backpropagate and gradient descent
        loss.backward()
        optimizer.step()
        
        train_losses.append(loss.item())

    print(' [-] epoch {:4}/{:}, train loss {:.6f} in {:.2f}s'.format(
        i_epoch+1, nb_epoch, numpy.mean(train_losses), time.time()-start_time))
    
# Display final score
test_acc = compute_accuracy(model, test_loader, device)
print(' [-] test acc. {:.6f}%'.format(test_acc * 100))

In [None]:
# AE clustering optimisation
# Must retrain the autoencoder from pretrained weights
# now optimising weights to minimise clustering loss
# AND reconstruction loss
