# TD5

In [1]:
#importer toutes les librairies nécessaire
import torch
import torch.nn as nn
from utils import *
import time
import matplotlib.pyplot as plt 
import matplotlib.ticker as ticker
import seaborn as sns
from torch.utils.tensorboard import SummaryWriter


La tâche consiste à prédire la languue d'orighine d'un nom de famille. Pour cela, nous disposons de données sous la forme d'une liste de noms pour 18 langues différentes (données téléchargeable [ici](https://download.pytorch.org/tutorial/data.zip), télécharger les données et décompresser le zip). 
Le modèle souhaité prend un nom de famille en entrée et prédit l'index de l'une des 18 classes en sortie. Un nom de famille peut être vu comme une séquence de lettres danbs un alphabet fixé. 
Questions :

- Comment modéliser les entrées
- Quel modele statistique/neuronnal choisir ?
- Quelle prédiction peut être envisagée ?

### Modélisation et apprentissage

L'alphabet est composé de 57 caractères : 26 lettres majuscules, 26 lettres minuscules, 5 caractères spéciaux (espace, point, virgule, apostrophe, point-virgule).
`all_letters = string.ascii_letters + " .,;'"
`

Les accents sont ignorées avec la fonction `unicodeToAscii` qui remplace les caractères accentués par leur équivalent non accentué. De plus, on latinise les noms arabes, chinois, japonais, coréens, ... (voir dans les données, les noms ont été latinisés).

In [None]:
### get training data
DATAPATH='data/names/'
train_data,all_categories = get_language_data(DATAPATH)
n_categories = len(all_categories)

print(f"There are {n_letters} letters \n{all_letters}")
n_data = 0
print(f'There are {n_categories} languages.\nNumber of family name per language:')
for categ in train_data.keys():
    print('   {}\t {}'.format(categ, len(train_data[categ]) ))
    n_data += len(train_data[categ])
print(f"The dataset contains {n_data} family names.")

In [None]:
print(train_data['Arabic'][:5])
print(train_data['Chinese'][:5])
print(train_data['French'][:5])

## RNN

In [20]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


In [24]:
def train_rnn_wo_optimizer(model, criterion, learning_rate, category_tensor, line_tensor):
    hidden = model.initHidden()

    model.zero_grad()
    for i in range(line_tensor.size()[0]):
        output, hidden = model(line_tensor[i], hidden)

    loss = criterion(output, category_tensor)
    loss.backward()

    for p in model.parameters():
        p.data.add_(p.grad.data, alpha=-learning_rate)

    return output, loss.item()


# Keep track of losses for plotting
def training_loop(model, criterion, learning_rate, train_data, all_categories, n_iters, plot_every=1000, print_every=5000):
    current_loss = 0
    all_losses = []
    start = time.time()
    for iter in range(1, n_iters + 1):
        category, line, category_tensor, line_tensor = randomTrainingExample(all_categories,train_data)
        output, loss = train_rnn_wo_optimizer(model, criterion, learning_rate, category_tensor, line_tensor)
        current_loss += loss

        # Print iter number, loss, name and guess
        if iter % print_every == 0:
            guess, guess_i = categoryFromOutput(output,all_categories)
            correct = '✓' if guess == category else '✗ (%s)' % category
            print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))

        # Add current loss avg to list of losses
        if iter % plot_every == 0:
            all_losses.append(current_loss / plot_every)
            current_loss = 0
    return all_losses


In [None]:
n_iters = 100000

### create model
n_hidden = 128
model = RNN(n_letters, n_hidden, n_categories)

#### training
criterion = torch.nn.NLLLoss()
learning_rate = 0.005 
all_losses = training_loop(model, criterion, learning_rate, train_data, all_categories, n_iters=n_iters)
### save model
torch.save(model, 'char-rnn-classification.pt')

In [30]:
@torch.no_grad()
def evaluate_rnn(model, line):
    hidden = model.initHidden()
    for i in range(line.size()[0]):
        output, hidden = model(line[i], hidden)
    return output

@torch.no_grad()
def evaluate_lstm(model, line):
    hidden = model.initHidden()
    # for i in range(line.size()[0]):
    output, hidden = model(line)
    return output

def get_confusion_matrix(model, train_data, all_categories, n_categories):
    confusion = torch.zeros(n_categories, n_categories) 
    print("----------------\n   Effectifs\n----------------")
    for categ in train_data.keys():
        print("   {}\t {}".format(categ, len(train_data[categ])))
        for name in train_data[categ]:
            
            if isinstance(model, RNN):
                output = evaluate_rnn(model, lineToTensor(name))
            else:
                output = evaluate_lstm(model, lineToTensor(name))
            guess, guess_i = categoryFromOutput(output, all_categories) # get the index of the max log-probability
            category_i = all_categories.index(categ)
            confusion[category_i][guess_i] += 1

    effectif = confusion.sum(dim=0) # sum of each column
    print("----------------\n   Scores\n----------------")
    for i in range(n_categories):
        confusion[i] = confusion[i] / (1e-16 + confusion[i].sum())
        print("   {} \t {:2.1%}".format(all_categories[i], (confusion[i][i]).item()))
    print("------")
    print("Global (flat) \t {:2.1%}".format(confusion.diag().mean().item()))
    weighted_conf = confusion.diag() * (effectif / effectif.sum())
    print("Global (wght) \t {:2.1%}".format(weighted_conf.sum().item()))
    print("-----------\n")
    return confusion

def plot_confusion_matrix(confusion, all_categories):
    sns.heatmap(confusion, fmt="g", xticklabels=all_categories, yticklabels=all_categories)
    plt.show()

def plot_all_losses(all_losses):
    plt.figure()
    plt.plot(all_losses)
    plt.xlabel('iterations')
    plt.ylabel('training loss')
    plt.show()

In [None]:
plot_all_losses(all_losses)

### Evaluer les performances 

In [None]:
model_rnn = torch.load('char-rnn-classification.pt')
confusion_matrix = get_confusion_matrix(model_rnn, train_data, all_categories, n_categories)

In [None]:
plot_confusion_matrix(confusion_matrix, all_categories)

On utilise la matrice de confusion pour évaluer les performances du modèle. La matrice de confusion est une matrice de taille 18x18 où chaque ligne correspond à la classe réelle et chaque colonne à la classe prédite. La diagonale de la matrice correspond aux prédictions correctes.

### Monitoring avec tensorboard

Nous allons maintenant log les informations

In [42]:
def training_loop(model, criterion, learning_rate, train_data, all_categories, n_iters, tb_writer, plot_every=1000, print_every=5000):
    current_loss = 0
    all_losses = []
    start = time.time()
    for iter in range(1, n_iters + 1):
        category, line, category_tensor, line_tensor = randomTrainingExample(all_categories,train_data)
        output, loss = train_rnn_wo_optimizer(model,   criterion, learning_rate, category_tensor, line_tensor)
        current_loss += loss

        # Print iter number, loss, name and guess
        if iter % print_every == 0:
            guess, guess_i = categoryFromOutput(output,all_categories)
            correct = '✓' if guess == category else '✗ (%s)' % category
            print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))

        if iter % plot_every == 0:
            tb_writer.add_scalar('Training loss', current_loss / plot_every, iter)
            all_losses.append(current_loss / plot_every)
            current_loss = 0
    tb_writer.flush()
    tb_writer.close()
    return all_losses

In [None]:
tb_writer = SummaryWriter('log/nlp_scratch_exp_1')

n_hidden = 128
model = RNN(n_letters, n_hidden, n_categories)

### Tensorboard visualization of the network
# - create (any valid) input data
# - visualize the built model in tensorboeard
category, line, category_tensor, line_tensor = randomTrainingExample(all_categories,train_data)
hidden = model.initHidden()
tb_writer.add_graph(model, (line_tensor[0], hidden  ))

#### training
criterion = torch.nn.NLLLoss() # the RNN already has a softmax as output
learning_rate = 0.005

training_loop(model, criterion, learning_rate, train_data, all_categories, n_iters = 100000, tb_writer=tb_writer)
torch.save(model, 'char-rnn-classification.pt')

### Visualiser le réseau

Lancer tensorboard avec `tensorboard --logdir=6_nlp_scratch/log`

In [None]:
model = torch.load('char-rnn-classification.pt')
category, line, category_tensor, line_tensor = randomTrainingExample(all_categories, train_data)
hidden = model.initHidden()
tb_writer.add_graph(model, (line_tensor[0], hidden))
tb_writer.close()

## Implémentation LSTM

In [45]:
class LSTMNet(nn.Module):
    def __init__(self,in_size,hidden_size, nb_layer, nb_classes):
        super(LSTMNet,self).__init__()
        self.hidden_size = hidden_size
        self.nb_layer = nb_layer
        self.nb_classes = nb_classes
        # change 'bidirectional' to get a BiLSTM
        # batch_first=False --> input and output tensors are provided as (seq, batch, feature)
        self.lstm = nn.LSTM(in_size,hidden_size,nb_layer,batch_first=False,bidirectional=False)
        self.fc = nn.Linear(hidden_size,nb_classes)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self,x):
        # initial states; x.size(1) = batch_size avec batch_first=False
        h0 = torch.zeros(self.nb_layer, x.size(1), self.hidden_size)#.to(device)
        c0 = torch.zeros(self.nb_layer, x.size(1), self.hidden_size)#.to(device)
        out,(hn,cn) = self.lstm(x, (h0,c0)) # self.lstm(x) : zero par défaut 
        out = self.fc(out[-1,:,:]) # dernière couche cachée de la séquence avec batch_first=False
        # out = self.fc(out[:,-1,:]) # idem avec batch_first=True
        out = self.softmax(out)
        return out,hn

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


In [46]:
def n_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def training_loop(model, optimizer, n_iters, tb_writer = None, plot_every = 1000, print_every= 5000):
    start = time.time()
    all_losses = []
    current_loss = 0
    print(f"Training with a model of {n_params(model)} params ")
    
    for iter in range(1, n_iters + 1):
        category, line, category_tensor, line_tensor = randomTrainingExample(all_categories,train_data)
        if isinstance(model, RNN):
            hidden = model.initHidden()
        model.zero_grad()
        if isinstance(model, RNN):
            for i in range(line_tensor.size()[0]):
                output, hidden = model(line_tensor[i], hidden)
        else:
            output, _ = model(line_tensor)
        loss = criterion(output,category_tensor)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        current_loss += loss.item()
        if iter % print_every == 0:
            guess, guess_i = categoryFromOutput(output,all_categories)
            correct = '✓' if guess == category else '✗ (%s)' % category
            print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))

        if iter % plot_every == 0:
            if tb_writer is not None:
                tb_writer.add_scalar('Training loss', current_loss / plot_every, iter)
            all_losses.append(current_loss / plot_every)
            current_loss = 0

    if tb_writer is not None:
        tb_writer.flush()
        tb_writer.close()
    return all_losses


In [None]:
tb_writer = SummaryWriter('log/lstm')
n_hidden = 64
num_layers = 1
model = LSTMNet(n_letters, n_hidden, num_layers, n_categories)#.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = torch.nn.NLLLoss()
n_iters = 100000
training_loop(model, optimizer, n_iters, tb_writer)
### save model
torch.save(model, 'char-lstm-classification.pt')

In [None]:
# load the model
model = torch.load('char-lstm-classification.pt')
tb_writer = SummaryWriter('log/lstm')
# voir le réseau
category, line, category_tensor, line_tensor = randomTrainingExample(all_categories, train_data)
hidden = model.initHidden()
tb_writer.add_graph(model, (line_tensor))
tb_writer.close()
confusion = get_confusion_matrix(model, train_data, all_categories, n_categories)

In [None]:
plot_confusion_matrix(confusion, all_categories)

### Comparaison RNN / LSTM

In [None]:
tb_writer = SummaryWriter('log/rnn_adam')
n_hidden = 128
model = RNN(n_letters, n_hidden, n_categories)
#### training
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = torch.nn.NLLLoss()
current_loss = 0
n_iters = 100000

all_losses = training_loop(model, optimizer, n_iters, tb_writer)
torch.save(model, 'char-rnn-adam-classification.pt')

In [None]:
plot_all_losses(all_losses)
confusion = get_confusion_matrix(model, train_data, all_categories, n_categories)

In [None]:
plot_confusion_matrix(confusion, all_categories)