In [7]:
%load_ext autoreload
%autoreload 2
import os

from  partie1 import GRUEncoderGD, GRUDecoderGD
from  partie1 import GRUEncoderDG, GRUDecoderDG
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchmetrics.classification import Accuracy
import itertools
import pandas as pd


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
input_size = 10
output_size = 10
batch_size = 16
seq_len = 10
epochs = 10000
learning_rate = 0.001
num_layers = 1

device  = torch.device("cuda" if torch.cuda.is_available() else "cpu")
precision= Accuracy(task="multiclass", num_classes=input_size).to(device) 

def generate_data(batch_size, seq_len, input_size):
    '''
    input_size : pour la plage de valeur 
    '''
    data = torch.randint(0, input_size, (batch_size, seq_len))  # fillers
    roles = torch.arange(seq_len).unsqueeze(0).expand(batch_size, -1)  # rôles
    return data, roles

# **Grid Search GRU**

## **Grid Search Gauche Droite**

In [None]:


os.makedirs("./models/GD/", exist_ok=True)
os.makedirs("./resultats/GD/", exist_ok=True)


emb_sizes = [8, 16, 32, 64]  
hidden_sizes = [8, 16, 32, 64, 128] 

results = []
for emb_size, hidden_size in itertools.product(emb_sizes, hidden_sizes):
    print(f"\n--- emb_size={emb_size}, hidden_size={hidden_size} ---\n")
    
    # Définition du modèle
    # encoder = GRUEncoderGD(input_size, emb_size, hidden_size)
    # decoder = GRUDecoderGD(emb_size, hidden_size, output_size)
    encoder = GRUEncoderGD(input_size, emb_size, hidden_size, num_layers).to(device)
    decoder = GRUDecoderGD(emb_size, hidden_size, output_size, num_layers).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate)

    losses = []
    accuracies = []


    for epoch in range(epochs):
        data , _ = generate_data(batch_size, seq_len, input_size)
        data = data.to(device)
        

        _, hidden = encoder(data)
        hidden = hidden.to(device)
        # print(hidden.shape)
        hidden = hidden[-1].unsqueeze(0).repeat(num_layers, 1, 1) 
        # on fait ca pour ne garder que la sortie de la couche finale et la copier pour avoir le bon nombre de couches pour le décodeur
        # si on met une seule couche dans le décodeur ca bug et on veut pas la suite de couches différentes sinon on peut pas l'utiliser avec le tpdn

        # print(hidden.shape)
        reconstructed_x = decoder(hidden, seq_len) #.argmax(dim=1) pour la loss on prend les logits et pas les classes directement

        loss = criterion(reconstructed_x.view(-1, output_size), data.view(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        accuracy = precision(reconstructed_x.argmax(dim=-1), data)
        losses.append(loss.item())
        accuracies.append(accuracy.cpu())

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy*100:.2f}%")



    torch.save(encoder, f"./models/GD/gru_enc_emb{emb_size}_hid{hidden_size}.pth")
    torch.save(decoder, f"./models/GD/gru_dec_emb{emb_size}_hid{hidden_size}.pth")

    fig, ax1 = plt.subplots()
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss', color='tab:blue')
    ax1.plot(range(epochs), losses, color='tab:blue', label='Loss')
    ax1.tick_params(axis='y', labelcolor='tab:blue')

    ax2 = ax1.twinx()
    ax2.set_ylabel('Accuracy (%)', color='tab:orange')
    ax2.plot(range(epochs), accuracies, color='tab:orange', label='Accuracy')
    ax2.tick_params(axis='y', labelcolor='tab:orange')

    plt.title(f'Loss and Accuracy (emb={emb_size}, hid={hidden_size})')
    plt.savefig(f"./resultats/GD/training_gru_emb{emb_size}_hid{hidden_size}.png")
    plt.close()

    results.append({
        "emb_size": emb_size,
        "hidden_size": hidden_size,
        "final_loss": losses[-1],
        "final_accuracy": accuracies[-1]
    })

df_results = pd.DataFrame(results)
df_results.to_csv("./resultats/GD/grid_search_results.csv", index=False)

print("\n--- tableau svg ---")

In [6]:
df_results = pd.read_csv("./resultats/GD/grid_search_results.csv")
df_results

Unnamed: 0,emb_size,hidden_size,final_loss,final_accuracy
0,8,8,1.840307,tensor(0.3438)
1,8,16,1.487727,tensor(0.3875)
2,8,32,1.045227,tensor(0.6062)
3,8,64,0.146321,tensor(0.9438)
4,8,128,0.048828,tensor(0.9875)
5,16,8,1.792798,tensor(0.3250)
6,16,16,1.418317,tensor(0.4375)
7,16,32,0.748182,tensor(0.7250)
8,16,64,0.093686,tensor(0.9812)
9,16,128,0.015651,tensor(0.9937)


## **Grid Search Droite Gauche**

In [8]:


os.makedirs("./models/DG/", exist_ok=True)
os.makedirs("./resultats/DG/", exist_ok=True)


emb_sizes = [8, 16, 32, 64]  
hidden_sizes = [8, 16, 32, 64, 128] 

results = []
for emb_size, hidden_size in itertools.product(emb_sizes, hidden_sizes):
    print(f"\n--- emb_size={emb_size}, hidden_size={hidden_size} ---\n")
    

    encoder = GRUEncoderDG(input_size, emb_size, hidden_size, num_layers).to(device)
    decoder = GRUDecoderDG(emb_size, hidden_size, output_size, num_layers).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate)

    losses = []
    accuracies = []


    for epoch in range(epochs):
        data , _ = generate_data(batch_size, seq_len, input_size)
        data = data.to(device)
        

        _, hidden = encoder(data)
        hidden = hidden.to(device)
        # print(hidden.shape)
        hidden = hidden[-1].unsqueeze(0).repeat(num_layers, 1, 1) 
        # on fait ca pour ne garder que la sortie de la couche finale et la copier pour avoir le bon nombre de couches pour le décodeur
        # si on met une seule couche dans le décodeur ca bug et on veut pas la suite de couches différentes sinon on peut pas l'utiliser avec le tpdn

        # print(hidden.shape)
        reconstructed_x = decoder(hidden, seq_len) #.argmax(dim=1) pour la loss on prend les logits et pas les classes directement

        loss = criterion(reconstructed_x.view(-1, output_size), data.view(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        accuracy = precision(reconstructed_x.argmax(dim=-1), data)
        losses.append(loss.item())
        accuracies.append(accuracy.cpu())

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy*100:.2f}%")



    torch.save(encoder, f"./models/DG/gru_enc_emb{emb_size}_hid{hidden_size}.pth")
    torch.save(decoder, f"./models/DG/gru_dec_emb{emb_size}_hid{hidden_size}.pth")

    fig, ax1 = plt.subplots()
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss', color='tab:blue')
    ax1.plot(range(epochs), losses, color='tab:blue', label='Loss')
    ax1.tick_params(axis='y', labelcolor='tab:blue')

    ax2 = ax1.twinx()
    ax2.set_ylabel('Accuracy (%)', color='tab:orange')
    ax2.plot(range(epochs), accuracies, color='tab:orange', label='Accuracy')
    ax2.tick_params(axis='y', labelcolor='tab:orange')

    plt.title(f'Loss and Accuracy (emb={emb_size}, hid={hidden_size})')
    plt.savefig(f"./resultats/DG/training_gru_emb{emb_size}_hid{hidden_size}.png")
    plt.close()

    results.append({
        "emb_size": emb_size,
        "hidden_size": hidden_size,
        "final_loss": losses[-1],
        "final_accuracy": accuracies[-1]
    })

df_results = pd.DataFrame(results)
df_results.to_csv("./resultats/DG/grid_search_results.csv", index=False)

print("\n--- tableau svg ---")


--- emb_size=8, hidden_size=8 ---

Epoch [10/10000], Loss: 2.3210, Accuracy: 12.50%
Epoch [20/10000], Loss: 2.3462, Accuracy: 10.62%
Epoch [30/10000], Loss: 2.3236, Accuracy: 8.75%
Epoch [40/10000], Loss: 2.3232, Accuracy: 5.62%
Epoch [50/10000], Loss: 2.3099, Accuracy: 11.25%
Epoch [60/10000], Loss: 2.3001, Accuracy: 9.38%
Epoch [70/10000], Loss: 2.3197, Accuracy: 10.00%
Epoch [80/10000], Loss: 2.3024, Accuracy: 14.38%
Epoch [90/10000], Loss: 2.2971, Accuracy: 7.50%
Epoch [100/10000], Loss: 2.2932, Accuracy: 15.62%
Epoch [110/10000], Loss: 2.3005, Accuracy: 11.25%
Epoch [120/10000], Loss: 2.2879, Accuracy: 11.88%
Epoch [130/10000], Loss: 2.2835, Accuracy: 11.25%
Epoch [140/10000], Loss: 2.2784, Accuracy: 13.75%
Epoch [150/10000], Loss: 2.2842, Accuracy: 14.38%
Epoch [160/10000], Loss: 2.2543, Accuracy: 18.12%
Epoch [170/10000], Loss: 2.2543, Accuracy: 17.50%
Epoch [180/10000], Loss: 2.2671, Accuracy: 15.62%
Epoch [190/10000], Loss: 2.2579, Accuracy: 16.25%
Epoch [200/10000], Loss: 2.

In [9]:
df_results = pd.read_csv("./resultats/DG/grid_search_results.csv")
df_results

Unnamed: 0,emb_size,hidden_size,final_loss,final_accuracy
0,8,8,1.640913,tensor(0.3187)
1,8,16,1.198593,tensor(0.5875)
2,8,32,0.374944,tensor(0.8875)
3,8,64,0.009178,tensor(1.)
4,8,128,0.013296,tensor(0.9937)
5,16,8,1.840205,tensor(0.3000)
6,16,16,1.07951,tensor(0.5875)
7,16,32,0.451516,tensor(0.8375)
8,16,64,0.041421,tensor(0.9812)
9,16,128,0.009103,tensor(1.)


# **Grid Search TPDN**

D'après les résultats précédent nous utiliserons les RNN avec une taille d'embedding de 8 et un hiddensize de 64 pour entraîner les TPDN (ratio performance/ temps de train). 

## TPDN gauche droite

In [None]:
## faire grid search en faisant varier la taille des embedding des fillers et des roles 

## TPDN droite gauche 

In [None]:
## faire grid search en faisant varier la taille des embedding des fillers et des roles 