[![Open in Colab](hhtps://colab.research.google.com/assets/cloab-badge.svg)](https://colab.research.google.com/drive/1DvqoamIBAQ9weAOnIW7ST8e0NHPNam_x)

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import torch.nn as nn
!pip install torchdiffeq
import torchdiffeq

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')




In [2]:
!pip install torchtext==0.6.0



### Construction du modèle

In [3]:
from models import ODEBlock
from plots import single_feature_plt
from dataloaders import ConcentricSphere
from training import Trainer
from plots import get_feature_history
from torch.utils.data import DataLoader
from plots import multi_feature_plt
from plots import trajectory_plt


In [39]:
import math
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_len):
        super().__init__()

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        self.pe = torch.zeros(max_len, 1, d_model)
        self.pe[:, 0, 0::2] = torch.sin(position * div_term)
        self.pe[:, 0, 1::2] = torch.cos(position * div_term)

    def forward(self, x):
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return x

In [40]:
class EncoderMHAttentionODEFunc(nn.Module):
    """MLP modeling the derivative of FeedForward ODE system.
    device : torch.device
    data_dim : int
        Dimension of data.
    hidden_dim : int
        Dimension of hidden layers.
    augment_dim: int
        Dimension of augmentation. If 0 does not augment ODE, otherwise augments
        it with augment_dim dimensions.
    """
    def __init__(self, device, data_dim,  augment_dim=0, n_heads=1):
        super(EncoderMHAttentionODEFunc, self).__init__()
        self.device = device
        self.augment_dim = augment_dim
        self.data_dim = data_dim
        self.input_dim = data_dim + augment_dim
        self.nfe = 0  # Number of function evaluations

        self.n_heads = n_heads
        self.att = nn.MultiheadAttention(self.input_dim, self.n_heads)

    def forward(self, t, x):
        """
        t : torch.Tensor (not used here)
            Current time. Shape (1,).
        x : torch.Tensor
            Shape (batch_size, input_dim)
        """
        # Forward pass of model corresponds to one function evaluation, so
        # increment counter
        self.nfe += 1
        x = self.att(x, x, x)[0]
        return x

class FeedForwardODEFunc(nn.Module):
    """MLP modeling the derivative of FeedForward ODE system.
    device : torch.device
    data_dim : int
        Dimension of data.
    hidden_dim : int
        Dimension of hidden layers.
    augment_dim: int
        Dimension of augmentation. If 0 does not augment ODE, otherwise augments
        it with augment_dim dimensions.
    """
    def __init__(self, device, data_dim, hidden_dim, augment_dim=0):
        super(FeedForwardODEFunc, self).__init__()
        self.device = device
        self.augment_dim = augment_dim
        self.data_dim = data_dim
        self.input_dim = data_dim + augment_dim
        self.hidden_dim = hidden_dim
        self.nfe = 0  # Number of function evaluations

        self.layers = nn.Sequential(
            nn.Linear(self.input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, self.input_dim),
        )

    def forward(self, t, x):
        """
        t : torch.Tensor (not used here)
            Current time. Shape (1,).
        x : torch.Tensor
            Shape (batch_size, input_dim)
        """
        # Forward pass of model corresponds to one function evaluation, so
        # increment counter
        self.nfe += 1
        return self.layers(x)

In [41]:
class EncoderBlockFunc(nn.Module):
  def __init__(self, device, data_dim, hidden_dim, augment_dim=0, n_heads = 1):
        super(EncoderBlockFunc, self).__init__()
        self.device = device
        self.augment_dim = augment_dim
        self.data_dim = data_dim
        self.input_dim = data_dim + augment_dim
        self.hidden_dim = hidden_dim
        self.nfe = 0  # Number of function evaluations
        self.n_heads = n_heads
        self.mha = EncoderMHAttentionODEFunc(device, data_dim, augment_dim = augment_dim, n_heads = n_heads)
        self.ffd = FeedForwardODEFunc(device, data_dim, hidden_dim, augment_dim=augment_dim)
        self.layn1 = nn.LayerNorm(self.input_dim)
        self.layn2 = nn.LayerNorm(self.input_dim)

  def forward(self, t, x):
        """
        t : torch.Tensor (not used here)
            Current time. Shape (1,).
        x : torch.Tensor
            Shape (batch_size, input_dim)
        """
        # Forward pass of model corresponds to one function evaluation, so
        # increment counter
        self.nfe += 1
        x = self.mha(t,x)
        x = self.layn1(x)
        x = self.ffd(t,x)
        x = self.layn2(x)
        return x

In [42]:
class ODETransformerClassification(nn.Module):
  def __init__(self, device, seq_len, emb_size,  hidden_dim, n_blocks = 1,  augment_dim=0, n_heads = 1):
    super(ODETransformerClassification, self).__init__()
    self.device = device
    self.seq_len = seq_len
    self.emb_size = emb_size
    self.hidden_dim = hidden_dim
    self.augment_dim = augment_dim
    self.n_heads = n_heads
    ode_blocks = []
    for i in range(n_blocks):
      encoder_func = EncoderMHAttentionODEFunc(device, emb_size+ 2*i*augment_dim , augment_dim=augment_dim, n_heads=n_heads)
      ode_blocks.append(ODEBlock(device, encoder_func, is_seq=True))
      ode_blocks.append(nn.LayerNorm(emb_size+ (2*i+1)*augment_dim))
      ff_func = FeedForwardODEFunc(device, emb_size+ (2*i+1)*augment_dim, hidden_dim, augment_dim=augment_dim)
      ode_blocks.append(ODEBlock(device, ff_func, is_seq=True))
      ode_blocks.append(nn.LayerNorm(emb_size+ (2*i+2)*augment_dim))

    self.block_layers = nn.Sequential(*ode_blocks)
    self.final_layer = nn.Sequential(nn.Linear((emb_size+2*augment_dim*n_blocks)*seq_len, 1), nn.Sigmoid())
    self.pos_encoding = PositionalEncoding(emb_size, seq_len)

  def forward(self, x):
    x = self.pos_encoding(x)
    x = self.block_layers(x).view(x.shape[0], -1)
    return self.final_layer(x)

class ODETransformerClassificationAllinOne(nn.Module):
  def __init__(self, device, seq_len, emb_size,  hidden_dim, n_blocks = 1,  augment_dim=0, n_heads = 1):
    super(ODETransformerClassification, self).__init__()
    self.device = device
    self.seq_len = seq_len
    self.emb_size = emb_size
    self.hidden_dim = hidden_dim
    self.augment_dim = augment_dim
    self.n_heads = n_heads
    ode_blocks = []
    for i in range(n_blocks):
      encoder_block = EncoderBlockFunc(device,emb_size+i*augment_dim, hidden_dim,augment_dim=augment_dim, n_heads = n_heads)
      ode_blocks.append(ODEBlock(device, encoder_block, is_seq=True))

    self.block_layers = nn.Sequential(*ode_blocks)
    self.final_layer = nn.Sequential(nn.Linear((emb_size+augment_dim*n_blocks)*seq_len, 1), nn.Sigmoid())
    self.pos_encoding = PositionalEncoding(emb_size, seq_len)

  def forward(self, x):
    x = self.pos_encoding(x)
    x = self.block_layers(x).view(x.shape[0], -1)
    return self.final_layer(x)


### Importation des données

In [6]:
import torchtext as tt

emb_dim = 50
# Load pre-trained GloVe embeddings
glove = tt.vocab.GloVe(name='6B', dim=emb_dim)

# Define tokenizer
tokenizer = tt.data.utils.get_tokenizer("basic_english")

# Define fields for text and label
TEXT = tt.data.Field(lower=True, include_lengths=True, batch_first=True, tokenize=tokenizer)
LABEL = tt.data.Field(sequential=False)

# Define batch size, maximum review length, and maximum vocabulary words
batch_size = 64
max_review_len = 100
max_vocab_words = 3500

# Load IMDb dataset
train_ds, test_ds = tt.datasets.IMDB.splits(TEXT, LABEL)

# Build vocabulary
TEXT.build_vocab(train_ds, max_size=max_vocab_words-2)
LABEL.build_vocab(train_ds)

# Split train dataset into train and dev sets
train_ds, dev_ds = train_ds.split(split_ratio=0.8)

# Create data iterators
train_loader, dev_loader, test_loader = tt.data.BucketIterator.splits(
    (train_ds, dev_ds, test_ds),
    batch_sizes=(batch_size, batch_size, batch_size),
    shuffle=True,
    sort_key=lambda x: len(x.text),
    sort_within_batch=True,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

In [7]:
max_length = 128
# Convert tokenized text into embedded tensors
def get_embedded_text(loader):
    embedded_texts = []
    targets = []
    for batch in loader:
        text, lengths = batch.text
        target = batch.label.unsqueeze(1)
        target = torch.where(target == 1, torch.tensor(0.), torch.tensor(1.))  # Add dimension for concatenation


        embedded_text = []
        for sentence, length in zip(text, lengths):
            sentence_emb = []
            for i in range(min(length.item(), max_length)):
                word = TEXT.vocab.itos[sentence[i]]
                try:
                    word_emb = glove.vectors[glove.stoi[word]]
                except KeyError:
                    word_emb = glove.vectors[glove.stoi['unk']]
                sentence_emb.append(word_emb)
            # Padding
            sentence_emb += [torch.zeros(glove.vectors.shape[1])] * (max_length - length)
            embedded_text.append(torch.stack(sentence_emb))

        embedded_texts.append(torch.stack(embedded_text))
        targets.append(target)

    embedded_texts = torch.cat(embedded_texts)
    targets = torch.cat(targets).squeeze(1)
    return embedded_texts, targets


# Get embedded tensors for train, dev, and test datasets
train_embedded = get_embedded_text(train_loader)
print("Train embedded text tensor shape:", train_embedded[0].shape)


dev_embedded = get_embedded_text(dev_loader)
print("Dev embedded text tensor shape:", dev_embedded[0].shape)

test_embedded = get_embedded_text(test_loader)
print("Test embedded text tensor shape:", test_embedded[0].shape)

Train embedded text tensor shape: torch.Size([20000, 128, 50])
Dev embedded text tensor shape: torch.Size([5000, 128, 50])
Test embedded text tensor shape: torch.Size([25000, 128, 50])


In [8]:
from torch.utils.data import Dataset, DataLoader
class CustomDataset(Dataset):
    def __init__(self, data):
        self.text = data[0]
        self.target = data[1]

    def __len__(self):
        return self.text.size(0)

    def __getitem__(self, idx):
        return self.text[idx], self.target[idx]


trainset_embedded = CustomDataset(train_embedded)
devset_embedded = CustomDataset(dev_embedded)
testset_embedded = CustomDataset(test_embedded)

traindataloader_embedded = DataLoader(trainset_embedded, batch_size=32, shuffle=True)
testdataloader_embedded = DataLoader(testset_embedded, batch_size=64, shuffle=True)
devdataloader_embedded = DataLoader(devset_embedded, batch_size=64, shuffle=True)

### Entrainement et test

In [43]:
model = ODETransformerClassification(device, max_length, emb_dim, 32, augment_dim=16)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

In [44]:
from tqdm import tqdm

def train(dataloader, model, optimizer, criterion):
    model.train()
    total_loss = 0.0
    total_samples = 0

    for batch in tqdm(dataloader):
        inputs, targets = batch
        inputs = inputs.to(device)  # Move inputs to GPU if available
        targets = targets.to(device)  # Move targets to GPU if available

        optimizer.zero_grad()  # Clear gradients
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs.view(-1), targets.view(-1))  # Calculate loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        total_loss += loss.item() * len(inputs)
        total_samples += len(inputs)

    return total_loss / total_samples



for i in range(10):
  loss = train(devdataloader_embedded, model, optimizer, criterion)
  print(f"Epoch {i+1} : Loss = {loss}")

100%|██████████| 79/79 [03:54<00:00,  2.96s/it]


Epoch 1 : Loss = 135.11691789855956


100%|██████████| 79/79 [03:55<00:00,  2.98s/it]


Epoch 2 : Loss = 134.96739652404784


100%|██████████| 79/79 [03:54<00:00,  2.97s/it]


Epoch 3 : Loss = 135.1670230606079


100%|██████████| 79/79 [04:11<00:00,  3.18s/it]


Epoch 4 : Loss = 135.01730262145995


100%|██████████| 79/79 [04:32<00:00,  3.45s/it]


Epoch 5 : Loss = 134.96739613342285


100%|██████████| 79/79 [04:21<00:00,  3.32s/it]


Epoch 6 : Loss = 135.2169287666321


100%|██████████| 79/79 [04:27<00:00,  3.38s/it]


Epoch 7 : Loss = 134.9673974029541


100%|██████████| 79/79 [04:29<00:00,  3.41s/it]


Epoch 8 : Loss = 135.0173027191162


100%|██████████| 79/79 [04:24<00:00,  3.35s/it]


Epoch 9 : Loss = 135.01730262145995


100%|██████████| 79/79 [04:20<00:00,  3.30s/it]

Epoch 10 : Loss = 135.06720950012206





In [45]:
with torch.no_grad():
  model.eval()
  correct = 0
  total_loss = 0
  total = 0
  for inputs, targets in tqdm(devdataloader_embedded):
    outputs = model(inputs.to(device))
    total_loss += criterion(outputs.view(-1), targets.view(-1))*len(inputs)
    preds = torch.where(outputs.view(-1) <0.5, torch.tensor(1.), torch.tensor(1.))
    correct += torch.sum(preds==targets.view(-1)).item()
    total += len(inputs)

print(f"Dev loss : {total_loss/total}")
print(f"Dev accuracy : {correct/total}")

100%|██████████| 79/79 [01:47<00:00,  1.36s/it]

Dev loss : 135.01731872558594
Dev accuracy : 0.5082



