In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import math
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset, TensorDataset


In [5]:
### Charger les données
def load_data(input_file, output_file):
    input_sequences = pd.read_csv(input_file)
    output_sequences = pd.read_csv(output_file)
    return input_sequences, output_sequences


In [6]:
### Préparer les données pour Transformers
def prepare_transformer_data(input_sequences, output_sequences, test_size=0.2, val_size=0.1, random_state=42):
    video_object_ids = input_sequences.iloc[:, :2].values  # `video_id`, `object_id`

    # Extraire les séquences d'entrée et de sortie
    X = input_sequences.iloc[:, 2:].values  # Frames pour l'encodage
    y = output_sequences.iloc[:, 2:].values  # Frames pour le décodage

    # Diviser les données en ensembles d'entraînement, validation et test
    X_train_val, X_test, y_train_val, y_test, ids_train_val, ids_test = train_test_split(
        X, y, video_object_ids, test_size=test_size, random_state=random_state
    )
    X_train, X_val, y_train, y_val, ids_train, ids_val = train_test_split(
        X_train_val, y_train_val, ids_train_val, test_size=val_size, random_state=random_state
    )

    return X_train, X_val, X_test, y_train, y_val, y_test, ids_train, ids_val, ids_test


In [7]:
class VideoDataset(Dataset):
    def __init__(self, X, y, ids):
        self.X = torch.tensor(X, dtype=torch.float32)  # Séquences d'entrée
        self.y = torch.tensor(y, dtype=torch.float32)  # Séquences de sortie
        self.ids = torch.tensor(ids, dtype=torch.long)  # Identifiants (video_id, object_id)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return {
            'input': self.X[idx],
            'target': self.y[idx],
            'meta': self.ids[idx]  # Utilisation de 'meta' pour les identifiants
        }

In [8]:
### Ajouter un encodage positionnel
def add_positional_encoding(data, sequence_length, d_model):
    position = torch.arange(sequence_length).unsqueeze(1).float()
    div_term = torch.exp(torch.arange(0, d_model, 2) * (-torch.log(torch.tensor(10000.0)) / d_model))
    pos_encoding = torch.zeros(sequence_length, d_model)
    pos_encoding[:, 0::2] = torch.sin(position * div_term)
    pos_encoding[:, 1::2] = torch.cos(position * div_term)
    return data + pos_encoding

In [9]:
def convert_to_tensors(X_train, X_val, X_test, y_train, y_val, y_test, device='cpu'):
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32, device=device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
    
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32, device=device)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device)
    
    return X_train_tensor, X_val_tensor, X_test_tensor, y_train_tensor, y_val_tensor, y_test_tensor


In [10]:
def create_dataloaders(X_train, y_train, ids_train, X_val, y_val, ids_val, X_test, y_test, ids_test, batch_size=32):
    train_dataset = VideoDataset(X_train, y_train, ids_train)
    val_dataset = VideoDataset(X_val, y_val, ids_val)
    test_dataset = VideoDataset(X_test, y_test, ids_test)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader

In [11]:
### Chargement des données
input_file = 'C:/Users/h/Desktop/MASTER IAAD/S3/projet2/Data/annotations_transformers/input_sequences.csv'
output_file = 'C:/Users/h/Desktop/MASTER IAAD/S3/projet2/Data/annotations_transformers/output_sequences.csv'

input_sequences, output_sequences = load_data(input_file, output_file)

X_train, X_val, X_test, y_train, y_val, y_test, ids_train, ids_val, ids_test = prepare_transformer_data(
    input_sequences, output_sequences
)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
X_train_tensor, X_val_tensor, X_test_tensor, y_train_tensor, y_val_tensor, y_test_tensor = convert_to_tensors(
    X_train, X_val, X_test, y_train, y_val, y_test, device
)

In [12]:
batch_size = 32
train_loader, val_loader, test_loader = create_dataloaders(
    X_train_tensor, y_train_tensor, ids_train,
    X_val_tensor, y_val_tensor, ids_val,
    X_test_tensor, y_test_tensor, ids_test,
    batch_size=batch_size
)

  self.X = torch.tensor(X, dtype=torch.float32)  # Séquences d'entrée
  self.y = torch.tensor(y, dtype=torch.float32)  # Séquences de sortie


In [13]:
# Fonction de calcul de l'ADE et du FDE
def compute_ade_fde(predictions, targets):
    """
    Calcule ADE (Average Displacement Error) et FDE (Final Displacement Error).
    """
    ade = torch.mean(torch.sqrt(torch.sum((predictions - targets) ** 2, dim=-1)))
    fde = torch.sqrt(torch.sum((predictions[:, -1] - targets[:, -1]) ** 2, dim=-1)).mean()
    return ade.item(), fde.item()

In [14]:
# Modèle Transformer amélioré
class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, d_model=128, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.2):
        super(TransformerModel, self).__init__()

        self.d_model = d_model
        
        # Embeddings pour les entrées et les sorties
        self.embedding = nn.Linear(input_dim, d_model)
        self.output_embedding = nn.Linear(output_dim, d_model)
        
        # Encodeur LSTM pour capturer les dépendances temporelles
        self.lstm_encoder = nn.LSTM(d_model, d_model, batch_first=True)
        
        # Encodeur Transformer
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=d_model, 
                nhead=nhead, 
                dim_feedforward=dim_feedforward, 
                dropout=dropout
            ), 
            num_layers=num_encoder_layers
        )
        
        # Décodeur Transformer avec attention multi-têtes
        self.transformer_decoder = nn.TransformerDecoder(
            nn.TransformerDecoderLayer(
                d_model=d_model, 
                nhead=nhead, 
                dim_feedforward=dim_feedforward, 
                dropout=dropout
            ), 
            num_layers=num_decoder_layers
        )
        
        # Tête de sortie
        self.fc_out = nn.Linear(d_model, output_dim)
        
    def forward(self, src, tgt):
        # Embedding des entrées et des sorties
        src = self.embedding(src) * math.sqrt(self.d_model)
        tgt = self.output_embedding(tgt) * math.sqrt(self.d_model)
        
        # Ajouter l'encodage positionnel
        src = add_positional_encoding(src, src.size(0), self.d_model)
        tgt = add_positional_encoding(tgt, tgt.size(0), self.d_model)
        
        # Passage à travers l'encodeur LSTM
        lstm_out, _ = self.lstm_encoder(src)
        
        # Passage à travers l'encodeur Transformer
        memory = self.transformer_encoder(lstm_out)
        
        # Passage à travers le décodeur Transformer
        output = self.transformer_decoder(tgt, memory)
        
        # Sortie du modèle
        output = self.fc_out(output)
        
        return output

In [15]:

# Initialisation du modèle
input_dim = X_train_tensor.shape[1]
output_dim = y_train_tensor.shape[1]
model = TransformerModel(input_dim, output_dim)




In [16]:
# Définir la fonction de perte et l'optimiseur
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)


In [17]:
# Fonction de validation
def validate(model, val_loader):
    model.eval()
    val_loss = 0.0
    ade, fde = 0.0, 0.0
    with torch.no_grad():
        for batch in val_loader:
            inputs = batch['input'].to(device)
            targets = batch['target'].to(device)
            
            outputs = model(inputs, targets)
            
            loss = criterion(outputs.view(-1, output_dim), targets.view(-1, output_dim))
            val_loss += loss.item()
            
            batch_ade, batch_fde = compute_ade_fde(outputs, targets)
            ade += batch_ade
            fde += batch_fde

    val_loss /= len(val_loader)
    ade /= len(val_loader)
    fde /= len(val_loader)
    
    return val_loss, ade, fde

In [18]:
# Entraînement du modèle
def train_model(model, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        ade, fde = 0.0, 0.0

        for batch in train_loader:
            inputs = batch['input'].to(device)
            targets = batch['target'].to(device)
            
            optimizer.zero_grad()
            
            outputs = model(inputs, targets)
            
            loss = criterion(outputs.view(-1, output_dim), targets.view(-1, output_dim))
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            batch_ade, batch_fde = compute_ade_fde(outputs, targets)
            ade += batch_ade
            fde += batch_fde
        
        running_loss /= len(train_loader)
        ade /= len(train_loader)
        fde /= len(train_loader)
        
        val_loss, val_ade, val_fde = validate(model, val_loader)
        
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {running_loss:.4f}, Train ADE: {ade:.4f}, Train FDE: {fde:.4f}")
        print(f"Validation Loss: {val_loss:.4f}, Validation ADE: {val_ade:.4f}, Validation FDE: {val_fde:.4f}")


In [19]:
train_model(model, train_loader, val_loader, epochs=10)

Epoch 1/10, Train Loss: 0.0422, Train ADE: 0.7153, Train FDE: 0.3268
Validation Loss: 0.0345, Validation ADE: 0.6494, Validation FDE: 0.0154
Epoch 2/10, Train Loss: 0.0343, Train ADE: 0.6499, Train FDE: 0.1061
Validation Loss: 0.0353, Validation ADE: 0.6650, Validation FDE: 0.0593
Epoch 3/10, Train Loss: 0.0050, Train ADE: 0.1957, Train FDE: 0.1025
Validation Loss: 0.0008, Validation ADE: 0.0846, Validation FDE: 0.0187
Epoch 4/10, Train Loss: 0.0006, Train ADE: 0.0794, Train FDE: 0.0491
Validation Loss: 0.0003, Validation ADE: 0.0513, Validation FDE: 0.0211
Epoch 5/10, Train Loss: 0.0003, Train ADE: 0.0574, Train FDE: 0.0307
Validation Loss: 0.0002, Validation ADE: 0.0435, Validation FDE: 0.0181
Epoch 6/10, Train Loss: 0.0002, Train ADE: 0.0492, Train FDE: 0.0216
Validation Loss: 0.0002, Validation ADE: 0.0453, Validation FDE: 0.0092
Epoch 7/10, Train Loss: 0.0002, Train ADE: 0.0423, Train FDE: 0.0143
Validation Loss: 0.0001, Validation ADE: 0.0321, Validation FDE: 0.0076
Epoch 8/10, T

In [20]:
# Sauvegarder le modèle
import torch
torch.save(model.state_dict(), 'model.pth')


In [21]:

torch.save({ 
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),  
}, 'model_checkpoint.pth')


In [5]:
import torch
# Recharger l'état du modèle et de l'optimiseur de manière sécurisée
checkpoint = torch.load('model_checkpoint.pth', weights_only=True)
model.load_state_dict(checkpoint['model_checkpoint.pth'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Optionnel : Si vous avez sauvegardé l'epoch, vous pouvez aussi récupérer cette information
# epoch = checkpoint['epoch']
model.eval()  # Passe le modèle en mode évaluation si vous voulez faire des prédictions


NameError: name 'model' is not defined

In [9]:
# Charger la structure du modèle
model = TransformerModel(input_dim=input_dim, output_dim=output_dim)

# Charger les poids sauvegardés
model.load_state_dict(torch.load('C:/Users/h/Desktop/MASTER IAAD/S3/projet2/Codes/model.pth', map_location=device))

# Déplacer le modèle sur le bon appareil (CPU ou GPU)
model.to(device)

# Mettre le modèle en mode évaluation
model.eval()


NameError: name 'TransformerModel' is not defined