# Intégration d'une Architecture Hybride GCN + Transformer dans DialogueGCN

## Introduction
Cette documentation décrit comment intégrer une architecture hybride combinant Graph Neural Networks (GCN) et Transformers dans DialogueGCN.

## 1. Remplacement de RGCNConv par GATConv
**Fichier**: `models.py`

**Lignes concernées**: ~723-875 (classe `GraphNetwork`)

### Modification du réseau de graphe
```python
from torch_geometric.nn import GATConv  # Remplacer l'import RGCNConv

class GraphNetwork(torch.nn.Module):
    def __init__(self, num_features, num_classes, num_relations, max_seq_len, 
                 hidden_size=64, dropout=0.5, no_cuda=False, use_gat=True):
        super(GraphNetwork, self).__init__()
        
        self.use_gat = use_gat
        
        if self.use_gat:
            # Utilisation de Graph Attention Networks
            self.conv1 = GATConv(num_features, hidden_size, heads=4, dropout=dropout)
            self.conv2 = GATConv(hidden_size*4, hidden_size, heads=1, dropout=dropout)
        else:
            # Version originale avec RGCN
            self.conv1 = RGCNConv(num_features, hidden_size, num_relations, num_bases=30)
            self.conv2 = GraphConv(hidden_size, hidden_size)
        
        self.matchatt = MatchingAttention(num_features+hidden_size, num_features+hidden_size, att_type='general2')
        self.linear = nn.Linear(num_features+hidden_size, hidden_size)
        self.dropout = nn.Dropout(dropout)
        self.smax_fc = nn.Linear(hidden_size, num_classes)
        self.no_cuda = no_cuda

    def forward(self, x, edge_index, edge_norm, edge_type, seq_lengths, umask, nodal_attn, avec):
        if self.use_gat:
            out = F.elu(self.conv1(x, edge_index))
            out = F.dropout(out, p=self.dropout.p, training=self.training)
            out = self.conv2(out, edge_index)
        else:
            out = self.conv1(x, edge_index, edge_type)
            out = self.conv2(out, edge_index)
            
        emotions = torch.cat([x, out], dim=-1)
        log_prob = classify_node_features(emotions, seq_lengths, umask, 
                                        self.matchatt, self.linear, 
                                        self.dropout, self.smax_fc, 
                                        nodal_attn, avec, self.no_cuda)
        return log_prob
```

## 2. Ajout d'une Couche Transformer
### Nouvelle classe `TransformerEncoderLayer`
```python
class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.activation = F.relu

    def forward(self, src, src_mask=None, src_key_padding_mask=None):
        src2 = self.self_attn(src, src, src, attn_mask=src_mask,
                              key_padding_mask=src_key_padding_mask)[0]
        src = src + self.dropout1(src2)
        src = self.norm1(src)
        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
        src = src + self.dropout2(src2)
        src = self.norm2(src)
        return src
```

## 3. Modification de `DialogueGCNModel`
**Lignes concernées**: ~878-1030 (classe `DialogueGCNModel`)

```python
class DialogueGCNModel(nn.Module):
    def __init__(self, ..., use_transformer=False, transformer_layers=2):
        ...
        self.use_transformer = use_transformer
        
        if self.use_transformer:
            self.transformer = nn.ModuleList([
                TransformerEncoderLayer(2*D_e, nhead=4, 
                                      dim_feedforward=4*2*D_e, 
                                      dropout=dropout)
                for _ in range(transformer_layers)
            ])
            self.pos_encoder = PositionalEncoding(2*D_e, dropout)
        
    def forward(self, U, qmask, umask, seq_lengths):
        ...
        if self.use_transformer:
            emotions = emotions.permute(1, 0, 2)  # (batch, seq_len, dim)
            src_key_padding_mask = torch.zeros(emotions.size(0), emotions.size(1)).bool()
            for i, length in enumerate(seq_lengths):
                src_key_padding_mask[i, length:] = True
            if not self.no_cuda:
                src_key_padding_mask = src_key_padding_mask.cuda()
            emotions = self.pos_encoder(emotions)
            for layer in self.transformer:
                emotions = layer(emotions, src_key_padding_mask=src_key_padding_mask)
            emotions = emotions.permute(1, 0, 2)
        ...
        return log_prob, edge_index, edge_norm, edge_type, edge_index_lengths
```

## 4. Ajout de `PositionalEncoding`
```python
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)
```

## 5. Configuration Recommandée
```python
model = DialogueGCNModel(
    base_model='GRU',
    D_m=100, D_g=150, D_p=150, D_e=100, D_h=100, D_a=100,
    graph_hidden_size=64,
    n_speakers=2,
    max_seq_len=100,
    window_past=10,
    window_future=10,
    n_classes=7,
    dropout_rec=0.5,
    dropout=0.5,
    use_transformer=True,
    transformer_layers=2
)
```

## Avantages de l'Architecture Hybride
- **GAT (Graph Attention Networks)**:
  - Meilleure modélisation des relations locales
  - Attention dynamique sur les voisins dans le graphe

- **Transformer**:
  - Capture les dépendances longue portée
  - Attention globale sur toute la conversation

- **Combinaison des forces**:
  - Le GAT gère les relations locales entre utterances
  - Le Transformer capture les motifs globaux
  - Meilleure performance sur les conversations longues