# Detection d'anomalies CAN - CNN + LSTM
Projet CANlock - Session H26

In [1]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / "src"))

import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from tqdm import tqdm

from canlock.db.database import get_session, init_db
from canlock.decoder import SessionDecoder

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Device: {device}")

Device: mps


## Chargement des donnees

In [3]:
init_db()

with get_session() as session:
    decoder = SessionDecoder(db=session)
    sessions = decoder.list_sessions()

print(f"Sessions disponibles: {len(sessions)}")

Sessions disponibles: 228


In [4]:
with get_session() as session:
    decoder = SessionDecoder(db=session)
    df_raw = decoder.decode(session_id=sessions[12].id)

df_raw.set_index("timestamp", inplace=True)
print(f"Donnees: {df_raw.shape}")

Decoding messages: 100%|██████████| 1600032/1600032 [00:30<00:00, 52450.83it/s]


Donnees: (1288461, 329)


In [5]:
fill_rates = df_raw.notna().mean().sort_values(ascending=False)
top_cols = fill_rates.head(15).index.tolist()
df = df_raw[top_cols].copy()
print(f"Colonnes selectionnees: {len(top_cols)}")
print(f"Shape: {df.shape}")

Colonnes selectionnees: 15
Shape: (1288461, 15)


## Preparation des donnees

In [6]:
df_clean = df.ffill().bfill().dropna()
print(f"Apres nettoyage: {df_clean.shape}")

SAMPLE_SIZE = 50000
df_sample = df_clean.iloc[:SAMPLE_SIZE]
print(f"Echantillon: {df_sample.shape}")

Apres nettoyage: (1288461, 15)
Echantillon: (50000, 15)


In [7]:
scaler = StandardScaler()
data_scaled = scaler.fit_transform(df_sample.values)

SEQ_LEN = 30
STRIDE = 30

sequences = []
for i in range(0, len(data_scaled) - SEQ_LEN + 1, STRIDE):
    sequences.append(data_scaled[i:i + SEQ_LEN])
    
sequences = np.array(sequences)
print(f"Sequences: {sequences.shape}")

Sequences: (1666, 30, 15)


In [8]:
n_seq = len(sequences)
labels = np.zeros(n_seq, dtype=np.int64)

n_anomalies = int(n_seq * 0.15)
anomaly_idx = np.random.choice(n_seq, n_anomalies, replace=False)

for idx in anomaly_idx:
    attack = np.random.choice(['spike', 'noise', 'replay'])
    if attack == 'spike':
        col = np.random.randint(0, sequences.shape[2])
        sequences[idx, 10:20, col] = np.random.choice([-4, 4])
    elif attack == 'noise':
        sequences[idx] += np.random.normal(0, 1.5, sequences[idx].shape)
    else:
        pattern = sequences[idx, :5, :]
        for j in range(5, SEQ_LEN, 5):
            sequences[idx, j:min(j+5, SEQ_LEN), :] = pattern[:min(5, SEQ_LEN-j), :]
    labels[idx] = 1

print(f"Normal: {(labels==0).sum()}, Anomalies: {(labels==1).sum()}")

Normal: 1417, Anomalies: 249


In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    sequences, labels, test_size=0.2, random_state=42, stratify=labels
)

X_train_t = torch.FloatTensor(X_train)
X_test_t = torch.FloatTensor(X_test)
y_train_t = torch.LongTensor(y_train)
y_test_t = torch.LongTensor(y_test)

print(f"Train: {X_train_t.shape}, Test: {X_test_t.shape}")

Train: torch.Size([1332, 30, 15]), Test: torch.Size([334, 30, 15])


## Modele CNN + LSTM

In [10]:
class CNN_LSTM(nn.Module):
    def __init__(self, n_features, seq_len):
        super().__init__()
        
        self.conv1 = nn.Conv1d(n_features, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(32)
        self.pool1 = nn.MaxPool1d(2)
        
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(64)
        self.pool2 = nn.MaxPool1d(2)
        
        self.lstm = nn.LSTM(64, 32, num_layers=1, batch_first=True, bidirectional=True)
        
        self.fc1 = nn.Linear(64, 16)
        self.fc2 = nn.Linear(16, 2)
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = x.permute(0, 2, 1)
        
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)
        
        x = x.permute(0, 2, 1)
        lstm_out, _ = self.lstm(x)
        x = lstm_out[:, -1, :]
        
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

In [11]:
n_features = X_train.shape[2]
seq_len = X_train.shape[1]

model = CNN_LSTM(n_features, seq_len).to(device)
print(model)

n_params = sum(p.numel() for p in model.parameters())
print(f"\nParametres: {n_params:,}")

CNN_LSTM(
  (conv1): Conv1d(15, 32, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (lstm): LSTM(64, 32, batch_first=True, bidirectional=True)
  (fc1): Linear(in_features=64, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=2, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (relu): ReLU()
)

Parametres: 34,034


## Entrainement

In [12]:
BATCH_SIZE = 32
EPOCHS = 30
LR = 0.001

train_loader = DataLoader(TensorDataset(X_train_t, y_train_t), batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_t, y_test_t), batch_size=BATCH_SIZE)

weights = torch.FloatTensor([1.0, (labels==0).sum()/(labels==1).sum()]).to(device)
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

In [13]:
history = {'loss': [], 'val_loss': [], 'val_acc': []}

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        out = model(batch_x)
        loss = criterion(out, batch_y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            out = model(batch_x)
            val_loss += criterion(out, batch_y).item()
            _, pred = torch.max(out, 1)
            correct += (pred == batch_y).sum().item()
            total += batch_y.size(0)
    
    history['loss'].append(train_loss/len(train_loader))
    history['val_loss'].append(val_loss/len(test_loader))
    history['val_acc'].append(correct/total)
    
    if (epoch+1) % 5 == 0:
        print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {history['loss'][-1]:.4f} - Val Loss: {history['val_loss'][-1]:.4f} - Val Acc: {history['val_acc'][-1]:.4f}")

Epoch 5/30 - Loss: 0.3894 - Val Loss: 0.3733 - Val Acc: 0.9371
Epoch 10/30 - Loss: 0.2020 - Val Loss: 0.4445 - Val Acc: 0.9192
Epoch 15/30 - Loss: 0.1116 - Val Loss: 0.5409 - Val Acc: 0.8713
Epoch 20/30 - Loss: 0.0751 - Val Loss: 0.3215 - Val Acc: 0.9461
Epoch 25/30 - Loss: 0.0469 - Val Loss: 0.5057 - Val Acc: 0.9281
Epoch 30/30 - Loss: 0.0503 - Val Loss: 0.6127 - Val Acc: 0.9521


## Evaluation

In [14]:
model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x = batch_x.to(device)
        out = model(batch_x)
        _, pred = torch.max(out, 1)
        all_preds.extend(pred.cpu().numpy())
        all_targets.extend(batch_y.numpy())

all_preds = np.array(all_preds)
all_targets = np.array(all_targets)

In [None]:
print(classification_report(all_targets, all_preds, target_names=['Normal', 'Anomalie']))

Rapport de classification:
              precision    recall  f1-score   support

      Normal       0.95      1.00      0.97       284
    Anomalie       1.00      0.68      0.81        50

    accuracy                           0.95       334
   macro avg       0.97      0.84      0.89       334
weighted avg       0.95      0.95      0.95       334



In [16]:
cm = confusion_matrix(all_targets, all_preds)
tn, fp, fn, tp = cm.ravel()

print(f"Matrice de confusion:")
print(f"  TN={tn}  FP={fp}")
print(f"  FN={fn}  TP={tp}")
print(f"\nTaux de faux positifs: {fp/(fp+tn)*100:.2f}%")

Matrice de confusion:
  TN=284  FP=0
  FN=16  TP=34

Taux de faux positifs: 0.00%


In [17]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=2, subplot_titles=('Loss', 'Accuracy'))

fig.add_trace(go.Scatter(y=history['loss'], name='Train'), row=1, col=1)
fig.add_trace(go.Scatter(y=history['val_loss'], name='Val'), row=1, col=1)
fig.add_trace(go.Scatter(y=history['val_acc'], name='Val Acc'), row=1, col=2)

fig.update_layout(height=400, title_text="Courbes d'entrainement")
fig.show()

In [18]:
models_dir = Path.cwd().parent / "models"
models_dir.mkdir(exist_ok=True)

torch.save(model.state_dict(), models_dir / "cnn_lstm.pth")
print(f"Modele sauvegarde dans {models_dir / 'cnn_lstm.pth'}")

Modele sauvegarde dans /Users/nikova/Desktop/canLock/CANlock/models/cnn_lstm.pth
