In [1]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [3]:
import pandas as pd

training_df = pd.read_parquet("/kaggle/input/esa-telemetry-anomaly-detection/train.parquet")
test_df = pd.read_parquet("/kaggle/input/esa-telemetry-anomaly-detection/test.parquet")

target_channels = pd.read_csv("/kaggle/input/esa-telemetry-anomaly-detection/target_channels.csv")
target_channels = target_channels.iloc[:,0].tolist()
all_channels = training_df.columns.tolist()
all_channels = [col for col in training_df.columns if col != 'id' and not col.startswith('telecommand') and col != 'is_anomaly']

In [7]:
import torch
import math
from torch.utils.data import Dataset
from torch.utils.data import Sampler

class EsaDataset(Dataset):
    def __init__(self, dataframe, target_channels, window_len, split_type):
        self.df = dataframe
        self.target_channels = target_channels
        self.window_len = window_len
        self.split_type = split_type

    def __len__(self):
        return len(self.df)
        
    def __getitem__(self, idx):
        points = self.df.iloc[idx: idx + self.window_len]
        x = torch.tensor(points[self.target_channels].values, dtype=torch.float32)
        y = None
        
        if self.split_type == "train":
            y = torch.tensor(points["is_anomaly"].values, dtype=torch.float32)
        
        return x, y

class WindowedSampler(Sampler):
    def __init__(self, data_source, window_size):
        self.data_source = data_source
        self.window_size = window_size

    def __iter__(self):
        return iter(range(0, len(self.data_source), self.window_size))

    def __len__(self):
        return math.ceil(len(self.data_source) / self.window_size)


In [4]:
# Can rewrite the lower methods with tensor operations and probably refactor them in a single one

def build_temporal_edges(batch_size, seq_len, device):
    source, target = [], []
    
    for i in range(1, seq_len):
        for j in range(max(0, i - seq_len), i):
            source.append(j)
            target.append(i)
    
    edge_index_single = torch.tensor([source, target], dtype=torch.long, device=device)
    edges = []
    offsets = torch.arange(batch_size, device=device) * seq_len  
    edge_index_batch = edge_index_single.unsqueeze(2) + offsets.view(1, 1, -1)  # [2, num_edges_single, batch_size]
    
    return edge_index_batch.permute(2, 0, 1).reshape(2, -1)
    

def build_variable_edges(num_channels, batch_size, device):
    num_nodes = num_channels * batch_size
    single_edge_index = torch.cartesian_prod(
        torch.arange(num_channels, device=device), 
        torch.arange(num_channels, device=device)
    ).t()

    edge_indices = []
    
    for b in range(batch_size):
        offset = b * num_channels
        ei = single_edge_index + offset 
        edge_indices.append(ei)

    edge_index = torch.cat(edge_indices, dim=1)
    
    return edge_index

In [5]:
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import softmax
import torch.nn.functional as f
import torch
import torch.nn.utils.weight_norm as weight_norm
import torch.nn as nn

class GraphAttentionBlock(MessagePassing):
    def __init__(self, in_channels):
        super().__init__(aggr="sum")
        self.sigmoid = nn.Sigmoid() 
        self.leaky_relu = nn.LeakyReLU()
        self.linear = nn.Linear(in_channels, in_channels)
        self.a = nn.Parameter(torch.empty(in_channels*2,in_channels))       
        nn.init.xavier_uniform_(self.a.data)                       
       
    def forward(self, X, edge_index):
        X = self.linear(X)
        return self.sigmoid(self.propagate(edge_index, x=X))

    def message(self, x_j, x_i, index):
        out = torch.cat([x_j, x_i], dim=1)
        out = self.leaky_relu(out)
        out = out @ self.a
        alpha = softmax(out, index)
        return x_j * alpha 

class DGATnet(nn.Module):
    def __init__(self, in_channels, num_layers):
        super().__init__()
        self.layers = nn.ModuleList()

        for i in range(num_layers):
            self.layers.append(GraphAttentionBlock(in_channels))        
    
    def forward(self, x, edge_index):
        out = x
        for layer in self.layers:
            out = layer(out, edge_index)
            
        return out

class DilatedCausalConvBlock(nn.Module):
    def __init__(self, in_channels, kernel_size):
        super().__init__()
        dilation_values = [1,2,4]
        curr_channels = in_channels
        self.blocks = nn.ModuleList()
        self.relu = nn.ReLU()
        
        for d in dilation_values:
            conv = weight_norm(nn.Conv1d(curr_channels, curr_channels, kernel_size, dilation=d))
            # eventualmente aggiungere la batch norm
            self.blocks.append(nn.Sequential(conv, self.relu))

    def forward(self, X):
        out = X
        for i in range(len(self.blocks)):
            padding = (self.blocks[i][0].kernel_size[0] - 1) * self.blocks[i][0].dilation[0]
            out = f.pad(out, (padding, 0))
            out = self.blocks[i](out)
            
        return out

class TCNblock(nn.Module):
    def __init__(self, in_channels, kernel_size):
        super().__init__()
        self.dilated_causal_conv_block = DilatedCausalConvBlock(in_channels, kernel_size)
        self.dropout = nn.Dropout(p=0.2)
    
    def forward(self, X):
        out = self.dilated_causal_conv_block(X)
        out = self.dropout(out)
        return X + out

class TCNet(nn.Module):
    def __init__(self, in_channels, num_layers, kernel_size):
        super().__init__()
        self.layers = nn.ModuleList()
        for i in range (num_layers):
            self.layers.append(TCNblock(in_channels, kernel_size))
    
    def forward(self, X):
        out = X
        for layer in self.layers:
            out = layer(out)
             
        return out

import torch.nn.functional as F

# Can probably rewrite DGATnet to have one network for temporal and variable attention
class ATCNet(nn.Module):
    def __init__(self, seq_len, channels, kernel_size, num_layers_gat, num_layers_tcn, device):
        super().__init__()
        self.device = device
        self.graph_temporal_att = DGATnet(channels, num_layers_gat)
        self.graph_variable_att = DGATnet(seq_len, num_layers_gat)
        self.conv1d = nn.Conv1d(in_channels=channels*2, out_channels=channels, kernel_size=1)
        self.temp_conv_net = TCNet(channels, num_layers_tcn, kernel_size)
    
    # shape di X : batch_size,seq_len,channels
    def forward(self, X):
        batch_size, seq_len, channels = X.shape
        
        X_temporal = X.view(batch_size*seq_len, channels)
        X_variable = X.permute(0,2,1)
        X_variable = X_variable.reshape(channels*batch_size, seq_len)

        temporal_edge_index = build_temporal_edges(batch_size, seq_len, self.device)
        variable_edge_index = build_variable_edges(channels, batch_size, self.device)

        temporal_att_embeddings = self.graph_temporal_att(X_temporal, temporal_edge_index)
        variable_att_embeddings = self.graph_variable_att(X_variable, variable_edge_index)
        
        variable_att_embeddings = variable_att_embeddings.view(batch_size, channels, seq_len)
        temporal_att_embeddings = temporal_att_embeddings.view(batch_size, seq_len, channels)
        temporal_att_embeddings = temporal_att_embeddings.permute(0, 2, 1)
    
        features_matrix = torch.cat([temporal_att_embeddings, variable_att_embeddings], dim=1)
        # dimensionality reduction
        features_matrix = self.conv1d(features_matrix)
        
        predicted_sequence = self.temp_conv_net(features_matrix)
        predicted_sequence = predicted_sequence.permute(0, 2, 1)
    
        return predicted_sequence
        

In [9]:
batch_size, seq_len, channels = 64, 100, len(target_channels)
    
num_of_training_samples_atcnet = int(len(training_df)*0.3)
num_of_training_samples_classifier = int(len(training_df) * 0.7)

training_samples_df_atcnet = training_df[:num_of_training_samples_atcnet]
training_samples_df_classifier = training_df[num_of_training_samples_atcnet: num_of_training_samples_atcnet + num_of_training_samples_classifier]

In [10]:
import torch.nn as nn
import torch.optim as optim
import itertools
import pandas as pd
from torch.utils.data import DataLoader
from tqdm import tqdm


train_dataset_atcnet = EsaDataset(training_samples_df_atcnet, target_channels, seq_len, "train")
sampler = WindowedSampler(train_dataset_atcnet, seq_len)
train_loader_atcnet = DataLoader(
    train_dataset_atcnet,
    batch_size=batch_size,       
    shuffle=False,        
    num_workers=0, 
    sampler=sampler,
    drop_last=True
)
    
device = "cuda" if torch.cuda.is_available() else "cpu"
atcnet = ATCNet(seq_len=seq_len, channels=channels, kernel_size=4, num_layers_gat=3, num_layers_tcn=3, device=device)              
atcnet = atcnet.to(device)            
         
optimizer = optim.Adam(atcnet.parameters(), lr=1e-3)
num_epochs = 1
iteration_counter = 0
    
batch_losses = []

for epoch in range(num_epochs):
    atcnet.train()  
    train_loss = 0.
    last_predicted_window = torch.zeros(1, seq_len, channels).to(device)

    for batch in tqdm(train_loader_atcnet): 
        inputs, _ = batch
        inputs = inputs.to(device)
        predicted_sequences = atcnet(inputs).to(device)

        current_batch_size = inputs.shape[0]
        predicted_sequences = torch.cat([last_predicted_window, predicted_sequences[:current_batch_size-1]], dim=0)
        last_predicted_window = predicted_sequences[current_batch_size-1].unsqueeze(0).detach()

        error_abs = torch.abs(inputs - predicted_sequences)
        mse_loss = ((inputs - predicted_sequences)**2).mean()
        mae_loss = error_abs.mean()

        train_loss += mse_loss.item()
        batch_losses.append(mse_loss.item())

        if iteration_counter % 50 == 0:
            print(f"\n[Iteration {iteration_counter}] Batch Loss: {mae_loss.item():.6f}")

        optimizer.zero_grad()
        mse_loss.backward()
        optimizer.step()
        
        iteration_counter += 1


  WeightNorm.apply(module, name, dim)
  0%|          | 0/690 [00:00<?, ?it/s]


[Iteration 0] Batch Loss: 0.572052


  8%|▊         | 52/690 [00:09<01:46,  6.02it/s]


[Iteration 50] Batch Loss: 0.030643


 15%|█▍        | 102/690 [00:18<01:37,  6.01it/s]


[Iteration 100] Batch Loss: 0.014587


 22%|██▏       | 152/690 [00:26<01:29,  6.02it/s]


[Iteration 150] Batch Loss: 0.008916


 29%|██▉       | 202/690 [00:34<01:21,  6.01it/s]


[Iteration 200] Batch Loss: 0.007060


 37%|███▋      | 252/690 [00:43<01:12,  6.01it/s]


[Iteration 250] Batch Loss: 0.005181


 44%|████▍     | 302/690 [00:51<01:04,  5.99it/s]


[Iteration 300] Batch Loss: 0.004545


 51%|█████     | 352/690 [00:59<00:56,  6.02it/s]


[Iteration 350] Batch Loss: 0.006408


 58%|█████▊    | 402/690 [01:07<00:47,  6.02it/s]


[Iteration 400] Batch Loss: 0.005056


 66%|██████▌   | 452/690 [01:16<00:39,  6.01it/s]


[Iteration 450] Batch Loss: 0.005418


 73%|███████▎  | 502/690 [01:24<00:31,  6.02it/s]


[Iteration 500] Batch Loss: 0.005792


 80%|████████  | 552/690 [01:32<00:22,  6.02it/s]


[Iteration 550] Batch Loss: 0.003944


 87%|████████▋ | 602/690 [01:41<00:14,  6.01it/s]


[Iteration 600] Batch Loss: 0.003245


 94%|█████████▍| 652/690 [01:49<00:06,  6.01it/s]


[Iteration 650] Batch Loss: 0.003896


100%|██████████| 690/690 [01:55<00:00,  5.96it/s]


In [11]:
import torch
import numpy as np 
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm

from torch.nn.utils.rnn import pad_sequence
import torch

training_dataset_cls = EsaDataset(training_samples_df_classifier, target_channels, seq_len, "train")
cls_sampler = WindowedSampler(training_dataset_cls, seq_len)

train_loader_cls = DataLoader(
    training_dataset_cls,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    sampler=cls_sampler,
    drop_last=True)

feature_matrix_train_cls = []
train_labels = []

atcnet.eval()

last_predicted_window = torch.zeros(1, seq_len, channels).to(device)
last_window_input = torch.zeros(channels, seq_len).to(device)

for inputs, targets in tqdm(train_loader_cls):
    inputs = inputs.to(device)  # (batch_size, seq_len, channels)
    targets = targets.to(device)
    actual_batch_size = inputs.shape[0]

    with torch.no_grad():
        predicted_sequences = atcnet(inputs)  # (batch_size, seq_len, channels)

        predicted_sequences = torch.cat(
            [last_predicted_window, predicted_sequences[:actual_batch_size - 1]],
            dim=0
        )
        
        last_predicted_window = predicted_sequences[actual_batch_size - 1].unsqueeze(0).detach()

        model_errors = torch.abs(inputs - predicted_sequences)  
        model_errors = model_errors.view(actual_batch_size * seq_len, channels).cpu().numpy()
        
        feature_matrix_train_cls.append(model_errors)
        train_labels.append(targets.view(batch_size*seq_len).cpu().numpy())

feature_matrix_train_cls = np.concatenate(feature_matrix_train_cls, axis=0)
train_labels = np.concatenate(train_labels, axis=0)

100%|██████████| 1610/1610 [02:46<00:00,  9.69it/s]


In [12]:
import numpy as np

np.random.seed(42)

idx_1_full = np.where(train_labels == 1)[0]
idx_0_full = np.where(train_labels == 0)[0]

np.random.shuffle(idx_1_full)
np.random.shuffle(idx_0_full)

n_samples   = min(len(idx_1_full), len(idx_0_full))
idx_1_train = idx_1_full[:n_samples]
idx_0_train = idx_0_full[:n_samples]

selected_indices   = np.concatenate([idx_1_train, idx_0_train])
np.random.shuffle(selected_indices)

balanced_features  = feature_matrix_train_cls[selected_indices]
balanced_labels    = train_labels[selected_indices]

In [26]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42, n_estimators=200, max_depth=10)
model.fit(balanced_features, balanced_labels)

In [15]:
def collate_fn_test(batch):
    if isinstance(batch[0], tuple):
        inputs = [item[0] for item in batch]
    else:
        inputs = batch

    padded_inputs = pad_sequence(inputs, batch_first=True, padding_value=0.0)  # shape (batch_size, max_seq_len, channels)
    return padded_inputs

In [18]:
import torch
import numpy as np 
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm

test_dataset = EsaDataset(test_df, target_channels, seq_len, "test")
test_sampler = WindowedSampler(test_dataset, seq_len)

test_loader_real = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    sampler=test_sampler,
    drop_last=False,
    collate_fn=collate_fn_test
)

feature_matrix_test = []
atcnet.eval()

last_predicted_window = torch.zeros(1, seq_len, channels).to(device)
last_window_input = torch.zeros(channels, seq_len).to(device)

for inputs in tqdm(test_loader_real, desc="Test Evaluation"):
    inputs = inputs.to(device)  # inputs shape: (batch_size, seq_len, channels)
    actual_batch_size = inputs.shape[0]

    with torch.no_grad():
        predicted_sequences = atcnet(inputs)  # output shape (batch_size, seq_len, channels)
        predicted_sequences = torch.cat(
            [last_predicted_window, predicted_sequences[:actual_batch_size - 1]],
            dim=0
        )
        last_predicted_window = predicted_sequences[actual_batch_size - 1].unsqueeze(0).detach()

        model_errors = torch.abs(inputs - predicted_sequences)  # shape (batch_size, seq_len, channels)
        
        # Flatten e salva
        model_errors = model_errors.view(actual_batch_size * seq_len, channels).cpu().numpy()
        feature_matrix_test.append(model_errors)

feature_matrix_test = np.concatenate(feature_matrix_test, axis=0)


Test Evaluation: 100%|██████████| 82/82 [00:07<00:00, 10.26it/s]


In [29]:
import pandas as pd

y_pred = model.predict(feature_matrix_test_real)
start_id = 14728321
ids = range(start_id, start_id + len(y_pred))

# Crea DataFrame
df_out = pd.DataFrame({
    "id": ids,
    "is_anomaly": y_pred
})

df_out = df_out.iloc[:521280]
# Salva in file parquet
df_out.to_parquet("predictions.parquet", index=False)


model = RandomForestClassifier(random_state=42, n_estimators=200, max_depth=10)
atcnet = ATCNet(seq_len=seq_len, channels=channels, kernel_size=4, num_layers_gat=6, num_layers_tcn=6, device=device)              


In [30]:
df_out['is_anomaly'].sum()

5.0