In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import torch
from torch.utils.data import DataLoader, TensorDataset

# Load the dataset
df = pd.read_csv('../data/preprocessed_gnss_data.csv', delimiter=';')

# Handle missing values: Fill numeric columns with mean
numeric_columns = df.select_dtypes(include=['number']).columns
df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].mean())

# Convert target to binary
df['class'] = df['class'].map({'legitimate': 0, 'jammed': 1})

# Split features (X) and target (y)
X = df.drop(['class', 'timestamp'], axis=1)  # Drop timestamp if not relevant
y = df['class']

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Reshape X for PyTorch (sequence_length=1 in this case)
X = X.reshape((X.shape[0], 1, X.shape[1]))

# Convert to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.long)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.3, random_state=42, stratify=y)

# Create DataLoaders for batch processing
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [15]:
import torch.nn as nn

# Define the RNN model
class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNClassifier, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        out, _ = self.rnn(x)  # RNN output
        out = out[:, -1, :]   # Take the last output (many-to-one)
        out = self.fc(out)    # Fully connected layer
        return self.softmax(out)

# Hyperparameters
input_size = X.shape[2]
hidden_size = 64
output_size = 2  # Binary classification

# Instantiate the model
model = RNNClassifier(input_size, hidden_size, output_size)


In [16]:
import torch.optim as optim

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()          # Clear gradients
        outputs = model(X_batch)       # Forward pass
        loss = criterion(outputs, y_batch)  # Compute loss
        loss.backward()                # Backward pass
        optimizer.step()               # Update weights
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1/20, Loss: 0.5579
Epoch 2/20, Loss: 0.4560
Epoch 3/20, Loss: 0.4319
Epoch 4/20, Loss: 0.4159
Epoch 5/20, Loss: 0.4061
Epoch 6/20, Loss: 0.3984
Epoch 7/20, Loss: 0.3924
Epoch 8/20, Loss: 0.3877
Epoch 9/20, Loss: 0.3845
Epoch 10/20, Loss: 0.3813
Epoch 11/20, Loss: 0.3794
Epoch 12/20, Loss: 0.3778
Epoch 13/20, Loss: 0.3767
Epoch 14/20, Loss: 0.3756
Epoch 15/20, Loss: 0.3743
Epoch 16/20, Loss: 0.3749
Epoch 17/20, Loss: 0.3733
Epoch 18/20, Loss: 0.3730
Epoch 19/20, Loss: 0.3727
Epoch 20/20, Loss: 0.3721


In [17]:
# Evaluation
model.eval()
y_pred = []
y_true = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(y_batch.cpu().numpy())

# Classification report and confusion matrix
print("Classification Report:")
print(classification_report(y_true, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.96      1011
           1       0.89      0.86      0.87       327

    accuracy                           0.94      1338
   macro avg       0.92      0.91      0.92      1338
weighted avg       0.94      0.94      0.94      1338

Confusion Matrix:
[[975  36]
 [ 46 281]]
