In [40]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

# Load and preprocess multivariate time series data
def preprocess_data(df):
    # Drop two columns from the DataFrame
    df = df.drop(columns=['timestamp_(min)', 'feature_21'])
    
    # Perform any necessary preprocessing (e.g., normalization)
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df)
    return scaled_data

# Load your multivariate time series data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# Preprocess data
train_data = preprocess_data(train_df)
test_data = preprocess_data(test_df)

# Convert data to PyTorch tensors and create DataLoader
train_tensor = torch.tensor(train_data, dtype=torch.float32)
test_tensor = torch.tensor(test_data, dtype=torch.float32)

# Create DataLoader
batch_size = 32
train_loader = DataLoader(TensorDataset(train_tensor), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(test_tensor), batch_size=batch_size, shuffle=False)


In [41]:
class TransformerAutoencoder(nn.Module):
    def __init__(self, input_dim, embedding_dim=24, num_heads=4, ff_dim=128, num_layers=2, dropout_rate=0.1):
        super(TransformerAutoencoder, self).__init__()
        self.embedding_dim = embedding_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.num_layers = num_layers
        self.dropout_rate = dropout_rate

        # Adjust input_dim to be divisible by num_heads for Transformer
        self.input_dim = (input_dim // num_heads) * num_heads

        # Transformer Encoder
        self.encoder = nn.TransformerEncoder(
            encoder_layer=nn.TransformerEncoderLayer(
                d_model=self.input_dim,
                nhead=num_heads,
                dim_feedforward=ff_dim,
                dropout=dropout_rate
            ),
            num_layers=num_layers
        )

        # Transformer Decoder (with Linear output layer)
        self.decoder = nn.Linear(self.input_dim, input_dim)  # Adjust to match input_dim

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
# Instantiate Transformer-based Autoencoder model
input_dim = train_data.shape[1]
print("Input Dimension:", input_dim)
autoencoder = TransformerAutoencoder(input_dim=24, embedding_dim=24, num_heads=4, ff_dim=128, num_layers=2, dropout_rate=0.1)


Input Dimension: 24




In [43]:
# Define the GAN model (Generator and Discriminator)
# This is a basic example, you can use more complex GAN architectures
class Generator(nn.Module):
    def __init__(self, latent_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, z):
        return self.model(z)

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()  # Apply Sigmoid activation to output
        )

    def forward(self, x):
        return self.model(x)


# Instantiate GAN components
latent_dim = 32
generator = Generator(latent_dim, input_dim)
discriminator = Discriminator(input_dim)

# Define contrastive loss function
def contrastive_loss(y_true, y_pred):
    # Implement your contrastive loss function
    margin = 1.0
    
    # Calculate the positive loss (squared difference)
    squared_pred = torch.square(y_pred)
    positive_loss = y_true * squared_pred
    
    # Calculate the negative loss (squared margin)
    diff = margin - y_pred
    zeros = torch.zeros_like(diff)
    squared_margin = torch.square(torch.max(zeros, diff))  # Use torch.max to ensure non-negative values
    
    # Combine positive and negative losses
    loss = y_true * positive_loss + (1 - y_true) * squared_margin
    
    # Calculate the mean loss
    mean_loss = torch.mean(loss)
    
    return mean_loss

# Training loop
optimizer_ae = Adam(autoencoder.parameters(), lr=0.001)
optimizer_gan = Adam(list(generator.parameters()) + list(discriminator.parameters()), lr=0.001)

num_epochs = 50

for epoch in range(num_epochs):
    autoencoder.train()
    generator.train()
    discriminator.train()

    for data in train_loader:
        inputs = data[0]

        # Train Autoencoder
        optimizer_ae.zero_grad()
        reconstructions = autoencoder(inputs)
        ae_loss = contrastive_loss(inputs, reconstructions)
        ae_loss.backward()
        optimizer_ae.step()

        # Train GAN
        z = torch.randn(len(inputs), latent_dim)
        fake_data = generator(z)

        real_labels = discriminator(inputs)
        fake_labels = discriminator(fake_data.detach())

        gan_loss = nn.BCELoss()(real_labels, torch.ones_like(real_labels)) + nn.BCELoss()(fake_labels, torch.zeros_like(fake_labels))
        optimizer_gan.zero_grad()
        gan_loss.backward()
        optimizer_gan.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], AE Loss: {ae_loss.item()}, GAN Loss: {gan_loss.item()}")

# Anomaly Detection using the trained autoencoder
def detect_anomalies(data_loader, autoencoder, threshold):
    autoencoder.eval()
    anomalies = []

    with torch.no_grad():
        for data in data_loader:
            inputs = data[0]
            reconstructions = autoencoder(inputs)

            # Calculate reconstruction loss (e.g., MSE)
            reconstruction_loss = nn.MSELoss(reduction='none')(inputs, reconstructions).mean(dim=1)

            # Detect anomalies based on reconstruction loss and threshold
            anomalies.extend(data[0][reconstruction_loss > threshold].numpy().tolist())

    return anomalies

# Set threshold for anomaly detection
threshold = 0.5

# Detect anomalies in the test data
anomalies = detect_anomalies(test_loader, autoencoder, threshold)


RuntimeError: all elements of input should be between 0 and 1