In [1]:
import pandas as pd
import numpy as np

data = pd.read_csv('train.csv')

normalized_data = (data - data.mean()) / data.std()
normalized_data = np.expand_dims(normalized_data, axis=-1)

In [2]:
def apply_geometric_mask(data, p=0.1):
    mask = np.random.geometric(p, size=data.shape) > 1
    return data * mask

augmented_data = apply_geometric_mask(normalized_data)

In [9]:
normalized_data.shape

(132481, 26, 1)

In [10]:
from tensorflow import keras
from tensorflow.keras import layers

def build_autoencoder(input_shape):
    # Encoder
    input_layer = keras.Input(shape=input_shape)
    x = layers.Normalization()(input_layer)
    x = layers.MultiHeadAttention(num_heads=2, key_dim=2)(x, x)
    x = layers.GlobalAveragePooling1D()(x)
    encoded = layers.Dense(64, activation='relu')(x)
    encoder = keras.Model(inputs=input_layer, outputs=encoded, name="encoder")

    # Decoder
    decoder_input = keras.Input(shape=(64,))
    x = layers.RepeatVector(input_shape[0])(decoder_input)
    x = layers.MultiHeadAttention(num_heads=2, key_dim=2)(x, x)
    x = layers.TimeDistributed(layers.Dense(input_shape[-1]))(x)
    decoder = keras.Model(inputs=decoder_input, outputs=x, name="decoder")
    
    # Autoencoder
    autoencoder_output = decoder(encoder(input_layer))
    autoencoder = keras.Model(inputs=input_layer, outputs=autoencoder_output, name="autoencoder")
    autoencoder.compile(optimizer='adam', loss='mse')

    return autoencoder, encoder, decoder

input_shape = (normalized_data.shape[1], normalized_data.shape[2])  # Modify as per your actual input shape
autoencoder, encoder, decoder = build_autoencoder(input_shape)


In [27]:
# Generator
generator = encoder

# Discriminator
discriminator = keras.Sequential([
    keras.Input(shape=64),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
discriminator.compile(optimizer='adam', loss='binary_crossentropy')

discriminator.trainable = False
combined_input = keras.Input(shape=input_shape)
combined_output = discriminator(generator(combined_input))
combined_model = keras.Model(inputs=combined_input, outputs=combined_output)
combined_model.compile(loss='binary_crossentropy', optimizer='adam')


generator.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_7 (InputLayer)           [(None, 26, 1)]      0           []                               
                                                                                                  
 normalization_2 (Normalization  (None, 26, 1)       3           ['input_7[0][0]']                
 )                                                                                                
                                                                                                  
 multi_head_attention_4 (MultiH  (None, 26, 1)       29          ['normalization_2[0][0]',        
 eadAttention)                                                    'normalization_2[0][0]']        
                                                                                            

In [38]:
def train_gan(generator, discriminator, combined_model, data, epochs=100, batch_size=32):
    for epoch in range(epochs):
        for _ in range(data.shape[0] // batch_size):
            # Sampling and generating data
            idx = np.random.randint(0, data.shape[0], batch_size)
            real_data = data[idx].reshape(batch_size, 64)  # Correct reshaping to (batch_size, 64)

            # Generate noise for the generator
            noise = np.random.normal(0, 1, (batch_size, generator.input_shape[1]))

            # Generate fake data from noise
            fake_data = generator.predict(noise)

            # Discriminator training on real and fake data
            d_loss_real = discriminator.train_on_batch(real_data, np.ones((batch_size, 1)))
            d_loss_fake = discriminator.train_on_batch(fake_data, np.zeros((batch_size, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Generator training to fool the discriminator
            g_loss = combined_model.train_on_batch(noise, np.ones((batch_size, 1)))

        print(f"Epoch {epoch + 1}/{epochs}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}")



train_gan(generator, discriminator, combined_model, augmented_data, epochs=10, batch_size=64)

ValueError: cannot reshape array of size 1664 into shape (64,64)

In [16]:
reconstructions = autoencoder.predict(normalized_data)
losses = np.mean(np.square(normalized_data - reconstructions), axis=-1)
threshold = np.percentile(losses, 95)
anomalies = losses > threshold

