In [None]:
import sys
import os

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam


sys.path.append(os.path.abspath('../classes/'))
from feature_factory import FeatureFactory

In [None]:
train_df = pd.read_csv('../data/train.csv')
test_df = pd.read_csv('../data/test.csv')

In [None]:
# Example for FeatureFactory
factory = FeatureFactory(train_df, test_df)

factory.add_time_features()
factory.add_rolling_features(window_size=3)
factory.add_ratio_and_diff_features()
factory.add_aFRR_activation_request_ratio()
factory.add_FRCE_LFCInput_difference()
factory.add_participation_state()
factory.add_demand_FRCE_interaction()

factory.train_data.columns

In [None]:
# Features
# New features beginn with 'day', ...
features = ['Demand', 'correction', 'correctedDemand',
       'FRCE', 'controlBandPos', 'controlBandNeg', 'LFCInput',
       'aFRRactivation', 'aFRRrequest', 'participationCMO', 'participationIN',
       'correctionEcho', 'BandLimitedCorrectedDemand', 'controlArea', 'hour',
       'day', 'weekday', 'month', 'Demand_RollingMean', 'Demand_RollingStd',
       'Demand_CorrectedDemand_Ratio', 'Demand_CorrectedDemand_Diff',
       'aFRR_Activation_Request_Ratio', 'FRCE_LFCInput_Diff',
       'Participation_State', 'Demand_FRCE_Interaction']
X_train = factory.train_data[features]
X_test = factory.test_data[features]

In [None]:
X_train.isna().sum()          

In [None]:
# Scaler
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

In [None]:
input_dim = X_train_normalized.shape[1]

# Autoencoder model
input_layer = Input(shape=(input_dim,))
encoded = Dense(64, activation='relu')(input_layer)
encoded = Dense(32, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(input_dim, activation='linear')(decoded)

autoencoder = Model(input_layer, decoded)

autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [None]:
autoencoder.fit(X_train_normalized, X_train_normalized, epochs=5, batch_size=128, validation_data=(X_test_normalized, X_test_normalized))

In [None]:
reconstructed = autoencoder.predict(X_test_normalized)

In [None]:
# Calculate reconstruction error
mse = np.mean(np.power(X_test_normalized - reconstructed, 2), axis=1)

# Define the threshold at 95 %, every over is anomaly
threshold = np.percentile(mse, 95)
test_df['anomaly'] = mse > threshold

# Convert to int
test_df['anomaly'] = test_df['anomaly'].astype(int)
print(test_df[['Datum_Uhrzeit_CET', 'Demand', 'correctedDemand', 'anomaly']].head())

In [24]:
# submission file
submission_df = test_df[['id', 'anomaly']]
submission_df.to_csv('submission.csv', index=False)