In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

import tensorflow as tf
import keras
from keras import layers, models, metrics, optimizers, losses, callbacks

from sklearn.metrics import classification_report

from utils.model_inference_plots import *

In [None]:
df = pd.read_parquet('data/final_model_data_all_scaled.parquet')

In [None]:
X = df[['Bx', 'By', 'Bz', 'Bx_lag_1', 'Bx_lag_2', 'By_lag_1',
        'By_lag_2', 'Bz_lag_1', 'Bz_lag_2', 'Bx_conditional_vol',
        'By_conditional_vol', 'Bz_conditional_vol', 'Bx_rolling_stdev',
        'By_rolling_stdev', 'Bz_rolling_stdev']].values

y = df['Event_label_80'].values

In [None]:
total_samples = len(X)
n_features = X.shape[1]

train_size = int(0.8 * total_samples)
test_size = total_samples - train_size

In [None]:
batch_size = 256
n_timesteps = 500
stride = 40

def generate_timeseries(X, y, n_timesteps, batch_size, start_idx, end_idx, stride):
    while True:
        X_batch = []
        y_batch = []
        y_seq_batch = []

        for i in range(start_idx + n_timesteps, end_idx, stride):
            X_batch.append(X[i - n_timesteps:i, :])
            y_batch.append(y[i - n_timesteps:i].reshape(-1, 1))
            y_seq_batch.append([np.mean(y[i - n_timesteps:i]).astype(np.float32)])

            if len(X_batch) == batch_size:
                yield (
                    tf.convert_to_tensor(np.array(X_batch), dtype=tf.float32),
                    {
                        "time_output": tf.convert_to_tensor(np.array(y_batch), dtype=tf.float32),
                        "sequence_output": tf.convert_to_tensor(np.array(y_seq_batch), dtype=tf.float32)
                    }
                )
                X_batch, y_batch, y_seq_batch = [], [], []
        
        if len(X_batch) > 0:
            yield (
                tf.convert_to_tensor(np.array(X_batch), dtype=tf.float32),
                {
                    "time_output": tf.convert_to_tensor(np.array(y_batch), dtype=tf.float32),
                    "sequence_output": tf.convert_to_tensor(np.array(y_seq_batch), dtype=tf.float32)
                }
            )


train_idx = (0, train_size)
test_idx = (train_size, total_samples)

In [None]:
def create_dataset(start_idx, end_idx):
    return tf.data.Dataset.from_generator(
        lambda: generate_timeseries(X, y, n_timesteps=n_timesteps, batch_size=batch_size,
                                    start_idx=start_idx, end_idx=end_idx, stride=stride),
        output_signature=(
            tf.TensorSpec(shape=(None, n_timesteps, X.shape[1]), dtype=tf.float32),
            {
                "time_output": tf.TensorSpec(shape=(None, n_timesteps, 1), dtype=tf.float32),
                "sequence_output": tf.TensorSpec(shape=(None, 1), dtype=tf.float32)
            }
        )
    ).prefetch(tf.data.AUTOTUNE)

train_dataset = create_dataset(train_idx[0], train_idx[1])
test_dataset = create_dataset(test_idx[0], test_idx[1])

In [None]:
steps_train_epoch = int(np.ceil((train_size - n_timesteps) / (stride * batch_size)))
steps_test_epoch = int(np.ceil((test_size - n_timesteps) / (stride * batch_size)))

In [None]:
# batch_ratios = []

# for _, outputs in test_dataset.take(steps_train_epoch):
#     y_time = outputs['time_output'].numpy()
#     has_event = (np.sum(y_time, axis=1) > 0).astype(np.float32)
    
#     batch_ratios.append(np.mean(has_event))

# plt.hist(batch_ratios, bins=30, edgecolor='black')
# plt.xlabel("Percentage of 1s in Sequence")
# plt.ylabel("Count")
# plt.title("Distribution of Sequences with 1s per Batch (Train Set)")
# plt.show()

In [None]:
@tf.keras.utils.register_keras_serializable(package='Custom', name='TverskyBCEPerSequence')
class TverskyBCEPerSequence(losses.Loss):
    def __init__(self, alpha_t=0.5, beta_t=0.5, alpha_f=0.5, gamma_f=0.0, event_weight=1.0, smooth=1e-6, reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE, name="tversky_bce_per_sequence"):
        super().__init__(reduction=reduction, name=name)
        self.alpha_t = alpha_t
        self.beta_t = beta_t
        self.alpha_f = alpha_f
        self.gamma_f = gamma_f
        self.event_weight = event_weight
        self.smooth = smooth

    def call(self, y_true, y_pred):
        y_true = tf.reshape(y_true, [tf.shape(y_true)[0], -1])
        y_pred = tf.reshape(y_pred, [tf.shape(y_pred)[0], -1])
        y_pred = tf.clip_by_value(y_pred, 1e-7, 1. - 1e-7)

        has_event = tf.cast(tf.reduce_sum(y_true, axis=1) > 0, tf.float32)

        tp = tf.reduce_sum(y_true * y_pred, axis=1)
        fn = tf.reduce_sum(y_true * (1 - y_pred), axis=1)
        fp = tf.reduce_sum((1 - y_true) * y_pred, axis=1)

        tversky = (tp + self.smooth) / (tp + self.alpha_t * fp + self.beta_t * fn + self.smooth)
        fbce = losses.binary_focal_crossentropy(y_true, y_pred, alpha=self.alpha_f, gamma=self.gamma_f)
        
        final_loss = has_event * self.event_weight * (1 - tversky) + (1 - has_event) * fbce 

        return final_loss

    def get_config(self):
        config = super().get_config()
        config.update({
            "alpha_t": self.alpha_t,
            "beta_t": self.beta_t,
            "alpha_f": self.alpha_f,
            "gamma_f": self.gamma_f,
            "event_weight": self.event_weight,
            "smooth": self.smooth
        })
        
        return config

In [None]:
tf.random.set_seed(42)

input_layer = layers.Input(shape=(n_timesteps, n_features))
x = layers.Conv1D(kernel_size=5, filters=64, padding='same', activation='gelu')(input_layer)
x = layers.LayerNormalization()(x)
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
attention, attention_weights = layers.MultiHeadAttention(num_heads=4, key_dim=64)(x, x, return_attention_scores=True)
x = layers.Add()([x, attention])
x = layers.LayerNormalization()(x)
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
skip = x
x = layers.Dense(128, activation='gelu')(x)
x = layers.Dense(64, activation='gelu')(x)
skip = layers.Dense(64)(skip)
x = layers.Concatenate()([x, skip])
x = layers.Dense(32, activation='gelu')(x) 
output_time_layer = layers.TimeDistributed(layers.Dense(1, activation='sigmoid'), name="time_output")(x)
x_seq = layers.GlobalAveragePooling1D()(x)
output_seq_layer = layers.Dense(1, activation='sigmoid', name="sequence_output")(x_seq)
model = models.Model(inputs=input_layer, outputs=[output_time_layer, output_seq_layer])

model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss={
        'time_output': TverskyBCEPerSequence(
            alpha_t=0.6,
            beta_t=0.7,
            alpha_f=0.25,
            gamma_f=1.5,
            event_weight=1.75), 
        'sequence_output': losses.Huber()
    },
    loss_weights={
        'time_output': 1.0,
        'sequence_output': 1.0
    },
    metrics={
        'time_output': ['accuracy', metrics.Precision(), metrics.Recall()]
    }
)

In [None]:
lr_schedule = callbacks.ReduceLROnPlateau(
    monitor='loss',
    factor=0.5,
    patience=2,
    verbose=0,
    min_lr=1e-6
)

In [None]:
model.fit(
    train_dataset,
    epochs=10,
    steps_per_epoch=steps_train_epoch,
    callbacks=[lr_schedule],
    verbose=1
)

In [None]:
# keras.utils.plot_model(
#     model,
#     to_file="model.png",
#     show_shapes=True,
#     show_dtype=False,
#     show_layer_names=False,
#     rankdir="TD",
#     expand_nested=False,
#     dpi=200,
#     show_layer_activations=False,
#     show_trainable=False,
# )

In [None]:
model.save("models/mosrl_80_all_model.keras")

In [None]:
y_pred_probas_raw = model.predict(test_dataset, steps=steps_test_epoch, verbose=1)

In [None]:
y_pred_probas_sqzd = y_pred_probas_raw[0].squeeze(-1)
num_windows, window_size = y_pred_probas_sqzd.shape
output_len = num_windows * stride + window_size - 39

sum_preds = np.zeros(output_len, dtype=y_pred_probas_sqzd.dtype)
count_preds = np.zeros(output_len, dtype=int)

for win_num in range(num_windows):
    start = win_num * stride
    end = start + window_size
    sum_preds[start:end] += y_pred_probas_sqzd[win_num]
    count_preds[start:end] += 1

y_pred_probas = np.divide(sum_preds, count_preds, where=count_preds != 0)

In [None]:
threshold = 0.5
y_pred = (y_pred_probas >= threshold).astype(int)
y_test = y[test_idx[0]:test_idx[1]]
print(classification_report(y_test, y_pred))

In [None]:
np.save('models/mosrl_80_all_pred_probas.npy', y_pred_probas)