In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from lib.reproduction import major_oxides
from sklearn.metrics import mean_squared_error
from lib import full_flow_dataloader
import mlflow
import numpy as np
import datetime
import os
os.environ["KERAS_BACKEND"] = "torch"

import torch
import keras


In [None]:
print(keras.__version__)

In [None]:
import torch.nn as nn
import torch.optim as optim

# Check if GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
train_processed, test_processed = full_flow_dataloader.load_full_flow_data(load_cache_if_exits=True, average_shots=True)

In [None]:
from keras.layers import Input, BatchNormalization, Conv1D, MaxPooling1D, Add, Flatten, Dropout, Dense
from keras.models import Model

inputs = Input(shape=(6144, 1))
x = BatchNormalization()(inputs)

x1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(x)
x1 = MaxPooling1D(pool_size=2)(x1)

x2 = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(x1)
x2 = MaxPooling1D(pool_size=2)(x2)

residual1 = Conv1D(filters=64, kernel_size=5, strides=8, padding='same', activation='relu')(x)  # Adjust strides/kernel_size to match dimensions

x3 = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(x2)
x3 = MaxPooling1D(pool_size=2)(x3)
x3 = Add()([x3, residual1])

x4 = Conv1D(filters=256, kernel_size=5, strides=1, padding='same', activation='relu')(x3)
x4 = MaxPooling1D(pool_size=2)(x4)

x5 = Conv1D(filters=256, kernel_size=5, strides=1, padding='same', activation='relu')(x4)

residual2 = Conv1D(filters=512, kernel_size=5, strides=1, padding='same', activation='relu')(x4)  # Adjust strides/kernel_size to match dimensions

x6 = Conv1D(filters=512, kernel_size=5, strides=1, padding='same', activation='relu')(x5)
x6 = Add()([x6, residual2])

x7 = Flatten()(x6)
x7 = Dropout(0.3)(x7)
x7 = Dense(512, activation='relu')(x7)

output = Dense(8, activation='linear')(x7)  
model = keras.Model(inputs=inputs, outputs=output)

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

In [None]:
drop_cols = major_oxides + ["ID", "Sample Name"]

X_train = train_processed.drop(columns=drop_cols)
y_train = train_processed[major_oxides]

X_test = test_processed.drop(columns=drop_cols)
y_test = test_processed[major_oxides]

In [None]:
X_train_reshaped = X_train.to_numpy().reshape(-1, 6144, 1)
X_test_reshaped = X_test.to_numpy().reshape(-1, 6144, 1)

In [None]:
def run_cnn_experiment(
    X_train: np.ndarray,
    y_train: np.ndarray,
    X_test: np.ndarray,
    y_test: np.ndarray,
    model: Model,
    epochs: int,
    batch_size: int,
    callbacks: list = [],
    major_oxides: list = [],
):
    with mlflow.start_run(run_name="CNN"):
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1, callbacks=callbacks)
        y_pred = model.predict(X_test)
        for i, oxide in enumerate(major_oxides):
            y_test_oxide = y_test[:, i]
            y_pred_oxide = y_pred[:, i]
            rmse = mean_squared_error(y_test_oxide, y_pred_oxide, squared=False)
            mlflow.log_metric(f"rmse_{oxide}", float(rmse))

In [None]:
callback = keras.callbacks.EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True)
class MLFlowCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs is not None:
            for key, value in logs.items():
                mlflow.log_metric(f"{key}", value, step=epoch)


mlflow.set_experiment(f'CNN_Residual_{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}')
run_cnn_experiment(
    X_train_reshaped,
    y_train.to_numpy(),
    X_test_reshaped,
    y_test.to_numpy(),
    model,
    epochs=1000,
    batch_size=32,
    callbacks=[MLFlowCallback(), callback],
    major_oxides=major_oxides,
)

