In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from lib.reproduction import major_oxides
import mlflow
import numpy as np
import datetime
import os
os.environ["KERAS_BACKEND"] = "torch"

import torch
import keras


torch.manual_seed(42)
np.random.seed(42)


In [None]:
print(keras.__version__)

In [None]:
import torch.nn as nn
import torch.optim as optim

# Check if GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# cnn_regression_optimized.py
from keras import layers, optimizers, regularizers

def build_model(input_dim, output_dim):
    model = keras.models.Sequential()
    model.add(layers.Input(shape=(input_dim,)))
    model.add(layers.Reshape((48, 128, 1)))
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Additional convolutional block for better feature extraction
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(output_dim))
    
    # Using L2 regularization
    model.add(layers.Dense(output_dim, kernel_regularizer=regularizers.l2(0.01)))
    
    # Optimizer with a custom learning rate
    optimizer = optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mse', metrics=['root_mean_squared_error', 'mae'])
    return model

In [None]:
INPUT_DIM = 6144  # Number of features per sample
OUTPUT_DIM = 1    # Number of continuous values as output

In [None]:
def build_model_2(INPUT_DIM, OUTPUT_DIM):
    # def transformer_encoder(inputs, embed_dim, num_heads):
    #     # Transformer encoder layer
    #     attention_output = keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(inputs, inputs)
    #     attention_output = keras.layers.Dropout(0.1)(attention_output)
    #     attention_output = keras.layers.Add()([attention_output, inputs])
    #     return attention_output

    inputs = keras.Input(shape=(INPUT_DIM, 1))
    x = keras.layers.BatchNormalization()(inputs)

    # # Inception module
    # tower_1 = keras.layers.Conv1D(filters=32, kernel_size=1, padding='same', activation='relu')(x)
    # tower_2 = keras.layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(x)
    # tower_3 = keras.layers.Conv1D(filters=32, kernel_size=5, padding='same', activation='relu')(x)
    # x = keras.layers.concatenate([tower_1, tower_2, tower_3], axis=-1)


    x = keras.layers.Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(x)
    x = keras.layers.MaxPooling1D(pool_size=2)(x)


    residual1 = keras.layers.Conv1D(filters=64, kernel_size=1, strides=2, padding='same', activation='relu')(x)

    x = keras.layers.Conv1D(filters=64, kernel_size=3, strides=1, padding='same', activation='relu')(x)
    x = keras.layers.MaxPooling1D(pool_size=2)(x)

    # Incorporate the first residual connection
    x = keras.layers.Add()([x, residual1])

    x = keras.layers.Conv1D(filters=128, kernel_size=3, padding='same', activation='relu')(x)
    x = keras.layers.MaxPooling1D(pool_size=2)(x)

    residual2 = keras.layers.Conv1D(filters=128, kernel_size=1, strides=2, padding='same', activation='relu')(x)

    x = keras.layers.Conv1D(filters=128, kernel_size=3, padding='same', activation='relu')(x)
    x = keras.layers.MaxPooling1D(pool_size=2)(x)


    x = keras.layers.Add()([x, residual2])

    # embed_dim = 64  # Set this based on your model's architecture
    # num_heads = 2   # Number of attention heads in the MultiHeadAttention layer
    # x = transformer_encoder(x, embed_dim, num_heads)

    x = keras.layers.Flatten()(x)
    x = keras.layers.Dropout(0.5)(x)
    x = keras.layers.Dense(512, activation='relu')(x)
    output = keras.layers.Dense(OUTPUT_DIM, activation='linear')(x)  # Assuming prediction of 8 continuous target variables

    model = keras.Model(inputs=inputs, outputs=output)

    optimizer = optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mse', metrics=['root_mean_squared_error', 'mae'])

    return model

In [None]:
drop_cols = major_oxides + ["ID", "Sample Name"]
target_cols = major_oxides

In [None]:
from lib.cross_validation import (
    get_cross_validation_metrics,
)
from lib.metrics import rmse_metric, std_dev_metric
from functools import partial
from lib.deep_learning_utils import get_data_nn, get_preprocess_fn, MLFlowCallback


early_stopping_callback = partial(
    keras.callbacks.EarlyStopping, monitor="val_loss", patience=25, restore_best_weights=True
)


mlflow.set_experiment(f'CNN_{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}')


args = {
    "epochs": 1000,
    "batch_size": 32,
}

for target in major_oxides:
    folds, train, val, test = get_data_nn(target, "Sample Name")

    with mlflow.start_run(run_name=f"CNN_{target}"):
        # == CROSS VALIDATION ==
        cv_metrics = []
        for cv_train_data, cv_test_data in folds:
            model = build_model_2(INPUT_DIM, OUTPUT_DIM)

            preprocess_fn = get_preprocess_fn([target], drop_cols)
            X_train, y_train = preprocess_fn(cv_train_data)
            X_valid, y_valid = preprocess_fn(val)
            X_test, y_test = preprocess_fn(cv_test_data)

            model.fit(
                X_train, y_train, **args, callbacks=[early_stopping_callback()], validation_data=(X_valid, y_valid)
            )  # don't want to use mlflow callback here
            y_pred = model.predict(X_test)

            rmse = rmse_metric(y_test, y_pred)
            std_dev = std_dev_metric(y_test, y_pred)
            cv_metrics.append([rmse, std_dev])


        mlflow.log_metrics(get_cross_validation_metrics(cv_metrics).as_dict())

        # == TRAIN ON ALL DATA ==
        model = build_model_2(INPUT_DIM, OUTPUT_DIM)
        preprocess_fn = get_preprocess_fn([target], drop_cols)

        X_train, y_train = preprocess_fn(train)
        X_valid, y_valid = preprocess_fn(val)
        X_test, y_test = preprocess_fn(test)

        model.fit(
            X_train,
            y_train,
            **args,
            callbacks=[MLFlowCallback(), early_stopping_callback()],
            validation_data=(X_valid, y_valid),
        )
        y_pred = model.predict(X_test)

        std_dev = std_dev_metric(y_test, y_pred)
        rmse = rmse_metric(y_test, y_pred)
        mlflow.log_metrics({"rmse": rmse, "std_dev": std_dev})

        mlflow.log_params(args)