In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from lib.reproduction import major_oxides
from sklearn.metrics import mean_squared_error
from lib import full_flow_dataloader
from sklearn.svm import SVR
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Add, BatchNormalization, Conv1D, MaxPooling1D, Flatten, Dropout, Dense

import datetime
import mlflow
import numpy as np

In [None]:
train_processed, test_processed = full_flow_dataloader.load_full_flow_data(load_cache_if_exits=False, average_shots=True)

In [None]:
drop_cols = major_oxides + ["ID", "Sample Name"]

X_train = train_processed.drop(columns=drop_cols)
y_train = train_processed[major_oxides]

X_test = test_processed.drop(columns=drop_cols)
y_test = test_processed[major_oxides]

X_train_reshaped = X_train.to_numpy().reshape(1538, 6144, 1)
X_test_reshaped = X_test.to_numpy().reshape(390, 6144, 1)

In [None]:
inputs = Input(shape=(6144, 1))
x = BatchNormalization()(inputs)

x1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(x)
x1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(inputs)
x1 = MaxPooling1D(pool_size=2)(x1)

x2 = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(x1)
x2 = MaxPooling1D(pool_size=2)(x2)

residual1 = Conv1D(filters=64, kernel_size=5, strides=8, padding='same', activation='relu')(x)

x3 = Conv1D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')(x2)
x3 = MaxPooling1D(pool_size=2)(x3)
x3 = Add()([x3, residual1])

x4 = Conv1D(filters=256, kernel_size=5, strides=1, padding='same', activation='relu')(x3)
x4 = MaxPooling1D(pool_size=2)(x4)

x5 = Conv1D(filters=256, kernel_size=5, strides=1, padding='same', activation='relu')(x4)

residual2 = Conv1D(filters=512, kernel_size=5, strides=1, padding='same', activation='relu')(x4)

x6 = Conv1D(filters=512, kernel_size=5, strides=1, padding='same', activation='relu')(x5)
x6 = Add()([x6, residual2])

x7 = Flatten()(x6)
x7 = Dropout(0.3)(x7)
x7 = Dense(512, activation='relu')(x7)

feature_extractor = Model(inputs=inputs, outputs=x7)

X_train_features = feature_extractor.predict(X_train_reshaped)
X_test_features = feature_extractor.predict(X_test_reshaped)

In [None]:
kernel="poly"
C=100
eps=0.1
gamma="scale"
degree=2
coef0=1.0

mlflow.set_experiment(f'CNN-SVM_{kernel}_{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}')

# Adjustments for using features with SVR
for target in y_train.columns:
    with mlflow.start_run(run_name=f"CNN_SVR_{target}"):
        # Initialize the SVR model with the chosen hyperparameters
        svm_reg = SVR(kernel=kernel, C=C, epsilon=eps, coef0=coef0, gamma=gamma)

        # Fit the SVR model using the extracted features instead of the raw data
        svm_reg.fit(X_train_features, y_train[target])

        # Predict with SVR using features extracted from the test set
        y_pred = svm_reg.predict(X_test_features)

        # Compute and log RMSE
        rmse = np.sqrt(mean_squared_error(y_test[target], y_pred))

        mlflow.log_metric("rmse", float(rmse))
        mlflow.log_param("target", target)
        mlflow.log_param("kernel", kernel)
        mlflow.log_param("degree", degree)
        mlflow.log_param("coef0", coef0)
        mlflow.log_param("C", C)
        mlflow.log_param("epsilon", eps)
        mlflow.log_param("gamma", gamma)

        print(f"RMSE for {target}: {rmse}")
