In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import mlflow
import numpy as np
import pandas as pd
import warnings

from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from lib.full_flow_dataloader import load_full_flow_data
from lib.reproduction import major_oxides
from sklearn.preprocessing import RobustScaler

In [None]:
train, test = load_full_flow_data()

robust_scaler = RobustScaler(quantile_range=(10.0, 90.0), unit_variance=True)

cols = pd.to_numeric(train.columns, errors="coerce")
wavelength_cols = cols[~cols.isna()].astype(str)

train.update(robust_scaler.fit_transform(train[wavelength_cols]))
test.update(robust_scaler.transform(test[wavelength_cols]))

train_columns = train.columns
test_columns = test.columns

#train = robust_scaler.fit_transform(train)
#test = robust_scaler.transform(test)

train = pd.DataFrame(train, columns=train_columns)
test = pd.DataFrame(test, columns=test_columns)

drop_cols = major_oxides + ["ID", "Sample Name"]

X_train = train.drop(columns=drop_cols)
y_train = train[major_oxides]

# Converting test set
X_test = test.drop(columns=drop_cols)
y_test = test[major_oxides]

In [None]:
import datetime

mlflow.set_experiment(f'Robust_scaler_SVR_{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}')

In [None]:

# disable warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

models = []

kernel="poly"
C=100
eps=0.1
gamma="scale"
degree=2
coef0=1.0


for target in y_train.columns:
    with mlflow.start_run(run_name=f"SVM_{target}"):
        svr_reg = SVR(kernel=kernel, degree=degree, C=C, epsilon=eps, coef0=coef0, gamma=gamma)
        svr_reg.fit(X_train, y_train[target])
        
        y_pred = svr_reg.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test[target], y_pred))
        mlflow.log_metric("rmse", float(rmse))
        mlflow.log_param("target", target)
        mlflow.log_param("kernel", kernel)
        mlflow.log_param("degree", degree)
        mlflow.log_param("coef0", coef0)
        mlflow.log_param("C", C)
        mlflow.log_param("epsilon", eps)
        mlflow.log_param("gamma", gamma)

        models.append(svr_reg)
        mlflow.sklearn.log_model(svr_reg, f"model_{target}")
        