In [1]:
import mlflow
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from lib import full_flow_dataloader
import os

from lib.norms import Norm1Scaler, Norm3Scaler
from lib.reproduction import major_oxides


os.chdir(os.path.dirname(os.getcwd()))

train_processed, test_processed = full_flow_dataloader.load_full_flow_data()

In [5]:
from sklearn.model_selection import train_test_split
from lib.utils import custom_train_test_split

train_cols = train_processed.columns
test_cols = test_processed.columns
norm = 1

scaler = Norm1Scaler() if norm == 1 else Norm3Scaler()

train = scaler.fit_transform(train_processed)
test = scaler.fit_transform(test_processed)

# turn back into dataframe
train = pd.DataFrame(train, columns=train_cols)
test = pd.DataFrame(test, columns=test_cols)

drop_cols = major_oxides + ["ID", "Sample Name"]

## - VALIDATION -
# split_train, split_val = custom_train_test_split(train, "Sample Name", test_size=0.2, random_state=42)

# X_train = split_train.drop(columns=drop_cols)
# y_train = split_train[major_oxides]
# X_val = split_val.drop(columns=drop_cols)
# y_val = split_val[major_oxides]

# Converting train set - comment out if using validation
X_train = train.drop(columns=drop_cols)
y_train = train[major_oxides]

# Converting test set
X_test = test.drop(columns=drop_cols)
y_test = test[major_oxides]

In [8]:
import datetime

mlflow.set_experiment(f'SVM_Norm{norm}_{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}')

2024/02/12 14:21:03 INFO mlflow.tracking.fluent: Experiment with name 'SVM_Norm1_20240212-142102' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/890257248564760444', creation_time=1707744063225, experiment_id='890257248564760444', last_update_time=1707744063225, lifecycle_stage='active', name='SVM_Norm1_20240212-142102', tags={}>

In [9]:
from sklearn.svm import SVR
import warnings

# disable warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

models = []

kernel="poly"
C=100
eps=0.1
gamma="scale"
degree=2
coef0=1.0


for target in y_train.columns:
    with mlflow.start_run(run_name=f"SVM_{target}"):
        svm_reg = SVR(kernel=kernel, degree=degree, C=C, epsilon=eps, coef0=coef0, gamma=gamma)
        svm_reg.fit(X_train, y_train[target])
        
        y_pred = svm_reg.predict(X_test)
        rmse = np.sqrt(mean_squared_error(y_test[target], y_pred))
        mlflow.log_metric("rmse", float(rmse))
        mlflow.log_param("target", target)
        mlflow.log_param("norm", norm)
        mlflow.log_param("kernel", kernel)
        mlflow.log_param("degree", degree)
        mlflow.log_param("coef0", coef0)
        mlflow.log_param("C", C)
        mlflow.log_param("epsilon", eps)
        mlflow.log_param("gamma", gamma)

        models.append(svm_reg)
        mlflow.sklearn.log_model(svm_reg, f"model_{target}")
        