In [1]:
%load_ext autoreload
%autoreload 2
import os
import pandas as pd
import numpy as np
from tune_models import tune_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, f1_score

In [2]:
random_state = 42

In [3]:
path = os.path.join("data", "pmsm_temperature_data.csv")

In [4]:
df = pd.read_csv(path)

In [5]:
df["stator_yoke"] = df["stator_yoke"].map({"Positive":1, "Negative":0})

In [6]:
df.isnull().sum()

ambient           0
coolant           0
u_d               0
u_q               0
motor_speed       1
torque            1
i_d               1
i_q               1
pm                1
stator_yoke       1
stator_winding    1
dtype: int64

In [7]:
df.dropna(inplace=True)

In [8]:
num_targets = ['pm', 'stator_winding'] 
cat_targets = ['stator_yoke']

In [9]:
X = df.drop(num_targets+cat_targets, axis=1)
y = df[num_targets+cat_targets]

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=random_state)

In [11]:
mlflow_run_name =  "XGBoost" 
model_name = "xgboost"
direction = "minimize"
task = "regression"
# time allocated for each target in seconds
timeout = 10*60 
mlflow_model_name = "xgboost"
n_splits = 3
# to be specified for LGBM and XGBoost
model_objective = "reg:squarederror" 
# base score function in cross validation
base_score_function = mean_squared_error
# LGBM and XGBoost cross validation score function
def cv_score_function(y_true, pred):
    """Returns score

    Args:
        y_true (Series): ground truth data
        y_pred (DMatrix): DMatrix XGBoost object
    
    """
    return(base_score_function.__name__, base_score_function(y_true, pred.get_label()))
cv_score_function.__name__ = base_score_function.__name__
targets = num_targets
# calling main function
score_dict, y_model_dict, opt_model_dict = tune_model(
            run_name=mlflow_run_name, 
            model_name=model_name,
            direction=direction,
            task=task,
            timeout=timeout,
            targets=targets,
            X_train=X_train, 
            y_train=y_train, 
            X_test=X_test, 
            y_test=y_test,
            n_splits=n_splits,
            random_state=random_state, 
            model_objective=model_objective,
            base_score_function=base_score_function,
            cv_score_function=cv_score_function,
            mlflow_model_name=mlflow_model_name,
        )

[I 2023-12-08 20:57:02,277] A new study created in memory with name: regression


Started tuning xgboost for pm...


[I 2023-12-08 20:57:08,742] Trial 0 finished with value: 0.42094933333333334 and parameters: {'n_estimators': 7, 'learning_rate': 0.43775200014817905, 'subsample': 0.6, 'colsample_bytree': 1.0, 'min_child_weight': 0.8491983767203796, 'reg_alpha': 0.0070689749506246055, 'reg_lambda': 0.005337032762603957, 'max_leaves': 5, 'colsample_bylevel': 0.6521211214797689, 'max_depth': 5}. Best is trial 0 with value: 0.42094933333333334.
[I 2023-12-08 20:57:14,770] Trial 1 finished with value: 1.050309 and parameters: {'n_estimators': 8, 'learning_rate': 0.00644285859857343, 'subsample': 0.6, 'colsample_bytree': 0.6, 'min_child_weight': 0.6467903667112945, 'reg_alpha': 0.004809461967501573, 'reg_lambda': 0.0018205657658407262, 'max_leaves': 20, 'colsample_bylevel': 0.9828160165372797, 'max_depth': 6}. Best is trial 0 with value: 0.42094933333333334.
[I 2023-12-08 20:57:20,817] Trial 2 finished with value: 1.0929330000000002 and parameters: {'n_estimators': 6, 'learning_rate': 0.0018678754137262702

Tuning finished! 



[I 2023-12-08 21:07:05,931] A new study created in memory with name: regression


Started tuning xgboost for stator_winding...


[I 2023-12-08 21:07:12,511] Trial 0 finished with value: 0.29152 and parameters: {'n_estimators': 7, 'learning_rate': 0.43775200014817905, 'subsample': 0.6, 'colsample_bytree': 1.0, 'min_child_weight': 0.8491983767203796, 'reg_alpha': 0.0070689749506246055, 'reg_lambda': 0.005337032762603957, 'max_leaves': 5, 'colsample_bylevel': 0.6521211214797689, 'max_depth': 5}. Best is trial 0 with value: 0.29152.
[I 2023-12-08 21:07:19,561] Trial 1 finished with value: 1.0404006666666665 and parameters: {'n_estimators': 8, 'learning_rate': 0.00644285859857343, 'subsample': 0.6, 'colsample_bytree': 0.6, 'min_child_weight': 0.6467903667112945, 'reg_alpha': 0.004809461967501573, 'reg_lambda': 0.0018205657658407262, 'max_leaves': 20, 'colsample_bylevel': 0.9828160165372797, 'max_depth': 6}. Best is trial 0 with value: 0.29152.
[I 2023-12-08 21:07:26,302] Trial 2 finished with value: 1.0883033333333334 and parameters: {'n_estimators': 6, 'learning_rate': 0.0018678754137262702, 'subsample': 0.6, 'colsa

Tuning finished! 





In [12]:
score_dict

{'pm': {'xgboost': {'RMSE': {'train': 0.46967198531308074,
    'test': 0.4695499996884248},
   'MAE': {'train': 0.3505366642714987, 'test': 0.3507050327158062},
   'R2': {'train': 0.8010777730194634, 'test': 0.8009848659294391}}},
 'stator_winding': {'xgboost': {'RMSE': {'train': 0.45203377958038365,
    'test': 0.45343238299089145},
   'MAE': {'train': 0.32043690223923843, 'test': 0.32220857906207134},
   'R2': {'train': 0.8160442929427676, 'test': 0.8149035860358}}}}

In [13]:
mlflow_run_name =  "LGBM" 
model_name = "lgbm"
direction = "maximize"
task = "classification"
# time allocated for each target in seconds
timeout = 10*60 
mlflow_model_name = "lgbm"
n_splits = 3
# to be specified for LGBM and XGBoost
model_objective = "binary" 
# base score function in cross validation
base_score_function = f1_score
# LGBM and XGBoost cross validation score function
def cv_score_function(preds, eval_data):
    """Returns score

    Args:
        eval_data (LGBM Dataset): ground truth data
        preds (series): predictions
    
    """
    return(base_score_function.__name__, base_score_function(eval_data.get_label().astype(int), preds.astype(int)), True)
cv_score_function.__name__ = base_score_function.__name__
targets = cat_targets
# calling main function
score_dict, y_model_dict, opt_model_dict = tune_model(
            run_name=mlflow_run_name, 
            model_name=model_name,
            direction=direction,
            task=task,
            timeout=timeout,
            targets=targets,
            X_train=X_train, 
            y_train=y_train, 
            X_test=X_test, 
            y_test=y_test,
            n_splits=n_splits,
            random_state=random_state, 
            model_objective=model_objective,
            base_score_function=base_score_function,
            cv_score_function=cv_score_function,
            mlflow_model_name=mlflow_model_name,
        )

[I 2023-12-08 21:17:22,876] A new study created in memory with name: classification


Started tuning lgbm for stator_yoke...


[I 2023-12-08 21:17:33,022] Trial 0 finished with value: 0.0 and parameters: {'n_estimators': 13, 'reg_alpha': 6.351221010640703, 'reg_lambda': 0.8471801418819978, 'colsample_bytree': 0.7, 'subsample': 0.4, 'learning_rate': 0.014648955132800727, 'max_depth': 4, 'num_leaves': 15, 'min_child_samples': 5}. Best is trial 0 with value: 0.0.
[I 2023-12-08 21:17:40,657] Trial 1 finished with value: 0.0 and parameters: {'n_estimators': 11, 'reg_alpha': 0.029204338471814112, 'reg_lambda': 0.06672367170464207, 'colsample_bytree': 0.3, 'subsample': 0.5, 'learning_rate': 0.0021348999901951954, 'max_depth': 4, 'num_leaves': 6, 'min_child_samples': 19}. Best is trial 0 with value: 0.0.
[I 2023-12-08 21:17:48,361] Trial 2 finished with value: 0.0 and parameters: {'n_estimators': 10, 'reg_alpha': 0.4467752817973907, 'reg_lambda': 0.017654048052495083, 'colsample_bytree': 0.6, 'subsample': 0.4, 'learning_rate': 0.17247957710046016, 'max_depth': 4, 'num_leaves': 10, 'min_child_samples': 12}. Best is tri

Tuning finished! 





In [14]:
score_dict

{'stator_yoke': {'lgbm': {'Accuracy': {'train': 0.8832098962773515,
    'test': 0.8839929135641309},
   'F1-micro': {'train': 0.8832098962773515, 'test': 0.8839929135641309},
   'F1-macro': {'train': 0.8759150300177194, 'test': 0.8768837083633496},
   'F1-weighted': {'train': 0.880229225528052, 'test': 0.8810881490666276}}}}