In [1]:
from fowt_ml.pipeline import Pipeline

In [2]:
example_config_file = "../../src/example_config.yml"

In [3]:
my_pipeline = Pipeline(example_config_file)

In [4]:
# set correct path for mat file
my_pipeline.config["data"]["exp699"]["mat_file"] = "/home/sarah/temp/hybridlabs/data_example/exp699.mat"

In [5]:
# get the data
df = my_pipeline.get_data("exp699")
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 327702 entries, 0 to 327701
Columns: 125 entries, time to wind_speed
dtypes: float64(125)
memory usage: 312.5 MB


Unnamed: 0,time,acc_calc6[0],acc_calc6[1],acc_calc6[2],acc_calc6[3],acc_calc6[4],acc_calc6[5],acc_tb_meas3[0],acc_tb_meas3[1],acc_tb_meas3[2],...,spd_rot_cmd,time_wave_precalc,tq_mot_act,trig_Hex_in,trig_PIV_in,trig_PIV_out,wtm2_spd_rot_act,wtm2_spd_tor_cmd,wtm2_tq_mot_act,wind_speed
0,0.0,-1.580219,0.135577,0.070342,-0.757244,-2.295318,-0.619591,-1.032135,0.21481,9.635009,...,474.0,29.406,-17.928282,0.999695,-0.000488,0.0,-470.274464,-474.0,40.714927,4.0
1,0.001,-2.035639,0.70547,0.066672,-4.331129,-5.214741,-0.16552,-2.268602,-0.45881,7.425536,...,474.0,29.407,-29.986389,0.999969,-0.000336,0.0,-471.326827,-474.0,24.651387,4.0
2,0.001999,-2.1144,-0.785931,-0.044466,5.023345,-5.750528,-0.520829,-1.588994,-1.485706,6.823769,...,474.0,29.408,-14.466384,0.999695,-0.00061,0.0,-499.477522,-474.0,38.891995,4.0
3,0.003,-2.179736,0.08617,0.448392,-0.444052,-6.337631,0.078678,0.279178,-1.889878,6.000455,...,474.0,29.409,-22.535783,0.999084,-0.000214,0.0,-470.800646,-474.0,25.997681,4.0
4,0.004,-1.792539,-0.31519,0.25357,2.065519,-3.999853,0.206758,-1.349485,-1.129436,8.111131,...,474.0,29.41,-14.566729,0.999695,-9.2e-05,0.0,-471.589917,-474.0,29.576647,4.0


In [6]:
# inspect ML setup
my_pipeline.config["ml_setup"]

{'target': ['acc_tb_meas3[0]',
  'acc_tb_meas3[1]',
  'acc_tb_meas3[2]',
  'acc_tt_meas3[0]',
  'acc_tt_meas3[1]',
  'acc_tt_meas3[2]',
  'force_aero_est6[0]',
  'force_aero_est6[1]',
  'force_aero_est6[2]',
  'force_aero_est6[3]',
  'force_aero_est6[4]',
  'force_aero_est6[5]',
  'force_tt_meas6[0]',
  'force_tt_meas6[1]',
  'force_tt_meas6[2]',
  'force_tt_meas6[3]',
  'force_tt_meas6[4]',
  'force_tt_meas6[5]'],
 'predictors': ['pos_act6[0]',
  'pos_act6[1]',
  'pos_act6[2]',
  'pos_act6[3]',
  'pos_act6[4]',
  'pos_act6[5]',
  'spd_rot_act',
  'wind_speed'],
 'save_grid_scores': True,
 'save_best_model': True,
 'n_jobs': 2,
 'use_gpu': False,
 'train_size': 0.7,
 'models': ['en', 'lar', 'llar', 'lasso', 'lr', 'ridge', 'omp', 'ransac'],
 'metrics_sort': 'R2',
 'system_log': './logs.log'}

In [7]:
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from fowt_ml.linear_models import LinearModels

In [11]:
def train_and_log_model(model_name):
    with mlflow.start_run():
        process = LinearModels(model_name)
        rmse = process.calculate_metric(X_train, X_test, y_train, y_test, "root_mean_squared_error") 
        model = process.model
        
        # Log model parameters and metrics
        mlflow.log_param("model_name", model_name)
        mlflow.log_param("n_estimators", model.n_estimators if hasattr(model, 'n_estimators') else None)
        mlflow.log_param("max_depth", model.max_depth if hasattr(model, 'max_depth') else None)
        mlflow.log_metric("rmse", rmse)
        
        # Log the model itself
        mlflow.sklearn.log_model(model, model_name)
        
        return rmse

In [12]:
# Dictionary to store model performances
model_performances = {}

predictors_labels = my_pipeline.config["ml_setup"]["predictors"]
target_labels = my_pipeline.config["ml_setup"]["target"]

X_data = df[predictors_labels]
Y_data = df[target_labels]

X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, test_size=0.75, shuffle=False, random_state=123)

In [13]:
%%time
# Train, log models, and compare performance
for name in ["LeastAngleRegression","LinearRegression", "LassoRegression", "RidgeRegression"]:
    rmse = train_and_log_model(name)
    model_performances[name] = rmse
print(model_performances)



{'LeastAngleRegression': 3.5914625711937265, 'LinearRegression': 3.5914560337640307, 'LassoRegression': 3.733306142704239, 'RidgeRegression': 3.5914586161568707}
CPU times: user 6.68 s, sys: 203 ms, total: 6.88 s
Wall time: 10.7 s
