# Script for training the ML-Models

Get datasets

In [1]:
from os import listdir
from os.path import isfile, join

dataset_loc = 'training-data/'

datasets_names = [f for f in listdir(dataset_loc) if isfile(join(dataset_loc, f))]

Build+Train ML-Models:

In [2]:
from flaml import AutoML
import pandas as pd
from sklearn.model_selection import train_test_split

kwargs = dict(test_size=0.2, random_state=42)

automl_models = dict()


for dataset_name in datasets_names:
  df = pd.read_csv(dataset_loc + dataset_name)
  dataset_name_short = dataset_name[0:-9]
  X, y = df.iloc[:, :-1], df.iloc[:, -1]
  X_train, X_test, y_train, y_test = train_test_split(X, y, **kwargs)

  automl = AutoML()
  automl.fit(X_train, y_train, task="classification", time_budget=60)

  automl_models[dataset_name_short] = {
    'name': dataset_name_short,
    'df': df,
    'X': X,
    'y': y,
    'X_train': X_train,
    'X_test': X_test,
    'y_train': y_train,
    'y_test': y_test,
    'model': automl,
    'score': automl.score(X_test, y_test)
  }

[flaml.automl: 09-02 16:51:46] {2427} INFO - task = classification
[flaml.automl: 09-02 16:51:46] {2429} INFO - Data split method: stratified
[flaml.automl: 09-02 16:51:46] {2432} INFO - Evaluation method: cv
[flaml.automl: 09-02 16:51:46] {1227} INFO - class 1 augmented from 7 to 21
[flaml.automl: 09-02 16:51:46] {2551} INFO - Minimizing error metric: 1-roc_auc
[flaml.automl: 09-02 16:51:46] {2691} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl: 09-02 16:51:46] {2993} INFO - iteration 0, current learner lgbm
[flaml.automl: 09-02 16:51:46] {3126} INFO - Estimated sufficient time budget=944s. Estimated necessary time budget=22s.
[flaml.automl: 09-02 16:51:46] {3173} INFO -  at 0.1s,	estimator lgbm's best error=0.1366,	best estimator lgbm's best error=0.1366
[flaml.automl: 09-02 16:51:46] {2993} INFO - iteration 1, current learner lgbm
[flaml.automl: 09-02 16:51:46] {3173} INFO -  at 0.2s,	estimator lgbm's best er

Save-AutoML Models

In [3]:
import pickle

with open(r'../app/ml-models.pickle', "wb") as output_file:
  pickle.dump(automl_models, output_file, -1)