# Script for training the ML-Models

Get datasets

In [6]:
from os import listdir
from os.path import isfile, join

dataset_loc = 'training-data/'

datasets_names = [f for f in listdir(dataset_loc) if isfile(join(dataset_loc, f))]

Build+Train ML-Models:

In [7]:
from flaml import AutoML
import pandas as pd
from datetime import datetime

automl_models = dict()

for dataset_name in datasets_names:
  df = pd.read_csv(dataset_loc + dataset_name)
  dataset_name_short = dataset_name[0:-9]
  X_train, y_train = df.iloc[:, :-1], df.iloc[:, -1]

  automl_settings = {
    "task": "classification",
    "time_budget": 2*60,
    "metric": 'accuracy',
    "log_file_name": 'logs/{}{}.log'.format(dataset_name, datetime.now())    
  }

  automl = AutoML()
  automl.fit(X_train, y_train, **automl_settings)

  automl_models[dataset_name_short] = {
    'model': automl
  }

[flaml.automl: 09-09 18:50:58] {2427} INFO - task = classification
[flaml.automl: 09-09 18:50:58] {2429} INFO - Data split method: stratified
[flaml.automl: 09-09 18:50:58] {2432} INFO - Evaluation method: cv
[flaml.automl: 09-09 18:50:58] {1227} INFO - class 1 augmented from 9 to 27
[flaml.automl: 09-09 18:50:58] {2551} INFO - Minimizing error metric: 1-accuracy
[flaml.automl: 09-09 18:50:59] {2691} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl: 09-09 18:50:59] {2993} INFO - iteration 0, current learner lgbm
[flaml.automl: 09-09 18:50:59] {3126} INFO - Estimated sufficient time budget=814s. Estimated necessary time budget=20s.
[flaml.automl: 09-09 18:50:59] {3173} INFO -  at 0.7s,	estimator lgbm's best error=0.2368,	best estimator lgbm's best error=0.2368
[flaml.automl: 09-09 18:50:59] {2993} INFO - iteration 1, current learner lgbm
[flaml.automl: 09-09 18:50:59] {3173} INFO -  at 0.7s,	estimator l

Save-AutoML Models

In [8]:
import pickle

with open(r'../app/ml-models.pickle', "wb") as output_file:
  pickle.dump(automl_models, output_file, -1)