# Script to evaluate the ML-Models

Setup (load automl-models.pickle)

In [None]:
import pickle
import csv

with open(r"../app/_meta.csv") as csv_file:
  csv_reader = csv.reader(csv_file, delimiter=',')
  list_qualities = list()
  for line in csv_reader:
    list_qualities.append(line[0])
  list_qualities = list_qualities[1:]

with open(r"../app/automl.pkl", "rb") as input_file:
  automl = pickle.load(input_file)

### Inspect the score:

In [None]:
'''retrieve best config and best learner'''
print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print('Best accuracy on validation data: {0:.4g}'.format(1-automl.best_loss))
print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))

Test with a random dataset:

In [None]:
import openml as oml
import pandas as pd

dataset = oml.datasets.get_dataset(12)

t = dict()

for quality in list_qualities:
  t[quality] = [dataset.qualities.get(quality)]
test_df = pd.DataFrame(t)
print(dict(zip(automl.classes_, automl.predict_proba(test_df)[0])))
print(automl.predict(test_df)[0])

In [None]:
automl.classes_

In [None]:
import lightgbm as lgb
import matplotlib.pyplot as plt

https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.plot_importance.html

In [None]:
importance_types = ['split', 'gain']
for importance_type in importance_types:
  lgb.plot_importance(automl.model.estimator, max_num_features=10, importance_type=importance_type, title='Feature importance ({})'.format(importance_type), height=0.5, grid=False, figsize=(5,5))
  plt.savefig('graphs/feature_importance ({}).png'.format(importance_type), dpi=300, bbox_inches="tight")

https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.plot_tree.html#lightgbm.plot_tree

In [None]:
max_index = 31
index = 31
ax = lgb.create_tree_digraph(
  automl.model.estimator, 
  tree_index=index, 
  orientation='vertical', 
  name='Tree{}'.format(index))

ax.view()

In [None]:
from flaml.data import get_output_from_log
time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \
    get_output_from_log(filename='logs/automl2022-09-09 15_59_18.040128.log', time_budget=60*60*2)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.title('Learning Curve')
plt.xlabel('Wall Clock Time (s)')
plt.ylabel('Validation Accuracy')
plt.scatter(time_history, 1 - np.array(valid_loss_history))
plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')
plt.savefig('graphs/learning-curve.png', dpi=300, bbox_inches="tight")