In [1]:
pip install "flaml[automl]"

Collecting flaml[automl]
  Downloading FLAML-1.2.4-py3-none-any.whl (260 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m260.5/260.5 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: flaml
Successfully installed flaml-1.2.4
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
from flaml import AutoML
from sklearn.model_selection import train_test_split

In [3]:
base_path = '/kaggle/input/self-reported-qol/20230625-processed-'

df_physical      = pd.read_csv(base_path + 'physical-qol.csv')
df_psychological = pd.read_csv(base_path + 'psychological-qol.csv')

df_physical.drop('id', axis=1, inplace=True)         # id
df_physical.drop('day', axis=1, inplace=True)        # day
df_psychological.drop('id', axis=1, inplace=True)    # id
df_psychological.drop('day', axis=1, inplace=True)   # day

phy_all_tmp = df_physical.copy()
phy_init_set_tmp = phy_all_tmp.query("group in ('Initial Set')").copy()
phy_ufpi_ufc_tmp = phy_all_tmp.query("group in ('UFPI', 'UFC')").copy()

psy_all_tmp = df_psychological.copy()
psy_init_set_tmp = psy_all_tmp.query("group in ('Initial Set')").copy()
psy_ufpi_ufc_tmp = psy_all_tmp.query("group in ('UFPI', 'UFC')").copy()

phy_all_tmp.drop("group", axis=1, inplace=True)
phy_init_set_tmp.drop("group", axis=1, inplace=True)
phy_ufpi_ufc_tmp.drop("group", axis=1, inplace=True)

psy_all_tmp.drop("group", axis=1, inplace=True)
psy_init_set_tmp.drop("group", axis=1, inplace=True)
psy_ufpi_ufc_tmp.drop("group", axis=1, inplace=True)

# Separating predictors from the values to be predicted.
phy_all           = phy_all_tmp.drop("phy_ref_score", axis=1)
phy_all_pred      = phy_all_tmp["phy_ref_score"].copy()
phy_init_set      = phy_init_set_tmp.drop("phy_ref_score", axis=1)
phy_init_set_pred = phy_init_set_tmp["phy_ref_score"].copy()
phy_ufpi_ufc      = phy_ufpi_ufc_tmp.drop("phy_ref_score", axis=1)
phy_ufpi_ufc_pred = phy_ufpi_ufc_tmp["phy_ref_score"].copy()

psy_all           = psy_all_tmp.drop("psy_ref_score", axis=1)
psy_all_pred      = psy_all_tmp["psy_ref_score"].copy()
psy_init_set      = psy_init_set_tmp.drop("psy_ref_score", axis=1)
psy_init_set_pred = psy_init_set_tmp["psy_ref_score"].copy()
psy_ufpi_ufc      = psy_ufpi_ufc_tmp.drop("psy_ref_score", axis=1)
psy_ufpi_ufc_pred = psy_ufpi_ufc_tmp["psy_ref_score"].copy()

In [4]:
def getDataset(domain_group):
    return {
        'phy_all': (phy_all, phy_all_pred),
        'psy_all': (psy_all, psy_all_pred),
        
        'phy_init_set': (phy_init_set, phy_init_set_pred),
        'psy_init_set': (psy_init_set, psy_init_set_pred),
        
        'phy_ufpi_ufc': (phy_ufpi_ufc, phy_ufpi_ufc_pred),
        'psy_ufpi_ufc': (psy_ufpi_ufc, psy_ufpi_ufc_pred),
    }[domain_group]

In [5]:
X, y = getDataset('phy_all')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [6]:
# Initialize an AutoML instance
# https://microsoft.github.io/FLAML/docs/Getting-Started
automl = AutoML()

# Specify automl goal and constraint
automl_settings = {
    "time_budget": 60*10,  # in seconds
    "metric": 'mae',      # mae, rmse, r2
    "task": 'regression',
    "log_file_name": "phy_all.log",
}

# Train with labeled input data
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

# Predict
# print(automl.predict(X_train))

# Print the best model
#print(automl.model.estimator)

print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print('Training duration of best run: {0:.4g} s', automl.best_config_train_time)

[flaml.automl.logger: 06-29 19:06:52] {1693} INFO - task = regression
[flaml.automl.logger: 06-29 19:06:52] {1700} INFO - Data split method: uniform
[flaml.automl.logger: 06-29 19:06:52] {1703} INFO - Evaluation method: cv
[flaml.automl.logger: 06-29 19:06:52] {1801} INFO - Minimizing error metric: mae
[flaml.automl.logger: 06-29 19:06:52] {1911} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl.logger: 06-29 19:06:52] {2221} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 06-29 19:06:53] {2347} INFO - Estimated sufficient time budget=1761s. Estimated necessary time budget=15s.
[flaml.automl.logger: 06-29 19:06:53] {2394} INFO -  at 0.5s,	estimator lgbm's best error=10.7734,	best estimator lgbm's best error=10.7734
[flaml.automl.logger: 06-29 19:06:53] {2221} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 06-29 19:06:53] {2394} INFO -  at 0.7s,	estimator lgbm's best error=10.7734,