In [5]:
from datetime import datetime
import random 
import pandas as pd

from flaml import AutoML
from sklearn.metrics import mean_squared_error

In [6]:
folder = "./data/20210921/"
column_target = "pressure"
columns_features = ["R", "C", "u_in", "u_out"]

In [7]:
dfp_train = pd.read_csv(folder + "train.csv")
dfp_test = pd.read_csv(folder + "test.csv")

In [8]:
X_train, y_train = dfp_train[columns_features], dfp_train[column_target]
X_test = dfp_test[columns_features]

In [18]:
time_budget = 6 * 3600
# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": time_budget,  # in seconds
    "metric": "r2",
    "task": "regression",
    "log_file_name": f"flaml_{time_budget}.log",
}

In [19]:
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

[flaml.automl: 09-28 22:50:52] {1432} INFO - Evaluation method: holdout
2021-09-28 22:50:52,245 flaml.automl INFO Evaluation method: holdout
[flaml.automl: 09-28 22:50:53] {1478} INFO - Minimizing error metric: 1-r2
2021-09-28 22:50:53,664 flaml.automl INFO Minimizing error metric: 1-r2
[flaml.automl: 09-28 22:50:53] {1515} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree']
2021-09-28 22:50:53,665 flaml.automl INFO List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree']
[flaml.automl: 09-28 22:50:53] {1748} INFO - iteration 0, current learner lgbm
2021-09-28 22:50:53,668 flaml.automl INFO iteration 0, current learner lgbm
[flaml.automl: 09-28 22:50:53] {1866} INFO - Estimated sufficient time budget=316624s. Estimated necessary time budget=681s.
2021-09-28 22:50:53,728 flaml.automl INFO Estimated sufficient time budget=316624s. Estimated necessary time budget=681s.
[flaml.automl: 09-28 22:50:53] {1944} INFO -  at 

In [20]:
dfp_submissions = dfp_test.copy()
dfp_submissions["pressure"] = automl.predict(X_test)
dfp_submissions = dfp_submissions[["id", "pressure"]]
dfp_submissions.to_csv(f"./data/submissions/{datetime.utcnow().strftime('%Y%m%d')}_flaml_{time_budget}.csv", index=None)

In [12]:
for mode in ["Perform", "Compete"]:
    automl = AutoML(mode=mode) # mode=Explain, Perform, Compete
    automl.fit(X_train, y_train)
    
    dfp_submissions = dfp_test.copy()
    dfp_submissions["pressure"] = automl.predict(X_test)
    dfp_submissions = dfp_submissions[["id", "pressure"]]
    dfp_submissions.to_csv(f"./data/submissions/{datetime.utcnow().strftime('%Y%m%d')}_mljar_{mode.lower()}.csv", index=None)

Linear algorithm was disabled.
AutoML directory: AutoML_2
The task is regression with evaluation metric rmse
AutoML will use algorithms: ['Random Forest', 'LightGBM', 'Xgboost', 'CatBoost', 'Neural Network']
AutoML will ensemble availabe models
AutoML steps: ['simple_algorithms', 'default_algorithms', 'not_so_random', 'golden_features', 'insert_random_feature', 'features_selection', 'hill_climbing_1', 'hill_climbing_2', 'ensemble']
Skip simple_algorithms because no parameters were generated.
* Step default_algorithms will try to check up to 5 models
1_Default_LightGBM rmse 5.662249 trained in 1022.52 seconds (1-sample predict time 0.0413 seconds)
2_Default_Xgboost rmse 5.671929 trained in 479.58 seconds (1-sample predict time 0.0414 seconds)
* Step not_so_random will try to check up to 20 models
7_LightGBM not trained. Force to stop the training. Total time for AutoML training already exceeded.
Skip golden_features because no parameters were generated.
Skip insert_random_feature becaus