In [None]:
from datetime import datetime
import random 
import pandas as pd

from flaml import AutoML
from sklearn.metrics import mean_squared_error

In [None]:
folder = "./data/updated_datasets/"
column_target = "pressure"
columns_features = ["R-C", "max_u_in", "min_u_in", "u_in_norm", "u_in_norm_trend_1", "u_in_norm_trend_2", "u_in_norm_trend_4", "u_out", "u_out_threshold_reached", "time_step"]

In [None]:
dfp_train = pd.read_csv(folder + "train_20211004.csv")
dfp_test = pd.read_csv(folder + "test_20211004.csv")

In [None]:
X_train, y_train = dfp_train[columns_features], dfp_train[column_target]
X_test = dfp_test[columns_features]

In [None]:
time_budget = 1 * 3600
# Initialize an AutoML instance
automl = AutoML()
# Specify automl goal and constraint
automl_settings = {
    "time_budget": time_budget,  # in seconds
    "metric": "r2",
    "task": "regression",
    "log_file_name": f"flaml_{time_budget}_nf20211004.log",
}

In [None]:
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

In [None]:
dfp_submissions = dfp_test.copy()
dfp_submissions["pressure"] = automl.predict(X_test)
dfp_submissions = dfp_submissions[["id", "pressure"]]
dfp_submissions.to_csv(f"./data/submissions/{datetime.utcnow().strftime('%Y%m%d')}_flaml_{time_budget}_nf20211004.csv", index=None)

In [None]:
for mode in ["Perform", "Compete"]:
    automl = AutoML(mode=mode) # mode=Explain, Perform, Compete
    automl.fit(X_train, y_train)
    
    dfp_submissions = dfp_test.copy()
    dfp_submissions["pressure"] = automl.predict(X_test)
    dfp_submissions = dfp_submissions[["id", "pressure"]]
    dfp_submissions.to_csv(f"./data/submissions/{datetime.utcnow().strftime('%Y%m%d')}_mljar_{mode.lower()}.csv", index=None)