In [None]:
import datetime as dt

import polars as pl

from wtg_power_prediction.dataset import load_submission_dataset, load_training_dataset, load_turbine_metadata
from wtg_power_prediction.model import WtgPowerPredictionModel

df_train = load_training_dataset().collect()

X_train = df_train.select(pl.exclude("target"))
y_train = df_train.select("target").to_series()

X_test = load_submission_dataset().collect()

wf_lat_lon = load_turbine_metadata().select(pl.col("Latitude").mean(), pl.col("Longitude").mean()).collect()

model = WtgPowerPredictionModel(
    latitude=wf_lat_lon.select("Latitude").item(),
    longitude=wf_lat_lon.select("Longitude").item(),
    validation_start=dt.datetime(2019, 1, 1, tzinfo=dt.UTC),  # reserve the last year of data for validation
    time_budget_engineering_s=5 * 60,
    time_budget_power_s=5 * 60,
)

model.fit(X_train, y_train)

In [None]:
model.models["wind_speed"].fig_corr

In [None]:
model.models["wind_speed"].fig_fi

In [None]:
model.models["wind_speed"].automl.best_config

In [None]:
model.models["power"].fig_corr

In [None]:
model.models["power"].fig_fi

In [None]:
model.models["power"].automl.best_config

In [None]:
prediction = model.predict(X_test)
submission = X_test.with_columns(prediction=prediction).select("id", "prediction")
submission.write_csv("submission.csv", include_header=True)