In [21]:
import pandas as pd
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

#!pip install autogluon
# remember to choose a python-version less than 3.11.0

In [22]:
x_train = pd.read_csv('cleaned_and_combined_data/x_train_combined.csv')
y_train = pd.read_csv('cleaned_and_combined_data/y_train_combined.csv')
x_test = pd.read_csv('cleaned_and_combined_data/x_test_combined.csv')

x_train_merged = pd.concat([x_train, y_train['pv_measurement']], axis=1)

In [23]:
# Convert the three columns into one column named 'location'
x_train_merged['location'] = x_train.apply(lambda row: 'A' if row['location_A'] == 1 else ('B' if row['location_B'] == 1 else 'C'), axis=1)
x_test['location'] = y_train.apply(lambda row: 'A' if row['location_A'] == 1 else ('B' if row['location_B'] == 1 else 'C'), axis=1)

# Drop the original three columns
x_train_merged.drop(['location_A', 'location_B', 'location_C'], axis=1, inplace=True)
x_test.drop(['location_A', 'location_B', 'location_C'], axis=1, inplace=True)

x_train_merged.head()

Unnamed: 0,date_forecast,absolute_humidity_2m:gm3,air_density_2m:kgm3,ceiling_height_agl:m,clear_sky_energy_1h:J,clear_sky_rad:W,cloud_base_agl:m,dew_or_rime:idx,dew_point_2m:K,diffuse_rad:W,...,forecast_year,forecast_month,forecast_day,forecast_hour,calc_year,calc_month,calc_day,calc_hour,pv_measurement,location
0,2019-01-01 00:00:00,5.525,1.23975,1200.675,0.0,0.0,1200.675,0.0,275.15,0.0,...,2019,1,1,0,,,,,0.0,B
1,2019-01-01 01:00:00,5.425,1.23975,1131.4249,0.0,0.0,1131.4249,0.0,274.825,0.0,...,2019,1,1,1,,,,,0.0,B
2,2019-01-01 02:00:00,5.4,1.2385,1061.0,0.0,0.0,1061.0,0.0,274.8,0.0,...,2019,1,1,2,,,,,0.0,B
3,2019-01-01 03:00:00,5.35,1.23975,1021.15,0.0,0.0,1021.15,0.0,274.675,0.0,...,2019,1,1,3,,,,,0.0,B
4,2019-01-01 04:00:00,5.675,1.2375,1033.7,0.0,0.0,1033.7,0.0,275.5,0.0,...,2019,1,1,4,,,,,0.0,B


In [24]:
x_test['observed'] = x_test['calc_year'].isna().astype(int)
x_train_merged['observed'] = x_train_merged['calc_year'].isna().astype(int)

x_test.drop(['calc_year', 'calc_month', 'calc_day', 'calc_hour'], axis=1, inplace=True)
x_train_merged.drop([ 'calc_year', 'calc_month', 'calc_day', 'calc_hour'], axis=1, inplace=True)

x_train_merged.head()

Unnamed: 0,date_forecast,absolute_humidity_2m:gm3,air_density_2m:kgm3,ceiling_height_agl:m,clear_sky_energy_1h:J,clear_sky_rad:W,cloud_base_agl:m,dew_or_rime:idx,dew_point_2m:K,diffuse_rad:W,...,wind_speed_u_10m:ms,wind_speed_v_10m:ms,wind_speed_w_1000hPa:ms,forecast_year,forecast_month,forecast_day,forecast_hour,pv_measurement,location,observed
0,2019-01-01 00:00:00,5.525,1.23975,1200.675,0.0,0.0,1200.675,0.0,275.15,0.0,...,1.175,2.55,0.0,2019,1,1,0,0.0,B,1
1,2019-01-01 01:00:00,5.425,1.23975,1131.4249,0.0,0.0,1131.4249,0.0,274.825,0.0,...,1.525,3.2,0.0,2019,1,1,1,0.0,B,1
2,2019-01-01 02:00:00,5.4,1.2385,1061.0,0.0,0.0,1061.0,0.0,274.8,0.0,...,2.15,3.425,0.0,2019,1,1,2,0.0,B,1
3,2019-01-01 03:00:00,5.35,1.23975,1021.15,0.0,0.0,1021.15,0.0,274.675,0.0,...,3.425,3.5,0.0,2019,1,1,3,0.0,B,1
4,2019-01-01 04:00:00,5.675,1.2375,1033.7,0.0,0.0,1033.7,0.0,275.5,0.0,...,5.575,2.2,0.0,2019,1,1,4,0.0,B,1


In [35]:
train_data = TimeSeriesDataFrame.from_data_frame(
    x_train_merged,
    id_column="location",
    timestamp_column="date_forecast"
)
missing_values = train_data.isnull().sum()
print(missing_values)


absolute_humidity_2m:gm3             72
air_density_2m:kgm3                  72
ceiling_height_agl:m              14830
clear_sky_energy_1h:J                 0
clear_sky_rad:W                      72
cloud_base_agl:m                   6190
dew_or_rime:idx                      72
dew_point_2m:K                       72
diffuse_rad:W                        72
diffuse_rad_1h:J                      0
direct_rad:W                         72
direct_rad_1h:J                       0
effective_cloud_cover:p              72
elevation:m                          72
fresh_snow_12h:cm                     0
fresh_snow_1h:cm                      0
fresh_snow_24h:cm                     0
fresh_snow_3h:cm                      0
fresh_snow_6h:cm                      0
is_day:idx                           72
is_in_shadow:idx                     72
msl_pressure:hPa                     72
precip_5min:mm                        0
precip_type_5min:idx                 72
pressure_100m:hPa                    72


In [33]:
predictor = TimeSeriesPredictor(
    prediction_length=24*30,
    path="autogluon-ts",
    target="pv_measurement",
    eval_metric="RMSE",
    ignore_time_index=True
)

predictor.fit(
    train_data,
    presets="best_quality",
    time_limit=4500,
)

