In [None]:
from autogluon.tabular import TabularDataset, TabularPredictor
import pandas as pd
import utils
import numpy as np
import random
import string
from autogluon.common import space


def do_prediction(location, limit, name, percentage, trials):
    x_train, tuning_data, x_test = utils.preprocess_category_estimated_observed(location)
    x_train.drop(["time", 'date_forecast'], axis=1, inplace=True)
    tuning_data.drop(["time", 'date_forecast'], axis=1, inplace=True)
    x_test_date_forecast = x_test['date_forecast']
    x_test.drop(['date_forecast'], axis=1, inplace=True)
    
    x_test.fillna(0, inplace=True)

    label = 'pv_measurement'
    train_data = TabularDataset(x_train)
    
    precentage_tuning = percentage/100
    
    tuning_data = TabularDataset(tuning_data)
    thirty_percent_index = int(len(tuning_data) * precentage_tuning)
    tuning_data = tuning_data.iloc[:thirty_percent_index]

    test_data = TabularDataset(x_test)

    predictor = TabularPredictor(label=label,
                                 path="AutoGluonTesting",
                                 eval_metric='mean_absolute_error')
    
    num_trials = trials  # try at most 30 different hyperparameter configurations for each type of model
    search_strategy = 'auto'  # to tune hyperparameters using random search routine with a local scheduler

    hyperparameter_tune_kwargs = {  # HPO is not performed unless hyperparameter_tune_kwargs is specified
        'num_trials': num_trials,
        'scheduler': 'local',
        'searcher': search_strategy,
    }

    predictor.fit(train_data,
                  time_limit=limit,
                  tuning_data=tuning_data,
                  hyperparameter_tune_kwargs=hyperparameter_tune_kwargs, )

    y_pred = predictor.predict(test_data)

    print(y_pred)
    preds = pd.DataFrame()
    preds['date_forecast'] = x_test_date_forecast
    preds['predicted'] = np.asarray(y_pred)
    preds.to_csv(str(percentage) + name + str(trials) + "_" +  '_' + location + '.csv')
    print('Saved this file: ' + name +'_'+ str(percentage) + '_' + location + '.csv')

for i in range(10):    
    time_limit = 60 * 60
    percentage = 30
    trials = 20 + 10 * i
    name= "tuning_HPO"
    print('Starting run with percentage tuning= ' + str(percentage))
    do_prediction('A', time_limit, name, percentage, trials)
    do_prediction('B', time_limit, name, percentage, trials)
    do_prediction('C', time_limit, name, percentage, trials)
    print('Done with run with percentage tuning= ' + str(percentage))


No model was trained during hyperparameter tuning NeuralNetTorch... Skipping this model.
Fitting model: LightGBMLarge ... Training model for up to 294.44s of the 1504.43s of remaining time.
	-5.1223	 = Validation score   (-mean_absolute_error)
	119.87s	 = Training   runtime
	0.36s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 360.0s of the 1377.11s of remaining time.
	-3.8581	 = Validation score   (-mean_absolute_error)
	0.59s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2223.77s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutoGluonTesting/")


0        0.006862
1        0.007724
2        0.008100
3        2.155264
4       27.009487
          ...    
1531    62.562881
1532     8.083497
1533    10.109064
1534     0.002251
1535     0.001569
Name: pv_measurement, Length: 1536, dtype: float32
Saved this file: tuning_HPO_30_C.csv
Done with run with percentage tuning= 30
Starting run with percentage tuning= 30


  estimated_df = estimated_df.resample('H').mean()
  test_df = test_df.resample('H').mean()


Total data points: 34085
Data points to be removed: 0


Beginning AutoGluon training ... Time limit = 3600s
AutoGluon will save models to "AutoGluonTesting/"
AutoGluon Version:  0.8.2
Python Version:     3.9.13
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Wed Nov 9 20:13:27 UTC 2022
Disk Space Avail:   317099.03 GB / 618408.77 GB (51.3%)
Train Data Rows:    29667
Train Data Columns: 79
Tuning Data Rows:    1325
Tuning Data Columns: 79
Label Column: pv_measurement
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (5733.42, 0.0, 674.14552, 1195.53172)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:

  0%|          | 0/80 [00:00<?, ?it/s]

[1000]	valid_set's l1: 72.2881


	Ran out of time, early stopping on iteration 1084. Best iteration is:
	[1079]	valid_set's l1: 72.0934
	Stopping HPO to satisfy time limit...
Fitted model: LightGBMXT/T1 ...
	-72.311	 = Validation score   (-mean_absolute_error)
	21.32s	 = Training   runtime
	0.12s	 = Validation runtime
Fitted model: LightGBMXT/T2 ...
	-71.3606	 = Validation score   (-mean_absolute_error)
	69.06s	 = Training   runtime
	0.35s	 = Validation runtime
Fitted model: LightGBMXT/T3 ...
	-70.827	 = Validation score   (-mean_absolute_error)
	32.64s	 = Training   runtime
	0.22s	 = Validation runtime
Fitted model: LightGBMXT/T4 ...
	-72.0934	 = Validation score   (-mean_absolute_error)
	165.98s	 = Training   runtime
	0.52s	 = Validation runtime
Hyperparameter tuning model: LightGBM ... Tuning model for up to 294.44s of the 3293.54s of remaining time.


  0%|          | 0/80 [00:00<?, ?it/s]

	Ran out of time, early stopping on iteration 551. Best iteration is:
	[551]	valid_set's l1: 92.048
	Stopping HPO to satisfy time limit...
Fitted model: LightGBM/T1 ...
	-70.7593	 = Validation score   (-mean_absolute_error)
	48.06s	 = Training   runtime
	0.2s	 = Validation runtime
Fitted model: LightGBM/T2 ...
	-67.4734	 = Validation score   (-mean_absolute_error)
	71.19s	 = Training   runtime
	0.28s	 = Validation runtime
Fitted model: LightGBM/T3 ...
	-71.2332	 = Validation score   (-mean_absolute_error)
	55.55s	 = Training   runtime
	0.25s	 = Validation runtime
Fitted model: LightGBM/T4 ...
	-92.048	 = Validation score   (-mean_absolute_error)
	113.74s	 = Training   runtime
	0.56s	 = Validation runtime
Hyperparameter tuning model: RandomForestMSE ... Tuning model for up to 294.44s of the 2999.37s of remaining time.
	No hyperparameter search space specified for RandomForestMSE. Skipping HPO. Will train one model based on the provided hyperparameters.
