In [None]:
from autogluon.tabular import TabularDataset, TabularPredictor
import pandas as pd
import utils
import numpy as np
import random
import string
from autogluon.common import space


def do_prediction(location, limit, name):
    x_train, tuning_data, x_test = utils.preprocess_category_estimated_observed(location)
    x_train.drop(["time"], axis=1, inplace=True)
    tuning_data.drop(["time"], axis=1, inplace=True)

    x_train['date_forecast'] = pd.to_datetime(x_train['date_forecast'])
    tuning_data['date_forecast'] = pd.to_datetime(tuning_data['date_forecast'])

    x_test.fillna(0, inplace=True)

    label = 'pv_measurement'
    train_data = TabularDataset(x_train)

    tuning_data = TabularDataset(tuning_data)
    thirty_percent_index = int(len(tuning_data) * 0.3)
    tuning_data = tuning_data.iloc[:thirty_percent_index]

    test_data = TabularDataset(x_test)

    predictor = TabularPredictor(label=label,
                                 path="AutoGluonTesting",
                                 eval_metric='mean_absolute_error')
    
    nn_options = {  # specifies non-default hyperparameter values for neural network models
    'num_epochs': 100,  # number of training epochs (controls training time of NN models)
    'learning_rate': space.Real(1e-4, 1e-2, default=5e-4, log=True),  # learning rate used in training (real-valued hyperparameter searched on log-scale)
    'activation': space.Categorical('relu', 'softrelu', 'tanh'),  # activation function used in NN (categorical hyperparameter, default = first entry)
    'dropout_prob': space.Real(0.0, 0.5, default=0.1),  # dropout probability (real-valued hyperparameter)
    }

    gbm_options = {  # specifies non-default hyperparameter values for lightGBM gradient boosted trees
        'num_boost_round': 500,  # number of boosting rounds (controls training time of GBM models)
        'num_leaves': space.Int(lower=20, upper=80, default=36),
        'extra_trees': True,
    }

    hyperparameters = {  # hyperparameters of each model type
                       'NN_TORCH': nn_options,
                        'GBM': [gbm_options, 'GBMLarge'],
                        'CAT': {},
                        'XGB': {},
                        'FASTAI': {},
                        'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
                        'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
                        'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}]
                      }  # When these keys are missing from hyperparameters dict, no models of that type are trained

    num_trials = 50  # try at most 50 different hyperparameter configurations for each type of model
    search_strategy = 'auto'  # to tune hyperparameters using random search routine with a local scheduler

    hyperparameter_tune_kwargs = {  # HPO is not performed unless hyperparameter_tune_kwargs is specified
        'num_trials': num_trials,
        'scheduler' : 'local',
        'searcher': search_strategy,
    }
    
    predictor.fit(train_data,
                  time_limit=limit,
                  tuning_data=tuning_data, 
                  hyperparameters=hyperparameters,
                  hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,)

    y_pred = predictor.predict(test_data)

    print(y_pred)
    preds = pd.DataFrame()
    preds['date_forecast'] = x_test['date_forecast']
    preds['predicted'] = np.asarray(y_pred)
    preds.to_csv(str(limit) + name + '_' + location + '.csv')
    print('Done with Location: ' + location + "================================================================")


def do_prediction_no_tuning(location, limit):
    x_train, x_test = utils.preprocess_category(location)
    x_train.drop(["time"], axis=1, inplace=True)

    x_train['date_forecast'] = pd.to_datetime(x_train['date_forecast'])

    x_test.fillna(0, inplace=True)

    label = 'pv_measurement'
    train_data = TabularDataset(x_train)

    test_data = TabularDataset(x_test)

    predictor = TabularPredictor(label=label,
                                 path="AutoGluonTesting",
                                 eval_metric='mean_absolute_error')

    predictor.fit(train_data,
                  time_limit=time_limit,
                  presets=['high_quality'])

    y_pred = predictor.predict(test_data)

    print(y_pred)
    preds = pd.DataFrame()
    preds['date_forecast'] = x_test['date_forecast']
    preds['predicted'] = np.asarray(y_pred)
    random_string = ''.join(random.choices(string.ascii_uppercase, k=4))
    preds.to_csv(str(limit) + random_string + '_' + location + '.csv')
    print('Done with Location: ' + location + "================================================================")


time_limit = 180 * 60
name="tester_long_12hours"
do_prediction('A', time_limit, name)
do_prediction('B', time_limit, name)
do_prediction('C', time_limit, name)


Fitted model: NeuralNetFastAI/e13e3a3b ...
	-87.6851	 = Validation score   (-mean_absolute_error)
	733.54s	 = Training   runtime
	0.43s	 = Validation runtime
Fitted model: NeuralNetFastAI/da8be29f ...
	-134.25	 = Validation score   (-mean_absolute_error)
	330.46s	 = Training   runtime
	2.02s	 = Validation runtime
Fitted model: NeuralNetFastAI/c635b2f7 ...
	-115.7485	 = Validation score   (-mean_absolute_error)
	595.27s	 = Training   runtime
	0.72s	 = Validation runtime
Fitted model: NeuralNetFastAI/2a8d83bc ...
	-111.3848	 = Validation score   (-mean_absolute_error)
	440.89s	 = Training   runtime
	0.8s	 = Validation runtime
Fitted model: NeuralNetFastAI/34eb8114 ...
	-87.7022	 = Validation score   (-mean_absolute_error)
	591.14s	 = Training   runtime
	0.76s	 = Validation runtime
Fitted model: NeuralNetFastAI/0e64fe53 ...
	-85.2408	 = Validation score   (-mean_absolute_error)
	552.82s	 = Training   runtime
	0.76s	 = Validation runtime
Fitted model: NeuralNetFastAI/66c4db81 ...
	-279.617

  0%|          | 0/50 [00:00<?, ?it/s]