# LightGBM: Oracle

### Setup

In [1]:
import sys
sys.path.append('../')
from src.load_data import *
from src.realtime_utils import *
from src.hp_tuning_functions import *

In [2]:
from darts.models.forecasting.lgbm import LightGBMModel

  "ds": pd.date_range(start="1949-01-01", periods=len(AirPassengers), freq="M"),


# Load data

In [9]:
targets, covariates = load_realtime_training_data()

# Forecast

In [31]:
NAME = 'lightgbm_oracle'

In [29]:
targets, covariates = load_realtime_training_data()

In [30]:
def compute_forecast(model, target_series, covariates, forecast_date, horizon, num_samples, vincentization=True, probabilistic_nowcast=True, local=False, oracle_nowcast=True):
    '''
    For every sample path given by the nowcasted quantiles, a probabilistic forecast is computed.
    These are then aggregated into one forecast by combining all predicted paths.
    '''
    
    if oracle_nowcast:
        target_list = target_series[:pd.Timestamp(forecast_date)]
        
    else:
        indicator = target_series.components[0].split('-')[1]
        ts_nowcast = load_nowcast(forecast_date, probabilistic_nowcast, indicator, local)
        target_list = make_target_paths(target_series, ts_nowcast)
        target_list = [encode_static_covariates(t, ordinal=False) for t in target_list]

        covariates = [covariates]*len(target_list) if covariates else None
      
    fct = model.predict(n=horizon, 
                        series=target_list, 
                        past_covariates=covariates, 
                        num_samples=num_samples)
    
    df = reshape_forecast(fct)
        
    return df

In [32]:
def compute_ensemble(forecast_date, export=False):
    dfs = []
    for seed in RANDOM_SEEDS:
        print(seed)
        model_path = f'../models/post-covid/{forecast_date}/{forecast_date}-lightgbm-{seed}.pt'
        model = LightGBMModel.load(model_path)
        df = compute_forecast(model, targets, covariates, forecast_date, HORIZON, NUM_SAMPLES, vincentization=False, probabilistic_nowcast=True, local=True, oracle_nowcast=True)
        dfs.append(df)

    df = pd.concat(dfs)
    df = df.groupby(['location', 'age_group', 'forecast_date', 'target_end_date', 'horizon', 'type', 'quantile']).agg({'value': 'mean'}).reset_index()

    df = df.sort_values(['location', 'age_group', 'horizon', 'quantile'])
    
    if export:
        df.to_csv(f'../data/post-covid/submissions/{NAME}/{forecast_date}-icosari-sari-{NAME}.csv', index=False)
        return df
    else:
        return df

In [None]:
forecasts = []
for forecast_date in FORECAST_DATES:
    print(forecast_date)
    forecast = compute_ensemble(forecast_date, export=True)
    forecasts.append(forecast)