# LightGBM: Train without covariates

### Setup

In [1]:
import sys
sys.path.append('../')
from src.load_data import *
from src.realtime_utils import *
from src.hp_tuning_functions import *

In [2]:
from darts.models.forecasting.lgbm import LightGBMModel

  "ds": pd.date_range(start="1949-01-01", periods=len(AirPassengers), freq="M"),


In [3]:
import wandb
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mdwolffram[0m ([33mdwolffram-karlsruhe-institute-of-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

# Load best model

In [4]:
params = get_best_parameters('sari-lightgbm', 'lightgbm', 'WIS', use_covariates=False)

WIS of best run: 453.6010618302779


In [5]:
{key: params[key] for key in sorted(params)}

{'colsample_bytree': 0.8,
 'lags': 8,
 'lags_future_covariates': [0, 1],
 'lags_past_covariates': {'agi-are-05-14': 8,
  'agi-are-35-59': 8,
  'agi-are-15-34': 8,
  'agi-are-60+': 8,
  'agi-are-DE': 8,
  'agi-are-00-04': 8,
  'default_lags': 1},
 'learning_rate': 0.01,
 'max_bin': 1024,
 'max_depth': -1,
 'min_child_samples': 40,
 'min_split_gain': 0,
 'model': 'lightgbm',
 'multiple_series': False,
 'n_estimators': 1000,
 'num_leaves': 20,
 'reg_alpha': 0,
 'reg_lambda': 0.5,
 'sample_weight': 'linear',
 'subsample': 0.8,
 'subsample_freq': 1,
 'use_covariates': False,
 'use_encoders': True,
 'use_features': False,
 'use_static_covariates': False}

In [6]:
name = params.pop('model')
multiple_series = params.pop('multiple_series')
use_covariates = params.pop('use_covariates')
use_features = params.pop('use_features')
use_encoders = params.pop('use_encoders')
sample_weight = params.pop('sample_weight')

if not use_covariates:
    del params['lags_past_covariates']
    
model = LightGBMModel(
    **params,
    output_chunk_length=HORIZON,
    add_encoders=ENCODERS if use_encoders else None,
    likelihood="quantile",
    quantiles=QUANTILES,
    verbose=-1,
)

# Load data

In [7]:
targets, covariates = load_realtime_training_data()

# Train model

In [8]:
for forecast_date in FORECAST_DATES:
    
    path = f'../models/post-covid/{forecast_date}/'
    os.makedirs(path, exist_ok=True)

    targets, covariates = load_realtime_training_data(as_of=forecast_date)

    for seed in RANDOM_SEEDS:
        model_path = path + f'{forecast_date}-lightgbm_noCovariates-{seed}.pt'
        print(model_path)
        model = LightGBMModel(
            **params,
            output_chunk_length=HORIZON,
            add_encoders=ENCODERS if use_encoders else None,
            likelihood="quantile",
            quantiles=QUANTILES,
            verbose=-1,
            random_state=seed
        )
        model.fit(targets, past_covariates=covariates if use_covariates else None, sample_weight=custom_weights if sample_weight == "no-covid" else sample_weight)
        model.save(model_path)

../models/post-covid/2023-11-16/2023-11-16-lightgbm_noCovariates-1.pt



KeyboardInterrupt



# Forecast

In [9]:
NAME = 'lightgbm_noCovariates'

In [10]:
targets, covariates = load_realtime_training_data()

In [11]:
use_covariates

False

In [13]:
def compute_ensemble(forecast_date, export=False):
    dfs = []
    for seed in RANDOM_SEEDS:
        #print(seed)
        model_path = f'../models/post-covid/{forecast_date}/{forecast_date}-lightgbm_noCovariates-{seed}.pt'
        model = LightGBMModel.load(model_path)
        df = compute_forecast(model, targets, covariates if use_covariates else None, forecast_date, HORIZON, NUM_SAMPLES, vincentization=False, probabilistic_nowcast=True, local=True)
        dfs.append(df)

    df = pd.concat(dfs)
    df = df.groupby(['location', 'age_group', 'forecast_date', 'target_end_date', 'horizon', 'type', 'quantile']).agg({'value': 'mean'}).reset_index()

    df = df.sort_values(['location', 'age_group', 'horizon', 'quantile'])
    
    if export:
        df.to_csv(f'../data/post-covid/submissions/{NAME}/{forecast_date}-icosari-sari-{NAME}.csv', index=False)
        return df
    else:
        return df

In [None]:
forecasts = []
for forecast_date in FORECAST_DATES:
    print(forecast_date)
    forecast = compute_ensemble(forecast_date, export=True)
    forecasts.append(forecast)

2023-11-16
2023-11-23
2023-11-30
2023-12-07
2023-12-14
2023-12-21
2023-12-28
2024-01-04
2024-01-11
2024-01-18
2024-01-25
2024-02-01
2024-02-08
2024-02-15
