# Motivation 

Purpose of this training task is to predict next 3 days of claims per each city.

# Libraries

In [1]:
from itertools import product

import lightning.pytorch as pl
from lightning.pytorch.tuner import Tuner
from lightning.pytorch.loggers import TensorBoardLogger

import os

import pandas as pd

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_forecasting.data import TorchNormalizer
from pytorch_forecasting.metrics import MAE, QuantileLoss

from sklearn.model_selection import train_test_split

# Add the ../src directory to the Python path
import sys
sys.path.append(os.path.abspath(os.path.join('..')))

from src import (
    adjust_column_names)

# Data Import

In [2]:
source_data = pd.read_csv(
    os.path.join('..', 'data', 'insurance_data_sample.csv'),
    sep=',',
    encoding='utf-8',
    parse_dates=['Date'],
    dtype={'Phone': str})

# Preprocess

In [3]:
prep_data = source_data.copy()

## Adjust column names

In [4]:
prep_data = adjust_column_names(prep_data)

## Calculate monthly claims

In [5]:
prep_data = prep_data\
    .groupby(['date', 'city'])\
    .agg(
        sum_of_claims=pd.NamedAgg('claim_amount', 'sum'))\
    .reset_index()

## Calculate date features

In [9]:
prep_data.loc[:, 'day_of_week'] = prep_data.loc[:, 'date'].dt.dayofweek.astype(str)
prep_data.loc[:, 'day'] = prep_data.loc[:, 'date'].dt.day.astype(str)
prep_data.loc[:, 'month'] = prep_data.loc[:, 'date'].dt.month.astype(str)
prep_data.loc[:, 'year'] = prep_data.loc[:, 'date'].dt.year.astype(str)
prep_data.loc[:, 'yyyymm'] = prep_data.loc[:, 'date'].dt.strftime('%Y%m')

## Add time index

In [None]:
prep_data.loc[:, 'yyyymm_dt'] = pd.to_datetime(prep_data.loc[:, 'yyyymm'], format='%Y%m').dt.to_period('M').astype(int) 
prep_data.loc[:, 'time_idx'] = prep_data.loc[:, 'yyyymm_dt'] - prep_data.loc[:, 'yyyymm_dt'].min()

prep_data.drop(['yyyymm_dt'], axis=1, inplace=True)

# Analysis

## Train-Test split

In [None]:
max_prediction_length = 2
max_encoder_length = 2
training_cutoff = prep_data["time_idx"].max() - max_prediction_length

all_idx = prep_data.time_idx.unique()
train_idx, test_idx = train_test_split(all_idx, test_size=0.30, shuffle=False)
train_idx, val_idx = train_test_split(train_idx, test_size=0.15, shuffle=False)

train_data = prep_data.query('time_idx in @train_idx')
val_data = prep_data.query('time_idx in @val_idx')
test_data = prep_data.query('time_idx in @test_idx')

training = TimeSeriesDataSet(
    train_data,
    time_idx='time_idx',
    target='sum_of_claims',
    group_ids=['city'],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=['city'],
    static_reals=[],
    time_varying_known_categoricals=['year', 'month'],
    time_varying_known_reals=['time_idx'],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=['sum_of_claims'],
    target_normalizer=TorchNormalizer(method='standard'),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True)

validation = TimeSeriesDataSet.from_dataset(training, val_data, predict=True, stop_randomization=True)

test = TimeSeriesDataSet.from_dataset(training, test_data, predict=True, stop_randomization=True)

## Create data loaders

In [None]:
batch_size = 16
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=4)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=4)
test_dataloader = test.to_dataloader(train=False, batch_size=batch_size, num_workers=4)

In [None]:
i = 0
for batch in test_dataloader:
    # Access input data and targets from the batch
    x, y = batch

    if i == 3:
        break

    i = i + 1

i

In [None]:
x.keys()

In [None]:
y[0]

In [None]:
x['encoder_cat'].shape

## Baseline model

In [None]:
baseline_predictions = Baseline().predict(val_dataloader, return_y=True)
mae_baseline = MAE()(baseline_predictions.output, baseline_predictions.y).item()

print(f'Mean absolute error of baseline model: {mae_baseline}')

In [None]:
baseline_predictions.output

In [None]:
baseline_predictions.y

## Temporal fusion transformer model

In [None]:
HIDDEN_SIZE=8
ATTENTION_HEAD_SIZE=1
DROPOUT_RATE=0.1
HIDDEN_CONTINOUS_SIZE=8

In [None]:
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    accelerator='gpu',
    gradient_clip_val=0.1)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=HIDDEN_SIZE, 
    attention_head_size=ATTENTION_HEAD_SIZE,
    dropout=DROPOUT_RATE,
    hidden_continuous_size=HIDDEN_CONTINOUS_SIZE,
    loss=QuantileLoss(),
    optimizer='Ranger')
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

### Hyperparameter tuning

In [None]:
res = Tuner(trainer).lr_find(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    max_lr=10.0,
    min_lr=1e-6)

print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()

### Train the model

In [None]:
# configure network and trainer
early_stop_callback = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=10, verbose=False, mode='min')
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")

trainer = pl.Trainer(
    max_epochs=100,
    accelerator='gpu',
    enable_model_summary=True,
    gradient_clip_val=0.1,
    callbacks=[lr_logger, early_stop_callback],
    logger=logger)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=res.suggestion(),
    hidden_size=HIDDEN_SIZE,
    attention_head_size=ATTENTION_HEAD_SIZE,
    dropout=DROPOUT_RATE,
    hidden_continuous_size=HIDDEN_CONTINOUS_SIZE,
    loss=QuantileLoss(),
    log_interval=10,
    optimizer="Ranger",
    reduce_on_plateau_patience=4)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

# fit network
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

In [None]:
# calcualte mean absolute error on validation set
predictions = tft.predict(val_dataloader, return_y=True, trainer_kwargs=dict(accelerator='gpu'))
mae_tft = MAE()(predictions.output, predictions.y).item()

print(f'mean absolute error of tft model: {mae_tft}')

In [None]:
predictions