# Vi prøver med PyTorch / NeuralForecast :o

#### Imports

In [1]:
import pandas as pd
import numpy as np
from neuralforecast import NeuralForecast
from neuralforecast.models import LSTM
from neuralforecast.losses.pytorch import DistributionLoss
from sklearn.metrics import mean_absolute_error
import optuna

In [2]:
# gather data set and split in x and y
def read_dataset(filename):
    df = pd.read_csv(filename)
    df = df.loc[:, ~df.columns.str.match('Unnamed')]
    df = df.replace('DC', 1)
    df = df.replace('LTE', 0)
    return df

def vertical_handover(y):
    new_y = []
    for seq in y:
        if 0 in seq and 1 in seq:
            new_y.append(1)
        else:
            new_y.append(0)
    return np.array(new_y)

def accuracy(y_pred, y_true):
    return np.mean(y_pred == y_true)

def precision_recall(y_pred, y_true):
    tp, fp, fn =  0, 0, 0

    # Loop through true and predicted labels to count TP, FP, and FN
    for yt, yp in zip(y_true, y_pred):
        if yt == 1 and yp == 1:
            tp += 1
        elif yt == 0 and yp == 1:
            fp += 1
        elif yt == 1 and yp == 0:
            fn += 1
    print(tp, fp, fn)

    # Precision calculation
    if tp + fp == 0:
        precision = 0
    else:
        precision = tp / (tp + fp)

    # Recall calculation
    if tp + fn == 0:
        recall = 0
    else:
        recall = tp / (tp + fn)

    return precision, recall

In [3]:
# df_dict = {elem : pd.DataFrame() for elem in campaigns}

# for key in df_dict.keys():
#     df_dict[key] = df[:][df.campaign == key]

# for key in df_dict.keys():
#     time_diffs = df_dict[key]['Timestamp'].diff().dt.total_seconds()
#     time_diffs = time_diffs.fillna(0)
#     df_dict[key]['Timestamp'] = time_diffs

# df_list = []
# for scenario, d in df_dict.items():
#     d['campaign'] = scenario
#     df_list.append(d)

# combined_df = pd.concat(df_list).reset_index(drop=True)
# combined_df = combined_df.loc[:, ~combined_df.columns.str.match('campaign')]

In [4]:
df = read_dataset('datasets/Op1_merged.csv')
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df = df.fillna(0)

df = df.rename(columns={'Timestamp': 'ds', 'Mode': 'y', 'campaign': 'unique_id'})

test_ids = [
    'Capacity_Ookla_4G5G_TIM_Driving_Tuscolana_Stat_Appia_No_Flight_Mode',
    'Ping_UNIDATA_4G5G_TIM_Walking_DIET_to_Termini',
    'Ping_UNIDATA_4G5G_TIM_Driving_Viale_Marconi'
]

train = df.loc[~df['unique_id'].isin(test_ids)]
test = df.loc[df['unique_id'].isin(test_ids)]

In [5]:
def objective(trial):
    encoder_n_layers = trial.suggest_int('encoder_n_layers', 1, 3)
    encoder_hidden_size = trial.suggest_categorical('encoder_hidden_size', [64, 128, 256])
    decoder_layers = trial.suggest_int('decoder_layers', 1, 3)
    decoder_hidden_size = trial.suggest_categorical('decoder_hidden_size', [64, 128, 256])
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    context_size = trial.suggest_int('context_size', 1, 60)
    scaler_type = trial.suggest_categorical('scaler_type', ['standard', 'robust'])

    print(encoder_n_layers, decoder_layers, context_size)

    models = [LSTM(h=10,
                   loss=DistributionLoss(distribution='Normal', level=[90]),
                   max_steps=2000,
                   encoder_n_layers=encoder_n_layers,
                   encoder_hidden_size=encoder_hidden_size,
                   context_size=context_size,
                   decoder_hidden_size=decoder_hidden_size,
                   decoder_layers=decoder_layers,
                   learning_rate=learning_rate,
                   scaler_type=scaler_type)]

    model = NeuralForecast(models=models, freq='D')
    model.fit(train)

    p = model.predict(test).reset_index()
    p = p.merge(test[['ds', 'unique_id', 'y']], on=['ds', 'unique_id'], how='left')

    loss = mean_absolute_error(p['y'], p['LSTM']) 

    return loss

In [6]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)
print(study.best_params)
print(study.best_value)

[I 2024-10-14 12:24:08,400] A new study created in memory with name: no-name-d6ff2804-809a-4bb2-ac9c-4a852c6b314f
Seed set to 1
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


1 2 8



  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | loss            | DistributionLoss | 3      | train
1 | padder          | ConstantPad1d    | 0      | train
2 | scaler          | TemporalNorm     | 0      | train
3 | hist_encoder    | LSTM             | 17.2 K | train
4 | context_adapter | Linear           | 5.2 K  | train
5 | mlp_decoder     | MLP              | 2.8 K  | train
-------------------------------------------------------------
25.2 K    Trainable params
3         Non-trainable params
25.2 K    Total params
0.101     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=2000` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[W 2024-10-14 12:26:22,247] Trial 0 failed with parameters: {'encoder_n_layers': 1, 'encoder_hidden_size': 64, 'decoder_layers': 2, 'decoder_hidden_size': 256, 'learning_rate': 2.25333004292643e-05, 'context_size': 8, 'scaler_type': 'standard'} because of the following error: ValueError('Input contains NaN.').
Traceback (most recent call last):
  File "/Users/ihneskarbovik/Library/Python/3.9/lib/python/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/np/z3d7fsz132vckb3gj8c2j2c00000gn/T/ipykernel_52896/3902971467.py", line 29, in objective
    loss = mean_absolute_error(p['y'], p['LSTM'])
  File "/Users/ihneskarbovik/Library/Python/3.9/lib/python/site-packages/sklearn/utils/_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/Users/ihneskarbovik/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_regression.py", line 204, in mean_absolute_error
    y_type, y_true, y_pred, mu

ValueError: Input contains NaN.