## Run `TimeGPT-1` for Santos off-shore dataset

In [8]:
import sys
import os
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from chronos import ChronosPipeline
import numpy as np
from nixtla import NixtlaClient
from IPython.display import clear_output
import time

import logging

sys.path.append('../src/')
from utils.nexdata import *
from utils.nexutil import *

# Simular argumentos da linha de comando
sys.argv = ['timegpt.py', '-v']
#sys.argv = ['timegpt.py']

# Configure the root logger
# Parse arguments
args = parse_args()
log_level = get_log_level(args.verbose)
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'

logging.basicConfig(level=log_level, format=log_fmt)


params = NexData(nexus_folder='../',
                log_level = log_level)

set_random_seeds(params.data_params['default_seed'])

id_experiment = 'timegpt_forecast_target_features'

INFO:utils.nexdata: Defining paths...


#### Configuring models, predict and save outputs to be used to `student` model

In [3]:
try:
    nixtla_client = NixtlaClient(
        api_key = load_api_key("../config/nixtla_api.key"),
        max_retries=30,
        retry_interval=30,
    )
    params.logger.info(f' TimeGPT model load with successfull o/')
except Exception as err:
    params.logger.critical(f' Chronos model cannot be loaded. {err}')
    raise

INFO:utils.nexdata: TimeGPT model load with successfull o/


In [3]:
df_train_composed = pd.DataFrame()

train_range = pd.date_range(start=params.data_params['train_start_date'],
                            end=params.data_params['train_end_date'],
                            freq=params.data_params['target_freq'])

df_train_composed[params.data_params['datetime_col']] = train_range

df_test_composed = pd.DataFrame()

endg_list = []
exog_list = []
target_feature_list = []

endg_list.append(params.data_params['datetime_col'])
exog_list.append(params.data_params['datetime_col'])

test_range = pd.date_range(start=params.data_params['test_start_date'],
                            end=params.data_params['test_end_date'],
                            freq=params.data_params['target_freq'])

df_test_composed[params.data_params['datetime_col']] = test_range

# Iterate over each ocean variable defined in the parameters
for ocean_variable in params.features.keys():
    params.logger.info(f' ################## Ocean variable: {ocean_variable}')
    # Retrieve target features and experiment IDs
    target_features = params.features[ocean_variable]
    params.logger.debug(f" {target_features}")
    id_experiment = 'chronos_forecast_composed'
    id_experiment_ioa = 'chronos_ioa_composed'

    # Load train and test data for the target feature
    df_train_target = pd.read_parquet(
        target_features['train_filepath'])
    df_test_target = pd.read_parquet(
        target_features['test_filepath'])

    try:
        # Process the training dataframe with specified parameters
        df_train_processed_target = process_dataframe(
            df_train_target,
            target_features['train_start_date'],
            target_features['train_end_date'],
            params.data_params['target_freq'],
            params.data_params['interp_method'],
            params.data_params['datetime_col'],
            target_features['freq'])
            #"20min")
        #params.logger.debug(' df_train_target are processed.')
        params.logger.debug(f' Train cols: {df_train_processed_target.columns}')

        # Process the test dataframe with specified parameters
        df_test_processed_target = process_dataframe(
            df_test_target,
            target_features['test_start_date'],
            target_features['test_end_date'],
            params.data_params['target_freq'],
            params.data_params['interp_method'],
            params.data_params['datetime_col'],
            target_features['freq'])
            #"20min")
        #params.logger.debug(' df_test_target are processed.')
        params.logger.debug(f' Test cols: {df_test_processed_target.columns}')
    except Exception as e:
        params.logger.debug(f" Error {e} on {ocean_variable}")

    # Define the context and forecast window lengths and shift
    context_len = params.model_params['context_window_len']
    forecast_len = params.model_params['forecast_len']
    shift = params.model_params['shift']
    mode = params.model_params['windowing_mode']

    # Generate indices for the test set using the context and forecast lengths
    X_test_index, y_test_index = generate_indices(
        df_test_processed_target, context_len, forecast_len,
        shift, mode)
    #params.logger.debug(' X_test_index, y_test_index are created.')

    # Initialize DataFrames for predictions and index of agreement (IOA) values
    df_y_hat = pd.DataFrame()
    df_ioa = pd.DataFrame()

    # Set the index for the y_hat DataFrame
    df_y_hat.index = np.concatenate(y_test_index)
    df_y_hat[params.data_params['datetime_col']] = (
        df_test_processed_target.loc[
            df_y_hat.index, params.data_params['datetime_col']
        ])

    #params.logger.debug(' start loop from df features')
    # Iterate over each target feature for prediction
    for target_feature in target_features['list_features']:
        # Add training data to improve the size of the inference data
        train_signal = df_train_processed_target.loc[:, 
            target_feature]

        test_signal = df_test_processed_target.loc[:, 
            target_feature]
        
        len_X_test_index = len(X_test_index)

        df_target_col_name = f'{ocean_variable}_{target_feature}'
        params.logger.debug(
            f" Target feature: {target_feature} | {df_target_col_name}")
        

        df_train_composed[df_target_col_name] = train_signal.values
        df_test_composed[df_target_col_name] = test_signal.values

        endg_list.append(df_target_col_name)
        if target_features['exog']: exog_list.append(df_target_col_name)
        if target_features['target_feature']: target_feature_list.append(
            df_target_col_name)

params.logger.info(f' endg features:   {endg_list}')
params.logger.info(f' exog features:   {exog_list}')
params.logger.info(f' target features: {target_feature_list}')

INFO:utils.nexdata: ################## Ocean variable: current_praticagem
INFO:utils.nexdata: ################## Ocean variable: ssh_praticagem
INFO:utils.nexdata: ################## Ocean variable: wind_praticagem
INFO:utils.nexdata: ################## Ocean variable: waves_palmas
INFO:utils.nexdata: ################## Ocean variable: sofs_praticagem
INFO:utils.nexdata: ################## Ocean variable: astronomical_tide
INFO:utils.nexdata: endg features:   ['datetime', 'current_praticagem_cross_shore_current', 'ssh_praticagem_ssh', 'wind_praticagem_vx', 'wind_praticagem_vy', 'waves_palmas_hs', 'waves_palmas_tp', 'waves_palmas_ws', 'sofs_praticagem_cross_shore_current', 'sofs_praticagem_ssh', 'astronomical_tide_astronomical_tide']
INFO:utils.nexdata: exog features:   ['datetime', 'sofs_praticagem_cross_shore_current', 'sofs_praticagem_ssh', 'astronomical_tide_astronomical_tide']
INFO:utils.nexdata: target features: ['current_praticagem_cross_shore_current', 'waves_palmas_hs', 'waves_

In [4]:
fcst_df_full = pd.DataFrame()

fcst_df_full.index = pd.RangeIndex(start=context_len,
                                   stop=df_test_composed.shape[0],
                                   step=1)
fcst_df_full[params.data_params['datetime_col']] = df_y_hat[
    params.data_params['datetime_col']]

for tgt_feature in target_feature_list:
    params.logger.info(f' target feature to forecast: {tgt_feature}')
    dt_list = []
    fcst_list = []
    for idx in tqdm(range(len_X_test_index)):
        params.logger.info(f' \ntarget feature: {tgt_feature}')
        # Extract test signal for the current window
        X_test_df = df_test_composed.loc[X_test_index[idx], :]
        y_test_df = df_test_composed.loc[y_test_index[idx], :]

        # Concatenate training and test signals
        combined_df = pd.concat([df_train_composed,
                                    X_test_df], axis=0).reset_index(drop=True)
        
        attempts = params.model_params['attempts_after_failure']

        for attempt in range(attempts):
            try:
                fcst_df = nixtla_client.forecast(
                    df=combined_df,
                    h=params.model_params['forecast_len'],
                    freq=params.data_params['target_freq'],
                    time_col=params.data_params['datetime_col'],
                    target_col=tgt_feature,
                    X_df=y_test_df[exog_list],  # Somente features exógenas
                    model=params.model_params['timegpt_model'],
                    #finetune_steps=params.model_params[
                    # 'timegpt_finetune_steps'],
                )
                break  # Sai do loop se a previsão for bem-sucedida
            except Exception as e:
                params.logger.info(
                    f'Error at attempt {attempt + 1} for {tgt_feature}: {e}')
                if attempt <= attempts:
                    time.sleep(10)  # Aguarda 10 segundos antes de tentar dnv
                else:
                    params.logger.error(
                        f'Fail after {attempts} attempts'
                    )
                    sys.exit(1)
                
        dt_list.extend(fcst_df.datetime.values)
        fcst_list.extend(fcst_df.TimeGPT.values)
        clear_output(wait=True)
        #TODO: Pegar tbm o valor medido para entrar no df

    fcst_df_full[tgt_feature] = fcst_list

# Save the predictions DataFrame to a parquet file
filename = os.path.join(
    params.forecasted_dir,
    f"{id_experiment}_"
    f"{params.timestamp}.pkl")

params.logger.info(f'Output file: {filename}')

fcst_df_full.to_parquet(filename)

display(fcst_df_full)

100%|██████████| 179/179 [33:55<00:00, 11.37s/it]


Unnamed: 0,datetime,current_praticagem_cross_shore_current,waves_palmas_hs,waves_palmas_tp,waves_palmas_ws
168,2022-01-08 00:00:00,-0.496241,1.205139,10.518562,-0.015064
169,2022-01-08 01:00:00,-0.475252,1.117056,10.122596,0.002176
170,2022-01-08 02:00:00,-0.221233,1.495980,10.170714,0.200205
171,2022-01-08 03:00:00,-0.304862,1.322694,9.801672,0.191701
172,2022-01-08 04:00:00,-0.258456,1.231174,10.200695,0.186209
...,...,...,...,...,...
8755,2022-12-31 19:00:00,0.117516,0.660472,9.674246,0.078386
8756,2022-12-31 20:00:00,0.144268,0.662330,9.598036,0.114077
8757,2022-12-31 21:00:00,-0.021904,0.564333,9.813744,0.033776
8758,2022-12-31 22:00:00,0.150519,0.658883,9.335948,0.162465


In [None]:
for tgt_feature in target_feature_list:
    for idx in range(150, 160):
        print(f'ioa: {calculate_ioa(df_test_composed.loc[y_test_index[idx],
                                                        tgt_feature].values,
                                        fcst_df_full.loc[y_test_index[idx],
                                                        tgt_feature].values)}')
        plt.plot(fcst_df_full.loc[y_test_index[idx],'datetime'],
            fcst_df_full.loc[y_test_index[idx],tgt_feature],
            label=tgt_feature)
        plt.plot(df_test_composed.loc[y_test_index[idx],'datetime'],
            df_test_composed.loc[y_test_index[idx],tgt_feature],
            label='Measured')
        plt.legend()
        plt.show()