## Run `chronos` for Santos off-shore dataset

In [7]:
import sys
import os
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from chronos import ChronosPipeline
import numpy as np

import logging

sys.path.append('../src/')
from utils.nexdata import *
from utils.nexutil import *

# Simular argumentos da linha de comando
sys.argv = ['chronos.py', '-v']

# Configure the root logger
# Parse arguments
args = parse_args()
log_level = get_log_level(args.verbose)
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'

logging.basicConfig(level=log_level, format=log_fmt)


params = NexData(nexus_folder='../',
                log_level = log_level)

set_random_seeds(params.data_params['default_seed'])

INFO:utils.nexdata: Defining paths...


#### Configuring models, predict and save outputs to be used to `student` model

In [None]:
# Load the ChronosPipeline model from the pretrained
# 'amazon/chronos-t5-large' model
try:
    chronos_pipeline = ChronosPipeline.from_pretrained(
        'amazon/chronos-t5-large',
        device_map='cuda',
        torch_dtype=torch.bfloat16,
    )
    params.logger.info(f'Chronos model load with successfull o/')
except Exception as err:
    params.logger.critical(f'Chronos model cannot be loaded. {err}')
    raise

# Iterate over each ocean variable defined in the parameters
for ocean_variable in params.features.keys():
    params.logger.debug(f'Ocean variable: {ocean_variable}')

    # Retrieve target features and experiment IDs
    target_features = params.features[ocean_variable]
    id_experiment = 'chronos_forecast_composed'
    id_experiment_ioa = 'chronos_ioa_composed'

    # Load train and test data for the target feature
    df_train_target = pd.read_parquet(
        target_features['train_filepath'])
    params.logger.debug(f' df_train_target:\n{df_train_target.head}')
    df_test_target = pd.read_parquet(
        target_features['test_filepath'])
    params.logger.debug(f' df_test_target:\n{df_test_target.head}')


    # Process the training dataframe with specified parameters
    df_train_processed_target = process_dataframe(
        df_train_target,
        target_features['train_start_date'],
        target_features['train_end_date'],
        params.data_params['target_freq'],
        params.data_params['interp_method'],
        params.data_params['datetime_col'],
        params.data_params['round_freq'])
    params.logger.debug(' df_train_target are processed.')

    # Process the test dataframe with specified parameters
    df_test_processed_target = process_dataframe(
        df_test_target,
        target_features['test_start_date'],
        target_features['test_end_date'],
        params.data_params['target_freq'],
        params.data_params['interp_method'],
        params.data_params['datetime_col'],
        params.data_params['round_freq'])
    params.logger.debug(' df_test_target are processed.')

    # Define the context and forecast window lengths and shift
    context_len = params.model_params['context_window_len']
    forecast_len = params.model_params['forecast_len']
    shift = params.model_params['shift']
    mode = params.model_params['windowing_mode']

    # Generate indices for the test set using the context and forecast lengths
    X_test_index, y_test_index = generate_indices(
        df_test_processed_target, context_len, forecast_len,
        shift, mode)
    params.logger.debug(' X_test_index, y_test_index are created.')

    # Initialize DataFrames for predictions and index of agreement (IOA) values
    df_y_hat = pd.DataFrame()
    df_ioa = pd.DataFrame()

    # Set the index for the y_hat DataFrame
    df_y_hat.index = np.concatenate(y_test_index)
    df_y_hat[params.data_params['datetime_col']] = (
        df_test_processed_target.loc[
            df_y_hat.index, params.data_params['datetime_col']
        ])

    params.logger.debug(' start loop from df features')
    # Iterate over each target feature for prediction
    for target_feature in target_features['list_features']:
        y_hat = []
        ioa_list = []

        # Add training data to improve the size of the inference data
        train_signal = df_train_processed_target.loc[:, 
            target_feature].values
        len_X_test_index = len(X_test_index)

        # Iterate over each test window to generate predictions
        for idx in range(len_X_test_index):
            # Extract test signal for the current window
            test_signal = df_test_processed_target.loc[
                X_test_index[idx], target_feature].values
            y_test_signal = df_test_processed_target.loc[
                y_test_index[idx], target_feature].values

            # Concatenate training and test signals
            composed_signal = np.concatenate(
                (train_signal, test_signal))
            
            # Convert the composed signal to a tensor
            batch_context = torch.tensor(composed_signal)
            
            # Generate forecast using the Chronos pipeline
            forecast = chronos_pipeline.predict(
                batch_context, forecast_len)
            predictions = np.quantile(
                forecast.numpy(), 0.5, axis=1)
            
            # Append predictions to the y_hat list
            y_hat.extend(np.array(predictions[0]))

            # Calculate the index of agreement (IOA) for the predictions
            ioa = calculate_ioa(
                y_test_signal, np.array(predictions[0]))
            ioa_list.append(ioa)

            # Print the progress and IOA value for the current window
            print(f'Window {idx+1} from {len_X_test_index} | '
                    f'target feature: {target_feature} |  ioa: {round(ioa,3)}')

        # Store the predictions and IOA values in the DataFrames
        df_y_hat[target_feature] = y_hat
        df_ioa[target_feature] = ioa_list

    # Save the predictions DataFrame to a parquet file
    filename = os.path.join(
        params.forecasted_dir,
        f"{target_features['name']}_{id_experiment}_"
        f"{params.timestamp}.pkl")
    df_y_hat.to_parquet(filename)

    # Save the IOA DataFrame to a parquet file
    filename_ioa = os.path.join(
        params.forecasted_dir,
        f"{ocean_variable}_{id_experiment_ioa}_"
        f"{params.timestamp}.pkl")
    df_ioa.to_parquet(filename_ioa)

    # Print the file paths of the saved files
    print(filename)
    print(filename_ioa)
