In [6]:
import numpy as np
import pandas as pd
import polars as pl
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

In [40]:
data = pl.read_parquet('../data/3_primary/complete_dataset.parquet')
data = data.filter(pl.col('uf') != 'ES')

data = data.rename({'casos': 'target'})
data = data.with_columns(
    np.log1p(pl.col("target")).alias("log_casos")
)

static_features = data.group_by('uf').agg(pl.col('uf_latitude').first(),pl.col('uf_longitude').first())

In [189]:
def train_test_sprint_split(data,validation_set=1):
    exo = ['enso','log_casos']
    train = data.filter(pl.col('train_'+str(validation_set)))[['uf','date','target',*exo]]
    train = TimeSeriesDataFrame.from_data_frame(
        train.to_pandas(),id_column='uf',
        timestamp_column='date',
        static_features_df=static_features.to_pandas()
    )
    test =  data.filter(
        pl.col('target_'+str(validation_set)) | (
            (pl.col('date') < np.max(np.array(data.filter(pl.col('target_'+str(validation_set))).sort('date')['date'])))
        )
    )[['uf','date','target',*exo]]
    test = TimeSeriesDataFrame.from_data_frame(
        test.to_pandas(),
        id_column='uf',
        timestamp_column='date',
        static_features_df=static_features.to_pandas()
    )
    return train,test

def train_model(train, validation_set=1):
    prediction_length = 68
    predictor = TimeSeriesPredictor(
        prediction_length=prediction_length,
        path='./SprintModels/chronos_bolt_small'+str(validation_set)
        ).fit(
        train_data=train,
        hyperparameters={
            "Chronos": [
                # {"model_path": "bolt_small", "fine_tune": True, "ag_args": {"name_suffix": ""}},
                {"model_path": "bolt_small", "fine_tune": False, "ag_args": {"name_suffix": ""}},
            ]
        },
        # time_limit=5,  # time limit in seconds
        enable_ensemble=False,
    )
    return predictor

def validation_sprint(data, train, predictor, validation_set=1):
    predictions = predictor.predict(data=train,model='Chronos[bolt_small]')

    dates_target =data.filter(pl.col('target_'+str(validation_set)))[['date','epiweek', 'target_'+str(validation_set)]].unique().sort('date')
    validation = pl.DataFrame(predictions.reset_index()).rename({'item_id': 'uf','timestamp': 'date'})

    validation = validation.with_columns(
        pl.col('date').dt.date().alias('date')
    )

    validation = validation.join(dates_target, on='date', how='left').filter(pl.col('epiweek').is_not_null())
    return validation


In [190]:
for v in [1,2,3]:
    train, test = train_test_sprint_split(data,3)
    predictor = train_model(train,validation_set=3)
    validation =validation_sprint(data, train, predictor,validation_set=3)
    validation.write_parquet('../data/4_model_output/validation_sprint_'+str(v)+'.parquet')

Beginning AutoGluon training...
AutoGluon will save models to '/Users/davibarreira/Documents/DSProjects/Mosqlimate/jbd-mosqlimate-sprint/train_model/SprintModels/chronos_bolt_small3'
AutoGluon Version:  1.4.0
Python Version:     3.11.11
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:33 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8122
CPU Count:          8
GPU Count:          1
Memory Avail:       4.36 GB / 16.00 GB (27.3%)
Disk Space Avail:   379.58 GB / 926.35 GB (41.0%)

Fitting with arguments:
{'enable_ensemble': False,
 'eval_metric': WQL,
 'hyperparameters': {'Chronos': [{'ag_args': {'name_suffix': ''},
                                  'fine_tune': False,
                                  'model_path': 'bolt_small'}]},
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 68,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_wind