In [6]:
import numpy as np
import pandas as pd
import polars as pl
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

In [40]:
data = pl.read_parquet('../data/3_primary/complete_dataset.parquet')
data = data.filter(pl.col('uf') != 'ES')

data = data.rename({'casos': 'target'})
data = data.with_columns(
    np.log1p(pl.col("target")).alias("log_casos")
)

static_features = data.group_by('uf').agg(pl.col('uf_latitude').first(),pl.col('uf_longitude').first())

In [78]:
exo = ['enso','log_casos']
train_1 = data.filter(pl.col('train_1'))[['uf','date','target',*exo]]
train_1 = TimeSeriesDataFrame.from_data_frame(
    train_1.to_pandas(),id_column='uf',
    timestamp_column='date',
    static_features_df=static_features.to_pandas()
)
test_1 =  data.filter(
    pl.col('target_1') | (
        (pl.col('date') < np.max(np.array(data.filter(pl.col('target_1')).sort('date')['date'])))
    )
)[['uf','date','target',*exo]]
test_1 = TimeSeriesDataFrame.from_data_frame(
    test_1.to_pandas(),
    id_column='uf',
    timestamp_column='date',
    static_features_df=static_features.to_pandas()
)

In [63]:
train_1

Unnamed: 0_level_0,Unnamed: 1_level_0,target,enso,log_casos
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RJ,2022-03-06,198,-0.713715,5.293305
RS,2015-03-29,179,1.650125,5.192957
AM,2015-10-11,31,2.957763,3.465736
RS,2012-08-19,1,-0.138392,0.693147
SP,2017-04-16,236,0.143325,5.468060
...,...,...,...,...
MT,2018-02-04,385,-0.436455,5.955837
AP,2019-12-01,1,0.942436,0.693147
PB,2012-05-27,290,-0.587082,5.673323
AP,2014-08-17,8,0.478978,2.197225


In [67]:
prediction_length = 68
predictor = TimeSeriesPredictor(prediction_length=prediction_length).fit(
    train_data=train_1,
    hyperparameters={
        "Chronos": [
            {"model_path": "bolt_small", "fine_tune": False, "ag_args": {"name_suffix": "ZeroShot"}},
            {"model_path": "bolt_small", "fine_tune": True, "ag_args": {"name_suffix": "FineTuned"}},
        ]
    },
    # time_limit=60,  # time limit in seconds
    enable_ensemble=False,
)

Beginning AutoGluon training...
AutoGluon will save models to '/Users/davibarreira/Documents/DSProjects/Mosqlimate/jbd-mosqlimate-sprint/train_model/AutogluonModels/ag-20250731_040059'
AutoGluon Version:  1.4.0
Python Version:     3.11.11
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:33 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8122
CPU Count:          8
GPU Count:          1
Memory Avail:       3.67 GB / 16.00 GB (23.0%)
Disk Space Avail:   378.04 GB / 926.35 GB (40.8%)

Fitting with arguments:
{'enable_ensemble': False,
 'eval_metric': WQL,
 'hyperparameters': {'Chronos': [{'ag_args': {'name_suffix': 'ZeroShot'},
                                  'fine_tune': False,
                                  'model_path': 'bolt_small'},
                                 {'ag_args': {'name_suffix': 'FineTuned'},
                                  'fine_tune': True,
                                  'model_path': 'bol

In [82]:
predictor.leaderboard(test_1)

Additional data provided, testing on additional data. Resulting leaderboard will be sorted according to test score (`score_test`).


Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time_marginal,fit_order
0,ChronosZeroShot[bolt_small],-0.57018,-0.656624,2.007558,1.802824,0.007992,1
1,ChronosFineTuned[bolt_small],-0.582592,-0.694272,1.313095,0.804088,389.012009,2


In [107]:
predictions = predictor.predict(data=train_1,model='ChronosFineTuned[bolt_small]')
predictions = pl.DataFrame(predictions.reset_index()).rename({'item_id': 'uf','timestamp': 'date'})
predictions = predictions.with_columns(
    pl.col('date').dt.date().alias('date')
)

In [102]:
dates_target_1 =data.filter(pl.col('target_1'))[['date','epiweek']].unique().sort('date')

In [112]:
predictions.join(dates_target_1, on='date', how='left').filter(pl.col('epiweek').is_not_null())['epiweek'].unique().sort()

epiweek
i64
202241
202242
202243
202244
202245
…
202336
202337
202338
202339
