In [1]:
import pandas as pd
from autogluon.timeseries import TimeSeriesPredictor
from autogluon.timeseries.dataset import TimeSeriesDataFrame
from time import time
import torch

torch.cuda.empty_cache()


context_df = pd.read_parquet(
    "https://autogluon.s3.amazonaws.com/datasets/timeseries/electricity_price/train.parquet"
)

test_df = pd.read_parquet(
    "https://autogluon.s3.amazonaws.com/datasets/timeseries/electricity_price/test.parquet"
)

# AutoGluon richiede un TimeSeriesDataFrame per il training (solo inizializzazione)
train_data = TimeSeriesDataFrame.from_data_frame(
    context_df,
    id_column="id",
    timestamp_column="timestamp"
)

predictor = TimeSeriesPredictor(
    prediction_length=24,
    target="target",
    eval_metric="MASE"
)


predictor.fit(
    train_data,
    hyperparameters={
        "Chronos-2": {
            "model_path": "autogluon/chronos-2-small",
            "pretrained": True
        }
    }
)


start = time()
forecast = predictor.predict(TimeSeriesDataFrame.from_data_frame(
    test_df,
    id_column="id",
    timestamp_column="timestamp"
))
elapsed = time() - start

print(forecast)
print(f"Tempo per la predizione: {elapsed:.2f} secondi")

Beginning AutoGluon training...
AutoGluon will save models to 'c:\Users\utente\Desktop\Magistrale\Anomaly-detection-with-chronos-2\Nunzio\AutogluonModels\ag-20260122_144657'
AutoGluon Version:  1.5.0
Python Version:     3.12.0
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.19045
CPU Count:          8
Pytorch Version:    2.9.1+cu126
CUDA Version:       12.6
GPU Memory:         GPU 0: 2.00/2.00 GB
Total GPU Memory:   Free: 2.00 GB, Allocated: 0.00 GB, Total: 2.00 GB
GPU Count:          1
Memory Avail:       0.59 GB / 15.77 GB (3.7%)
Disk Space Avail:   54.91 GB / 953.07 GB (5.8%)

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MASE,
 'hyperparameters': {'Chronos-2': {'model_path': 'autogluon/chronos-2-small',
                                   'pretrained': True}},
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 24,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit

                                  mean        0.1        0.2        0.3  \
item_id timestamp                                                         
DE      2017-12-13 00:00:00  19.656765  15.060280  17.051603  18.071985   
        2017-12-13 01:00:00  16.955845  10.652727  13.306093  14.737389   
        2017-12-13 02:00:00  14.808838   6.956022  10.534309  12.282492   
        2017-12-13 03:00:00  14.433514   5.223080   9.088501  11.346428   
        2017-12-13 04:00:00  16.571810   6.003214  10.017591  12.601194   
        2017-12-13 05:00:00  19.502583   7.608965  11.904921  14.618399   
        2017-12-13 06:00:00  24.648396  10.149460  15.229380  18.778980   
        2017-12-13 07:00:00  28.532583  12.165825  18.319229  22.326290   
        2017-12-13 08:00:00  30.739872  14.219173  20.473824  24.423676   
        2017-12-13 09:00:00  32.692192  15.768797  22.360641  26.586618   
        2017-12-13 10:00:00  33.855614  16.768263  23.397976  27.669096   
        2017-12-13 11:00:

In [3]:
test_df = pd.read_parquet(
    "https://autogluon.s3.amazonaws.com/datasets/timeseries/electricity_price/test.parquet"
)

In [6]:
torch.cuda.empty_cache()

In [2]:
TimeSeriesDataFrame.from_data_frame(
    test_df,
    id_column="id",
    timestamp_column="timestamp"
)

Unnamed: 0_level_0,Unnamed: 1_level_0,Ampirion Load Forecast,PV+Wind Forecast,target
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DE,2017-12-12 00:00:00,20483.0,22284.005859,24.52
DE,2017-12-12 01:00:00,19849.75,22878.673828,22.209999
DE,2017-12-12 02:00:00,19638.25,23632.283203,14.58
DE,2017-12-12 03:00:00,19895.25,24635.945312,12.31
DE,2017-12-12 04:00:00,20338.0,25584.935547,14.2
DE,2017-12-12 05:00:00,21395.25,26489.929688,26.139999
DE,2017-12-12 06:00:00,24072.75,27345.125,30.889999
DE,2017-12-12 07:00:00,26332.5,27729.585938,32.32
DE,2017-12-12 08:00:00,27329.75,27705.193359,35.189999
DE,2017-12-12 09:00:00,27344.25,28046.484375,34.700001


In [1]:
import pandas as pd  # requires: pip install 'pandas[pyarrow]'
from chronos import Chronos2Pipeline

pipeline = Chronos2Pipeline.from_pretrained("amazon/chronos-2", device_map="cuda")

# Load historical target values and past values of covariates
# context_df = pd.read_parquet("https://autogluon.s3.amazonaws.com/datasets/timeseries/electricity_price/train.parquet")

# # (Optional) Load future values of covariates
# test_df = pd.read_parquet("https://autogluon.s3.amazonaws.com/datasets/timeseries/electricity_price/test.parquet")
# future_df = test_df.drop(columns="target")

# # Generate predictions with covariates
# pred_df = pipeline.predict_df(
#     context_df,
#     future_df=future_df,
#     prediction_length=24,  # Number of steps to forecast
#     quantile_levels=[0.1, 0.5, 0.9],  # Quantiles for probabilistic forecast
#     id_column="id",  # Column identifying different time series
#     timestamp_column="timestamp",  # Column with datetime information
#     target="target",  # Column(s) with time series values to predict
# )


In [2]:
context_df = pd.read_csv(".././TSB-AD-M/001_Genesis_id_1_Sensor_tr_4055_1st_15538.csv", index_col=None, header=0)
context_df['timestamp'] = pd.to_datetime(pd.date_range(start='2026-01-01 00:00:00', periods=len(context_df), freq='min'))
context_df['item_id'] = 0

context_df, test_df = context_df.iloc[0:1024], context_df.iloc[1024:1024+128]

In [7]:
context_df.columns

Index(['MotorData.ActCurrent', 'MotorData.ActPosition', 'MotorData.ActSpeed',
       'MotorData.IsAcceleration', 'MotorData.IsForce',
       'MotorData.Motor_Pos1reached', 'MotorData.Motor_Pos2reached',
       'MotorData.Motor_Pos3reached', 'MotorData.Motor_Pos4reached',
       'NVL_Recv_Ind.GL_Metall', 'NVL_Recv_Ind.GL_NonMetall',
       'NVL_Recv_Storage.GL_I_ProcessStarted',
       'NVL_Recv_Storage.GL_I_Slider_IN', 'NVL_Recv_Storage.GL_I_Slider_OUT',
       'NVL_Recv_Storage.GL_LightBarrier', 'NVL_Send_Storage.ActivateStorage',
       'PLC_PRG.Gripper', 'PLC_PRG.MaterialIsMetal', 'Label', 'timestamp',
       'item_id'],
      dtype='object')

In [25]:
import numpy as np
np.expand_dims(context_df.loc[:, list(set(context_df.columns).difference({'Label', 'timestamp', 'item_id'}))].values, axis=0)


array([[[    0,     0,     0, ...,     0,     0,    -5],
        [    0,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,    -5],
        ...,
        [    1,     0,     0, ...,     0,     0, -1084],
        [    1,     0,     0, ...,  1015,     0,  -284],
        [    1,     0,     0, ...,     0,     0,  -349]]], dtype=int64)

In [48]:
enbeddings = pipeline.embed(
    np.expand_dims(
        context_df.loc[:, list(set(context_df.columns).difference({'Label', 'timestamp', 'item_id'}))].values.T,
        axis=1
    ),
    batch_size=256,
    context_length=64
 )

ValueError: When the input is a torch tensor or numpy array, it should be 3-d with shape (n_series, n_variates, history_length).  Found shape: (1024, 18).

In [51]:
"""
Script per ottenere embeddings da serie temporali multivariate usando Chronos-2.

Input: DataFrame NxD dove N = numero di osservazioni, D = numero di serie temporali
Output: Embeddings per ciascuna serie, considerando le correlazioni tra serie.
"""

import torch
import numpy as np
import pandas as pd
from chronos import Chronos2Pipeline


def get_multivariate_embeddings(
    df: pd.DataFrame,
    model_name: str = "amazon/chronos-2",
    device: str = None,
    batch_size: int = 256,
    context_length: int = None,
 ) -> tuple[list[torch.Tensor], list[tuple[torch.Tensor, torch.Tensor]]]:
    """
    Ottiene gli embeddings per serie temporali multivariate usando Chronos-2.
    Ogni serie viene codificata in modo indipendente come (n_series=D, n_variates=1, history_length=N).
    
    Args:
        df: DataFrame di forma (N, D) dove N = osservazioni, D = numero di serie
        model_name: Nome del modello Chronos-2 da usare
        device: Device per l'inferenza ("cuda", "cpu", o None per auto)
        batch_size: Batch size per la generazione degli embeddings
        context_length: Lunghezza del contesto (None = default del modello)
    
    Returns:
        embeddings: Lista di tensori, uno per ogni serie temporale di input.
                   Ogni tensore ha shape (n_variates, num_patches + 2, d_model)
                   dove +2 è per i token [REG] e masked output patch.
        loc_scales: Lista di tuple (loc, scale) per la normalizzazione di ogni serie.
    """
    # Carica la pipeline
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    
    pipeline = Chronos2Pipeline.from_pretrained(model_name, device_map=device)
    print(f"Modello caricato su: {device}")
    
    # Converti il DataFrame in formato richiesto da Chronos-2
    # Per serie indipendenti, usa un array 3D di shape (n_series=D, n_variates=1, history_length=N)
    data = df.values.T  # (D, N)
    data = np.expand_dims(data, axis=1)  # (D, 1, N)
    
    print(
        "Shape input: (n_series={}, n_variates={}, history_length={})".format(
            data.shape[0], data.shape[1], data.shape[2]
        )
    )
    
    # Ottieni embeddings
    embeddings, loc_scales = pipeline.embed(
        inputs=torch.tensor(data, dtype=torch.float32),
        batch_size=batch_size,
        context_length=context_length
    )
    
    print(f"Numero di output embeddings: {len(embeddings)}")
    if embeddings:
        print(f"Shape primo embedding: {embeddings[0].shape}")
    
    return embeddings, loc_scales


def _aggregate_single_embedding(emb: torch.Tensor, aggregation: str) -> torch.Tensor:
    """Aggrega un singolo embedding (n_variates, num_patches+2, d_model) -> (n_variates, d_model)."""
    if aggregation == "mean":
        return emb.mean(dim=1)
    if aggregation == "last":
        return emb[:, -2, :]
    if aggregation == "first":
        return emb[:, 1, :]
    if aggregation == "max":
        return emb.max(dim=1).values
    raise ValueError(f"Aggregazione non supportata: {aggregation}")


def get_series_level_embeddings(
    df: pd.DataFrame,
    model_name: str = "amazon/chronos-2",
    device: str = None,
    batch_size: int = 256,
    context_length: int = None,
    aggregation: str = "mean",
 ) -> np.ndarray:
    """
    Ottiene un singolo vettore di embedding per ogni serie nel DataFrame.
    
    Args:
        df: DataFrame di forma (N, D)
        model_name: Nome del modello Chronos-2
        device: Device per inferenza
        batch_size: Batch size
        context_length: Lunghezza contesto
        aggregation: Metodo di aggregazione per i patch ("mean", "last", "first", "max")
    
    Returns:
        embeddings_matrix: Array di shape (D, d_model) con un embedding per serie
    """
    embeddings, loc_scales = get_multivariate_embeddings(
        df, model_name, device, batch_size, context_length
    )
    
    # Ogni elemento di embeddings corrisponde a una serie (n_variates=1)
    series_vectors = []
    for emb in embeddings:
        # emb: (1, num_patches+2, d_model) -> (1, d_model)
        vec = _aggregate_single_embedding(emb, aggregation)
        series_vectors.append(vec.squeeze(0))
    
    series_embeddings = torch.stack(series_vectors, dim=0)  # (D, d_model)
    print(f"Shape embeddings dopo l'aggregazione: {series_embeddings.shape}")
    
    return series_embeddings.numpy()


def get_sliding_window_embeddings(
    df: pd.DataFrame,
    window_size: int,
    step_size: int = 1,
    model_name: str = "amazon/chronos-2",
    device: str = None,
    batch_size: int = 256,
 ) -> list[tuple[list[torch.Tensor], list[tuple[torch.Tensor, torch.Tensor]]]]:
    """
    Ottiene embeddings usando una finestra scorrevole sul DataFrame.
    Utile per serie lunghe che superano il context_length del modello.
    
    Args:
        df: DataFrame di forma (N, D)
        window_size: Dimensione della finestra
        step_size: Passo della finestra
        model_name: Nome del modello
        device: Device
        batch_size: Batch size
    
    Returns:
        Lista di (embeddings, loc_scales) per ogni finestra
    """
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    
    pipeline = Chronos2Pipeline.from_pretrained(model_name, device_map=device)
    
    results = []
    n_obs = len(df)
    
    for start in range(0, n_obs - window_size + 1, step_size):
        end = start + window_size
        window_df = df.iloc[start:end]
        
        data = window_df.values.T  # (D, window_size)
        data = np.expand_dims(data, axis=1)  # (D, 1, window_size)
        
        embeddings, loc_scales = pipeline.embed(
            inputs=torch.tensor(data, dtype=torch.float32),
            batch_size=batch_size,
        )
        results.append((embeddings, loc_scales))
    
    return results


 # =====================================================
# ESEMPIO DI UTILIZZO
# =====================================================
if __name__ == "__main__":
    # Crea dati di esempio: 500 osservazioni, 5 serie temporali correlate
    np.random.seed(42)
    N, D = 500, 5
    
    # Genera serie correlate
    base = np.cumsum(np.random.randn(N))  # Serie base (random walk)
    noise = np.random.randn(N, D) * 0.3  # Rumore per ogni serie
    data = base[:, np.newaxis] + noise  # Tutte le serie seguono la base + rumore
    
    df = context_df.iloc[:, :-3]

    print(f"DataFrame shape: {df.shape}")
    print(df.head())
    
    # Esempio 1: Ottieni embeddings grezzi
    print("\n" + "="*50)
    print("Esempio 1: Embeddings grezzi")
    print("="*50)
    embeddings, loc_scales = get_multivariate_embeddings(df)
    print(f"Embeddings shape: {embeddings[0].shape}")
    # Shape: (n_variates=1, num_patches+2, d_model=256 o 512 a seconda del modello)
    
    # Esempio 2: Ottieni un vettore embedding per serie
    print("\n" + "="*50)
    print("Esempio 2: Un embedding per serie")
    print("="*50)
    series_emb = get_series_level_embeddings(df, aggregation="mean")
    print(f"Series embeddings shape: {series_emb.shape}")  # (D, d_model)
    
    # Esempio 3: Calcola similarità tra serie usando gli embeddings
    print("\n" + "="*50)
    print("Esempio 3: Matrice di similarità tra serie")
    print("="*50)
    from sklearn.metrics.pairwise import cosine_similarity
    similarity_matrix = cosine_similarity(series_emb)
    print("Matrice di similarità (coseno):")
    print(pd.DataFrame(
        similarity_matrix, 
        index=df.columns, 
        columns=df.columns
    ).round(3))

DataFrame shape: (1024, 18)
   MotorData.ActCurrent  MotorData.ActPosition  MotorData.ActSpeed  \
0                    -5                 268975                   0   
1                     0                 268975                   0   
2                    -5                 268975                   0   
3                     0                 268975                   0   
4                     0                 268975                   0   

   MotorData.IsAcceleration  MotorData.IsForce  MotorData.Motor_Pos1reached  \
0                         0                 -1                            0   
1                         0                 -1                            0   
2                         0                 -3                            0   
3                         0                  4                            0   
4                         0                  4                            0   

   MotorData.Motor_Pos2reached  MotorData.Motor_Pos3reached  \
0            

In [None]:
data = df.values.T  # (D, N)
data = np.expand_dims(data, axis=0)  # (1, D, N)

1

In [65]:
pipeline = Chronos2Pipeline.from_pretrained("amazon/chronos-2", device_map="cuda")

data = df.values.T  # (D, N)
data = np.expand_dims(data, axis=0)  # (1, D, N)

# Ottieni embeddings
embeddings, loc_scales = pipeline.embed(
    inputs=torch.tensor(data, dtype=torch.float32),
    batch_size=1,
    context_length=data.shape[2]
)

print(f"Numero di output embeddings: {len(embeddings)}")
if embeddings:
    print(f"Shape primo embedding: {embeddings[0].shape}")

Numero di output embeddings: 1
Shape primo embedding: torch.Size([18, 66, 768])


In [42]:
context_df.dtypes

MotorData.ActCurrent                             int64
MotorData.ActPosition                            int64
MotorData.ActSpeed                               int64
MotorData.IsAcceleration                         int64
MotorData.IsForce                                int64
MotorData.Motor_Pos1reached                      int64
MotorData.Motor_Pos2reached                      int64
MotorData.Motor_Pos3reached                      int64
MotorData.Motor_Pos4reached                      int64
NVL_Recv_Ind.GL_Metall                           int64
NVL_Recv_Ind.GL_NonMetall                        int64
NVL_Recv_Storage.GL_I_ProcessStarted             int64
NVL_Recv_Storage.GL_I_Slider_IN                  int64
NVL_Recv_Storage.GL_I_Slider_OUT                 int64
NVL_Recv_Storage.GL_LightBarrier                 int64
NVL_Send_Storage.ActivateStorage                 int64
PLC_PRG.Gripper                                  int64
PLC_PRG.MaterialIsMetal                          int64
Label     