In [21]:
###################################
# 1) IMPORTS
###################################
import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import math
from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import DataEmbedding, PositionalEmbedding, TokenEmbedding, TimeFeatureEmbedding
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score



###################################
# 2) LOADING THE DATA
###################################
def load_data():
    # Adjust paths as needed if they are in a different directory
    train = pd.read_csv("playground-series-s5e1/train.csv")
    test = pd.read_csv("playground-series-s5e1/test.csv")
    submission = pd.read_csv("playground-series-s5e1/sample_submission.csv")
    return train, test, submission

###################################
# 3) BASIC PREPROCESS & IMPUTATION
###################################
def preprocess_data(train: pd.DataFrame):
    # Convert date to datetime
    train['date'] = pd.to_datetime(train['date'])

    # Impute missing num_sold by group means (country, product, store, month)
    train['month'] = train['date'].dt.month
    group_cols = ['country','product','store','month']
    group_means = train.groupby(group_cols)['num_sold'].transform('mean')
    train['num_sold'] = train['num_sold'].fillna(group_means)

    # Fill any remaining NaNs with overall mean
    train['num_sold'].fillna(train['num_sold'].mean(), inplace=True)
    return train

###################################
# 4) AGGREGATE INTO A SINGLE SERIES
###################################
def aggregate_timeseries(train: pd.DataFrame):
    """
    Aggregate all (country,store,product) combos into one daily series
    Return a DataFrame with [date, num_sold] plus time features
    """
    agg_df = train.groupby('date', as_index=False)['num_sold'].sum().sort_values('date')

    # Create 4 time features so freq='h' in TimeFeatureEmbedding matches (month, day, weekday, year_mod)
    agg_df['month']   = agg_df['date'].dt.month
    agg_df['day']     = agg_df['date'].dt.day
    agg_df['weekday'] = agg_df['date'].dt.weekday
    agg_df['year']    = agg_df['date'].dt.year
    agg_df['year_mod'] = agg_df['year'] - 2000

    return agg_df


In [22]:

###################################
# 5) TRAIN/VALID SPLIT BY DATE
###################################
def split_data_by_date(agg_df: pd.DataFrame, date_str="2015-01-01"):
    """
    Splits into train/valid based on date_str boundary
    """
    split_date = pd.to_datetime(date_str)
    train_mask = agg_df['date'] < split_date
    valid_mask = agg_df['date'] >= split_date

    train_data = agg_df[train_mask].reset_index(drop=True)
    valid_data = agg_df[valid_mask].reset_index(drop=True)
    return train_data, valid_data


In [23]:

###################################
# 6) MAKE WINDOWS (SLIDING)
###################################
def make_windows(data, time_feat, input_size=96, pred_size=24):
    """
    data: 1D array of the target
    time_feat: 2D array of shape (N, 4) containing [month, day, weekday, year_mod]
    Return: x_enc, x_mark_enc, x_dec, x_mark_dec, y arrays
    """
    x_enc_list, x_mark_enc_list = [], []
    x_dec_list, x_mark_dec_list = [], []
    y_list = []
    L = len(data)
    for i in range(L - input_size - pred_size):
        x_enc = data[i : i+input_size]
        t_enc = time_feat[i : i+input_size]
        x_dec = data[i+input_size : i+input_size+pred_size]
        t_dec = time_feat[i+input_size : i+input_size+pred_size]
        y    = data[i+input_size : i+input_size+pred_size]

        x_enc_list.append(x_enc)
        x_mark_enc_list.append(t_enc)
        x_dec_list.append(x_dec)
        x_mark_dec_list.append(t_dec)
        y_list.append(y)

    return (
        np.array(x_enc_list),
        np.array(x_mark_enc_list),
        np.array(x_dec_list),
        np.array(x_mark_dec_list),
        np.array(y_list)
    )


In [24]:

###################################
# 7) CUSTOM DATASET
###################################
class TimeSeriesWindowDataset(Dataset):
    def __init__(self, x_enc, x_mark_enc, x_dec, x_mark_dec, y):
        super().__init__()
        self.x_enc = x_enc
        self.x_mark_enc = x_mark_enc
        self.x_dec = x_dec
        self.x_mark_dec = x_mark_dec
        self.y = y
        
    def __len__(self):
        return len(self.x_enc)
    
    def __getitem__(self, idx):
        # Convert to torch and return
        # x_enc, x_dec => (Length, 1)
        return (
            torch.tensor(self.x_enc[idx],      dtype=torch.float32),
            torch.tensor(self.x_mark_enc[idx], dtype=torch.float32),
            torch.tensor(self.x_dec[idx],      dtype=torch.float32),
            torch.tensor(self.x_mark_dec[idx], dtype=torch.float32),
            torch.tensor(self.y[idx],          dtype=torch.float32)
        )

###################################
# 8) EMBEDDING + FAN LAYER + TRANSFORMER
###################################


In [25]:

# --- (B) FAN Layer
class FANLayer(nn.Module):
    def __init__(self, input_dim, output_dim, bias=True, with_gate=True):
        super(FANLayer, self).__init__()
        self.input_linear_p = nn.Linear(input_dim, output_dim//4, bias=bias)
        self.input_linear_g = nn.Linear(input_dim, (output_dim - output_dim//2))
        self.activation = nn.GELU()
        if with_gate:
            self.gate = nn.Parameter(torch.randn(1, dtype=torch.float32))

    def forward(self, src):
        # src: [B, L, d_model]
        g = self.activation(self.input_linear_g(src))  # => [B, L, out_dim - out_dim//2]
        p = self.input_linear_p(src)                   # => [B, L, out_dim//4]
        if not hasattr(self, 'gate'):
            output = torch.cat((torch.cos(p), torch.sin(p), g), dim=-1)
        else:
            gate = torch.sigmoid(self.gate)
            output = torch.cat((gate*torch.cos(p), gate*torch.sin(p), (1-gate)*g), dim=-1)
        return output


In [26]:

###################################
# 9) FANTimeSeriesModel (Transformer + FANLayer)
###################################
class FANTimeSeriesModel(nn.Module):
    """
    Transformer-based model for time-series forecasting with FAN layer integration.
    """
    def __init__(self, configs):
        super(FANTimeSeriesModel, self).__init__()
        self.pred_len = configs.pred_len
        self.output_attention = configs.output_attention

        # Embedding
        self.enc_embedding = DataEmbedding(
            configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout
        )
        self.dec_embedding = DataEmbedding(
            configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout
        )

        # FAN Layer
        self.fan_layer = FANLayer(configs.d_model, configs.d_model)

        # Encoder
        self.encoder = Encoder(
            [
                EncoderLayer(
                    AttentionLayer(
                        FullAttention(
                            False, configs.factor,
                            attention_dropout=configs.dropout,
                            output_attention=configs.output_attention,
                        ),
                        configs.d_model,
                        configs.n_heads,
                    ),
                    configs.d_model,
                    configs.d_ff,
                    dropout=configs.dropout,
                    activation=configs.activation,
                    exp_setting=configs.exp_setting,
                )
                for _ in range(configs.e_layers)
            ],
            norm_layer=nn.LayerNorm(configs.d_model),
        )

        # Decoder
        self.decoder = Decoder(
            [
                DecoderLayer(
                    AttentionLayer(
                        FullAttention(
                            True, configs.factor,
                            attention_dropout=configs.dropout,
                            output_attention=False,
                        ),
                        configs.d_model,
                        configs.n_heads,
                    ),
                    AttentionLayer(
                        FullAttention(
                            False, configs.factor,
                            attention_dropout=configs.dropout,
                            output_attention=False,
                        ),
                        configs.d_model,
                        configs.n_heads,
                    ),
                    configs.d_model,
                    configs.d_ff,
                    dropout=configs.dropout,
                    activation=configs.activation,
                    exp_setting=configs.exp_setting,
                )
                for _ in range(configs.d_layers)
            ],
            norm_layer=nn.LayerNorm(configs.d_model),
            projection=nn.Linear(configs.d_model, configs.c_out, bias=True),
        )

    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
                enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
        """
        x_enc:      [B, seq_len, enc_in]
        x_mark_enc: [B, seq_len, 4]
        x_dec:      [B, pred_len, dec_in]
        x_mark_dec: [B, pred_len, 4]
        """
        #print(f"x_enc shape: {x_enc.shape}")
        #print(f"x_mark_enc shape: {x_mark_enc.shape}")
        enc_out = self.enc_embedding(x_enc, x_mark_enc)
        #print(f"enc_out shape after embedding: {enc_out.shape}")

        #print(f"x_dec shape: {x_dec.shape}")
        #print(f"x_mark_dec shape: {x_mark_dec.shape}")
        dec_out = self.dec_embedding(x_dec, x_mark_dec)
        #print(f"dec_out shape after embedding: {dec_out.shape}")

        # FAN layer
        enc_out = self.fan_layer(enc_out)
        #print(f"enc_out shape after FAN layer: {enc_out.shape}")

        # Encoder
        enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
        #print(f"enc_out shape after encoder: {enc_out.shape}")

        # Decoder
        dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
        #print(f"dec_out shape after decoder: {dec_out.shape}")

        if self.output_attention:
            return dec_out[:, -self.pred_len:, :], attns
        else:
            return dec_out[:, -self.pred_len:, :]

###################################
# 10) CONFIGS
###################################
class Configs:
    # Because we have only one aggregated series, we set enc_in=1 and dec_in=1.
    enc_in = 1
    dec_in = 1
    c_out = 1
    d_model = 64
    embed = 'timeF'
    freq = 'h'        # We'll keep 'h' so we have 4 time features in x_mark
    dropout = 0.1
    e_layers = 2
    d_layers = 1
    d_ff = 256
    n_heads = 8
    factor = 5
    activation = 'gelu'
    output_attention = False
    pred_len = 24
    exp_setting = 0


In [27]:
def main():
    # 1) Load Data
    train, test, submission = load_data()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 2) Preprocess/impute
    train = preprocess_data(train)

    # 3) Aggregate daily
    agg_df = aggregate_timeseries(train)

    # 4) Split train/valid (ending at 2016-12-31 for example)
    train_data, valid_data = split_data_by_date(agg_df, date_str="2017-01-01")
    #   Now train_data covers up to 2016-12-31. valid_data is 2017+ (but you might or might not use it)

    # 5) Build arrays
    y_train = train_data['num_sold'].values  # aggregator from 2010–2016
    dates_train = train_data['date'].values  # keep the matching dates
    
    # 6) Log transform & scale the target
    #    log1p(x) = log(1 + x), handles zeros gracefully, reduces explosive growth
    y_train_log = np.log1p(y_train)  # shape (N,)

    scaler = StandardScaler()
    # We'll scale the log values instead of the raw aggregator
    y_train_log_scaled = scaler.fit_transform(y_train_log.reshape(-1,1)).ravel()
    
    # 7) Time features for training
    time_feat_train = train_data[['month','day','weekday','year_mod']].values

    # 8) Build windows
    input_size = 96
    pred_size = 24
    x_enc_tr, x_mark_enc_tr, x_dec_tr, x_mark_dec_tr, y_tr = make_windows(
        y_train_log_scaled, time_feat_train, input_size, pred_size
    )
    # ^ now y_tr is also in log-scale (scaled)

    # 9) Reshape (N, length, 1)
    x_enc_tr = x_enc_tr[..., None]
    x_dec_tr = x_dec_tr[..., None]

    # 10) Dataset, loader
    train_dataset = TimeSeriesWindowDataset(
        x_enc_tr, x_mark_enc_tr, x_dec_tr, x_mark_dec_tr, y_tr
    )
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    # 11) Model
    configs = Configs()
    model = FANTimeSeriesModel(configs).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # 12) Train loop (short version)
    num_epochs = 50
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for batch_data in train_loader:
            x_enc, x_mark_enc, x_dec, x_mark_dec, y_true_log_scaled = [bd.to(device) for bd in batch_data]
            optimizer.zero_grad()
            y_pred_log_scaled = model(x_enc, x_mark_enc, x_dec, x_mark_dec).squeeze(-1)
            loss = criterion(y_pred_log_scaled, y_true_log_scaled)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * len(x_enc)
        train_loss = total_loss / len(train_dataset)
        print(f"[Epoch {epoch+1}/{num_epochs}] Train Loss: {train_loss:.4f}")

    # 13) Save model weights
    torch.save(model.state_dict(), "fan_model.pth")
    print("Saved model weights to fan_model.pth")

    # 14) Also save arrays & scaler
    #     We'll store the *log-scaled* aggregator so we can continue from that point in forecast_future
    np.save("dates_train.npy", dates_train)                         # shape (N,)
    np.save("y_train_scaled.npy", y_train_log_scaled)              # log-scale, scaled aggregator
    np.save("time_feat_train.npy", time_feat_train)                # shape (N,4)

    import joblib
    joblib.dump(scaler, "scaler.pkl")
    print("Saved aggregator log-scaler & data.")


In [28]:
def forecast_future_aggregator(start_date="2017-01-01", end_date="2019-12-31"):
    """
    1) Load aggregator data & model from disk
    2) Iteratively forecast daily aggregator from start_date to end_date in chunks of pred_len=24 days
    3) Return a DataFrame with [date, aggregator_pred_original]
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # A) Load previously saved aggregator data
    dates_train = np.load("dates_train.npy", allow_pickle=True)  # shape (N,)
    y_train_scaled = np.load("y_train_scaled.npy", allow_pickle=True)  # shape (N,)
    time_feat_train = np.load("time_feat_train.npy", allow_pickle=True)  # shape (N,4)
    
    # B) Load model & scaler
    configs = Configs()
    model = FANTimeSeriesModel(configs).to(device)
    model.load_state_dict(torch.load("fan_model.pth", map_location=device))
    model.eval()

    import joblib
    scaler = joblib.load("scaler.pkl")
    
    input_size = configs.pred_len*4  # or 96 as you set in training
    pred_size  = configs.pred_len    # 24

    # For clarity, ensure these match what you used in main:
    input_size = 96
    pred_size  = 24

    # C) Build a "full" aggregator history array that we can extend
    #    For now, let's store them in lists, then convert to numpy
    full_dates = list(dates_train)          # up to 2016-12-31
    full_scaled = list(y_train_scaled)      # aggregator scaled
    full_timefeat = list(time_feat_train)   # shape (N,4), but store as list of arrays

    # Convert start_date & end_date to datetime
    start_dt = pd.to_datetime(start_date)
    end_dt   = pd.to_datetime(end_date)

    # We'll forecast in increments of 24 days until end_dt
    current_forecast_start = start_dt

    # We'll keep a "future" array of date-> aggregator
    # But we store it in full_dates / full_scaled / full_timefeat
    while True:
        # 1) If current_forecast_start > end_dt, break
        if current_forecast_start > end_dt:
            break

        # 2) Our next chunk end
        chunk_end = current_forecast_start + pd.Timedelta(days=pred_size - 1)
        if chunk_end > end_dt:
            chunk_end = end_dt
            # we might have a partial chunk if there's fewer than 24 days left
            chunk_size = (chunk_end - current_forecast_start).days + 1
        else:
            chunk_size = pred_size

        # 3) We'll take the last input_size days from the aggregator as x_enc
        #    We do that from full_scaled
        #    Note we must ensure we have at least input_size in full_scaled
        if len(full_scaled) < input_size:
            raise ValueError("Not enough history to do the first forecast! Check code.")
        
        # Slice out the last input_size from full_scaled/timefeat
        x_enc_arr = np.array(full_scaled[-input_size:])  # shape (96,)
        x_mark_enc_arr = np.array(full_timefeat[-input_size:])  # shape (96,4)

        # Next chunk of future dates
        future_dates_chunk = pd.date_range(current_forecast_start, periods=chunk_size, freq='D')
        
        # Build time features for that future chunk
        # shape => (chunk_size,4)
        t_dec_list = []
        for dt in future_dates_chunk:
            month   = dt.month
            day     = dt.day
            weekday = dt.weekday()
            year_mod= dt.year - 2000
            t_dec_list.append([month, day, weekday, year_mod])
        x_mark_dec_arr = np.array(t_dec_list, dtype=np.float32)  # (24,4) or partial

        # For x_dec, we don't have "known future" aggregator if we're purely forecasting.
        # Usually we feed zeros or just an empty input. We'll feed zeros:
        x_dec_arr = np.zeros((chunk_size,), dtype=np.float32)

        # 4) Reshape to 3D for model
        x_enc = torch.tensor(x_enc_arr[None, :, None], device=device, dtype=torch.float32)  # [1,96,1]
        x_mark_enc = torch.tensor(x_mark_enc_arr[None,:,:], device=device, dtype=torch.float32)  # [1,96,4]
        x_dec = torch.tensor(x_dec_arr[None, :, None], device=device, dtype=torch.float32)  # [1,chunk_size,1]
        x_mark_dec = torch.tensor(x_mark_dec_arr[None,:,:], device=device, dtype=torch.float32) # [1,chunk_size,4]

        with torch.no_grad():
            pred = model(x_enc, x_mark_enc, x_dec, x_mark_dec)
            # pred shape => [1, chunk_size, 1]
            pred = pred.squeeze(0).squeeze(-1).cpu().numpy()  # => shape (chunk_size,)

        # 5) Append these forecast values to full_scaled/timefeat
        for i in range(chunk_size):
            full_dates.append(future_dates_chunk[i])
            full_scaled.append(pred[i])
            full_timefeat.append(x_mark_dec_arr[i])

        # 6) Move the forecast start forward by chunk_size days
        current_forecast_start = chunk_end + pd.Timedelta(days=1)
        
        # If chunk_end == end_dt, we are done
        if chunk_end >= end_dt:
            break

    # Now full_scaled/timefeat includes the entire aggregator from 2010-01-01 to 2019-12-31
    # The portion from index [len(y_train_scaled): ] => 2017-01-01 to 2019-12-31 is forecast

    # Convert to arrays
    full_dates = np.array(full_dates)
    full_scaled = np.array(full_scaled)

    # D) Build a dataframe for the entire period
    df_full = pd.DataFrame({
        'date': full_dates,
        'aggregator_scaled': full_scaled
    })

    # E) Invert scaling for aggregator
    aggregator_original = scaler.inverse_transform(full_scaled.reshape(-1,1)).ravel()
    df_full['aggregator_original'] = aggregator_original

    # Finally, filter only the forecast range (>= 2017-01-01)
    df_forecast = df_full[df_full['date'] >= start_dt].copy()
    df_forecast = df_forecast[df_forecast['date'] <= end_dt]

    return df_forecast[['date','aggregator_original']]


In [29]:
def create_submission():
    # 1) Forecast aggregator from 2017-01-01 to 2019-12-31
    df_forecast = forecast_future_aggregator(start_date="2017-01-01", end_date="2019-12-31")
    # => columns: [date, aggregator_original]

    # 2) Load test.csv
    test = pd.read_csv("playground-series-s5e1/test.csv")
    test['date'] = pd.to_datetime(test['date'])

    # 3) Merge test with df_forecast on 'date'
    #    So each test row gets aggregator_original for that date
    merged = pd.merge(test, df_forecast, how='left', on='date')
    # merged => columns: ['id','date','country','store','product','aggregator_original']

    # 4) The single-series aggregator forecast is "num_sold" for each row
    #    Because the model doesn't differentiate country/store/product
    merged.rename(columns={'aggregator_original': 'num_sold'}, inplace=True)

    # 5) Output the required columns in Kaggle format: 'id','num_sold'
    submission = merged[['id','num_sold']].copy()

    # If you have any missing predictions outside the forecast range, fill with something
    submission['num_sold'] = submission['num_sold'].fillna(0)  # or an average

    # 6) Save
    submission.to_csv("submission.csv", index=False)
    print("submission.csv created!")

In [30]:
if __name__ == "__main__":
    # 1) Train aggregator model and save everything
    main()

    # 2) Create the submission
    create_submission()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train['num_sold'].fillna(train['num_sold'].mean(), inplace=True)


[Epoch 1/50] Train Loss: 0.2547
[Epoch 2/50] Train Loss: 0.0640
[Epoch 3/50] Train Loss: 0.0481
[Epoch 4/50] Train Loss: 0.0390
[Epoch 5/50] Train Loss: 0.0334
[Epoch 6/50] Train Loss: 0.0289
[Epoch 7/50] Train Loss: 0.0269
[Epoch 8/50] Train Loss: 0.0237
[Epoch 9/50] Train Loss: 0.0211
[Epoch 10/50] Train Loss: 0.0199
[Epoch 11/50] Train Loss: 0.0184
[Epoch 12/50] Train Loss: 0.0168
[Epoch 13/50] Train Loss: 0.0168
[Epoch 14/50] Train Loss: 0.0150
[Epoch 15/50] Train Loss: 0.0142
[Epoch 16/50] Train Loss: 0.0136
[Epoch 17/50] Train Loss: 0.0125
[Epoch 18/50] Train Loss: 0.0120
[Epoch 19/50] Train Loss: 0.0114
[Epoch 20/50] Train Loss: 0.0105
[Epoch 21/50] Train Loss: 0.0101
[Epoch 22/50] Train Loss: 0.0098
[Epoch 23/50] Train Loss: 0.0092
[Epoch 24/50] Train Loss: 0.0088
[Epoch 25/50] Train Loss: 0.0084
[Epoch 26/50] Train Loss: 0.0083
[Epoch 27/50] Train Loss: 0.0077
[Epoch 28/50] Train Loss: 0.0078
[Epoch 29/50] Train Loss: 0.0074
[Epoch 30/50] Train Loss: 0.0070
[Epoch 31/50] Train

  model.load_state_dict(torch.load("fan_model.pth", map_location=device))


submission.csv created!


In [None]:
configs = Configs()
model = FANTimeSeriesModel(configs).to(device)
    model.load_state_dict(torch.load("fan_model.pth", map_location=device))
    model.eval()

    import joblib
    scaler = joblib.load("scaler.pkl")
    
    input_size = configs.pred_len*4  # or 96 as you set in training
    pred_size  = configs.pred_len    # 24

    # For clarity, ensure these match what you used in main:
    input_size = 96
    pred_size  = 24

    # C) Build a "full" aggregator history array that we can extend
    #    For now, let's store them in lists, then convert to numpy
    full_dates = list(dates_train)          # up to 2016-12-31
    full_scaled = list(y_train_scaled)      # aggregator scaled
    full_timefeat = list(time_feat_train)   # shape (N,4), but store as list of arrays

    # Convert start_date & end_date to datetime
    start_dt = pd.to_datetime(start_date)
    end_dt   = pd.to_datetime(end_date)

    # We'll forecast in increments of 24 days until end_dt
    current_forecast_start = start_dt

    # We'll keep a "future" array of date-> aggregator
    # But we store it in full_dates / full_scaled / full_timefeat
    while True:
        # 1) If current_forecast_start > end_dt, break
        if current_forecast_start > end_dt:
            break

        # 2) Our next chunk end
        chunk_end = current_forecast_start + pd.Timedelta(days=pred_size - 1)
        if chunk_end > end_dt:
            chunk_end = end_dt
            # we might have a partial chunk if there's fewer than 24 days left
            chunk_size = (chunk_end - current_forecast_start).days + 1
        else:
            chunk_size = pred_size

        # 3) We'll take the last input_size days from the aggregator as x_enc
        #    We do that from full_scaled
        #    Note we must ensure we have at least input_size in full_scaled
        if len(full_scaled) < input_size:
            raise ValueError("Not enough history to do the first forecast! Check code.")
        
        # Slice out the last input_size from full_scaled/timefeat
        x_enc_arr = np.array(full_scaled[-input_size:])  # shape (96,)
        x_mark_enc_arr = np.array(full_timefeat[-input_size:])  # shape (96,4)

        # Next chunk of future dates
        future_dates_chunk = pd.date_range(current_forecast_start, periods=chunk_size, freq='D')
        
        # Build time features for that future chunk
        # shape => (chunk_size,4)
        t_dec_list = []
        for dt in future_dates_chunk:
            month   = dt.month
            day     = dt.day
            weekday = dt.weekday()
            year_mod= dt.year - 2000
            t_dec_list.append([month, day, weekday, year_mod])
        x_mark_dec_arr = np.array(t_dec_list, dtype=np.float32)  # (24,4) or partial

        # For x_dec, we don't have "known future" aggregator if we're purely forecasting.
        # Usually we feed zeros or just an empty input. We'll feed zeros:
        x_dec_arr = np.zeros((chunk_size,), dtype=np.float32)

        # 4) Reshape to 3D for model
        x_enc = torch.tensor(x_enc_arr[None, :, None], device=device, dtype=torch.float32)  # [1,96,1]
        x_mark_enc = torch.tensor(x_mark_enc_arr[None,:,:], device=device, dtype=torch.float32)  # [1,96,4]
        x_dec = torch.tensor(x_dec_arr[None, :, None], device=device, dtype=torch.float32)  # [1,chunk_size,1]
        x_mark_dec = torch.tensor(x_mark_dec_arr[None,:,:], device=device, dtype=torch.float32) # [1,chunk_size,4]

        with torch.no_grad():
            pred = model(x_enc, x_mark_enc, x_dec, x_mark_dec)
            # pred shape => [1, chunk_size, 1]
            pred = pred.squeeze(0).squeeze(-1).cpu().numpy()  # => shape (chunk_size,)

        # 5) Append these forecast values to full_scaled/timefeat
        for i in range(chunk_size):
            full_dates.append(future_dates_chunk[i])
            full_scaled.append(pred[i])
            full_timefeat.append(x_mark_dec_arr[i])

        # 6) Move the forecast start forward by chunk_size days
        current_forecast_start = chunk_end + pd.Timedelta(days=1)
        
        # If chunk_end == end_dt, we are done
        if chunk_end >= end_dt:
            break

    # Now full_scaled/timefeat includes the entire aggregator from 2010-01-01 to 2019-12-31
    # The portion from index [len(y_train_scaled): ] => 2017-01-01 to 2019-12-31 is forecast

    # Convert to arrays
    full_dates = np.array(full_dates)
    full_scaled = np.array(full_scaled)

    # D) Build a dataframe for the entire period
    df_full = pd.DataFrame({
        'date': full_dates,
        'aggregator_scaled': full_scaled
    })

    # E) Invert scaling for aggregator
    aggregator_original = scaler.inverse_transform(full_scaled.reshape(-1,1)).ravel()
    df_full['aggregator_original'] = aggregator_original

    # Finally, filter only the forecast range (>= 2017-01-01)
    df_forecast = df_full[df_full['date'] >= start_dt].copy()
    df_forecast = df_forecast[df_forecast['date'] <= end_dt]

    return df_forecast[['date','aggregator_original']]


In [None]:
model = FANTimeSeriesModel(configs).to(device)
    model.load_state_dict(torch.load("fan_model.pth", map_location=device))
    model.eval()

    import joblib
    scaler = joblib.load("scaler.pkl")
    
    input_size = configs.pred_len*4  # or 96 as you set in training
    pred_size  = configs.pred_len    # 24

    # For clarity, ensure these match what you used in main:
    input_size = 96
    pred_size  = 24

    # C) Build a "full" aggregator history array that we can extend
    #    For now, let's store them in lists, then convert to numpy
    full_dates = list(dates_train)          # up to 2016-12-31
    full_scaled = list(y_train_scaled)      # aggregator scaled
    full_timefeat = list(time_feat_train)   # shape (N,4), but store as list of arrays

    # Convert start_date & end_date to datetime
    start_dt = pd.to_datetime(start_date)
    end_dt   = pd.to_datetime(end_date)

    # We'll forecast in increments of 24 days until end_dt
    current_forecast_start = start_dt

    # We'll keep a "future" array of date-> aggregator
    # But we store it in full_dates / full_scaled / full_timefeat
    while True:
        # 1) If current_forecast_start > end_dt, break
        if current_forecast_start > end_dt:
            break

        # 2) Our next chunk end
        chunk_end = current_forecast_start + pd.Timedelta(days=pred_size - 1)
        if chunk_end > end_dt:
            chunk_end = end_dt
            # we might have a partial chunk if there's fewer than 24 days left
            chunk_size = (chunk_end - current_forecast_start).days + 1
        else:
            chunk_size = pred_size

        # 3) We'll take the last input_size days from the aggregator as x_enc
        #    We do that from full_scaled
        #    Note we must ensure we have at least input_size in full_scaled
        if len(full_scaled) < input_size:
            raise ValueError("Not enough history to do the first forecast! Check code.")
        
        # Slice out the last input_size from full_scaled/timefeat
        x_enc_arr = np.array(full_scaled[-input_size:])  # shape (96,)
        x_mark_enc_arr = np.array(full_timefeat[-input_size:])  # shape (96,4)

        # Next chunk of future dates
        future_dates_chunk = pd.date_range(current_forecast_start, periods=chunk_size, freq='D')
        
        # Build time features for that future chunk
        # shape => (chunk_size,4)
        t_dec_list = []
        for dt in future_dates_chunk:
            month   = dt.month
            day     = dt.day
            weekday = dt.weekday()
            year_mod= dt.year - 2000
            t_dec_list.append([month, day, weekday, year_mod])
        x_mark_dec_arr = np.array(t_dec_list, dtype=np.float32)  # (24,4) or partial

        # For x_dec, we don't have "known future" aggregator if we're purely forecasting.
        # Usually we feed zeros or just an empty input. We'll feed zeros:
        x_dec_arr = np.zeros((chunk_size,), dtype=np.float32)

        # 4) Reshape to 3D for model
        x_enc = torch.tensor(x_enc_arr[None, :, None], device=device, dtype=torch.float32)  # [1,96,1]
        x_mark_enc = torch.tensor(x_mark_enc_arr[None,:,:], device=device, dtype=torch.float32)  # [1,96,4]
        x_dec = torch.tensor(x_dec_arr[None, :, None], device=device, dtype=torch.float32)  # [1,chunk_size,1]
        x_mark_dec = torch.tensor(x_mark_dec_arr[None,:,:], device=device, dtype=torch.float32) # [1,chunk_size,4]

        with torch.no_grad():
            pred = model(x_enc, x_mark_enc, x_dec, x_mark_dec)
            # pred shape => [1, chunk_size, 1]
            pred = pred.squeeze(0).squeeze(-1).cpu().numpy()  # => shape (chunk_size,)

        # 5) Append these forecast values to full_scaled/timefeat
        for i in range(chunk_size):
            full_dates.append(future_dates_chunk[i])
            full_scaled.append(pred[i])
            full_timefeat.append(x_mark_dec_arr[i])

        # 6) Move the forecast start forward by chunk_size days
        current_forecast_start = chunk_end + pd.Timedelta(days=1)
        
        # If chunk_end == end_dt, we are done
        if chunk_end >= end_dt:
            break

    # Now full_scaled/timefeat includes the entire aggregator from 2010-01-01 to 2019-12-31
    # The portion from index [len(y_train_scaled): ] => 2017-01-01 to 2019-12-31 is forecast

    # Convert to arrays
    full_dates = np.array(full_dates)
    full_scaled = np.array(full_scaled)

    # D) Build a dataframe for the entire period
    df_full = pd.DataFrame({
        'date': full_dates,
        'aggregator_scaled': full_scaled
    })

    # E) Invert scaling for aggregator
    aggregator_original = scaler.inverse_transform(full_scaled.reshape(-1,1)).ravel()
    df_full['aggregator_original'] = aggregator_original

    # Finally, filter only the forecast range (>= 2017-01-01)
    df_forecast = df_full[df_full['date'] >= start_dt].copy()
    df_forecast = df_forecast[df_forecast['date'] <= end_dt]

    return df_forecast[['date','aggregator_original']]
