## Import Dataset

In [None]:
!pip install scikit-learn pandas numpy torchview visualtorch gdown torch-geometric-temporal torch-cluster # then restart

In [None]:
!gdown --folder https://drive.google.com/drive/folders/1dydbU9HlSIgGQBzYMLogDNI27uO6wga7?usp=drive_link

## Load & Clean the Data

In [1]:
import os
import re
import pandas as pd
from datetime import datetime
from tqdm import tqdm
from multiprocessing import Pool, cpu_count

# --------- CONFIGURATION ---------
ROOT_XLSX_DIR = "Load-data"
CLEANED_CSV_DIR = "cleaned_data"
PREPROCESSED_CSV_DIR = "preprocessed_data"
FINAL_WIDE_CSV = "all_data_df.csv"
FINAL_LONG_CSV = "all_data_timeseries.csv"
# ---------------------------------

def clean_header_and_drop_unused_rows(tmp_df):
    tmp_df.columns = tmp_df.iloc[0]
    tmp_df = tmp_df[1:].reset_index(drop=True)
    if 'Date' in tmp_df.columns:
        tmp_df = tmp_df[~pd.isna(tmp_df['Date'])]
    return tmp_df

def process_excel_file(file_info):
    file_path, rel_path = file_info
    try:
        tmp_df = pd.read_excel(file_path)
        cleaned_df = clean_header_and_drop_unused_rows(tmp_df)
        output_path = os.path.join(CLEANED_CSV_DIR, rel_path).replace(".xlsx", ".csv")
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        cleaned_df.to_csv(output_path, index=False)
        return f"✅ Excel Processed: {file_path}"
    except Exception as e:
        return f"❌ Excel Error in {file_path}: {str(e)}"

def preprocess_and_add_datetime(tmp_df, filename):
    match = re.search(r"(\d{2})-(\d{4})", filename)
    if not match:
        raise ValueError(f"❌ Cannot extract date from filename: {filename}")

    start_month = int(match.group(1))
    start_year = int(match.group(2))
    tmp_df = tmp_df.reset_index(drop=True)

    date_range = pd.date_range(start=datetime(start_year, start_month, 1), periods=len(tmp_df), freq='D')
    tmp_df['Date'] = date_range

    time_cols = [col for col in tmp_df.columns if col != 'Date']
    tmp_df[time_cols] = tmp_df[time_cols].apply(pd.to_numeric, errors='coerce')
    return tmp_df

def process_csv_file(file_info):
    file_path, rel_path = file_info
    try:
        tmp_df = pd.read_csv(file_path)
        processed_df = preprocess_and_add_datetime(tmp_df, os.path.basename(file_path))

        station_name = os.path.normpath(rel_path).split(os.sep)[0]
        processed_df.insert(0, 'station_name', station_name)

        output_path = os.path.join(PREPROCESSED_CSV_DIR, rel_path)
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        processed_df.to_csv(output_path, index=False)
        return f"✅ CSV Processed: {file_path}"
    except Exception as e:
        return f"❌ CSV Error in {file_path}: {str(e)}"

def gather_files(root_dir, extension):
    files = []
    for subdir, _, filenames in os.walk(root_dir):
        for f in filenames:
            if f.endswith(extension):
                full = os.path.join(subdir, f)
                rel = os.path.relpath(full, root_dir)
                files.append((full, rel))
    return files

def concatenate_preprocessed_data(output_dir):
    all_data = []
    for subdir, _, files in os.walk(output_dir):
        for file in files:
            if file.endswith(".csv"):
                try:
                    df = pd.read_csv(os.path.join(subdir, file))
                    all_data.append(df)
                except Exception as e:
                    print(f"❌ Failed to read {file}: {e}")
    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()

def convert_to_timeseries_long_format(df):
    time_columns = [col for col in df.columns if re.match(r"^\d{1,2}:\d{2}$", str(col))]
    long_df = df.melt(id_vars=['station_name', 'Date'], value_vars=time_columns,
                      var_name='Time', value_name='Electricity(kW)')
    long_df['Date'] = pd.to_datetime(long_df['Date'].astype(str) + ' ' + long_df['Time'])
    long_df.drop(columns=['Time'], inplace=True)
    long_df.sort_values(by=['station_name', 'Date'], inplace=True)
    return long_df

# ----------- MAIN EXECUTION FLOW -----------
if __name__ == "__main__":
    # Step 1: Clean Excel files to CSV
    xlsx_files = gather_files(ROOT_XLSX_DIR, ".xlsx")
    with Pool(cpu_count()) as pool:
        results = list(tqdm(pool.imap_unordered(process_excel_file, xlsx_files), total=len(xlsx_files)))
    for res in results:
        print(res)

    # Step 2: Preprocess cleaned CSVs
    csv_files = gather_files(CLEANED_CSV_DIR, ".csv")
    with Pool(cpu_count()) as pool:
        results = list(tqdm(pool.imap_unordered(process_csv_file, csv_files), total=len(csv_files)))
    for res in results:
        print(res)

    # Step 3: Concatenate all preprocessed CSVs
    all_data_df = concatenate_preprocessed_data(PREPROCESSED_CSV_DIR)
    if not all_data_df.empty:
        all_data_df.to_csv(FINAL_WIDE_CSV, index=False)
        print(f"✅ Wide-format saved to {FINAL_WIDE_CSV}")

        # Step 4: Convert to long time series format
        long_df = convert_to_timeseries_long_format(all_data_df)
        long_df.to_csv(FINAL_LONG_CSV, index=False)
        print(f"✅ Long-format saved to {FINAL_LONG_CSV}")
    else:
        print("⚠️ No data found for concatenation.")

100%|██████████| 71/71 [00:02<00:00, 28.99it/s]

✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-09-2024.xlsx
✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-01-2024.xlsx
✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-12-2023.xlsx
✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-08-2024.xlsx
✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-07-2024.xlsx
✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-04-2024.xlsx
✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-10-2024.xlsx
✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-02-2024.xlsx
✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-03-2024.xlsx
✅ Excel Processed: Load-data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-05-


100%|██████████| 71/71 [00:00<00:00, 86.91it/s]


✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-12-2023.csv
✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-11-2024.csv
✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-01-2024.csv
✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-03-2024.csv
✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-04-2024.csv
✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-10-2024.csv
✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-06-2024.csv
✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-05-2024.csv
✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-08-2024.csv
✅ CSV Processed: cleaned_data/Data_อาคารจามจุรี 9/รายงานสรุป-Demand-รายวัน-อาคารจามจุรี9-07

## Define Weight

In [2]:
import pandas as pd

# Create the DataFrame
station_weights_df = pd.DataFrame({
    "station_name": [
        "Data_สถานีชาร์จ",
        "Data_อาคารจามจุรี 9",
        "Data_อาคารวิทยนิเวศน์",
        "Data_อาคารจุลจักรพงษ์",
        "Data_อาคารบรมราชกุมารี",
        "Data_อาคารจามจุรี4",
    ],
    "normalized_reverse_weight": [
        1.000000,
        1.000000,
        1.000000,        1.002786,
        1.002786,
        1.094225,
    ]
})

## Experiment [Clean Data]

In [3]:
def preprocess(long_df):
    long_df.loc[long_df['Electricity(kW)'] < 0, 'Electricity(kW)'] = 0
    return long_df
# long_df_tmp = preprocess(long_df_new)
long_df = preprocess(long_df)

  return op(a, b)


## Split train,valid and test

In [4]:
def split_train_test_data(long_df,long_df_new):
    # Define ratios
    train_ratio = 0.8
    test_ratio = 0.2  # Optional, just for clarity (1 - train_ratio)
    
    # Create empty lists to collect per-station splits
    train_list = []
    test_list = []
    
    # Split per station
    for station, station_df in long_df_new.groupby('station_name'):
        station_df = station_df.sort_values('Date')
        n = len(station_df)
    
        train_end = int(n * train_ratio)
    
        train_list.append(station_df.iloc[:train_end])
        test_list.append(station_df.iloc[train_end:])
    
    # Combine all stations back into global sets
    train_df = pd.concat(train_list).reset_index(drop=True)
    # Create empty lists to collect per-station splits
    train_list = []
    test_list = []
    for station, station_df in long_df.groupby('station_name'):
        station_df = station_df.sort_values('Date')
        n = len(station_df)
    
        train_end = int(n * train_ratio)
    
        train_list.append(station_df.iloc[:train_end])
        test_list.append(station_df.iloc[train_end:])
    
    test_df_new = pd.concat(test_list).reset_index(drop=True)
    
    return train_df,test_df_new
train_df,test_df = split_train_test_data(long_df,long_df)

In [5]:
locations = {
    "Data_สถานีชาร์จ": (13.73624, 100.52995), #Station_name, latitude,longitude
    "Data_อาคารจามจุรี4": (13.73260, 100.53177),
    "Data_อาคารจามจุรี 9": (13.73380, 100.53045),
    "Data_อาคารจุลจักรพงษ์": (13.73684, 100.52852),
    "Data_อาคารบรมราชกุมารี": (13.73800, 100.52905),
    "Data_อาคารวิทยนิเวศน์": (13.73723, 100.53015),
}
train_df.tail(),test_df.tail(),train_df.info(),test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163353 entries, 0 to 163352
Data columns (total 3 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   station_name     163353 non-null  object        
 1   Date             163353 non-null  datetime64[ns]
 2   Electricity(kW)  163231 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 3.7+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40839 entries, 0 to 40838
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   station_name     40839 non-null  object        
 1   Date             40839 non-null  datetime64[ns]
 2   Electricity(kW)  40839 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 957.3+ KB


(                 station_name                Date  Electricity(kW)
 163348  Data_อาคารวิทยนิเวศน์ 2024-09-15 22:45:00              0.0
 163349  Data_อาคารวิทยนิเวศน์ 2024-09-15 23:00:00              0.0
 163350  Data_อาคารวิทยนิเวศน์ 2024-09-15 23:15:00              0.0
 163351  Data_อาคารวิทยนิเวศน์ 2024-09-15 23:30:00              0.0
 163352  Data_อาคารวิทยนิเวศน์ 2024-09-15 23:45:00              0.0,
                 station_name                Date  Electricity(kW)
 40834  Data_อาคารวิทยนิเวศน์ 2024-11-27 22:45:00             0.00
 40835  Data_อาคารวิทยนิเวศน์ 2024-11-27 23:00:00             0.00
 40836  Data_อาคารวิทยนิเวศน์ 2024-11-27 23:15:00             0.00
 40837  Data_อาคารวิทยนิเวศน์ 2024-11-27 23:30:00             0.00
 40838  Data_อาคารวิทยนิเวศน์ 2024-11-27 23:45:00             0.03,
 None,
 None)

## Create Graph

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric_temporal import ASTGCN
from torch_geometric_temporal.signal import temporal_signal_split
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch.cuda.amp import autocast, GradScaler
from tqdm.auto import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric_temporal import ASTGCN
from torch_geometric.utils import dense_to_sparse  # for converting dense A → sparse edges
# 1. Prepare device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Prepare the graph
locations = {
    "Data_สถานีชาร์จ": (13.73624, 100.52995),
    "Data_อาคารจามจุรี4": (13.73260, 100.53177),
    "Data_อาคารจามจุรี 9": (13.73380, 100.53045),
    "Data_อาคารจุลจักรพงษ์": (13.73684, 100.52852),
    "Data_อาคารบรมราชกุมารี": (13.73800, 100.52905),
    "Data_อาคารวิทยนิเวศน์": (13.73723, 100.53015),
}
station_names = list(locations.keys())
num_nodes = len(station_names)

# Build a fully-connected edge_index
edge_index = torch.tensor(
    [[i, j] for i in range(num_nodes) for j in range(num_nodes) if i != j],
    dtype=torch.long,
).t().contiguous().to(device)  # ← move to GPU here

# 3. Pivot utility
def pivot_to_tensor(df, seq_len):
    df_pivot = df.pivot(index='Date', columns='station_name', values='Electricity(kW)')
    df_pivot = df_pivot[station_names].fillna(0.)
    windows = []
    for start in range(len(df_pivot) - seq_len + 1):
        win = df_pivot.iloc[start:start+seq_len].values  # (seq_len, N)
        windows.append(win.T)                            # (N, seq_len)
    arr = np.stack(windows, axis=0)                     # (T, N, seq_len)
    return torch.tensor(arr, dtype=torch.float)

# 4. Data preparation
len_input = 96
prediction_length = 96
X = pivot_to_tensor(train_df, len_input + prediction_length)
X_input = X[:, :, :len_input]      # (T, N, len_input)
X_target = X[:, :, len_input:]     # (T, N, prediction_length)

class TemporalDataset(torch.utils.data.Dataset):
    def __init__(self, X_in, X_out):
        self.X_in = X_in
        self.X_out = X_out
    def __len__(self):
        return len(self.X_in)
    def __getitem__(self, idx):
        return self.X_in[idx], self.X_out[idx]

dataset = TemporalDataset(X_input, X_target)
loader = DataLoader(dataset, batch_size=512, shuffle=True)
# Config dictionary for ASTGCN hyperparameters



class ASTGCN_V2(nn.Module):
    def __init__(self, num_nodes: int, **kwargs):
        super().__init__()
        self.astgcn = ASTGCN(**kwargs)
        # learnable factors to build A_adp = softmax(ReLU(emb1 @ emb2))
        self.node_emb1 = nn.Parameter(torch.randn(num_nodes, 10))
        self.node_emb2 = nn.Parameter(torch.randn(10, num_nodes))

    def forward(self, x, edge_index=None):
        # 1) build adaptive adjacency matrix (dense)
        A_inter = F.relu(self.node_emb1 @ self.node_emb2)   # (N, N)
        A_adp   = F.softmax(A_inter, dim=1)                 # row-normalize

        # 2) convert to sparse edge_index (drop weights)
        edge_index_adp, _ = dense_to_sparse(A_adp)          #  [oai_citation:0‡pytorch-geometric-temporal.readthedocs.io](https://pytorch-geometric-temporal.readthedocs.io/en/latest/modules/root.html?utm_source=chatgpt.com)

        # 3) feed into ASTGCN (which expects only edge_index)  [oai_citation:1‡pytorch-geometric-temporal.readthedocs.io](https://pytorch-geometric-temporal.readthedocs.io/en/latest/modules/root.html?utm_source=chatgpt.com)
        out = self.astgcn(x, edge_index_adp)

        return F.relu(out)
# 5. Instantiate and move model to GPU
# Define your config as before
astgcn_config = {
    "nb_block": 2,
    "in_channels": 1,
    "K": 2,
    "nb_chev_filter": 64,
    "nb_time_filter": 64,
    "time_strides": 1,
    "num_for_predict": prediction_length,
    "len_input": len_input,
    "num_of_vertices": num_nodes,
    "normalization": "sym",
    "bias": True,
}
# 1) Model, loss, optimizer, scheduler, scaler
model     = ASTGCN_V2(**astgcn_config, num_nodes=num_nodes).to(device)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-2, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=3e-2,
    steps_per_epoch=len(loader),
    epochs=30,
    pct_start=0.3   # optional: gentle warm-up
)
scaler    = GradScaler()

# 2) Training loop
model.train()
for epoch in range(30):
    total_loss = 0.0
    # iterate over DataLoader directly
    for X_batch, Y_batch in tqdm(loader, desc=f"Epoch {epoch+1:02d}"):
        X = X_batch.unsqueeze(2).to(device)  # [B, N, 1, len_input]
        Y = Y_batch.to(device)               # [B, N, pred_len]

        optimizer.zero_grad()
        with autocast():
            y_pred = model(X, edge_index)
            loss   = criterion(y_pred, Y)

        # 1) backward with scaler
        scaler.scale(loss).backward()
        # 2) un-scale then clip
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        # 3) optimizer step + scaler update
        scaler.step(optimizer)
        scaler.update()
        # 4) scheduler step (per-batch)
        scheduler.step()

        total_loss += loss.item()

    avg = total_loss / len(loader)
    print(f"Epoch {epoch+1:02d} — Avg Loss: {avg:.4f}")

Using device: cuda


  scaler    = GradScaler()


Epoch 01:   0%|          | 0/55 [00:00<?, ?it/s]

  with autocast():


Epoch 01 — Avg Loss: 15610.5403


Epoch 02:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 02 — Avg Loss: 8847.7999


Epoch 03:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 03 — Avg Loss: 6087.0939


Epoch 04:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 04 — Avg Loss: 5691.5266


Epoch 05:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 05 — Avg Loss: 5238.5119


Epoch 06:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 06 — Avg Loss: 4940.6096


Epoch 07:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 07 — Avg Loss: 5571.9517


Epoch 08:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 08 — Avg Loss: 5914.4908


Epoch 09:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 09 — Avg Loss: 5287.7232


Epoch 10:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 10 — Avg Loss: 4648.4655


Epoch 11:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 11 — Avg Loss: 4457.2845


Epoch 12:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 12 — Avg Loss: 4339.9532


Epoch 13:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 13 — Avg Loss: 4337.7640


Epoch 14:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 14 — Avg Loss: 4507.5241


Epoch 15:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 15 — Avg Loss: 4304.1267


Epoch 16:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 16 — Avg Loss: 4124.8091


Epoch 17:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 17 — Avg Loss: 3920.1993


Epoch 18:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 18 — Avg Loss: 3860.9607


In [None]:
# class ASTGCNWrapper(nn.Module):
#     def __init__(self, model, edge_index):
#         super().__init__()
#         self.model = model
#         self.edge_index = edge_index

#     def forward(self, x):
#         return self.model(x, self.edge_index)
# from torchview import draw_graph

# # Wrap the model with fixed edge_index
# wrapped_model = ASTGCNWrapper(model, edge_index)

# # Provide the correct input shape: (batch_size, num_nodes, 1, len_input)
# draw_graph(
#     wrapped_model,
#     input_size=(1, num_nodes, 1, len_input),
#     expand_nested=True,
#     roll=True,
#     show_shapes=True,
# ).visual_graph.render("astgcn_graph_v1.5", format="png")

## Evaluate

In [13]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_absolute_error
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Pivot and concatenate train+test
df_all = pd.concat([train_df, test_df], ignore_index=True)
pivot = (df_all
         .pivot(index='Date', columns='station_name', values='Electricity(kW)')
         .reindex(columns=station_names)      # ensure correct station order
         .fillna(0.0))
dates = pivot.index
T = len(dates)

# 2. Build every possible sliding window of length `len_input`
max_start = T - len_input - prediction_length + 1  # total windows
windows = []
for t0 in range(max_start):
    arr = pivot.iloc[t0:t0+len_input].values       # (len_input, N)
    windows.append(arr.T)                          # → (N, len_input)
X_all = np.stack(windows, axis=0)                  # (W, N, len_input)
X_all = torch.from_numpy(X_all).float().unsqueeze(2)  # (W, N, 1, len_input)

# 3. Batch through the model in eval mode
batch_size = 512
loader = DataLoader(TensorDataset(X_all), batch_size=batch_size, shuffle=False)

model.eval()
preds = []
with torch.no_grad():
    for (Xb,) in tqdm(loader,desc="batch"):
        Xb = Xb.to(device)
        yb = model(Xb, edge_index)               # → (B, N, prediction_length)
        preds.append(yb.cpu().numpy())
preds = np.concatenate(preds, axis=0)            # (W, N, pred_len)

# 4. Take only the *first-step* forecast (you can slice other horizons similarly)
first_step = preds[:, :, 0]                      # (W, N)

# 5. Build a long DataFrame of all predictions
#    window w predicts for date = dates[w + len_input]
pred_dates = dates[len_input : len_input + first_step.shape[0]]
records = []
for w, pd_dt in enumerate(pred_dates):
    for i, station in enumerate(station_names):
        records.append((pd_dt, station, first_step[w, i]))
df_preds = pd.DataFrame(records, columns=['Date','station_name','Predicted(kW)'])

# 6. Merge with test_df (this yields exactly len(test_df)=40 839 rows)
df_merged = (test_df
             .merge(df_preds, on=['Date','station_name'], how='left')
             .sort_values(['Date','station_name'])
             .reset_index(drop=True))

# after your merge:
df_eval = df_merged.dropna(subset=['Predicted(kW)']).copy()

# compute MAE only on the non‐NaN rows
mae = mean_absolute_error(
    df_eval['Electricity(kW)'].values,
    df_eval['Predicted(kW)'].values
)
print(f"Test MAE (first‐step, dropping {len(df_merged) - len(df_eval)} rows with no pred): {mae:.4f}")

batch:   0%|          | 0/68 [00:00<?, ?it/s]

Test MAE (first‐step, dropping 570 rows with no pred): 58.8885


In [None]:
df_eval

In [14]:
import pandas as pd
import numpy as np

# Merge weights into evaluation DataFrame
df_eval = df_eval.merge(station_weights_df, on='station_name', how='left')

# Compute weighted absolute error
df_eval['abs_error'] = np.abs(df_eval['Electricity(kW)'] - df_eval['Predicted(kW)'])
df_eval['weighted_abs_error'] = df_eval['abs_error'] * df_eval['normalized_reverse_weight']

# Compute weighted actual value
df_eval['weighted_actual'] = df_eval['Electricity(kW)'] * df_eval['normalized_reverse_weight']

# Calculate WAPE
wape = df_eval['weighted_abs_error'].sum() / df_eval['weighted_actual'].sum()
print(f"WAPE (weighted): {wape:.4f} or {wape*100:.2f}%")

WAPE (weighted): 0.6406 or 64.06%


In [None]:
df_eval.to_csv("df_eval.csv",index=False)