In [1]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm
from torch.utils.data import Dataset, DataLoader,RandomSampler,SubsetRandomSampler
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import random
import json
# import optuna
from torch.nn import functional
import datetime
import gc
import os
import glob
from tqdm import tqdm

In [2]:
all_data = np.load('D:/myfiles/project/bike_prediction/feature_data/tcn_data_3d.npy')
all_data.shape

(753, 3312, 8)

In [3]:
all_data[1:2,0:72,0:8]

array([[[  9.,  17.,   8.,  12.,   2.,  13.,   6.,   0.],
        [  9.,  19.,  10.,   9.,   2.,  10.,   6.,   1.],
        [ 10.,  21.,  11.,  13.,   2.,  13.,   7.,   2.],
        [ 10.,  22.,  12.,  11.,   1.,  12.,   6.,   3.],
        [ 10.,  26.,  16.,   8.,  -1.,   9.,   8.,   4.],
        [  2.,  27.,  25.,   0.,   0.,   7.,   3.,   5.],
        [ -2.,  27.,  29.,  -5.,   0.,   6.,   1.,   6.],
        [ -2.,  28.,  30.,  -6.,   0.,   0.,   1.,   7.],
        [ -4.,  26.,  30., -12.,  -1.,  -6.,  -1.,   8.],
        [-10.,  18.,  28., -14.,  -1., -10.,  -4.,   9.],
        [ -8.,  19.,  27., -14.,  -2., -10.,  -4.,  10.],
        [ -9.,  17.,  26., -14.,  -2., -10.,  -4.,  11.],
        [-10.,  15.,  25., -12.,  -2.,  -9.,  -3.,  12.],
        [-10.,  13.,  23., -10.,  -2.,  -6.,  -3.,  13.],
        [-11.,  11.,  22., -14.,  -2.,  -9.,  -4.,  14.],
        [-11.,  10.,  21., -12.,  -1.,  -8.,  -3.,  15.],
        [-11.,   6.,  17.,  -9.,   1.,  -5.,  -5.,  16.],
        [ -3.,

In [3]:
# 【站点数量，序列长度，特征数量】
class MyDataset(Dataset):
    def __init__(self, his_datas, his_label, output_size, feature_size, seq_num, time_of_day):
        self.his_datas = his_datas  #【N，1080，X】
        # self.sta_datas = sta_datas  #【N，26，Y】
        self.his_label = his_label  #【N，1080，1】
        self.output_size = output_size  # 输出长度24
        self.feature_size = feature_size  # 卷积塔时序特征数量
        # self.static_feature_size = static_feature_size  # 特征塔天粒度/静态特征数量
        self.seq_num = seq_num  # 窗口大小
        self.time_of_day = time_of_day  # 每天24小时
         
        self.site_num = his_datas.shape[0]  # 站点数量
        self.time_num = his_datas.shape[1] // time_of_day  - (seq_num + 3) # 单个站点的样本数量：26-15=11个样本
        self.sample_num = self.time_num * self.site_num  # 总样本数量：32*1080=3w
        # print(his_datas.shape)
        print('单个样本数量：', self.time_num)
        print('站点数量：', self.site_num)
        print('总样本数量：', self.sample_num)
        print("a", his_datas.shape, his_label.shape)
        
    def __getitem__(self, index): # 0-3w
        # 是第几个样本？
        cls_indx, time_indx = divmod(index, self.time_num)
        start_index = time_indx * self.time_of_day
        end_index = (time_indx + self.seq_num) * self.time_of_day
        # [站点,小时粒度序列,小时粒度特征]
        tmp_data = self.his_datas[cls_indx, start_index:end_index, 0:self.feature_size].astype(float)  # [0, 14*24, time_feature_size]
        sample_time_data = torch.tensor(tmp_data, dtype=torch.float32)
        # [站点,天粒度序列,天粒度特征]
        # static_data = self.sta_datas[cls_indx, static_index:static_index+1, 0:self.static_feature_size].astype(float)  # [0, 1, time_feature_size]
        # sample_static_data = torch.tensor(static_data, dtype=torch.float32)
        # [站点,序列,1]
        label_start = end_index
        label_end = label_start + self.output_size
        target_label = self.his_label[cls_indx, label_start:label_end, 0:1].astype(float)
        sample_labels = torch.tensor(target_label, dtype=torch.float32)
        
        return sample_time_data, sample_labels
    
    def __len__(self):
        return self.sample_num


In [4]:
def train_test_split(all_data):  # 56天
    tmp_data_info = np.array(all_data)
    # sta_data_info = np.array(sta_data)
    # 当前总时长为138天，4.15-8.30
    train_start_idx = 0
    train_end_idx = 76 * 24 
    val_start_idx = 76 * 24
    val_end_idx = 107 * 24 
    test_start_idx = 107 * 24
    test_end_idx = 138 * 24 
    # train_start_sta_idx = 0
    # train_end_sta_idx = 18
    # val_end_sta_idx = 22
    # test_end_sta_idx = 26
    
#     train_start_idx = 0
#     train_end_idx = 38 * 24  # 9
#     val_start_idx = (38 - 30) * 24  # 13使用14，14使用15
#     val_end_idx = 42 * 24  # 4
#     test_start_idx = (42 - 30) * 24
#     test_end_idx = 49 * 24  # 7
    
    train_data = tmp_data_info[:, train_start_idx:train_end_idx, :]  # 所有特征
    # train_data_sta = sta_data_info[:, train_start_sta_idx:train_end_sta_idx, :]
    train_label = tmp_data_info[:, train_start_idx:train_end_idx, 0:1]
    val_data = tmp_data_info[:, val_start_idx:val_end_idx, :]
    # val_data_sta = sta_data_info[:, train_end_sta_idx:val_end_sta_idx, :]    
    val_label = tmp_data_info[:, val_start_idx:val_end_idx, 0:1]
    test_data = tmp_data_info[:, test_start_idx:test_end_idx, :]
    # test_data_sta = sta_data_info[:, val_end_sta_idx:test_end_sta_idx, :]  
    test_label = tmp_data_info[:, test_start_idx:test_end_idx, 0:1]
    return train_data, train_label, val_data, val_label, test_data, test_label
    # return train_data, train_data_sta, train_label, val_data, val_data_sta, val_label, test_data, test_data_sta, test_label



def load_data(all_data, batch_size):
    train_data, train_label, val_data, val_label, test_data, test_label = train_test_split(all_data)
    
    # 创建数据集
    train_dataset = MyDataset(his_datas=train_data, his_label=train_label, 
                             output_size=24, feature_size=8, seq_num=1, time_of_day=24)
    
    # 创建训练样本索引
    n_train = len(train_dataset)
    indices = list(range(n_train))
    np.random.shuffle(indices)
    split_point = int(n_train * 0.4)
    train_indices = indices[:split_point]
    
    # 创建采样器
    train_sampler = SubsetRandomSampler(train_indices)
    
    # 创建数据加载器
    train_dataloader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        sampler=train_sampler,
        pin_memory=True  # 加速GPU数据传输
    )
    
    # 验证和测试集保持完整
    val_dataset = MyDataset(his_datas=val_data, his_label=val_label, 
                           output_size=24, feature_size=8, seq_num=1, time_of_day=24)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    test_dataset = MyDataset(his_datas=test_data, his_label=test_label, 
                             output_size=24, feature_size=8, seq_num=1, time_of_day=24)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_dataloader, val_dataloader, test_dataloader


# def load_multitask_data(all_data, sta_data, batch_size, output_sizes={0: 24, 1: 24, 2: 24}):
#     # 自定义collate函数处理多目标数据
#     def multitask_collate(batch):
#         time_data = torch.stack([item[0] for item in batch])
#         static_data = torch.stack([item[1] for item in batch])
#         labels = {}
#         for target_idx in output_sizes.keys():
#             labels[target_idx] = torch.stack([item[2][target_idx] for item in batch])
#         return time_data, static_data, labels
#     train_data, train_data_sta, train_label, val_data, val_data_sta, val_label, test_data, test_data_sta, test_label = train_test_split(all_data, sta_data)
#     train_dataset = MyDataset(his_datas=train_data, sta_datas = train_data_sta, his_label=train_label, 
#                               output_sizes=output_sizes, time_feature_size=22, static_feature_size=7, seq_num=14, time_of_day=24)
#     n_samples = len(train_dataset)
#     indices = list(range(n_samples))
#     # 随机选择50%的样本
#     split = int(0.4 * n_samples)
#     np.random.shuffle(indices)
#     train_indices = indices[:split]  # 前50%作为本次训练样本
#     # 创建采样器
#     train_sampler = SubsetRandomSampler(train_indices)
#     train_dataloader = DataLoader(
#         train_dataset,
#         batch_size=batch_size,
#         sampler=train_sampler,
#         collate_fn=multitask_collate
#     )
#     # train_rand_sampler = RandomSampler(train_dataset, replacement=False, num_samples=int(len(train_dataset)*0.3))
#     # train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle=False, sampler=train_rand_sampler) 
#     # train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True) 
      
#     val_dataset = MyDataset(his_datas=val_data, sta_datas = val_data_sta, his_label=val_label,
#                             output_sizes=output_sizes, time_feature_size=22, static_feature_size=7, seq_num=14, time_of_day=24)
#     val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle=False, collate_fn=multitask_collate)

#     test_dataset = MyDataset(his_datas=test_data, sta_datas = test_data_sta, his_label=test_label,
#                              output_sizes=output_sizes, time_feature_size=22, static_feature_size=7, seq_num=14, time_of_day=24)
#     test_dataloader = DataLoader(test_dataset, batch_size = 4, shuffle=False, collate_fn=multitask_collate)

#     return train_dataloader , val_dataloader, test_dataloader


In [5]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm

def huber_loss(y_pred, y_true):
    loss = torch.nn.SmoothL1Loss(reduction='mean',beta=5.0)(y_pred, y_true)
    return loss


def mse_loss(y_pred, y_true):
    loss = torch.nn.MSELoss(reduction='mean')(y_pred, y_true)
    return loss
    

def printbar():
    t = datetime.datetime.now()
    print('==========='*8 + str(t))


import os
def setup_seed(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] =str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.daterministic = True
    

class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()


class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        # [N, 38, 14*24]->[N, 300, 14*24]
        # n_inputs=38, n_outputs=1
        # weight_norm(
        self.conv1 = nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation)
        self.chomp1 = Chomp1d(padding)
        self.norm1 = nn.GroupNorm(1, n_outputs)  #加一层试试

        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        
        # [N, 300, 14*24]->[N, 300, 14*24]
        # weight_norm(
        self.conv2 = nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation)
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.norm2 = nn.GroupNorm(1, n_outputs)
        self.dropout2 = nn.Dropout(dropout)
        

        self.net = nn.Sequential(self.conv1, self.chomp1, self.norm1, self.relu1, self.dropout1,
                                  self.conv2, self.chomp2, self.norm2, self.relu2, self.dropout2)

        # [N, 38, 14*24]->[N, 300, 14*24]
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None # 1x1 conv
        self.relu = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        """
        x: n*emb_size*seq_len
        out: n*layer_outchannel* seq_len"""
        # [N, 38, 14*24]->[N, 1, 14*24]
        out = self.net(x)
        # [N, 38, 14*24]->[N, 1, 14*24]
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)  # [N, 1, 14*24]


class TemporalConvNet(nn.Module):
    # num_inputs=38, out_channels=[300, 200, 100, 50, 1]
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        # dilation_sizes = [1,4,16,24]
        for i in range(num_levels):
            """dilated conv"""
            dilation_size = 2 ** i   #认为此处不合理，待改                                                    
            # dilation_size = dilation_sizes[i]
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            # [N, 300, 14*24] + [N, 200, 14*24] + [N, 100, 14*24] + [N, 50, 14*24] + [N, 1, 14*24]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)
 

class MultiTaskTCN(nn.Module):
    def __init__(self, input_size, input_len, output_sizes, num_channels,
                 kernel_size=2, dropout=0.3, emb_dropout=0.1, tied_weights=False):
        super(MultiTaskTCN, self).__init__()
        # [N, 38, 14*24]->[N, 3, 14*24]
        # input_size=39, num_channels=[300, 200, 100, 50, 1],output_size=1*24
        self.output_sizes = output_sizes
        self.time_tasks = list(output_sizes.keys())
        # 为每个时间粒度创建独立的预测头
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size, dropout=dropout)
        # self.decoder = nn.Linear(input_len, output_size)
        self.drop = nn.Dropout(emb_dropout)
        self.emb_dropout = emb_dropout
        # 为每个时间粒度创建独立的预测头
        self.prediction_heads = nn.ModuleDict()
        for scale in self.time_tasks:
            self.prediction_heads[str(scale)] = nn.Linear(
                input_len,
                output_sizes[scale]
            )
            self.prediction_heads[str(scale)].bias.data.fill_(0)
            self.prediction_heads[str(scale)].weight.data.normal_(0, 0.01)
        # self.init_weights()

    def init_weights(self):
#         self.decoder.bias.data.fill_(0)
#         self.decoder.weight.data.normal_(0, 0.01)
        for head in self.prediction_heads:
            head.bias.data.fill_(0)
            head.weight.data.normal_(0, 0.01)
            

    def forward(self, input):
        """Input ought to have dimension (N, C_in, L_in), 
        where L_in is the seq_len; 
        here the input is (N, L, C)"""
        # [N, 14*24, 38]->[N, 38, 14*24]
        y = input.transpose(1, 2)
        # [N, 38, 14*24]->[N, 3, 14*24]
        y = self.tcn(y)
        # [N, 3, 14*24]->拆分多目标->[N, 1, 1*24]->[N, 1*24, 1]
        # 为每个时间粒度生成预测
        predictions = {}
        for scale in self.time_tasks:
            scale_features = y[:,scale:scale+1,:]
            predictions[scale] = self.prediction_heads[str(scale)](scale_features).transpose(1, 2).contiguous()
        
        return predictions
#         y = self.decoder(y).transpose(1, 2)
#         return y.contiguous()

    
class TCN(nn.Module):
    def __init__(self, input_size, input_len, output_size, num_channels,
                 kernel_size=2, dropout=0.3, emb_dropout=0.1, tied_weights=False):
        super(TCN, self).__init__()
        # [N, 38, 14*24]->[N, 1, 14*24]
        # input_size=39, num_channels=[300, 200, 100, 50, 1],output_size=1*24
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size, dropout=dropout)
        self.decoder = nn.Linear(input_len, output_size)
        self.drop = nn.Dropout(emb_dropout)
        self.emb_dropout = emb_dropout
        self.init_weights()
        self.linear = nn.Linear(input_len, output_size)

    def init_weights(self):
        self.decoder.bias.data.fill_(0)
        self.decoder.weight.data.normal_(0, 0.01)

    def forward(self, input):
        """Input ought to have dimension (N, C_in, L_in), 
        where L_in is the seq_len; 
        here the input is (N, L, C)"""
        # [N, 14*24, 38]->[N, 38, 14*24]
        y = input.transpose(1, 2)
        # [N, 38, 14*24]->[N, 1, 14*24]
        y = self.tcn(y)
        # [N, 1, 14*24]->[N, 1, 1*24]->[N, 1*24, 1]
        y = self.decoder(y).transpose(1, 2)
        # 使用linear的效果  [N, 14*24, 1]->[N, 1, 14*24]->[N, 1, 1*24]->[N, 1*24, 1]
        return y.contiguous()

In [6]:
class PeakHuberLoss(nn.Module):
    def __init__(self):
        super(PeakHuberLoss, self).__init__()
    def forward(self, y_pred, y_true, delta = 5):
        # y_pred: [B, 24, 1]; y_true: [B, 24, 1]
        # 标准化形状，确保可广播
        if y_pred.ndim == 2:
            y_pred = y_pred.unsqueeze(-1)
        if y_true.ndim == 2:
            y_true = y_true.unsqueeze(-1)
        error = y_true - y_pred
        peak_mask = (y_true >= 5)
        # 让空集合时保持为张量而不是 Python float
        if torch.any(peak_mask):
            peak_err = error[peak_mask]
            peak_loss = torch.where(torch.abs(peak_err) <= delta,
                                    0.5 * peak_err**2,
                                    delta * (torch.abs(peak_err) - 0.5 * delta)).mean()
        else:
            peak_loss = torch.zeros((), device=error.device)
        non_peak_mask = ~peak_mask
        if torch.any(non_peak_mask):
            non_peak_err = error[non_peak_mask]
            non_peak_loss = torch.abs(non_peak_err).mean()
        else:
            non_peak_loss = torch.zeros((), device=error.device)
        total_loss = peak_loss * 2 + non_peak_loss
        return total_loss  # 返回单个标量张量
    
class MultiTaskPHLoss(nn.Module):
    def __init__(self, loss_weights=None):
        super(MultiTaskPHLoss, self).__init__()
        self.peakhuberloss = PeakHuberLoss()
        self.loss_weights = loss_weights
    
    def forward(self, predictions, targets, delta = 5):
        total_loss = 0
        losses = {}
        for scale, pred in predictions.items():
            target = targets[scale]
            scale_loss = self.peakhuberloss(pred, target, delta = delta)
            weight = self.loss_weights[scale] if self.loss_weights else 1.0
            weighted_loss = weight * scale_loss
            losses[scale] = scale_loss.item()
            total_loss += weighted_loss
        return total_loss, losses

In [7]:
setup_seed(12345)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
output_sizes = 24

# device = 'cpu'
print('device:', device)
print(all_data.shape)
# print(static_all_data.shape)

# 加载数据
train_dataloader, val_dataloader, test_dataloader = load_data(all_data[:, :, :], 1024)


device: cuda
(753, 3312, 8)
单个样本数量： 72
站点数量： 753
总样本数量： 54216
a (753, 1824, 8) (753, 1824, 1)
单个样本数量： 27
站点数量： 753
总样本数量： 20331
a (753, 744, 8) (753, 744, 1)
单个样本数量： 27
站点数量： 753
总样本数量： 20331
a (753, 744, 8) (753, 744, 1)


In [9]:
model_save_path = 'pred_model/net_divvy_TCN_4.pth'

num_channels = [64, 128, 32, 1]  # TCN隐藏层维度

# 训练模型
lr = 0.001
# loss_weights = {10: 1.0, 16: 1.0, 21: 1.0}  # 更长期的预测给予更高权重    
es_cnt = 0
max_es_epoch = 10
min_val_loss = float('inf')
epoches = 50
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# tcn_model = HybridTCN(input_size=22, input_len=14*24, output_sizes=output_sizes, num_channels=num_channels, 
#                          static_dict=static_dict, static_hidden_dims=[32, 16],
#                          kernel_size=3, dropout=0.25, static_dropout=0.2, tied_weights=False).to(device)
tcn_model = TCN(input_size=8, input_len=1*24, output_size=24, num_channels=num_channels, 
                         kernel_size=3, dropout=0.25, emb_dropout=0.2, tied_weights=False).to(device)
criterion = PeakHuberLoss()
optimizer = torch.optim.AdamW(
    tcn_model.parameters(),
    lr=lr,                    # 学习率
    betas=(0.9, 0.999),         # 动量参数
    eps=1e-8,                   # 数值稳定性
    weight_decay=1e-2,          # 权重衰减
    amsgrad=False               # 是否使用 AMSGrad 变体
)

In [10]:
for e in range(epoches + 1):
    tcn_model.train()
    train_losses = []

    for time_data, label in train_dataloader:
        time_data = time_data.to(device)
        label_on_device = label.to(device)
        optimizer.zero_grad()
        forecast = tcn_model(time_data)  # [batch, 24, 1]
        loss = criterion(forecast, label_on_device)  # 已修改：不再解包
        if torch.isnan(loss):
            print("训练损失为NaN! 中止该 batch")
            break
        train_losses.append(loss.detach().item())
        loss.backward()
        optimizer.step()

    train_loss_avg = sum(train_losses) / len(train_losses) if train_losses else 0

    if e % 2 == 0:  # 每2个epoch验证一次
        tcn_model.eval()
        val_losses = []
        with torch.no_grad():
            for val_time_data, val_label in val_dataloader:
                val_time_data = val_time_data.to(device)
                val_label_on_device = val_label.to(device)
                val_forecast = tcn_model(val_time_data)
                val_loss = criterion(val_forecast, val_label_on_device)  # 已修改：不再解包
                val_losses.append(val_loss.item())
        val_loss_avg = sum(val_losses) / len(val_losses) if val_losses else 0
        print(f'Epoch={e}, Train Loss:{train_loss_avg:.6f}, Val Loss:{val_loss_avg:.6f}')
        if val_loss_avg < min_val_loss:
            min_val_loss = val_loss_avg
            es_cnt = 0
            torch.save(tcn_model.state_dict(), model_save_path)
        else:
            es_cnt += 1
            if es_cnt >= max_es_epoch:
                print('触发早停机制！')
                break

Epoch=0, Train Loss:81.721069, Val Loss:117.444611
Epoch=2, Train Loss:33.274699, Val Loss:71.997129
Epoch=4, Train Loss:29.381841, Val Loss:61.213430
Epoch=6, Train Loss:24.640171, Val Loss:51.102310
Epoch=8, Train Loss:23.153331, Val Loss:46.127585
Epoch=10, Train Loss:22.742082, Val Loss:43.024810
Epoch=12, Train Loss:21.247876, Val Loss:46.824858
Epoch=14, Train Loss:20.740135, Val Loss:43.581237
Epoch=16, Train Loss:23.489024, Val Loss:54.298407
Epoch=18, Train Loss:20.800681, Val Loss:41.735164
Epoch=20, Train Loss:19.965673, Val Loss:42.334273
Epoch=22, Train Loss:20.088468, Val Loss:48.071153
Epoch=24, Train Loss:20.469164, Val Loss:40.219119
Epoch=26, Train Loss:19.683221, Val Loss:41.774037
Epoch=28, Train Loss:19.448854, Val Loss:40.830687
Epoch=30, Train Loss:19.065644, Val Loss:45.415265
Epoch=32, Train Loss:19.423810, Val Loss:41.030278
Epoch=34, Train Loss:19.235192, Val Loss:42.497341
Epoch=36, Train Loss:19.440825, Val Loss:42.200446
Epoch=38, Train Loss:18.673383, Val

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_channels = [64, 128, 32, 1]  # TCN隐藏层维度
criterion = PeakHuberLoss()
# 加载模型，预测
tcn_save_model = TCN(input_size=8, input_len=1*24, output_size=24, num_channels=num_channels, 
                         kernel_size=3, dropout=0.25, emb_dropout=0.2, tied_weights=False).to(device)
# batch*length*size 输入， batch = 32个点位，len = 7天*24小时，size = 8个特征 
tcn_save_model.load_state_dict(torch.load('pred_model/net_divvy_TCN_4.pth'))

# 初始化存储
test_losses = []
true_values = []
pred_values = []

# 测试循环
tcn_save_model.eval()
with torch.no_grad():
    for test_time_data, test_labels in test_dataloader:
        test_time_data = test_time_data.to(device)
        test_labels = test_labels.to(device)
        
        # 标准化输入数据（如果需要）
        # test_data, mean, std = transform_series2(test_data)
        
        # 前向传播
        test_forecasts = tcn_save_model(test_time_data)
        
        # 反标准化预测结果（如果需要）
        # for target_idx, forecast in test_forecasts.items():
        #     test_forecasts[target_idx] = transform_series2_decode(forecast, mean, std)
        
        # 计算损失
        test_loss = criterion(test_forecasts, test_labels)
        test_losses.append(test_loss.item())
        # 存储真实值和预测值
        true_values.append(test_labels.cpu().numpy())
        pred_values.append(test_forecasts.cpu().numpy())

# 计算平均损失
test_loss_avg = sum(test_losses) / len(test_losses) if test_losses else 0
print(f'Test Loss: {test_loss_avg:.6f}')

Test Loss: 40.089904


In [13]:
# 评估测试集的 MSE / MAPE / WMAPE（仅统计真值>5的样本；早峰7-9、晚峰18-20、全天0-23）
import numpy as np

if not pred_values or not true_values:
    print("pred_values / true_values 为空，请先运行测试循环。")
else:
    y_pred = np.concatenate(pred_values, axis=0)  # [N, 24, 1]
    y_true = np.concatenate(true_values, axis=0)  # [N, 24, 1]
    
    # 去掉最后一个特征维度
    y_pred = y_pred.squeeze(-1)  # [N, 24]
    y_true = y_true.squeeze(-1)  # [N, 24]

    def compute_metrics_gt5(y_true_slice, y_pred_slice, gt_min=5):
        # 仅在真值>gt_min的样本上计算
        mask = y_true_slice > gt_min
        if not np.any(mask):
            return float('nan'), float('nan'), float('nan')
        yt = y_true_slice[mask]
        yp = y_pred_slice[mask]
        mse = float(np.mean((yp - yt) ** 2))
        # MAPE（分母为真值>5的子集）
        mape = float(np.mean(np.abs((yp - yt) / yt)))
        # WMAPE（分母为真值>5的子集和）
        denom = float(np.sum(np.abs(yt)))
        wmape = float(np.sum(np.abs(yp - yt)) / denom) if denom > 0 else float('nan')
        return mse, mape, wmape

    # 定义时段索引（含端点）
    morning_idx = np.array([7, 8, 9])
    evening_idx = np.array([18, 19, 20])
    all_idx = np.arange(24)

    # 早峰（仅真值>5）
    mse_morning, mape_morning, wmape_morning = compute_metrics_gt5(
        y_true[:, morning_idx].reshape(-1), y_pred[:, morning_idx].reshape(-1)
    )
    # 晚峰（仅真值>5）
    mse_evening, mape_evening, wmape_evening = compute_metrics_gt5(
        y_true[:, evening_idx].reshape(-1), y_pred[:, evening_idx].reshape(-1)
    )
    # 全天（仅真值>5）
    mse_all, mape_all, wmape_all = compute_metrics_gt5(
        y_true[:, all_idx].reshape(-1), y_pred[:, all_idx].reshape(-1)
    )

    print("=== Test Metrics (y_true > 5 only) ===")
    print(f"Morning 7-9  -> MSE: {mse_morning:.4f}, MAPE: {mape_morning:.4f}, WMAPE: {wmape_morning:.4f}")
    print(f"Evening 18-20-> MSE: {mse_evening:.4f}, MAPE: {mape_evening:.4f}, WMAPE: {wmape_evening:.4f}")
    print(f"All-day 0-23 -> MSE: {mse_all:.4f}, MAPE: {mape_all:.4f}, WMAPE: {wmape_all:.4f}")


=== Test Metrics (y_true > 5 only) ===
Morning 7-9  -> MSE: 200.3086, MAPE: 0.2619, WMAPE: 0.3377
Evening 18-20-> MSE: 105.4965, MAPE: 0.2685, WMAPE: 0.3152
All-day 0-23 -> MSE: 99.2215, MAPE: 0.2545, WMAPE: 0.2883


In [14]:
# 计算每个目标的MAPE（按天计算）
# 合并所有批次的预测和真实值
lth = len(true_values)
all_true = np.array(true_values)
all_true = np.reshape(all_true, (lth, 4, 24))
all_pred = np.array(pred_values)
all_pred = np.reshape(all_pred, (lth, 4, 24))
print(all_true.shape)

print(f"\n=== 目标 {target_idx} MAPE分析 ===")

# 计算每天的MAPE（只考虑真值>=5的点）
daily_mapes = []
morning_mapes = []
afternoon_mapes = []
evening_mapes = []
daily_wapes = []  # 新增：存储每天的WMAPE
morning_wapes = []  # 新增：存储每天早峰的WMAPE
afternoon_wapes = []  # 新增：存储每天午峰的WMAPE
evening_wapes = []  # 新增：存储每天晚峰的WMAPE

for i in range(all_pred.shape[1]):
    sub_day_pred = all_pred[:, i,:].round()
    sub_day_true = all_true[:, i,:]
    
    # 只考虑真值>=5的点
    where_res = np.where(sub_day_true>=5)
    sub_day_true1 = sub_day_true[where_res]
    sub_day_pred1 = sub_day_pred[where_res]
    
    # 计算MAPE
    mape = np.mean(np.abs((sub_day_pred1 - sub_day_true1) / sub_day_true1))
    daily_mapes.append(mape)
    
    # 计算WMAPE (新增)
    wape = np.sum(np.abs(sub_day_pred1 - sub_day_true1)) / np.sum(sub_day_true1)
    daily_wapes.append(wape)
    
    print(f"Day {i+1}: MAPE = {mape:.4f}, WAPE = {wape:.4f}")
    
    # 早峰时段 (9时)
    mor_day_pred = all_pred[:, i, 8:9].round()
    mor_day_true = all_true[:, i, 8:9]
    where_resmon = np.where(mor_day_true>=5)
    mor_day_true1 = mor_day_true[where_resmon]
    mor_day_pred1 = mor_day_pred[where_resmon]
    
    mor_mape = np.mean(np.abs((mor_day_pred1 - mor_day_true1) / mor_day_true1))
    morning_mapes.append(mor_mape)
    
    # 早峰WMAPE (新增)
    mor_wape = np.sum(np.abs(mor_day_pred1 - mor_day_true1)) / np.sum(mor_day_true1)
    morning_wmapes.append(mor_wape)
    
    print(f"Day {i+1} Morning MAPE = {mor_mape:.4f}, WAPE = {mor_wape:.4f}")
    
    # 午峰时段 (14时)
    aft_day_pred = all_pred[:, i, 13:14].round()
    aft_day_true = all_true[:, i, 13:14]
    where_resaft = np.where(aft_day_true>=5)
    aft_day_true1 = aft_day_true[where_resaft]
    aft_day_pred1 = aft_day_pred[where_resaft]
    
    aft_mape = np.mean(np.abs((aft_day_pred1 - aft_day_true1) / aft_day_true1))
    afternoon_mapes.append(aft_mape)
    
    # 午峰WMAPE (新增)
    aft_wape = np.sum(np.abs(aft_day_pred1 - aft_day_true1)) / np.sum(aft_day_true1)
    afternoon_wapes.append(aft_wape)
    
    print(f"Day {i+1} Afternoon MAPE = {aft_mape:.4f}, WAPE = {aft_wmape:.4f}")
    
    # 晚峰时段 (20时)
    eve_day_pred = all_pred[:, i, 19:20].round()
    eve_day_true = all_true[:, i, 19:20]
    where_reseve = np.where(eve_day_true>=5)
    eve_day_true1 = eve_day_true[where_reseve]
    eve_day_pred1 = eve_day_pred[where_reseve]
    
    eve_mape = np.mean(np.abs((eve_day_pred1 - eve_day_true1) / eve_day_true1))
    evening_mapes.append(eve_mape)
    
    # 晚峰WMAPE (新增)
    eve_wape = np.sum(np.abs(eve_day_pred1 - eve_day_true1)) / np.sum(eve_day_true1)
    evening_wapes.append(eve_wape)
    
    print(f"Day {i+1} Evening MAPE = {eve_mape:.4f}, WAPE = {eve_wape:.4f}")

# 计算平均MAPE和WMAPE
if daily_mapes:
    avg_mape = np.mean(daily_mapes)
    avg_wape = np.mean(daily_wapes)  # 新增
    print(f"目标 {target_idx} 平均MAPE: {avg_mape:.4f}, 平均WAPE: {avg_wmape:.4f}")
if morning_mapes:
    monavg_mape = np.mean(morning_mapes)
    monavg_wape = np.mean(morning_wapes)  # 新增
    print(f"目标 {target_idx} 早峰平均MAPE: {monavg_mape:.4f}, 平均WAPE: {monavg_wape:.4f}")
if afternoon_mapes:
    aftavg_mape = np.mean(afternoon_mapes)
    aftavg_wape = np.mean(afternoon_wapes)  # 新增
    print(f"目标 {target_idx} 午峰平均MAPE: {aftavg_mape:.4f}, 平均WAPE: {aftavg_wape:.4f}")
if evening_mapes:
    eveavg_mape = np.mean(evening_mapes)
    eveavg_wape = np.mean(evening_wapes)  # 新增
    print(f"目标 {target_idx} 晚峰平均MAPE: {eveavg_mape:.4f}, 平均WAPE: {eveavg_wape:.4f}")


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (16,) + inhomogeneous part.