In [1]:
import torch
from torch.utils.data import Dataset, DataLoader,RandomSampler,SubsetRandomSampler
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import random
import json
# import optuna
from torch.nn import functional
import datetime
import gc
import os
import glob
from tqdm import tqdm

In [2]:
def load_and_merge_batches(output_dir, static_dict, expected_shape=None):
    """
    批量读取并合并为三维numpy数组
    
    Args:
        output_dir: 包含批次文件的目录
        expected_shape: 期望的最终形状 (N, seq_len, feature_count)，可选
        delete_after_merge: 合并后是否删除原始文件
    
    Returns:
        merged_features: 合并后的三维数组 [N, seq_len, feature_count]
        all_site_guids: 所有站点的GUID列表
    """
    
    # 1. 查找所有的批次文件
    hourly_feature_files = sorted(glob.glob(os.path.join(output_dir, 'batch_*_hourly_features.npy')))
    static_feature_files = sorted(glob.glob(os.path.join(output_dir, 'batch_*_static_features.npy')))
    guid_files = sorted(glob.glob(os.path.join(output_dir, 'batch_*_guids.txt')))
    
    print(f"找到 {len(hourly_feature_files)} 个特征文件和 {len(guid_files)} 个GUID文件")
    
    if len(hourly_feature_files) != len(guid_files):
        print("警告: 特征文件和GUID文件数量不匹配")
    
    # 2. 初始化存储
    hourly_all_features = []
    static_all_features = []
    all_site_guids = []
    batch_shapes = []
    
    # 3. 逐个加载批次文件
    for i, (hourly_feat_file, static_feat_file, guid_file) in enumerate(tqdm(zip(hourly_feature_files, static_feature_files, guid_files), 
                                                   total=len(hourly_feature_files), 
                                                   desc="加载批次")):
        
        try:
            # 加载特征数据
            hourly_batch_features = np.load(hourly_feat_file)
            static_batch_features = np.load(static_feat_file)
#             hourly_batch_shapes.append(hourly_batch_features.shape)
#             static_batch_shapes.append(static_batch_features.shape)

            
            # 加载GUID
            with open(guid_file, 'r') as f:
                batch_guids = [line.strip() for line in f if line.strip()]
            
            # 验证数据一致性
            if len(batch_guids) != hourly_batch_features.shape[0]:
                print(f"警告: 批次 {i} 的小时特征数量 {hourly_batch_features.shape[0]} 和GUID数量 {len(batch_guids)} 不匹配")
                continue
            if len(batch_guids) != static_batch_features.shape[0]:
                print(f"警告: 批次 {i} 的静态特征数量 {static_batch_features.shape[0]} 和GUID数量 {len(batch_guids)} 不匹配")
                continue
            
            
            hourly_all_features.append(hourly_batch_features)
            static_all_features.append(static_batch_features)
            all_site_guids.extend(batch_guids)

            
        except Exception as e:
            print(f"加载批次 {i} 时出错: {e}")
            continue
    
    # 4. 合并所有批次
    if not hourly_all_features:
        print("没有找到有效的数据批次")
        return np.empty((0, 0, 0)), np.empty((0, 0, 0)), []
    
    # 合并特征数组
    try:
        hourly_merged_features = np.concatenate(hourly_all_features, axis=0)
        static_merged_features = np.concatenate(static_all_features, axis=0)        
        print(f"成功合并 {len(hourly_all_features)} 个批次")
        print(f"动态特征合并后形状: {hourly_merged_features.shape}")
        print(f"静态特征合并后形状: {static_merged_features.shape}")
        print(f"GUID数量: {len(all_site_guids)}")
        sorted_features = static_dict.keys()
        # 按照特征名称顺序提取特征
#         for i, feat_name in enumerate(sorted_features):
#             feature = static_input[:, i]
#             feature_type, structure_type, dimension = self.static_dict[feat_name]
#             if feature_type == 'continuous':
#                 static_merged_features[]
        
        # 验证形状一致性
        if len(all_site_guids) != hourly_merged_features.shape[0]:
            print(f"警告: 最终小时特征数量 {hourly_merged_features.shape[0]} 和GUID数量 {len(all_site_guids)} 不匹配")
        if len(all_site_guids) != static_merged_features.shape[0]:
            print(f"警告: 最终小时特征数量 {static_merged_features.shape[0]} 和GUID数量 {len(all_site_guids)} 不匹配")
        
    except Exception as e:
        print(f"合并批次时出错: {e}")
        return np.empty((0, 0, 0)), np.empty((0, 0, 0)), []
    
    
    return hourly_merged_features, static_merged_features, all_site_guids


In [3]:
gc.collect()

22

In [3]:
output_dir = "ebik_dataset/traindata1"
mapping_df = pd.read_csv("ebik_dataset/city_guid_mapping_3.csv")
max_city = len(mapping_df)
static_dict = { "lag7d_order_cnt": ('continuous',None,None),
                "parking_capacity": ('continuous',None,None),
                "temperature_avg_val": ('continuous',None,None),
                "city_guid_encoded": ('categorical','embedding',max_city+1),
                "day_of_week": ('categorical','onehot',8),
                "workday_level": ('categorical','onehot',3),
                "cycle_weather_level": ('categorical','embedding',4)
              }
hourly_all_data, static_all_data, all_site_guids = load_and_merge_batches(output_dir, static_dict = static_dict, expected_shape=None)
hourly_all_data.shape

加载批次:   0%|          | 0/5 [00:00<?, ?it/s]

找到 5 个特征文件和 5 个GUID文件


加载批次: 100%|██████████| 5/5 [15:14<00:00, 182.85s/it]


成功合并 5 个批次
动态特征合并后形状: (113468, 1008, 22)
静态特征合并后形状: (113468, 26, 7)
GUID数量: 113468


(113468, 1008, 22)

In [8]:
static_all_data[100:101,:,0:7]

array([[[1.000e+00, 1.280e+02, 2.540e+01, 1.012e+03, 4.000e+00,
         1.000e+00, 3.000e+00],
        [9.000e+00, 0.000e+00, 2.450e+01, 1.012e+03, 5.000e+00,
         1.000e+00, 3.000e+00],
        [1.100e+01, 1.280e+02, 2.477e+01, 1.012e+03, 6.000e+00,
         1.000e+00, 2.000e+00],
        [1.000e+01, 1.280e+02, 2.397e+01, 1.012e+03, 7.000e+00,
         2.000e+00, 3.000e+00],
        [1.000e+01, 0.000e+00, 2.344e+01, 1.012e+03, 1.000e+00,
         2.000e+00, 3.000e+00],
        [6.000e+00, 1.280e+02, 2.346e+01, 1.012e+03, 2.000e+00,
         1.000e+00, 3.000e+00],
        [1.000e+00, 1.280e+02, 2.342e+01, 1.012e+03, 3.000e+00,
         1.000e+00, 2.000e+00],
        [1.000e+01, 1.280e+02, 2.406e+01, 1.012e+03, 4.000e+00,
         1.000e+00, 2.000e+00],
        [6.000e+00, 0.000e+00, 2.406e+01, 1.012e+03, 5.000e+00,
         1.000e+00, 3.000e+00],
        [2.000e+00, 1.280e+02, 2.404e+01, 1.012e+03, 6.000e+00,
         1.000e+00, 3.000e+00],
        [5.000e+00, 1.280e+02, 2.391e+01

In [4]:
# 【站点数量，序列长度，特征数量】
class MyDataset(Dataset):
    def __init__(self, his_datas, sta_datas, his_label, output_sizes, time_feature_size, static_feature_size, seq_num, time_of_day):
        self.his_datas = his_datas  #【N，1080，X】
        self.sta_datas = sta_datas  #【N，26，Y】
        self.his_label = his_label  #【N，1080，1】
        self.output_sizes = output_sizes  # 输出长度24
        self.time_feature_size = time_feature_size  # 卷积塔时序特征数量
        self.static_feature_size = static_feature_size  # 特征塔天粒度/静态特征数量
        self.seq_num = seq_num  # 窗口大小
        self.time_of_day = time_of_day  # 每天24小时
         
        self.site_num = his_datas.shape[0]  # 站点数量
        self.time_num = his_datas.shape[1] // time_of_day  - (seq_num + 3) # 单个站点的样本数量：26-15=11个样本
        self.sample_num = self.time_num * self.site_num  # 总样本数量：32*1080=3w
        # print(his_datas.shape)
        print('单个样本数量：', self.time_num)
        print('站点数量：', self.site_num)
        print('总样本数量：', self.sample_num)
        print("a", his_datas.shape, his_label.shape)
        
    def __getitem__(self, index): # 0-3w
        # 是第几个样本？
        cls_indx, time_indx = divmod(index, self.time_num)
        start_index = time_indx * self.time_of_day
        end_index = (time_indx + self.seq_num) * self.time_of_day
        static_index = time_indx
        # [站点,小时粒度序列,小时粒度特征]
        tmp_data = self.his_datas[cls_indx, start_index:end_index, 0:self.time_feature_size].astype(float)  # [0, 14*24, time_feature_size]
        sample_time_data = torch.tensor(tmp_data, dtype=torch.float32)
        # [站点,天粒度序列,天粒度特征]
        static_data = self.sta_datas[cls_indx, static_index:static_index+1, 0:self.static_feature_size].astype(float)  # [0, 1, time_feature_size]
        sample_static_data = torch.tensor(static_data, dtype=torch.float32)
        # [站点,序列,1]  # 预测的时候，拿不到前一天的数据。预测0807，在0806进行，只能拿到0804的数据
        sample_labels = {}
        label_start = end_index + self.time_of_day * 3
        for target_idx, output_size in self.output_sizes.items():
            # 获取对应目标的标签 [output_size, 1]
            label_end = label_start + output_size
            target_label = self.his_label[cls_indx, label_start:label_end, target_idx:target_idx+1].astype(float)
            sample_labels[target_idx] = torch.tensor(target_label, dtype=torch.float32)
        
        return sample_time_data, sample_static_data, sample_labels
    
    def __len__(self):
        return self.sample_num


In [5]:
def train_test_split(all_data, sta_data):  # 56天
    tmp_data_info = np.array(all_data)
    sta_data_info = np.array(sta_data)
    train_start_idx = 0
    train_end_idx = 35 * 24 
    val_start_idx = 17 * 24
    val_end_idx = 38 * 24 
    test_start_idx = 21 * 24
    test_end_idx = 42 * 24 
    train_start_sta_idx = 0
    train_end_sta_idx = 18
    val_end_sta_idx = 22
    test_end_sta_idx = 26
    
#     
#     train_start_idx = 0
#     train_end_idx = 38 * 24  # 9
#     val_start_idx = (38 - 30) * 24  # 13使用14，14使用15
#     val_end_idx = 42 * 24  # 4
#     test_start_idx = (42 - 30) * 24
#     test_end_idx = 49 * 24  # 7
    
#     train_end_idx = 39 * 24  # 10
#     val_start_idx = (39 - 30) * 24  # 13使用14，14使用15
#     val_end_idx = 41 * 24  # 4
#     test_start_idx = (41 - 30) * 24
#     test_end_idx = 49 * 24  # 8
    
    train_data = tmp_data_info[:, train_start_idx:train_end_idx, :]  # 所有特征
    train_data_sta = sta_data_info[:, train_start_sta_idx:train_end_sta_idx, :]
    train_label = tmp_data_info[:, train_start_idx:train_end_idx, 0:3]
    val_data = tmp_data_info[:, val_start_idx:val_end_idx, :]
    val_data_sta = sta_data_info[:, train_end_sta_idx:val_end_sta_idx, :]    
    val_label = tmp_data_info[:, val_start_idx:val_end_idx, 0:3]
    test_data = tmp_data_info[:, test_start_idx:test_end_idx, :]
    test_data_sta = sta_data_info[:, val_end_sta_idx:test_end_sta_idx, :]  
    test_label = tmp_data_info[:, test_start_idx:test_end_idx, 0:3]
    return train_data, train_data_sta, train_label, val_data, val_data_sta, val_label, test_data, test_data_sta, test_label

def load_multitask_data(all_data, sta_data, batch_size, output_sizes={0: 24, 1: 24, 2: 24}):
    # 自定义collate函数处理多目标数据
    def multitask_collate(batch):
        time_data = torch.stack([item[0] for item in batch])
        static_data = torch.stack([item[1] for item in batch])
        labels = {}
        for target_idx in output_sizes.keys():
            labels[target_idx] = torch.stack([item[2][target_idx] for item in batch])
        return time_data, static_data, labels
    train_data, train_data_sta, train_label, val_data, val_data_sta, val_label, test_data, test_data_sta, test_label = train_test_split(all_data, sta_data)
    train_dataset = MyDataset(his_datas=train_data, sta_datas = train_data_sta, his_label=train_label, 
                              output_sizes=output_sizes, time_feature_size=22, static_feature_size=7, seq_num=14, time_of_day=24)
    n_samples = len(train_dataset)
    indices = list(range(n_samples))
    # 随机选择50%的样本
    split = int(0.4 * n_samples)
    np.random.shuffle(indices)
    train_indices = indices[:split]  # 前50%作为本次训练样本
    # 创建采样器
    train_sampler = SubsetRandomSampler(train_indices)
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        sampler=train_sampler,
        collate_fn=multitask_collate
    )
    # train_rand_sampler = RandomSampler(train_dataset, replacement=False, num_samples=int(len(train_dataset)*0.3))
    # train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle=False, sampler=train_rand_sampler) 
    # train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True) 
      
    val_dataset = MyDataset(his_datas=val_data, sta_datas = val_data_sta, his_label=val_label,
                            output_sizes=output_sizes, time_feature_size=22, static_feature_size=7, seq_num=14, time_of_day=24)
    val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle=False, collate_fn=multitask_collate)

    test_dataset = MyDataset(his_datas=test_data, sta_datas = test_data_sta, his_label=test_label,
                             output_sizes=output_sizes, time_feature_size=22, static_feature_size=7, seq_num=14, time_of_day=24)
    test_dataloader = DataLoader(test_dataset, batch_size = 4, shuffle=False, collate_fn=multitask_collate)

    return train_dataloader , val_dataloader, test_dataloader


In [6]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm

def huber_loss(y_pred, y_true):
    loss = torch.nn.SmoothL1Loss(reduction='mean',beta=5.0)(y_pred, y_true)
    return loss


def mse_loss(y_pred, y_true):
    loss = torch.nn.MSELoss(reduction='mean')(y_pred, y_true)
    return loss
    

def printbar():
    t = datetime.datetime.now()
    print('==========='*8 + str(t))


import os
def setup_seed(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] =str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.daterministic = True
    


    
    
    
class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()


class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        # [N, 38, 14*24]->[N, 300, 14*24]
        # n_inputs=38, n_outputs=1
        # weight_norm(
        self.conv1 = nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation)
        self.chomp1 = Chomp1d(padding)
        self.norm1 = nn.GroupNorm(1, n_outputs)  #加一层试试

        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        
        # [N, 300, 14*24]->[N, 300, 14*24]
        # weight_norm(
        self.conv2 = nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation)
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.norm2 = nn.GroupNorm(1, n_outputs)
        self.dropout2 = nn.Dropout(dropout)
        

        self.net = nn.Sequential(self.conv1, self.chomp1, self.norm1, self.relu1, self.dropout1,
                                  self.conv2, self.chomp2, self.norm2, self.relu2, self.dropout2)

        # [N, 38, 14*24]->[N, 300, 14*24]
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None # 1x1 conv
        self.relu = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        """
        x: n*emb_size*seq_len
        out: n*layer_outchannel* seq_len"""
        # [N, 38, 14*24]->[N, 1, 14*24]
        out = self.net(x)
        # [N, 38, 14*24]->[N, 1, 14*24]
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)  # [N, 1, 14*24]


class TemporalConvNet(nn.Module):
    # num_inputs=38, out_channels=[300, 200, 100, 50, 1]
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        # dilation_sizes = [1,4,16,24]
        for i in range(num_levels):
            """dilated conv"""
            dilation_size = 2 ** i   #认为此处不合理，待改                                                    
            # dilation_size = dilation_sizes[i]
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            # [N, 300, 14*24] + [N, 200, 14*24] + [N, 100, 14*24] + [N, 50, 14*24] + [N, 1, 14*24]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)
 

class MultiTaskTCN(nn.Module):
    def __init__(self, input_size, input_len, output_sizes, num_channels,
                 kernel_size=2, dropout=0.3, emb_dropout=0.1, tied_weights=False):
        super(MultiTaskTCN, self).__init__()
        # [N, 38, 14*24]->[N, 3, 14*24]
        # input_size=39, num_channels=[300, 200, 100, 50, 1],output_size=1*24
        self.output_sizes = output_sizes
        self.time_tasks = list(output_sizes.keys())
        # 为每个时间粒度创建独立的预测头
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size, dropout=dropout)
        # self.decoder = nn.Linear(input_len, output_size)
        self.drop = nn.Dropout(emb_dropout)
        self.emb_dropout = emb_dropout
        # 为每个时间粒度创建独立的预测头
        self.prediction_heads = nn.ModuleDict()
        for scale in self.time_tasks:
            self.prediction_heads[str(scale)] = nn.Linear(
                input_len,
                output_sizes[scale]
            )
            self.prediction_heads[str(scale)].bias.data.fill_(0)
            self.prediction_heads[str(scale)].weight.data.normal_(0, 0.01)
        # self.init_weights()

    def init_weights(self):
#         self.decoder.bias.data.fill_(0)
#         self.decoder.weight.data.normal_(0, 0.01)
        for head in self.prediction_heads:
            head.bias.data.fill_(0)
            head.weight.data.normal_(0, 0.01)
            

    def forward(self, input):
        """Input ought to have dimension (N, C_in, L_in), 
        where L_in is the seq_len; 
        here the input is (N, L, C)"""
        # [N, 14*24, 38]->[N, 38, 14*24]
        y = input.transpose(1, 2)
        # [N, 38, 14*24]->[N, 3, 14*24]
        y = self.tcn(y)
        # [N, 3, 14*24]->拆分多目标->[N, 1, 1*24]->[N, 1*24, 1]
        # 为每个时间粒度生成预测
        predictions = {}
        for scale in self.time_tasks:
            scale_features = y[:,scale:scale+1,:]
            predictions[scale] = self.prediction_heads[str(scale)](scale_features).transpose(1, 2).contiguous()
        
        return predictions
#         y = self.decoder(y).transpose(1, 2)
#         return y.contiguous()


    
# class TCN(nn.Module):
#     def __init__(self, input_size, input_len, output_size, num_channels,
#                  kernel_size=2, dropout=0.3, emb_dropout=0.1, tied_weights=False):
#         super(TCN, self).__init__()
#         # [N, 38, 14*24]->[N, 1, 14*24]
#         # input_size=39, num_channels=[300, 200, 100, 50, 1],output_size=1*24
#         self.tcn = TemporalConvNet(input_size, num_channels, kernel_size, dropout=dropout)
#         self.decoder = nn.Linear(input_len, output_size)
#         self.drop = nn.Dropout(emb_dropout)
#         self.emb_dropout = emb_dropout
#         self.init_weights()
#         self.linear = nn.Linear(input_len, output_size)

#     def init_weights(self):
#         self.decoder.bias.data.fill_(0)
#         self.decoder.weight.data.normal_(0, 0.01)

#     def forward(self, input):
#         """Input ought to have dimension (N, C_in, L_in), 
#         where L_in is the seq_len; 
#         here the input is (N, L, C)"""
#         # [N, 14*24, 38]->[N, 38, 14*24]
#         y = input.transpose(1, 2)
#         # [N, 38, 14*24]->[N, 1, 14*24]
#         y = self.tcn(y)
#         # [N, 1, 14*24]->[N, 1, 1*24]->[N, 1*24, 1]
#         y = self.decoder(y).transpose(1, 2)
#         # 使用linear的效果  [N, 14*24, 1]->[N, 1, 14*24]->[N, 1, 1*24]->[N, 1*24, 1]
#         return y.contiguous()

In [7]:
class StaticFeatureProcessor(nn.Module):
    def __init__(self, static_dict, dropout=0.2, hidden_dims=[64, 32]):
        """
        static_dict: dict, 定义静态特征的属性和维度
                    格式: {feature_index: ('type', 'onehot/embedding', dimension)}
                    例如: {"feat1": ('continuous', None, None), "feat2": ('categorical', 'onehot', 5), "feat3": ('categorical', 'embedding', 10)}
        hidden_dims: list of int, MLP隐藏层维度
        """
        super(StaticFeatureProcessor, self).__init__()
        
        self.static_dict = static_dict
        self.num_features = len(static_dict)
        self.dropout_rate = dropout
        
        # 解析static_dict，分离连续和类别特征
        self.continuous_indices = []
        self.categorical_onehot_indices = []
        self.categorical_embedding_indices = []
        self.cat_feature_dims = []
        self.embedding_dims = {}
        
        # 按照特征名称排序处理
        sorted_features = static_dict.keys()
        # print(sorted_features)
        self.feature_names = sorted_features
        
        # 为embedding特征创建embedding层
        self.embeddings = nn.ModuleDict()
        
        for i, feat_name in enumerate(self.feature_names):
            feature_type, structure_type, dimension = static_dict[feat_name]
            
            if feature_type == 'continuous':
                self.continuous_indices.append(i)
            elif feature_type == 'categorical':
                if structure_type == 'onehot':
                    self.categorical_onehot_indices.append(i)
                    self.cat_feature_dims.append(dimension)  #预留
                elif structure_type == 'embedding':
                    self.categorical_embedding_indices.append(i)
                    if dimension > 128:
                        embedding_dim = 32
                    else:
                        embedding_dim = max(2, dimension // 4)  # 确保至少为2
                    self.embeddings[feat_name] = nn.Embedding(dimension+10, embedding_dim) #预留
                    self.embedding_dims[feat_name] = embedding_dim
        
        # 计算总的输入维度
        self.total_onehot_dim = sum(self.cat_feature_dims)
        self.total_embedding_dim = sum(self.embedding_dims.values())
        self.num_continuous = len(self.continuous_indices)
        
#         # 对连续特征添加BatchNorm层
#         if self.num_continuous > 0:
#             self.continuous_bn = nn.BatchNorm1d(self.num_continuous)
#         else:
#             self.continuous_bn = None
        self.continuous_bn = None
        
        # MLP输入维度 = one-hot维度 + embedding维度 + 连续特征维度
        mlp_input_dim = self.total_onehot_dim + self.total_embedding_dim + self.num_continuous
        # print(mlp_input_dim,self.total_onehot_dim,self.total_embedding_dim,self.num_continuous)
        # 构建MLP
        mlp_layers = []
        input_dim = mlp_input_dim
        
        for i, hidden_dim in enumerate(hidden_dims):
            mlp_layers.append(nn.Linear(input_dim, hidden_dim))
            mlp_layers.append(nn.BatchNorm1d(hidden_dim))
            mlp_layers.append(nn.ReLU())
            mlp_layers.append(nn.Dropout(self.dropout_rate))
            input_dim = hidden_dim
        
        self.mlp = nn.Sequential(*mlp_layers)
        self.output_dim = hidden_dims[-1] if hidden_dims else mlp_input_dim
        
        
    def safe_embedding_lookup(self, embedding_layer, indices, feature_name):
        """安全的embedding查找，处理索引越界"""
        num_embeddings = embedding_layer.num_embeddings
        
        # 检查是否有越界索引
        out_of_bounds_mask = indices >= num_embeddings
        if out_of_bounds_mask.any():
            print(f"警告: 特征 {feature_name} 发现 {out_of_bounds_mask.sum().item()} 个越界索引")
            print(f"最大索引: {indices.max().item()}, embedding大小: {num_embeddings}")
            
            # 将越界索引映射到最后一个位置
            safe_indices = torch.where(
                indices < num_embeddings,
                indices,
                torch.tensor(num_embeddings - 1, device=indices.device)
            )
            
            # 记录被映射的索引
            mapped_indices = indices[out_of_bounds_mask]
            print(f"被映射的索引示例: {mapped_indices[:10].cpu().numpy()}")
            
            return embedding_layer(safe_indices)
        else:
            return embedding_layer(indices)
    
    def forward(self, static_input):
        """
        static_input: tensor of shape (batch_size, num_features)
                      按照static_dict中特征名称的顺序排列的特征
        """
        # 处理输入形状
        if static_input.dim() == 3:
            if static_input.size(1) == 1:
                static_input = static_input.squeeze(1)  # (batch_size, 1, num_features) -> (batch_size, num_features)
            else:
                raise ValueError(f"Expected static_input shape (batch_size, num_features) or (batch_size, 1, num_features), but got {static_input.shape}")
        
        batch_size = static_input.size(0)
        
        # 分离连续特征和类别特征
        continuous_features = []
        onehot_features = []
        embedding_features = []
        
        # 按照特征名称顺序提取特征
        for i, feat_name in enumerate(self.feature_names):
            feature = static_input[:, i]
            feature_type, structure_type, dimension = self.static_dict[feat_name]
            
            if feature_type == 'continuous':
                continuous_features.append(feature.unsqueeze(1))
            elif feature_type == 'categorical':
                if structure_type == 'onehot':
                    onehot_features.append((feature, dimension, i))
                elif structure_type == 'embedding':
                    embedding_features.append((feature, feat_name))
        
        # 处理连续特征
        if continuous_features:
            num_features = torch.cat(continuous_features, dim=1)
            if self.continuous_bn is not None:
                num_features = self.continuous_bn(num_features)
        else:
            num_features = None

        # 处理one-hot类别特征
        one_hot_embeddings = []
        for feature_tensor, num_classes, idx in onehot_features:
            feature_long = feature_tensor.long()
            one_hot = functional.one_hot(feature_long, num_classes=num_classes)
            one_hot_embeddings.append(one_hot.float())
        

        # 处理embedding类别特征 - 使用安全查找
        embedding_vectors = []
        for feature_tensor, feat_name in embedding_features:
            feature_long = feature_tensor.long()
            embedding_layer = self.embeddings[feat_name]
            
            embedding_vec = self.safe_embedding_lookup(embedding_layer, feature_long, feat_name)
            embedding_vectors.append(embedding_vec)
        
        # 拼接所有特征
        all_features = []
        
        # 拼接one-hot特征
        if one_hot_embeddings:
            cat_onehot_embeddings = torch.cat(one_hot_embeddings, dim=1)
            all_features.append(cat_onehot_embeddings)
        # print(cat_onehot_embeddings.shape)
        # 拼接embedding特征
        if embedding_vectors:
            cat_embedding_embeddings = torch.cat(embedding_vectors, dim=1)
            all_features.append(cat_embedding_embeddings)
        # print(cat_embedding_embeddings.shape)

        # 拼接连续特征
        if num_features is not None:
            all_features.append(num_features)
        
        if not all_features:
            raise ValueError("No features available after processing")
        
        # 拼接所有特征
        features = torch.cat(all_features, dim=1)
        # print(features.shape)
        # 通过MLP
        output = self.mlp(features)
        
        return output




class HybridTCN(nn.Module):
    def __init__(self, input_size, input_len, output_sizes, num_channels,
                 static_dict, static_hidden_dims=[16,8],
                 kernel_size=2, dropout=0.3, static_dropout=0.2, tied_weights=False):
        super(HybridTCN, self).__init__()
        
        self.output_sizes = output_sizes
        self.time_tasks = list(output_sizes.keys())
        
        # 时间序列处理部分
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size, dropout=dropout)
        
        # 静态特征处理部分 - 使用static_dict
        self.static_processor = StaticFeatureProcessor(
            static_dict, static_dropout, static_hidden_dims
        )
        
        # 修正：直接从static_processor获取输出维度
        static_output_dim = self.static_processor.output_dim
        
        # 预测头，内置静态信息融合
        self.prediction_heads = nn.ModuleDict()
        for scale in self.time_tasks:
            fusion_input_dim = input_len + static_output_dim
            self.prediction_heads[str(scale)] = nn.Sequential(
                nn.Linear(fusion_input_dim, fusion_input_dim // 2),
                nn.ReLU(),
                nn.Dropout(dropout),
                nn.Linear(fusion_input_dim // 2, output_sizes[scale])
            )
        
        self.drop = nn.Dropout(static_dropout)

    def forward(self, time_series_input, static_input):
        """
        time_series_input: tensor of shape (batch_size, seq_len, input_size)
        static_input: tensor of shape (batch_size, num_static_features)
                      按照static_dict中特征名称的顺序排列
        """
        # 处理时间序列
        y = time_series_input.transpose(1, 2)  # (N, time_fea, 14*24)
        y = self.tcn(y)  # (N, 3, 14*24)
        
        # 处理静态特征
        static_features = self.static_processor(static_input)  # (N, static_output_dim)
        static_features = torch.unsqueeze(static_features, 1)  # (N, 1, static_output_dim)
        
        # 为每个时间粒度生成预测
        predictions = {}
        for scale in self.time_tasks:
            # 获取该时间粒度的特征
            scale_features = y[:, scale:scale+1, :]  # (N, 1, 14*24)
            
            # 拼接特征
            fused_features = torch.cat([static_features, scale_features], dim=2)  # (N, 1, static_output_dim+14*24)
            
            # 通过预测头
            predictions[scale] = self.prediction_heads[str(scale)](fused_features).transpose(1, 2).contiguous()
        
        return predictions

In [8]:
class PeakHuberLoss(nn.Module):
    def __init__(self):
        super(PeakHuberLoss, self).__init__()
    def forward(self, y_pred, y_true, delta = 5):
        error = y_true - y_pred
        peak_mask = (y_true >= 5)
        # 峰值用Huber Loss，非峰值用MAE
        peak_loss = torch.where(torch.abs(error[peak_mask]) <= delta, 
                               0.5 * error[peak_mask]**2, 
                               delta * (torch.abs(error[peak_mask]) - 0.5 * delta)).mean() if torch.any(peak_mask) else 0.0
        non_peak_loss = torch.abs(error[~peak_mask]).mean() if torch.any(~peak_mask) else 0.0
        return peak_loss * 2 + non_peak_loss  # 峰值损失权重加倍
    

class MultiTaskPHLoss(nn.Module):
    def __init__(self, loss_weights=None):
        super(MultiTaskPHLoss, self).__init__()
        self.peakhuberloss = PeakHuberLoss()
        self.loss_weights = loss_weights
    
    def forward(self, predictions, targets, delta = 5):
        total_loss = 0
        losses = {}
        
        for scale, pred in predictions.items():
            target = targets[scale]
            scale_loss = self.peakhuberloss(pred, target, delta = delta)
            
            # 应用权重（如果有）
            weight = self.loss_weights[scale] if self.loss_weights else 1.0
            weighted_loss = weight * scale_loss
            
            losses[scale] = scale_loss.item()
            total_loss += weighted_loss
        
        return total_loss, losses

In [9]:
import gc
gc.collect()

44

In [9]:
#### setup_seed(12345)
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
output_sizes = {0: 24, 1: 24, 2: 24}

# device = 'cpu'
print('device:', device)
print(hourly_all_data.shape)
print(static_all_data.shape)

# 加载数据
train_dataloader, val_dataloader, test_dataloader = load_multitask_data(hourly_all_data[:, :, :], static_all_data[:,:,:], 1024, output_sizes)


device: cuda:0
(113468, 1008, 22)
(113468, 26, 7)
单个样本数量： 18
站点数量： 113468
总样本数量： 2042424
a (113468, 840, 22) (113468, 840, 3)
单个样本数量： 4
站点数量： 113468
总样本数量： 453872
a (113468, 504, 22) (113468, 504, 3)
单个样本数量： 4
站点数量： 113468
总样本数量： 453872
a (113468, 504, 22) (113468, 504, 3)


In [10]:
static_dict = { "lag7d_order_cnt": ('continuous',None,None),
                "parking_capacity": ('continuous',None,None),
                "temperature_avg_val": ('continuous',None,None),
                "city_guid_encoded": ('categorical','embedding',max_city+1),
                "day_of_week": ('categorical','onehot',8),
                "workday_level": ('categorical','onehot',3),
                "cycle_weather_level": ('categorical','embedding',4)
              }
# 保存到 JSON 文件
def save_static_dict(static_dict, file_path):
    # 将元组转换为列表以便JSON序列化
    serializable_dict = {}
    for key, value in static_dict.items():
        serializable_dict[key] = list(value) if value[1] is not None else [value[0], None, value[2]]
    
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(serializable_dict, f, indent=4, ensure_ascii=False)

save_static_dict(static_dict, 'ebik_dataset/static_dict_config_3.json')

In [23]:
print(sorted(static_dict.keys()))

['city_guid_encoded', 'cycle_weather_level', 'day_of_week', 'lag7d_order_cnt', 'parking_capacity', 'temperature_avg_val', 'workday_level']


In [11]:
model_save_path = 'ebik_model/net_ebik_splitower_7.pth'
output_sizes = {0: 24, 1: 24, 2: 24}  # 10/16/21终点预测，输出对应长度

num_channels = [64, 128, 32, 3]  # TCN隐藏层维度

# 训练模型
lr = 0.001
# loss_weights = {10: 1.0, 16: 1.0, 21: 1.0}  # 更长期的预测给予更高权重    
es_cnt = 0
max_es_epoch = 15
min_val_loss = float('inf')
epoches = 70
device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
tcn_model = HybridTCN(input_size=22, input_len=14*24, output_sizes=output_sizes, num_channels=num_channels, 
                         static_dict=static_dict, static_hidden_dims=[32, 16],
                         kernel_size=3, dropout=0.25, static_dropout=0.2, tied_weights=False).to(device)
criterion = MultiTaskPHLoss()
optimizer = torch.optim.AdamW(
    tcn_model.parameters(),
    lr=lr,                    # 学习率
    betas=(0.9, 0.999),         # 动量参数
    eps=1e-8,                   # 数值稳定性
    weight_decay=1e-2,          # 权重衰减
    amsgrad=False               # 是否使用 AMSGrad 变体
)

In [12]:
for e in range(epoches + 1):
    tcn_model.train()
    train_losses = []

    # 修正的数据遍历 - 参数顺序与数据集返回顺序一致
    for time_data, static_data, all_labels in train_dataloader:
        time_data, static_data = time_data.to(device), static_data.to(device)

        # 标签移动到设备
        labels_on_device = {}
        for target_idx, label in all_labels.items():
            labels_on_device[target_idx] = label.to(device)

        optimizer.zero_grad()

        # 模型前向传播 - 参数顺序正确
        forecast = tcn_model(time_data, static_data)  # [batch_size, output_size, num_targets]

        loss, task_losses = criterion(forecast, labels_on_device)

        if torch.isnan(loss):
            print("训练损失为NaN!")
            break

        train_losses.append(loss.detach().item())
        loss.backward()
        optimizer.step()

    # 计算平均训练损失
    train_loss_avg = sum(train_losses) / len(train_losses) if train_losses else 0

    # 验证循环
    if e % 2 == 0:  # 每2个epoch验证一次
        tcn_model.eval()
        val_losses = []

        with torch.no_grad():
            for val_time_data, val_static_data, val_labels in val_dataloader:
                val_time_data, val_static_data = val_time_data.to(device), val_static_data.to(device)

                val_labels_on_device = {}
                for target_idx, label in val_labels.items():
                    val_labels_on_device[target_idx] = label.to(device)

                val_forecast = tcn_model(val_time_data, val_static_data)
                val_loss, val_task_losses = criterion(val_forecast, val_labels_on_device)
                val_losses.append(val_loss.item())

        val_loss_avg = sum(val_losses) / len(val_losses) if val_losses else 0

        print(f'Epoch={e}, Train Loss:{train_loss_avg:.6f}, Val Loss:{val_loss_avg:.6f}')

        # 早停机制
        if val_loss_avg < min_val_loss:
            min_val_loss = val_loss_avg
            es_cnt = 0
            # 这里可以保存模型
            torch.save(tcn_model.state_dict(), model_save_path)
        else:
            es_cnt += 1
            if es_cnt >= max_es_epoch:
                print('触发早停机制！')
                break

Epoch=0, Train Loss:92.601941, Val Loss:76.477463
Epoch=2, Train Loss:83.546698, Val Loss:75.811064
Epoch=4, Train Loss:82.819341, Val Loss:75.489043
Epoch=6, Train Loss:81.908659, Val Loss:74.622880
Epoch=8, Train Loss:81.304820, Val Loss:75.485731
Epoch=10, Train Loss:80.718140, Val Loss:75.097236
Epoch=12, Train Loss:80.226729, Val Loss:75.081207
Epoch=14, Train Loss:79.919971, Val Loss:74.948944
Epoch=16, Train Loss:79.482328, Val Loss:74.746026
Epoch=18, Train Loss:79.086415, Val Loss:72.854394
Epoch=20, Train Loss:78.791865, Val Loss:73.771309
Epoch=22, Train Loss:78.518774, Val Loss:77.722579
Epoch=24, Train Loss:77.997797, Val Loss:73.622288
Epoch=26, Train Loss:77.874087, Val Loss:74.371407
Epoch=28, Train Loss:77.341410, Val Loss:75.251709
Epoch=30, Train Loss:77.155854, Val Loss:73.531235
Epoch=32, Train Loss:76.907828, Val Loss:73.941101
Epoch=34, Train Loss:76.590003, Val Loss:74.243076
Epoch=36, Train Loss:76.304098, Val Loss:74.158977
Epoch=38, Train Loss:75.966438, Val 

In [14]:
output_sizes = {0: 24, 1: 24, 2: 24}  # 3/6/12小时预测，输出对应长度
device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
cat_feature_dims = [4,8]  #类别变量类别数
num_channels = [64, 128, 32, 3]  # TCN隐藏层维度
criterion = MultiTaskPHLoss()
# 加载模型，预测
tcn_save_model = HybridTCN(input_size=22, input_len=14*24, output_sizes=output_sizes, num_channels=num_channels, 
                         static_dict=static_dict, static_hidden_dims=[32, 16],
                         kernel_size=3, dropout=0.25, static_dropout=0.2, tied_weights=False).to(device)
# batch*length*size 输入， batch = 32个点位，len = 7天*24小时，size = 8个特征 
tcn_save_model.load_state_dict(torch.load('ebik_model/net_ebik_splitower_7.pth'))

# 初始化存储
test_losses = []
test_task_losses_dict = {target_idx: [] for target_idx in output_sizes.keys()}
true_values = {target_idx: [] for target_idx in output_sizes.keys()}
pred_values = {target_idx: [] for target_idx in output_sizes.keys()}

# 测试循环
tcn_save_model.eval()
with torch.no_grad():
    for test_time_data, test_static_data, test_labels in test_dataloader:
        test_time_data, test_static_data = test_time_data.to(device), test_static_data.to(device)
        
        # 将每个目标的标签移动到设备
        test_labels_on_device = {}
        for target_idx, label in test_labels.items():
            test_labels_on_device[target_idx] = label.to(device)
        
        # 标准化输入数据（如果需要）
        # test_data, mean, std = transform_series2(test_data)
        
        # 前向传播
        test_forecasts = tcn_save_model(test_time_data, test_static_data)
        
        # 反标准化预测结果（如果需要）
        # for target_idx, forecast in test_forecasts.items():
        #     test_forecasts[target_idx] = transform_series2_decode(forecast, mean, std)
        
        # 计算多目标损失
        test_loss, test_task_losses = criterion(test_forecasts, test_labels_on_device)
        
        # 存储损失
        test_losses.append(test_loss.item())
        for target_idx, loss_val in test_task_losses.items():
            test_task_losses_dict[target_idx].append(loss_val)
        
        # 存储真实值和预测值
        for target_idx in output_sizes.keys():
            true_values[target_idx].append(test_labels[target_idx].cpu().numpy())
            pred_values[target_idx].append(test_forecasts[target_idx].cpu().numpy())

# 计算平均损失
test_loss_avg = sum(test_losses) / len(test_losses) if test_losses else 0
print(f'Test Loss: {test_loss_avg:.6f}')

# 计算每个目标的平均损失
for target_idx in output_sizes.keys():
    target_loss_avg = sum(test_task_losses_dict[target_idx]) / len(test_task_losses_dict[target_idx])
    print(f'Target {target_idx} Loss: {target_loss_avg:.6f}')

Test Loss: 38.096264
Target 0 Loss: 12.169564
Target 1 Loss: 12.576924
Target 2 Loss: 13.349776


True

In [16]:
# 计算每个目标的MAPE（按天计算）
for target_idx in output_sizes.keys():
    # 合并所有批次的预测和真实值
    lth = len(true_values[target_idx])
    all_true = np.array(true_values[target_idx])
    all_true = np.reshape(all_true, (lth, 4, 24))
    all_pred = np.array(pred_values[target_idx])
    all_pred = np.reshape(all_pred, (lth, 4, 24))
    print(all_true.shape)
    
    print(f"\n=== 目标 {target_idx} MAPE分析 ===")
    
    # 计算每天的MAPE（只考虑真值>=5的点）
    daily_mapes = []
    morning_mapes = []
    afternoon_mapes = []
    evening_mapes = []
    daily_wapes = []  # 新增：存储每天的WMAPE
    morning_wapes = []  # 新增：存储每天早峰的WMAPE
    afternoon_wapes = []  # 新增：存储每天午峰的WMAPE
    evening_wapes = []  # 新增：存储每天晚峰的WMAPE
    
    for i in range(all_pred.shape[1]):
        sub_day_pred = all_pred[:, i,:].round()
        sub_day_true = all_true[:, i,:]
        
        # 只考虑真值>=5的点
        where_res = np.where(sub_day_true>=5)
        sub_day_true1 = sub_day_true[where_res]
        sub_day_pred1 = sub_day_pred[where_res]
        
        # 计算MAPE
        mape = np.mean(np.abs((sub_day_pred1 - sub_day_true1) / sub_day_true1))
        daily_mapes.append(mape)
        
        # 计算WMAPE (新增)
        wape = np.sum(np.abs(sub_day_pred1 - sub_day_true1)) / np.sum(sub_day_true1)
        daily_wapes.append(wape)
        
        print(f"Day {i+1}: MAPE = {mape:.4f}, WAPE = {wape:.4f}")
        
        # 早峰时段 (9时)
        mor_day_pred = all_pred[:, i, 8:9].round()
        mor_day_true = all_true[:, i, 8:9]
        where_resmon = np.where(mor_day_true>=5)
        mor_day_true1 = mor_day_true[where_resmon]
        mor_day_pred1 = mor_day_pred[where_resmon]
        
        mor_mape = np.mean(np.abs((mor_day_pred1 - mor_day_true1) / mor_day_true1))
        morning_mapes.append(mor_mape)
        
        # 早峰WMAPE (新增)
        mor_wape = np.sum(np.abs(mor_day_pred1 - mor_day_true1)) / np.sum(mor_day_true1)
        morning_wmapes.append(mor_wape)
        
        print(f"Day {i+1} Morning MAPE = {mor_mape:.4f}, WAPE = {mor_wape:.4f}")
        
        # 午峰时段 (14时)
        aft_day_pred = all_pred[:, i, 13:14].round()
        aft_day_true = all_true[:, i, 13:14]
        where_resaft = np.where(aft_day_true>=5)
        aft_day_true1 = aft_day_true[where_resaft]
        aft_day_pred1 = aft_day_pred[where_resaft]
        
        aft_mape = np.mean(np.abs((aft_day_pred1 - aft_day_true1) / aft_day_true1))
        afternoon_mapes.append(aft_mape)
        
        # 午峰WMAPE (新增)
        aft_wape = np.sum(np.abs(aft_day_pred1 - aft_day_true1)) / np.sum(aft_day_true1)
        afternoon_wapes.append(aft_wape)
        
        print(f"Day {i+1} Afternoon MAPE = {aft_mape:.4f}, WAPE = {aft_wmape:.4f}")
        
        # 晚峰时段 (20时)
        eve_day_pred = all_pred[:, i, 19:20].round()
        eve_day_true = all_true[:, i, 19:20]
        where_reseve = np.where(eve_day_true>=5)
        eve_day_true1 = eve_day_true[where_reseve]
        eve_day_pred1 = eve_day_pred[where_reseve]
        
        eve_mape = np.mean(np.abs((eve_day_pred1 - eve_day_true1) / eve_day_true1))
        evening_mapes.append(eve_mape)
        
        # 晚峰WMAPE (新增)
        eve_wape = np.sum(np.abs(eve_day_pred1 - eve_day_true1)) / np.sum(eve_day_true1)
        evening_wapes.append(eve_wape)
        
        print(f"Day {i+1} Evening MAPE = {eve_mape:.4f}, WAPE = {eve_wape:.4f}")
    
    # 计算平均MAPE和WMAPE
    if daily_mapes:
        avg_mape = np.mean(daily_mapes)
        avg_wape = np.mean(daily_wapes)  # 新增
        print(f"目标 {target_idx} 平均MAPE: {avg_mape:.4f}, 平均WAPE: {avg_wmape:.4f}")
    if morning_mapes:
        monavg_mape = np.mean(morning_mapes)
        monavg_wape = np.mean(morning_wapes)  # 新增
        print(f"目标 {target_idx} 早峰平均MAPE: {monavg_mape:.4f}, 平均WAPE: {monavg_wape:.4f}")
    if afternoon_mapes:
        aftavg_mape = np.mean(afternoon_mapes)
        aftavg_wape = np.mean(afternoon_wapes)  # 新增
        print(f"目标 {target_idx} 午峰平均MAPE: {aftavg_mape:.4f}, 平均WAPE: {aftavg_wape:.4f}")
    if evening_mapes:
        eveavg_mape = np.mean(evening_mapes)
        eveavg_wape = np.mean(evening_wapes)  # 新增
        print(f"目标 {target_idx} 晚峰平均MAPE: {eveavg_mape:.4f}, 平均WAPE: {eveavg_wape:.4f}")
 

(113468, 4, 24)

=== 目标 0 MAPE分析 ===
Day 1: MAPE = 0.3317, WMAPE = 0.3416
Day 1 Morning MAPE = 0.3666, WMAPE = 0.3798
Day 1 Afternoon MAPE = 0.3504, WMAPE = 0.3547
Day 1 Evening MAPE = 0.3238, WMAPE = 0.3475
Day 2: MAPE = 0.3269, WMAPE = 0.3441
Day 2 Morning MAPE = 0.3456, WMAPE = 0.3631
Day 2 Afternoon MAPE = 0.3520, WMAPE = 0.3587
Day 2 Evening MAPE = 0.3221, WMAPE = 0.3556
Day 3: MAPE = 0.3233, WMAPE = 0.3572
Day 3 Morning MAPE = 0.3309, WMAPE = 0.3620
Day 3 Afternoon MAPE = 0.3415, WMAPE = 0.3669
Day 3 Evening MAPE = 0.3192, WMAPE = 0.3686
Day 4: MAPE = 0.3109, WMAPE = 0.3461
Day 4 Morning MAPE = 0.3334, WMAPE = 0.3886
Day 4 Afternoon MAPE = 0.3308, WMAPE = 0.3549
Day 4 Evening MAPE = 0.3107, WMAPE = 0.3446
目标 0 平均MAPE: 0.3232, 平均WMAPE: 0.3472
目标 0 早峰平均MAPE: 0.3441, 平均WMAPE: 0.3734
目标 0 午峰平均MAPE: 0.3437, 平均WMAPE: 0.3588
目标 0 晚峰平均MAPE: 0.3189, 平均WMAPE: 0.3541
(113468, 4, 24)

=== 目标 1 MAPE分析 ===
Day 1: MAPE = 0.3242, WMAPE = 0.3422
Day 1 Morning MAPE = 0.3412, WMAPE = 0.3564
Day 1 A