In [1]:
import os
import random
import gc
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
from tqdm.notebook import tqdm, trange
import torch
import torch.multiprocessing as mp
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

from methods.model import *
from methods.logger import *
from methods.processing import *
from methods.train import *

# 读取数据

In [None]:
mp.set_start_method('spawn', force=True)
main_device_name = 0
print('Read Factor.')
factor = pd.read_pickle('/home/datamake134/data/haris/dataset_1031/total_date.pkl')                      # 日期+股票代码
grouped = pd.read_pickle('/home/datamake134/data/haris/dataset_1031/grouped_adj.pkl').fillna(0)          # 特征
grouped_label = pd.read_pickle('/home/datamake134/data/haris/dataset_1031/grouped_label_adj.pkl')        # 标签
grouped_liquidity = pd.read_pickle('/home/datamake134/data/haris/dataset_1031/grouped_liquidity.pkl')    # 流动性指标
grouped_liquidity.index = grouped_liquidity.index.strftime('%Y%m%d').astype(int)
correlation_df = pd.read_pickle('/home/datamake134/data/haris/dataset_1031/corr_byday_abs.pkl')          # 因子筛选辅助数据
correlation_df.index = correlation_df.index.strftime('%Y%m%d').astype(int)
total_date_list = np.array(factor['date'].drop_duplicates().tolist())                                   # 日期列表

Read Factor.


In [59]:
grouped_liquidity['301665'] = 0.0
sorted_columns = sorted(grouped_liquidity.columns)
grouped_liquidity = grouped_liquidity[sorted_columns]
grouped_liquidity

Unnamed: 0_level_0,000001,000002,000004,000005,000006,000007,000008,000009,000010,000011,...,688787,688788,688789,688793,688798,688799,688800,688819,688981,689009
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20190102,1097221.50,931650.531,0.0,4035.0,5128.2,84766.5,95227.2,138982.5,12675.0,11316.0,...,0.000,0.00,0.000,0.000,0.000,0.00,0.000,0.000,0.00,0.000
20190103,830523.78,152731.800,481.8,3698.4,3399.0,77714.1,10681.2,22470.0,22404.9,558.6,...,0.000,0.00,0.000,0.000,0.000,0.00,0.000,0.000,0.00,0.000
20190104,1048647.60,435401.100,2934.0,3123.9,20309.1,140454.0,97812.0,38304.0,16039.2,51670.8,...,0.000,0.00,0.000,0.000,0.000,0.00,0.000,0.000,0.00,0.000
20190107,7300591.20,6348042.900,26908.2,39281.4,371718.0,154982.1,284163.0,133830.3,10289.7,120059.1,...,0.000,0.00,0.000,0.000,0.000,0.00,0.000,0.000,0.00,0.000
20190108,894381.60,1057104.990,8721.0,16443.6,110791.2,235467.0,372771.3,7626.6,5406.0,37286.4,...,0.000,0.00,0.000,0.000,0.000,0.00,0.000,0.000,0.00,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20250411,1951382.40,4526979.600,150696.0,0.0,812251.5,91176.0,3748563.0,140407.8,34483.2,31717.8,...,605959.200,163388.40,388098.360,165901.161,237375.000,0.00,364901.196,87016.965,7611796.50,369090.480
20250414,3966164.46,2294429.400,77280.0,0.0,691156.2,33189.0,3373527.3,197584.5,122532.9,12117.6,...,228598.434,49500.00,118227.705,31457.400,2527870.092,64487.82,481720.140,59621.928,10000000.00,1992310.125
20250415,907779.60,738144.000,62551.5,0.0,260922.6,243307.8,2322676.8,107385.6,6966.0,28249.2,...,522727.884,25367.04,40576.800,35338.212,128131.080,16902.00,683343.000,34983.783,3377588.22,162534.444
20250416,2080525.50,704421.600,956130.0,0.0,367071.6,102942.0,1196876.1,78825.6,216453.6,14530.2,...,324038.412,50886.00,19920.600,66157.476,51848.595,16663.50,173830.110,0.000,6157788.00,167364.288


In [60]:
def main(
    round_num, dt1, dt2, dt3, dt4, dt5,
    correlation_df, grouped, grouped_label, grouped_liquidity,
    total_date_list, main_folder_name, 
    pid_num=5273, factor_num=2790, corr_thres=0.9, seed_num=5, model_mode=False, multi_model=6
    ):
    '''
    para round_num: 轮数（周期序号）
    para dt1: 训练集开始时间
    para dt2: 验证集开始时间
    para dt3: 验证集结束时间
    para dt4: 测试集开始时间
    para dt5: 测试集结束时间
    
    dt1 ------训练集------ dt2 ------验证集------ dt3/dt4 ------测试集------ dt5
    
    para correlation_df: 因子筛选辅助数据
    para grouped: 按日期分组的因子数据
    para grouped_label: 按日期分组的标签数据
    para grouped_liquidity: 按日期分组的流动性数据
    para total_date_list: 全部日期
    para main_folder_name: 主文件夹名称
    para pid_num: 股票数量
    para factor_num: 因子数量
    para corr_thres: 因子筛选相关系数阈值
    para seed_num: 每个模型的种子数
    para model_mode: 是否继续训练
    para multi_model: 模型数量
    '''
    seed_list = []
    for i in range(seed_num):
        random.seed(i)
        seed_list.append(list(random.sample(range(100), multi_model)))
    total_train_num = len(seed_list)  # seed_num * multi_model
    total_test_output = []
    total_test_name = 'test_output_' + str(round_num) + '.pt'
    total_date_pid_name = 'test_date_pid_' + str(round_num) + '.pt'
    save_path = "/home/datamake134/data/haris/DL/" + main_folder_name
    
    # 根据给定的时间范围 dt1 到 dt3，选出训练集的日期列表。之后，有一个特别的日期范围处理（过滤掉指定日期段的训练数据）。
    date_list_train = total_date_list[np.where((total_date_list >= dt1) & (total_date_list < dt3))[0]]
    # 若20240223在训练周期或测试周期内，训练周期或测试周期去除20240201-20240223这一时间段
    if 20240223 >= dt1 and 20240223 <= dt3:
        date_list_train = np.array([date_train for date_train in date_list_train if date_train < 20240201 or date_train > 20240223])
    total_ts_train_val1 = np.zeros((len(date_list_train), pid_num, factor_num)) # 因子数据 shape: (len(date_list_train), pid_num, factor_num)
    total_label_train_val = np.zeros((len(date_list_train), pid_num, 5))        # 标签数据 shape: (len(date_list_train), pid_num, 5)
    total_group_train_val = np.zeros((len(date_list_train), pid_num, 1))        # 流动性数据 shape: (len(date_list_train), pid_num, 1)
    for i in trange(len(date_list_train), desc='train_val_data'):
        date = date_list_train[i]
        total_ts_train_val1[i, :, :] = grouped.loc[date].iloc[:pid_num, :]          # 因子
        total_label_train_val[i, :, :] = grouped_label.loc[date].iloc[:pid_num, :]  # 标签
        # 根据流动性调整收益率前7%-10%附近的训练标签：label(returns)
        total_label_train_val[i, :, 0] = adjust_daily_returns(total_label_train_val[i, :, 0], total_label_train_val[i, :, 4])
        total_group_train_val[i, :, :] = np.array(grouped_liquidity.loc[date])[:pid_num].reshape(-1, 1)  # 流动性
    
    # 类似地，date_list_test 被定义为测试集的日期范围，时间从 dt4 到 dt5。
    date_list_test = total_date_list[np.where((total_date_list >= dt4) & (total_date_list < dt5))[0]]
    total_ts_test1 = np.zeros((len(date_list_test), pid_num, factor_num))
    total_label_test = np.zeros((len(date_list_test), pid_num, 5))
    total_group_test = np.zeros((len(date_list_test), pid_num, 1))
    for i in trange(len(date_list_test), desc='test_data'):
        date = date_list_test[i]
        total_ts_test1[i, :, :] = grouped.loc[date].iloc[:pid_num, :]
        total_label_test[i, :, :] = grouped_label.loc[date].iloc[:pid_num, :]
        total_label_test[i, :, 0] = adjust_daily_returns(total_label_test[i, :, 0], total_label_test[i, :, 4])
        total_group_test[i, :, :] = np.array(grouped_liquidity.loc[date])[:pid_num].reshape(-1, 1)
    
    # 流动性数据归一化
    def min_max_standard(column):
        return (column - column.min()) / (column.max() - column.min())
    print('Min-max scaling.')
    total_group_train_val, total_group_test = min_max_standard(total_group_train_val), min_max_standard(total_group_test)
    
    # 因子数据标准化
    print('Standard scaling.')
    scaler = StandardScaler()
    total_ts_train_val1 = np.apply_along_axis(
        lambda x: np.clip(x, np.percentile(x, 0.5), np.percentile(x, 99.5)), axis=0, arr=total_ts_train_val1.reshape(-1, factor_num)
        )  # 去极值，保留0.5%-99.5%数据
    total_ts_train_val1 = total_ts_train_val1.reshape(len(date_list_train), pid_num, factor_num)
    total_ts_train_val1 = np.nan_to_num(scaler.fit_transform(total_ts_train_val1.reshape(-1, factor_num)).reshape(len(date_list_train), pid_num, factor_num), nan=0)
    total_ts_test1 = np.apply_along_axis(
        lambda x: np.clip(x, np.percentile(x, 0.5), np.percentile(x, 99.5)), axis=0, arr=total_ts_test1.reshape(-1, factor_num)
        )
    total_ts_test1 = total_ts_test1.reshape(len(date_list_test), pid_num, factor_num)
    total_ts_test1 = np.nan_to_num(scaler.transform(total_ts_test1.reshape(-1, factor_num)).reshape(len(date_list_test), pid_num, factor_num), nan=0)
    
    # KFold 交叉验证（并行训练）
    print('KFold training.')
    kf = KFold(n_splits=total_train_num, shuffle=False)
    processes = []
    for train_num, index_tuple in enumerate(kf.split(total_ts_train_val1)):
        p = mp.Process(
            target=train_one_Fold, 
            args=(
                round_num, train_num, index_tuple, main_folder_name,
                total_ts_train_val1, total_label_train_val, total_group_train_val, date_list_train,
                total_ts_test1, total_label_test, total_group_test, date_list_test,
                correlation_df, seed_list, dt1, dt2, dt3, dt4, dt5,
                factor_num, corr_thres, save_path, model_mode, multi_model
                )
            )
        processes.append(p)
        p.start()
    for p in processes:
        p.join()
    
    torch.cuda.empty_cache()
    gc.collect()
    
    # 保存测试数据
    print('Save test data.')
    total_test_output = []
    for train_num in range(total_train_num):
        test_name = 'test_output_ic' + str(round_num) + str(train_num) + '.pt'
        test_path = os.path.join(save_path, test_name)
        total_test_output.append(torch.load(test_path))
        
    total_test_path = os.path.join(save_path, total_test_name)
    total_date_pid_path = os.path.join(save_path, total_date_pid_name)
    
    total_test_output = torch.stack(total_test_output)
    weight_tensor = torch.tensor([0.1, 0.15, 0.2, 0.25, 0.3]).view(-1, *([1] * (total_test_output.dim() - 1)))
    total_test_output = (total_test_output * weight_tensor).sum(dim=0)
    torch.save(total_test_output, total_test_path)
    
    stocks = np.array(grouped_label.loc[20200102].index)
    repeated_stocks = np.tile(stocks, len(date_list_test))
    repeated_dates = np.repeat(date_list_test, len(stocks))
    date_pid_test = np.column_stack((repeated_dates, repeated_stocks))
    torch.save(date_pid_test, total_date_pid_path)
    
    del total_ts_train_val1
    del total_ts_test1
    del total_label_train_val
    del total_label_test
    del total_group_train_val
    del total_group_test
    
    torch.cuda.empty_cache()
    gc.collect()

# 训练和测试

```c
Round 1. Train: 2020/07/01 2022/07/01 Validation: 2022/07/01 2022/12/30 Test: 2023/01/01 2023/07/01
Round 2. Train: 2021/01/01 2023/01/01 Validation: 2023/01/01 2023/03/31 Test: 2023/04/01 2023/07/01
Round 3. Train: 2021/04/01 2023/04/01 Validation: 2023/04/01 2023/06/30 Test: 2023/07/01 2023/10/01
Round 4. Train: 2021/07/01 2023/07/01 Validation: 2023/07/01 2023/09/28 Test: 2023/10/01 2024/01/01
Round 5. Train: 2021/10/01 2023/10/01 Validation: 2023/10/01 2023/12/29 Test: 2024/01/01 2024/04/01
Round 6. Train: 2022/01/01 2024/01/01 Validation: 2024/01/01 2024/03/29 Test: 2024/04/01 2024/07/01
Round 7. Train: 2022/04/01 2024/04/01 Validation: 2024/04/01 2024/06/28 Test: 2024/07/01 2024/10/01
Round 8. Train: 2022/07/01 2024/07/01 Validation: 2024/07/01 2024/09/30 Test: 2024/10/01 2025/01/01
Round 9. Train: 2022/10/01 2024/10/01 Validation: 2024/10/01 2024/12/31 Test: 2025/01/01 2025/02/21
```

In [61]:
folder_path = "/home/datamake134/data/haris/DL/" + main_folder_name
os.makedirs(folder_path, exist_ok=True)

# 第1轮
print('Round 1.')
round_num = 1
dt1 = int(pd.to_datetime("2020-07-01").strftime('%Y%m%d'))  # 训练集开始时间
dt2 = int(pd.to_datetime("2022-07-01").strftime('%Y%m%d'))  # 验证集开始时间
dt3 = int(pd.to_datetime("2022-12-30").strftime('%Y%m%d'))  # 验证集结束时间
dt4 = int(pd.to_datetime("2023-01-01").strftime('%Y%m%d'))  # 测试集开始时间
dt5 = int(pd.to_datetime("2023-04-01").strftime('%Y%m%d'))  # 测试集结束时间
main(
    round_num, dt1, dt2, dt3, dt4, dt5,
    correlation_df, grouped, grouped_label, grouped_liquidity,
    total_date_list, main_folder_name, corr_thres=0.9
    )
torch.cuda.empty_cache()
gc.collect()

test_output1 = torch.load("/home/datamake134/data/haris/DL/" + main_folder_name + "/test_output_1.pt")
test_output = torch.cat([test_output1])
test_output = test_output.cpu()
date_pid1 = torch.load("/home/datamake134/data/haris/DL/" + main_folder_name + "/test_date_pid_1.pt", weights_only=False)
total_date_pid = np.concatenate([date_pid1], axis=0)
total_date_pid_test = total_date_pid
grading_factor = pd.DataFrame(index=np.unique(total_date_pid_test[:, 0]), columns=np.unique(total_date_pid_test[:, 1]))
test_output_list = test_output.tolist()
for i in range(len(total_date_pid_test)):
    grading_factor.loc[total_date_pid_test[i][0], total_date_pid_test[i][1]] = test_output_list[i]
grading_factor.to_pickle("/home/datamake134/data/haris/DL/" + main_folder_name + "/单次_KFold_2023.pkl")
gc.collect()

Round 1.


train_val_data:   0%|          | 0/610 [00:00<?, ?it/s]

test_data:   0%|          | 0/59 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling.


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/22 08:28:22 train.py INFO Period1, Train0, Train Period:20200701-20220701, Val Period:20220701-20221230, Test Period:20230101-20230401
2025/04/22 08:28:22 train.py INFO Train1 Shape: torch.Size([488, 5273, 2790]), Val1 Shape: torch.Size([121, 5273, 2790]), Test1 Shape: torch.Size([59, 5273, 2790])
2025/04/22 08:28:22 train.py INFO Start Training
2025/04/22 08:30:04 train.py INFO Epoch[1/200], Time:102.09sec, Train Loss: 0.930986, Val Loss: 0.9085807204246521,0.9199652075767517,0.9025828838348389,0.9091241955757141,0.9246126413345337,0.9029377698898315
2025/04/22 08:30:04 model.py INFO Validation loss decreased (inf --> 0.908581).  Saving model 0.0...
2025/04/22 08:30:04 model.py INFO Validation loss decreased (inf --> 0.919965).  Saving model 1.0...
2025/04/22 08:30:04 model.py INFO Validation loss decreased (inf --> 0.902583).  Saving model 2.0...
2025/04/22 08:30:04 model.py INFO Validation loss decreased (inf --> 0.909124).  Saving model 3.0...
2025/04/22 08:30:04 model.py I

Save test data.


0

In [62]:
# 第2-9轮
total_date_list = np.array(factor['date'].drop_duplicates().tolist())
rolling_step = 3    # 3个月滚动训练
window_size = 24    # 训练集大小
val_size = 3        # 验证集大小
corr_thres = 0.9
for round_num in range(2, 10):
    print('Round %i.' % round_num)
    start_date = pd.to_datetime('2021-01-01')
    dt1 = start_date + relativedelta(months=rolling_step * (round_num - 2))             # 训练集开始时间
    dt2 = dt1 + relativedelta(months=window_size)                                       # 验证集开始时间
    dt3 = dt2 + relativedelta(months=val_size)                                          # 验证集结束时间
    dt4 = dt3                                                                           # 测试集开始时间
    dt5 = dt3 + relativedelta(months=rolling_step)                                      # 测试集结束时间
    dt3 = total_date_list[total_date_list < int(dt3.strftime('%Y%m%d'))][-1]
    dt1, dt2, dt3, dt4, dt5 = int(dt1.strftime('%Y%m%d')), int(dt2.strftime('%Y%m%d')), int(dt3), int(dt4.strftime('%Y%m%d')), int(dt5.strftime('%Y%m%d'))
    main(
        round_num, dt1, dt2, dt3, dt4, dt5,
        correlation_df, grouped, grouped_label, grouped_liquidity,
        total_date_list, main_folder_name, corr_thres=0.9, seed_num=5, model_mode=False
        )
    torch.cuda.empty_cache()
    gc.collect()

test_output_list = []
for round_num in range(2, 10):
    test_output = torch.load("/home/datamake134/data/haris/DL/" + main_folder_name + "/test_output_" + str(round_num) + ".pt")
    test_output_list.append(test_output)
test_output = torch.cat(test_output_list)
test_output = test_output.cpu()
date_pid_list = []
for round_num in range(2, 10):
    date_pid = torch.load("/home/datamake134/data/haris/DL/" + main_folder_name + "/test_date_pid_" + str(round_num) + ".pt", weights_only=False)
    date_pid_list.append(date_pid)
total_date_pid = np.concatenate(date_pid_list, axis=0)
total_date_pid_test = total_date_pid
grading_factor = pd.DataFrame(index=np.unique(total_date_pid_test[:, 0]), columns=np.unique(total_date_pid_test[:, 1]))
test_output_list = test_output.tolist()
for i in range(len(total_date_pid_test)):
    grading_factor.loc[total_date_pid_test[i][0], total_date_pid_test[i][1]] = test_output_list[i]
grading_factor2023 = pd.read_pickle("/home/datamake134/data/haris/DL/" + main_folder_name + "/单次_KFold_2023.pkl")
grading_factor = pd.concat([grading_factor2023, grading_factor], axis=0)
grading_factor.to_feather("/home/datamake134/data/haris/DL/" + main_folder_name + "/单次_KFold_0.fea")

Round 2.


train_val_data:   0%|          | 0/543 [00:00<?, ?it/s]

test_data:   0%|          | 0/59 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling.


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/22 09:45:59 train.py INFO Period2, Train0, Train Period:20210101-20230101, Val Period:20230101-20230331, Test Period:20230401-20230701
2025/04/22 09:45:59 train.py INFO Train1 Shape: torch.Size([434, 5273, 2790]), Val1 Shape: torch.Size([108, 5273, 2790]), Test1 Shape: torch.Size([59, 5273, 2790])
2025/04/22 09:45:59 train.py INFO Start Training
2025/04/22 09:47:28 train.py INFO Epoch[1/200], Time:88.76sec, Train Loss: 0.935392, Val Loss: 0.9096143245697021,0.9050412178039551,0.9064566493034363,0.9109578132629395,0.9055284261703491,0.9031881093978882
2025/04/22 09:47:28 model.py INFO Validation loss decreased (inf --> 0.909614).  Saving model 0.0...
2025/04/22 09:47:28 model.py INFO Validation loss decreased (inf --> 0.905041).  Saving model 1.0...
2025/04/22 09:47:28 model.py INFO Validation loss decreased (inf --> 0.906457).  Saving model 2.0...
2025/04/22 09:47:28 model.py INFO Validation loss decreased (inf --> 0.910958).  Saving model 3.0...
2025/04/22 09:47:28 model.py IN

Save test data.
Round 3.


train_val_data:   0%|          | 0/544 [00:00<?, ?it/s]

test_data:   0%|          | 0/64 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling.


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/22 10:48:59 train.py INFO Period3, Train0, Train Period:20210401-20230401, Val Period:20230401-20230630, Test Period:20230701-20231001
2025/04/22 10:48:59 train.py INFO Train1 Shape: torch.Size([435, 5273, 2790]), Val1 Shape: torch.Size([108, 5273, 2790]), Test1 Shape: torch.Size([64, 5273, 2790])
2025/04/22 10:48:59 train.py INFO Start Training
2025/04/22 10:51:09 train.py INFO Epoch[1/200], Time:130.44sec, Train Loss: 0.943349, Val Loss: 0.9092921614646912,0.9119365811347961,0.903836190700531,0.907352864742279,0.9094793796539307,0.9065970182418823
2025/04/22 10:51:09 model.py INFO Validation loss decreased (inf --> 0.909292).  Saving model 0.0...
2025/04/22 10:51:09 model.py INFO Validation loss decreased (inf --> 0.911937).  Saving model 1.0...
2025/04/22 10:51:09 model.py INFO Validation loss decreased (inf --> 0.903836).  Saving model 2.0...
2025/04/22 10:51:10 model.py INFO Validation loss decreased (inf --> 0.907353).  Saving model 3.0...
2025/04/22 10:51:10 model.py INF

Save test data.
Round 4.


train_val_data:   0%|          | 0/548 [00:00<?, ?it/s]

test_data:   0%|          | 0/60 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling.


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/22 11:59:53 train.py INFO Period4, Train0, Train Period:20210701-20230701, Val Period:20230701-20230928, Test Period:20231001-20240101
2025/04/22 11:59:53 train.py INFO Train1 Shape: torch.Size([438, 5273, 2790]), Val1 Shape: torch.Size([109, 5273, 2790]), Test1 Shape: torch.Size([60, 5273, 2790])
2025/04/22 11:59:53 train.py INFO Start Training
2025/04/22 12:02:09 train.py INFO Epoch[1/200], Time:136.01sec, Train Loss: 0.936661, Val Loss: 0.9550540447235107,0.9430182576179504,0.9366914629936218,0.9634310007095337,0.9446742534637451,0.9382824897766113
2025/04/22 12:02:09 model.py INFO Validation loss decreased (inf --> 0.955054).  Saving model 0.0...
2025/04/22 12:02:09 model.py INFO Validation loss decreased (inf --> 0.943018).  Saving model 1.0...
2025/04/22 12:02:09 model.py INFO Validation loss decreased (inf --> 0.936691).  Saving model 2.0...
2025/04/22 12:02:09 model.py INFO Validation loss decreased (inf --> 0.963431).  Saving model 3.0...
2025/04/22 12:02:09 model.py I

Save test data.
Round 5.


train_val_data:   0%|          | 0/544 [00:00<?, ?it/s]

test_data:   0%|          | 0/58 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling.


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/22 13:07:54 train.py INFO Period5, Train0, Train Period:20211001-20231001, Val Period:20231001-20231229, Test Period:20240101-20240401
2025/04/22 13:07:54 train.py INFO Train1 Shape: torch.Size([435, 5273, 2790]), Val1 Shape: torch.Size([108, 5273, 2790]), Test1 Shape: torch.Size([58, 5273, 2790])
2025/04/22 13:07:54 train.py INFO Start Training
2025/04/22 13:10:10 train.py INFO Epoch[1/200], Time:135.68sec, Train Loss: 0.941250, Val Loss: 0.9202383756637573,0.9362106323242188,0.9206165075302124,0.916897177696228,0.9399462342262268,0.9210155010223389
2025/04/22 13:10:10 model.py INFO Validation loss decreased (inf --> 0.920238).  Saving model 0.0...
2025/04/22 13:10:10 model.py INFO Validation loss decreased (inf --> 0.936211).  Saving model 1.0...
2025/04/22 13:10:10 model.py INFO Validation loss decreased (inf --> 0.920617).  Saving model 2.0...
2025/04/22 13:10:10 model.py INFO Validation loss decreased (inf --> 0.916897).  Saving model 3.0...
2025/04/22 13:10:10 model.py IN

Save test data.
Round 6.


train_val_data:   0%|          | 0/530 [00:00<?, ?it/s]

test_data:   0%|          | 0/59 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling.


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/22 14:24:02 train.py INFO Period6, Train0, Train Period:20220101-20240101, Val Period:20240101-20240329, Test Period:20240401-20240701
2025/04/22 14:24:02 train.py INFO Train1 Shape: torch.Size([424, 5273, 2790]), Val1 Shape: torch.Size([105, 5273, 2790]), Test1 Shape: torch.Size([59, 5273, 2790])
2025/04/22 14:24:02 train.py INFO Start Training
2025/04/22 14:26:19 train.py INFO Epoch[1/200], Time:136.53sec, Train Loss: 0.943231, Val Loss: 0.9196034669876099,0.9121091961860657,0.9130905270576477,0.9220677018165588,0.9145130515098572,0.9156022667884827
2025/04/22 14:26:19 model.py INFO Validation loss decreased (inf --> 0.919603).  Saving model 0.0...
2025/04/22 14:26:19 model.py INFO Validation loss decreased (inf --> 0.912109).  Saving model 1.0...
2025/04/22 14:26:19 model.py INFO Validation loss decreased (inf --> 0.913091).  Saving model 2.0...
2025/04/22 14:26:19 model.py INFO Validation loss decreased (inf --> 0.922068).  Saving model 3.0...
2025/04/22 14:26:19 model.py I

Save test data.
Round 7.


train_val_data:   0%|          | 0/531 [00:00<?, ?it/s]

test_data:   0%|          | 0/64 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling.


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/22 15:36:35 train.py INFO Period7, Train0, Train Period:20220401-20240401, Val Period:20240401-20240628, Test Period:20240701-20241001
2025/04/22 15:36:35 train.py INFO Train1 Shape: torch.Size([424, 5273, 2790]), Val1 Shape: torch.Size([106, 5273, 2790]), Test1 Shape: torch.Size([64, 5273, 2790])
2025/04/22 15:36:35 train.py INFO Start Training
2025/04/22 15:38:47 train.py INFO Epoch[1/200], Time:131.54sec, Train Loss: 0.946690, Val Loss: 0.9352855086326599,0.9384829998016357,0.9318973422050476,0.9342840909957886,0.9395635724067688,0.9308005571365356
2025/04/22 15:38:47 model.py INFO Validation loss decreased (inf --> 0.935286).  Saving model 0.0...
2025/04/22 15:38:47 model.py INFO Validation loss decreased (inf --> 0.938483).  Saving model 1.0...
2025/04/22 15:38:47 model.py INFO Validation loss decreased (inf --> 0.931897).  Saving model 2.0...
2025/04/22 15:38:47 model.py INFO Validation loss decreased (inf --> 0.934284).  Saving model 3.0...
2025/04/22 15:38:47 model.py I

Save test data.
Round 8.


train_val_data:   0%|          | 0/536 [00:00<?, ?it/s]

test_data:   0%|          | 0/61 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling.


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/22 16:54:14 train.py INFO Period8, Train1, Train Period:20220701-20240701, Val Period:20240701-20240930, Test Period:20241001-20250101
2025/04/22 16:54:14 train.py INFO Train1 Shape: torch.Size([429, 5273, 2790]), Val1 Shape: torch.Size([106, 5273, 2790]), Test1 Shape: torch.Size([61, 5273, 2790])
2025/04/22 16:54:14 train.py INFO Start Training
2025/04/22 16:56:15 train.py INFO Epoch[1/200], Time:121.19sec, Train Loss: 0.945281, Val Loss: 0.9254987239837646,0.93743896484375,0.9443867206573486,0.9337573647499084,0.9388672113418579,0.9395045638084412
2025/04/22 16:56:15 model.py INFO Validation loss decreased (inf --> 0.925499).  Saving model 0.0...
2025/04/22 16:56:15 model.py INFO Validation loss decreased (inf --> 0.937439).  Saving model 1.0...
2025/04/22 16:56:15 model.py INFO Validation loss decreased (inf --> 0.944387).  Saving model 2.0...
2025/04/22 16:56:15 model.py INFO Validation loss decreased (inf --> 0.933757).  Saving model 3.0...
2025/04/22 16:56:15 model.py INF

Save test data.
Round 9.


train_val_data:   0%|          | 0/532 [00:00<?, ?it/s]

test_data:   0%|          | 0/57 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling.


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/22 17:59:15 train.py INFO Period9, Train0, Train Period:20221001-20241001, Val Period:20241001-20241231, Test Period:20250101-20250401
2025/04/22 17:59:15 train.py INFO Train1 Shape: torch.Size([425, 5273, 2790]), Val1 Shape: torch.Size([106, 5273, 2790]), Test1 Shape: torch.Size([57, 5273, 2790])
2025/04/22 17:59:15 train.py INFO Start Training
2025/04/22 18:00:52 train.py INFO Epoch[1/200], Time:97.18sec, Train Loss: 0.948683, Val Loss: 0.9223596453666687,0.9260984659194946,0.9250297546386719,0.9258387088775635,0.9252011775970459,0.9218665361404419
2025/04/22 18:00:52 model.py INFO Validation loss decreased (inf --> 0.922360).  Saving model 0.0...
2025/04/22 18:00:52 model.py INFO Validation loss decreased (inf --> 0.926098).  Saving model 1.0...
2025/04/22 18:00:52 model.py INFO Validation loss decreased (inf --> 0.925030).  Saving model 2.0...
2025/04/22 18:00:52 model.py INFO Validation loss decreased (inf --> 0.925839).  Saving model 3.0...
2025/04/22 18:00:52 model.py IN

Save test data.
