In [1]:
import os
import random
import gc
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
from tqdm.notebook import tqdm, trange
import torch
import torch.multiprocessing as mp
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

from methods.model import *
from methods.logger import *
from methods.processing import *
from methods.train import *

# 读取数据

In [2]:
mp.set_start_method('spawn', force=True)
main_device_name = 0
print('Read Factor.')
factor = pd.read_pickle('/home/datamake134/data/haris/dataset_0121/total_date.pkl')                      # 日期+股票代码
grouped = pd.read_pickle('/home/datamake134/data/haris/dataset_0121/grouped_adj.pkl').fillna(0)          # 特征
grouped_label = pd.read_pickle('/home/datamake134/data/haris/dataset_0121/grouped_label_adj.pkl')        # 标签
grouped_liquidity = pd.read_pickle('/home/datamake134/data/haris/dataset_0121/grouped_liquidity.pkl')    # 流动性指标
grouped_liquidity.index = grouped_liquidity.index.strftime('%Y%m%d').astype(int)
correlation_df = pd.read_pickle('/home/datamake134/data/haris/dataset_0121/corr_byday_abs.pkl')          # 因子筛选辅助数据
correlation_df.index = correlation_df.index.strftime('%Y%m%d').astype(int)
total_date_list = np.array(factor['date'].drop_duplicates().tolist())                               # 日期列表

Read Factor.


In [3]:
grouped = grouped.reset_index(level='Code')

In [4]:
grouped.head().iloc[:500]

Unnamed: 0_level_0,Code,0,1,2,3,4,5,6,7,8,...,2780,2781,2782,2783,2784,2785,2786,2787,2788,2789
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20200102,1,0.161906,0.715487,0.093825,2.500486,1.495386,3.687422,1.265125,2.488541,1.528644,...,0.0,0.0,0.0,0.0,-1.0,10500.0,70.0,1.0,0.100181,0.0
20200102,2,0.233131,0.554393,-0.052147,0.148151,0.797523,-4.361952,0.735354,0.153694,0.807209,...,0.0,0.0,0.0,0.0,-1.0,10564.0,67.0,1.0,0.100095,0.0
20200102,4,0.129032,0.902306,-0.089413,-3.294483,0.073779,-4.185775,0.243243,-3.187878,0.080566,...,0.0,0.0,0.0,0.0,-1.0,10579.0,61.0,0.0,0.100044,0.0
20200102,5,0.003936,0.962999,0.011201,-2.758275,1.244934,-4.228945,0.402985,-2.640067,1.323399,...,0.0,0.0,0.0,0.0,-1.0,10605.0,62.0,0.0,0.100649,0.0
20200102,6,0.059158,0.80841,-0.066815,0.230044,0.572983,-3.184859,0.47619,0.285741,0.60694,...,0.0,0.0,0.0,0.0,-1.0,10110.0,62.0,1.0,0.099624,0.0


In [8]:
def main(
    round_num, dt1, dt2, dt3, dt4, dt5,
    correlation_df, grouped, grouped_label, grouped_liquidity,
    total_date_list, main_folder_name, 
    pid_num=5269, factor_num=2791, corr_thres=0.9, seed_num=5, model_mode=False, multi_model=6
    ):
    '''
    para round_num: 轮数（周期序号）
    para dt1: 训练集开始时间
    para dt2: 验证集开始时间
    para dt3: 验证集结束时间
    para dt4: 测试集开始时间
    para dt5: 测试集结束时间
    
    dt1 ------训练集------ dt2 ------验证集------ dt3/dt4 ------测试集------ dt5
    
    para correlation_df: 因子筛选辅助数据
    para grouped: 按日期分组的因子数据
    para grouped_label: 按日期分组的标签数据
    para grouped_liquidity: 按日期分组的流动性数据
    para total_date_list: 全部日期
    para main_folder_name: 主文件夹名称
    para pid_num: 股票数量
    para factor_num: 因子数量
    para corr_thres: 因子筛选相关系数阈值
    para seed_num: 每个模型的种子数
    para model_mode: 是否继续训练
    para multi_model: 模型数量
    '''
    seed_list = []
    for i in range(seed_num):
        random.seed(i)
        seed_list.append(list(random.sample(range(100), multi_model)))
    total_train_num = len(seed_list)  # seed_num * multi_model
    total_test_output = []
    total_test_name = 'test_output_' + str(round_num) + '.pt'
    total_date_pid_name = 'test_date_pid_' + str(round_num) + '.pt'
    save_path = "/home/datamake134/data/haris/DL/" + main_folder_name
    
    # 根据给定的时间范围 dt1 到 dt3，选出训练集的日期列表。之后，有一个特别的日期范围处理（过滤掉指定日期段的训练数据）。
    date_list_train = total_date_list[np.where((total_date_list >= dt1) & (total_date_list < dt3))[0]]
    # 若20240223在训练周期或测试周期内，训练周期或测试周期去除20240201-20240223这一时间段
    if 20240223 >= dt1 and 20240223 <= dt3:
        date_list_train = np.array([date_train for date_train in date_list_train if date_train < 20240201 or date_train > 20240223])
    total_ts_train_val1 = np.zeros((len(date_list_train), pid_num, factor_num)) # 因子数据 shape: (len(date_list_train), pid_num, factor_num)
    total_label_train_val = np.zeros((len(date_list_train), pid_num, 5))        # 标签数据 shape: (len(date_list_train), pid_num, 5)
    total_group_train_val = np.zeros((len(date_list_train), pid_num, 1))        # 流动性数据 shape: (len(date_list_train), pid_num, 1)
    for i in trange(len(date_list_train), desc='train_val_data'):
        date = date_list_train[i]
        total_ts_train_val1[i, :, :] = grouped.loc[date].iloc[:pid_num, :]          # 因子
        total_label_train_val[i, :, :] = grouped_label.loc[date].iloc[:pid_num, :]  # 标签
        # 根据流动性调整收益率前7%-10%附近的训练标签：label(returns)
        total_label_train_val[i, :, 0] = adjust_daily_returns(total_label_train_val[i, :, 0], total_label_train_val[i, :, 4])
        total_group_train_val[i, :, :] = np.array(grouped_liquidity.loc[date])[:pid_num].reshape(-1, 1)  # 流动性
    
    # 类似地，date_list_test 被定义为测试集的日期范围，时间从 dt4 到 dt5。
    date_list_test = total_date_list[np.where((total_date_list >= dt4) & (total_date_list < dt5))[0]]
    total_ts_test1 = np.zeros((len(date_list_test), pid_num, factor_num))
    total_label_test = np.zeros((len(date_list_test), pid_num, 5))
    total_group_test = np.zeros((len(date_list_test), pid_num, 1))
    for i in trange(len(date_list_test), desc='test_data'):
        date = date_list_test[i]
        total_ts_test1[i, :, :] = grouped.loc[date].iloc[:pid_num, :]
        total_label_test[i, :, :] = grouped_label.loc[date].iloc[:pid_num, :]
        total_label_test[i, :, 0] = adjust_daily_returns(total_label_test[i, :, 0], total_label_test[i, :, 4])
        total_group_test[i, :, :] = np.array(grouped_liquidity.loc[date])[:pid_num].reshape(-1, 1)
    
    # 流动性数据归一化
    def min_max_standard(column):
        return (column - column.min()) / (column.max() - column.min())
    print('Min-max scaling.')
    total_group_train_val, total_group_test = min_max_standard(total_group_train_val), min_max_standard(total_group_test)
    
    # 因子数据标准化
    # print('Standard scaling.')
    # scaler = StandardScaler()
    # total_ts_train_val1 = np.apply_along_axis(
    #     lambda x: np.clip(x, np.percentile(x, 0.5), np.percentile(x, 99.5)), axis=0, arr=total_ts_train_val1.reshape(-1, factor_num)
    #     )  # 去极值，保留0.5%-99.5%数据
    # total_ts_train_val1 = total_ts_train_val1.reshape(len(date_list_train), pid_num, factor_num)
    # total_ts_train_val1 = np.nan_to_num(scaler.fit_transform(total_ts_train_val1.reshape(-1, factor_num)).reshape(len(date_list_train), pid_num, factor_num), nan=0)
    # total_ts_test1 = np.apply_along_axis(
    #     lambda x: np.clip(x, np.percentile(x, 0.5), np.percentile(x, 99.5)), axis=0, arr=total_ts_test1.reshape(-1, factor_num)
    #     )
    # total_ts_test1 = total_ts_test1.reshape(len(date_list_test), pid_num, factor_num)
    # total_ts_test1 = np.nan_to_num(scaler.transform(total_ts_test1.reshape(-1, factor_num)).reshape(len(date_list_test), pid_num, factor_num), nan=0)
    
    
    # 假设 total_ts_train_val1 的形状是 [日期数, 股票数, 因子数]
    # 其中第一列是股票代码，其他列是需要标准化的因子
    print('Standard scaling (skipping first column).')
    # 保存第一列（股票代码）
    stock_codes_train = total_ts_train_val1[:, :, 0].copy()  # 取出第一列
    stock_codes_test = total_ts_test1[:, :, 0].copy()
    # 仅对非第一列的数据进行标准化
    scaler = StandardScaler()
    # 训练集处理
    train_factors = total_ts_train_val1[:, :, 1:]  # 跳过第一列
    train_factors = np.apply_along_axis(
        lambda x: np.clip(x, np.percentile(x, 0.5), np.percentile(x, 99.5)), 
        axis=0, 
        arr=train_factors.reshape(-1, factor_num - 1)  # 因子数减1
    )
    train_factors = scaler.fit_transform(train_factors)  # 标准化
    train_factors = np.nan_to_num(train_factors, nan=0)  # 填充NaN
    train_factors = train_factors.reshape(len(date_list_train), pid_num, factor_num - 1)  # 恢复形状
    # 将股票代码列拼回去
    total_ts_train_val1 = np.concatenate([
        stock_codes_train[:, :, np.newaxis],  # 保持形状 [日期, 股票, 1]
        train_factors
    ], axis=2)  # 沿最后一个维度拼接
    # 测试集处理（使用训练集的scaler）
    test_factors = total_ts_test1[:, :, 1:]  # 跳过第一列
    test_factors = np.apply_along_axis(
        lambda x: np.clip(x, np.percentile(x, 0.5), np.percentile(x, 99.5)), 
        axis=0, 
        arr=test_factors.reshape(-1, factor_num - 1)
    )
    test_factors = scaler.transform(test_factors)  # 使用训练集的scaler
    test_factors = np.nan_to_num(test_factors, nan=0)
    test_factors = test_factors.reshape(len(date_list_test), pid_num, factor_num - 1)
    # 将股票代码列拼回去
    total_ts_test1 = np.concatenate([
        stock_codes_test[:, :, np.newaxis], 
        test_factors
    ], axis=2)

    # KFold 交叉验证（并行训练）
    print('KFold training.')
    kf = KFold(n_splits=total_train_num, shuffle=False)
    processes = []
    for train_num, index_tuple in enumerate(kf.split(total_ts_train_val1)):
        p = mp.Process(
            target=train_one_Fold, 
            args=(
                round_num, train_num, index_tuple, main_folder_name,
                total_ts_train_val1, total_label_train_val, total_group_train_val, date_list_train,
                total_ts_test1, total_label_test, total_group_test, date_list_test,
                correlation_df, seed_list, dt1, dt2, dt3, dt4, dt5,
                factor_num, corr_thres, save_path, model_mode, multi_model
                )
            )
        processes.append(p)
        p.start()
    for p in processes:
        p.join()
    
    torch.cuda.empty_cache()
    gc.collect()
    
    # 保存测试数据
    print('Save test data.')
    total_test_output = []
    for train_num in range(total_train_num):
        test_name = 'test_output_ic' + str(round_num) + str(train_num) + '.pt'
        test_path = os.path.join(save_path, test_name)
        total_test_output.append(torch.load(test_path))
        
    total_test_path = os.path.join(save_path, total_test_name)
    total_date_pid_path = os.path.join(save_path, total_date_pid_name)
    
    total_test_output = torch.stack(total_test_output)
    weight_tensor = torch.tensor([0.1, 0.15, 0.2, 0.25, 0.3]).view(-1, *([1] * (total_test_output.dim() - 1)))
    total_test_output = (total_test_output * weight_tensor).sum(dim=0)
    torch.save(total_test_output, total_test_path)
    
    stocks = np.array(grouped_label.loc[20200102].index)
    repeated_stocks = np.tile(stocks, len(date_list_test))
    repeated_dates = np.repeat(date_list_test, len(stocks))
    date_pid_test = np.column_stack((repeated_dates, repeated_stocks))
    torch.save(date_pid_test, total_date_pid_path)
    
    del total_ts_train_val1
    del total_ts_test1
    del total_label_train_val
    del total_label_test
    del total_group_train_val
    del total_group_test
    
    torch.cuda.empty_cache()
    gc.collect()

# 训练和测试

```c
Round 1. Train: 2020/07/01 2022/07/01 Validation: 2022/07/01 2022/12/30 Test: 2023/01/01 2023/07/01
Round 2. Train: 2021/01/01 2023/01/01 Validation: 2023/01/01 2023/03/31 Test: 2023/04/01 2023/07/01
Round 3. Train: 2021/04/01 2023/04/01 Validation: 2023/04/01 2023/06/30 Test: 2023/07/01 2023/10/01
Round 4. Train: 2021/07/01 2023/07/01 Validation: 2023/07/01 2023/09/28 Test: 2023/10/01 2024/01/01
Round 5. Train: 2021/10/01 2023/10/01 Validation: 2023/10/01 2023/12/29 Test: 2024/01/01 2024/04/01
Round 6. Train: 2022/01/01 2024/01/01 Validation: 2024/01/01 2024/03/29 Test: 2024/04/01 2024/07/01
Round 7. Train: 2022/04/01 2024/04/01 Validation: 2024/04/01 2024/06/28 Test: 2024/07/01 2024/10/01
Round 8. Train: 2022/07/01 2024/07/01 Validation: 2024/07/01 2024/09/30 Test: 2024/10/01 2025/01/01
Round 9. Train: 2022/10/01 2024/10/01 Validation: 2024/10/01 2024/12/31 Test: 2025/01/01 2025/04/01
```

In [9]:
folder_path = "/home/datamake134/data/haris/DL/" + main_folder_name
os.makedirs(folder_path, exist_ok=True)

# 第1轮
print('Round 1.')
round_num = 1
dt1 = int(pd.to_datetime("2020-07-01").strftime('%Y%m%d'))  # 训练集开始时间
dt2 = int(pd.to_datetime("2022-07-01").strftime('%Y%m%d'))  # 验证集开始时间
dt3 = int(pd.to_datetime("2022-12-30").strftime('%Y%m%d'))  # 验证集结束时间
dt4 = int(pd.to_datetime("2023-01-01").strftime('%Y%m%d'))  # 测试集开始时间
dt5 = int(pd.to_datetime("2023-04-01").strftime('%Y%m%d'))  # 测试集结束时间
main(
    round_num, dt1, dt2, dt3, dt4, dt5,
    correlation_df, grouped, grouped_label, grouped_liquidity,
    total_date_list, main_folder_name, corr_thres=0.9
    )
torch.cuda.empty_cache()
gc.collect()

test_output1 = torch.load("/home/datamake134/data/haris/DL/" + main_folder_name + "/test_output_1.pt")
test_output = torch.cat([test_output1])
test_output = test_output.cpu()
date_pid1 = torch.load("/home/datamake134/data/haris/DL/" + main_folder_name + "/test_date_pid_1.pt", weights_only=False)
total_date_pid = np.concatenate([date_pid1], axis=0)
total_date_pid_test = total_date_pid
grading_factor = pd.DataFrame(index=np.unique(total_date_pid_test[:, 0]), columns=np.unique(total_date_pid_test[:, 1]))
test_output_list = test_output.tolist()
for i in range(len(total_date_pid_test)):
    grading_factor.loc[total_date_pid_test[i][0], total_date_pid_test[i][1]] = test_output_list[i]
grading_factor.to_pickle("/home/datamake134/data/haris/DL/" + main_folder_name + "/单次_KFold_2023.pkl")
gc.collect()

Round 1.


train_val_data:   0%|          | 0/610 [00:00<?, ?it/s]

test_data:   0%|          | 0/59 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling (skipping first column).


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/14 08:32:31 train.py INFO Period1, Train0, Train Period:20200701-20220701, Val Period:20220701-20221230, Test Period:20230101-20230401
2025/04/14 08:32:31 train.py INFO Train1 Shape: torch.Size([488, 5269, 2791]), Val1 Shape: torch.Size([121, 5269, 2791]), Test1 Shape: torch.Size([59, 5269, 2791])
2025/04/14 08:32:31 train.py INFO Start Training
2025/04/14 08:34:47 train.py INFO Epoch[1/200], Time:136.61sec, Train Loss: 0.903262, Val Loss: 0.8679094314575195,0.8718764781951904,0.8724111318588257,0.871613085269928,0.8710750937461853,0.8737856149673462
2025/04/14 08:34:47 model.py INFO Validation loss decreased (inf --> 0.867909).  Saving model 0.0...
2025/04/14 08:34:50 model.py INFO Validation loss decreased (inf --> 0.871876).  Saving model 1.0...
2025/04/14 08:34:53 model.py INFO Validation loss decreased (inf --> 0.872411).  Saving model 2.0...
2025/04/14 08:34:56 model.py INFO Validation loss decreased (inf --> 0.871613).  Saving model 3.0...
2025/04/14 08:34:59 model.py IN

Save test data.


0

In [10]:
# 第2-9轮
total_date_list = np.array(factor['date'].drop_duplicates().tolist())
rolling_step = 3    # 3个月滚动训练
window_size = 24    # 训练集大小
val_size = 3        # 验证集大小
corr_thres = 0.9
for round_num in range(2, 10):
    print('Round %i.' % round_num)
    start_date = pd.to_datetime('2021-01-01')
    dt1 = start_date + relativedelta(months=rolling_step * (round_num - 2))             # 训练集开始时间
    dt2 = dt1 + relativedelta(months=window_size)                                       # 验证集开始时间
    dt3 = dt2 + relativedelta(months=val_size)                                          # 验证集结束时间
    dt4 = dt3                                                                           # 测试集开始时间
    dt5 = dt3 + relativedelta(months=rolling_step)                                      # 测试集结束时间
    dt3 = total_date_list[total_date_list < int(dt3.strftime('%Y%m%d'))][-1]
    dt1, dt2, dt3, dt4, dt5 = int(dt1.strftime('%Y%m%d')), int(dt2.strftime('%Y%m%d')), int(dt3), int(dt4.strftime('%Y%m%d')), int(dt5.strftime('%Y%m%d'))
    main(
        round_num, dt1, dt2, dt3, dt4, dt5,
        correlation_df, grouped, grouped_label, grouped_liquidity,
        total_date_list, main_folder_name, corr_thres=0.9, seed_num=5, model_mode=False
        )
    torch.cuda.empty_cache()
    gc.collect()

test_output_list = []
for round_num in range(2, 10):
    test_output = torch.load("/home/datamake134/data/haris/DL/" + main_folder_name + "/test_output_" + str(round_num) + ".pt")
    test_output_list.append(test_output)
test_output = torch.cat(test_output_list)
test_output = test_output.cpu()
date_pid_list = []
for round_num in range(2, 10):
    date_pid = torch.load("/home/datamake134/data/haris/DL/" + main_folder_name + "/test_date_pid_" + str(round_num) + ".pt", weights_only=False)
    date_pid_list.append(date_pid)
total_date_pid = np.concatenate(date_pid_list, axis=0)
total_date_pid_test = total_date_pid
grading_factor = pd.DataFrame(index=np.unique(total_date_pid_test[:, 0]), columns=np.unique(total_date_pid_test[:, 1]))
test_output_list = test_output.tolist()
for i in range(len(total_date_pid_test)):
    grading_factor.loc[total_date_pid_test[i][0], total_date_pid_test[i][1]] = test_output_list[i]
grading_factor2023 = pd.read_pickle("/home/datamake134/data/haris/DL/" + main_folder_name + "/单次_KFold_2023.pkl")
grading_factor = pd.concat([grading_factor2023, grading_factor], axis=0)
grading_factor.to_feather("/home/datamake134/data/haris/DL/" + main_folder_name + "/单次_KFold_0.fea")

Round 2.


train_val_data:   0%|          | 0/543 [00:00<?, ?it/s]

test_data:   0%|          | 0/59 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling (skipping first column).


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/14 10:01:10 train.py INFO Period2, Train0, Train Period:20210101-20230101, Val Period:20230101-20230331, Test Period:20230401-20230701
2025/04/14 10:01:10 train.py INFO Train1 Shape: torch.Size([434, 5269, 2791]), Val1 Shape: torch.Size([108, 5269, 2791]), Test1 Shape: torch.Size([59, 5269, 2791])
2025/04/14 10:01:10 train.py INFO Start Training
2025/04/14 10:03:17 train.py INFO Epoch[1/200], Time:126.74sec, Train Loss: 0.905240, Val Loss: 0.8736862540245056,0.873616635799408,0.874582052230835,0.8712753057479858,0.8717119097709656,0.8708034753799438
2025/04/14 10:03:17 model.py INFO Validation loss decreased (inf --> 0.873686).  Saving model 0.0...
2025/04/14 10:03:21 model.py INFO Validation loss decreased (inf --> 0.873617).  Saving model 1.0...
2025/04/14 10:03:24 model.py INFO Validation loss decreased (inf --> 0.874582).  Saving model 2.0...
2025/04/14 10:03:27 model.py INFO Validation loss decreased (inf --> 0.871275).  Saving model 3.0...
2025/04/14 10:03:30 model.py INF

Save test data.
Round 3.


train_val_data:   0%|          | 0/544 [00:00<?, ?it/s]

test_data:   0%|          | 0/64 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling (skipping first column).


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/14 11:25:36 train.py INFO Period3, Train0, Train Period:20210401-20230401, Val Period:20230401-20230630, Test Period:20230701-20231001
2025/04/14 11:25:36 train.py INFO Train1 Shape: torch.Size([435, 5269, 2791]), Val1 Shape: torch.Size([108, 5269, 2791]), Test1 Shape: torch.Size([64, 5269, 2791])
2025/04/14 11:25:36 train.py INFO Start Training
2025/04/14 11:27:43 train.py INFO Epoch[1/200], Time:127.34sec, Train Loss: 0.916021, Val Loss: 0.8727573752403259,0.8755685687065125,0.8717026114463806,0.8744902014732361,0.874402642250061,0.8752333521842957
2025/04/14 11:27:43 model.py INFO Validation loss decreased (inf --> 0.872757).  Saving model 0.0...
2025/04/14 11:27:46 model.py INFO Validation loss decreased (inf --> 0.875569).  Saving model 1.0...
2025/04/14 11:27:49 model.py INFO Validation loss decreased (inf --> 0.871703).  Saving model 2.0...
2025/04/14 11:27:52 model.py INFO Validation loss decreased (inf --> 0.874490).  Saving model 3.0...
2025/04/14 11:27:55 model.py IN

Save test data.
Round 4.


train_val_data:   0%|          | 0/548 [00:00<?, ?it/s]

test_data:   0%|          | 0/60 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling (skipping first column).


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/14 12:49:45 train.py INFO Period4, Train0, Train Period:20210701-20230701, Val Period:20230701-20230928, Test Period:20231001-20240101
2025/04/14 12:49:45 train.py INFO Train1 Shape: torch.Size([438, 5269, 2791]), Val1 Shape: torch.Size([109, 5269, 2791]), Test1 Shape: torch.Size([60, 5269, 2791])
2025/04/14 12:49:45 train.py INFO Start Training
2025/04/14 12:51:57 train.py INFO Epoch[1/200], Time:132.02sec, Train Loss: 0.908437, Val Loss: 0.9244918823242188,0.9277007579803467,0.9287406802177429,0.9265681505203247,0.9309817552566528,0.9248376488685608
2025/04/14 12:51:57 model.py INFO Validation loss decreased (inf --> 0.924492).  Saving model 0.0...
2025/04/14 12:52:02 model.py INFO Validation loss decreased (inf --> 0.927701).  Saving model 1.0...
2025/04/14 12:52:07 model.py INFO Validation loss decreased (inf --> 0.928741).  Saving model 2.0...
2025/04/14 12:52:12 model.py INFO Validation loss decreased (inf --> 0.926568).  Saving model 3.0...
2025/04/14 12:52:15 model.py I

Save test data.
Round 5.


train_val_data:   0%|          | 0/544 [00:00<?, ?it/s]

test_data:   0%|          | 0/58 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling (skipping first column).


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/14 14:07:57 train.py INFO Period5, Train0, Train Period:20211001-20231001, Val Period:20231001-20231229, Test Period:20240101-20240401
2025/04/14 14:07:57 train.py INFO Train1 Shape: torch.Size([435, 5269, 2791]), Val1 Shape: torch.Size([108, 5269, 2791]), Test1 Shape: torch.Size([58, 5269, 2791])
2025/04/14 14:07:57 train.py INFO Start Training
2025/04/14 14:09:30 train.py INFO Epoch[1/200], Time:93.02sec, Train Loss: 0.911781, Val Loss: 0.8875344395637512,0.8902795314788818,0.8864848017692566,0.8895403146743774,0.8925573229789734,0.8871213793754578
2025/04/14 14:09:30 model.py INFO Validation loss decreased (inf --> 0.887534).  Saving model 0.0...
2025/04/14 14:09:35 model.py INFO Validation loss decreased (inf --> 0.890280).  Saving model 1.0...
2025/04/14 14:09:40 model.py INFO Validation loss decreased (inf --> 0.886485).  Saving model 2.0...
2025/04/14 14:09:45 model.py INFO Validation loss decreased (inf --> 0.889540).  Saving model 3.0...
2025/04/14 14:09:49 model.py IN

Save test data.
Round 6.


train_val_data:   0%|          | 0/530 [00:00<?, ?it/s]

test_data:   0%|          | 0/59 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling (skipping first column).


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/14 15:21:25 train.py INFO Period6, Train0, Train Period:20220101-20240101, Val Period:20240101-20240329, Test Period:20240401-20240701
2025/04/14 15:21:25 train.py INFO Train1 Shape: torch.Size([424, 5269, 2791]), Val1 Shape: torch.Size([105, 5269, 2791]), Test1 Shape: torch.Size([59, 5269, 2791])
2025/04/14 15:21:25 train.py INFO Start Training
2025/04/14 15:22:57 train.py INFO Epoch[1/200], Time:92.17sec, Train Loss: 0.917040, Val Loss: 0.8812097311019897,0.8780667185783386,0.87629234790802,0.8802863359451294,0.8834035992622375,0.8782601356506348
2025/04/14 15:22:57 model.py INFO Validation loss decreased (inf --> 0.881210).  Saving model 0.0...
2025/04/14 15:23:00 model.py INFO Validation loss decreased (inf --> 0.878067).  Saving model 1.0...
2025/04/14 15:23:03 model.py INFO Validation loss decreased (inf --> 0.876292).  Saving model 2.0...
2025/04/14 15:23:06 model.py INFO Validation loss decreased (inf --> 0.880286).  Saving model 3.0...
2025/04/14 15:23:09 model.py INFO

Save test data.
Round 7.


train_val_data:   0%|          | 0/531 [00:00<?, ?it/s]

test_data:   0%|          | 0/64 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling (skipping first column).


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/14 16:37:44 train.py INFO Period7, Train0, Train Period:20220401-20240401, Val Period:20240401-20240628, Test Period:20240701-20241001
2025/04/14 16:37:44 train.py INFO Train1 Shape: torch.Size([424, 5269, 2791]), Val1 Shape: torch.Size([106, 5269, 2791]), Test1 Shape: torch.Size([64, 5269, 2791])
2025/04/14 16:37:44 train.py INFO Start Training
2025/04/14 16:39:17 train.py INFO Epoch[1/200], Time:93.64sec, Train Loss: 0.918586, Val Loss: 0.8924223780632019,0.8959860801696777,0.8967133164405823,0.896324872970581,0.8944334983825684,0.8904873132705688
2025/04/14 16:39:17 model.py INFO Validation loss decreased (inf --> 0.892422).  Saving model 0.0...
2025/04/14 16:39:21 model.py INFO Validation loss decreased (inf --> 0.895986).  Saving model 1.0...
2025/04/14 16:39:25 model.py INFO Validation loss decreased (inf --> 0.896713).  Saving model 2.0...
2025/04/14 16:39:28 model.py INFO Validation loss decreased (inf --> 0.896325).  Saving model 3.0...
2025/04/14 16:39:32 model.py INF

Save test data.
Round 8.


train_val_data:   0%|          | 0/536 [00:00<?, ?it/s]

test_data:   0%|          | 0/61 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling (skipping first column).


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/14 17:54:29 train.py INFO Period8, Train0, Train Period:20220701-20240701, Val Period:20240701-20240930, Test Period:20241001-20250101
2025/04/14 17:54:29 train.py INFO Train1 Shape: torch.Size([428, 5269, 2791]), Val1 Shape: torch.Size([107, 5269, 2791]), Test1 Shape: torch.Size([61, 5269, 2791])
2025/04/14 17:54:29 train.py INFO Start Training
2025/04/14 17:56:04 train.py INFO Epoch[1/200], Time:94.85sec, Train Loss: 0.920279, Val Loss: 0.8908974528312683,0.8892650008201599,0.8906570672988892,0.888675332069397,0.8883320093154907,0.8923328518867493
2025/04/14 17:56:04 model.py INFO Validation loss decreased (inf --> 0.890897).  Saving model 0.0...
2025/04/14 17:56:07 model.py INFO Validation loss decreased (inf --> 0.889265).  Saving model 1.0...
2025/04/14 17:56:10 model.py INFO Validation loss decreased (inf --> 0.890657).  Saving model 2.0...
2025/04/14 17:56:12 model.py INFO Validation loss decreased (inf --> 0.888675).  Saving model 3.0...
2025/04/14 17:56:15 model.py INF

Save test data.
Round 9.


train_val_data:   0%|          | 0/532 [00:00<?, ?it/s]

test_data:   0%|          | 0/57 [00:00<?, ?it/s]

Min-max scaling.
Standard scaling (skipping first column).


  diff_b_a = subtract(b, a)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  diff_b_a = subtract(b, a)


KFold training.


2025/04/14 19:06:43 train.py INFO Period9, Train0, Train Period:20221001-20241001, Val Period:20241001-20241231, Test Period:20250101-20250401
2025/04/14 19:06:43 train.py INFO Train1 Shape: torch.Size([425, 5269, 2791]), Val1 Shape: torch.Size([106, 5269, 2791]), Test1 Shape: torch.Size([57, 5269, 2791])
2025/04/14 19:06:43 train.py INFO Start Training
2025/04/14 19:08:14 train.py INFO Epoch[1/200], Time:91.17sec, Train Loss: 0.923510, Val Loss: 0.8955049514770508,0.8877689838409424,0.8937031030654907,0.8892895579338074,0.8921661376953125,0.8913280367851257
2025/04/14 19:08:14 model.py INFO Validation loss decreased (inf --> 0.895505).  Saving model 0.0...
2025/04/14 19:08:19 model.py INFO Validation loss decreased (inf --> 0.887769).  Saving model 1.0...
2025/04/14 19:08:23 model.py INFO Validation loss decreased (inf --> 0.893703).  Saving model 2.0...
2025/04/14 19:08:28 model.py INFO Validation loss decreased (inf --> 0.889290).  Saving model 3.0...
2025/04/14 19:08:33 model.py IN

Save test data.
