In [1]:
# Parameters
EMO_INDEX = 1
MODEL_INDEX = 7
N = 20


In [2]:
import os
import ast
import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas()
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.dates as mdates
import statsmodels.api as sm # 用于统计检验
%matplotlib inline
plt.rcParams["font.sans-serif"]=["WenQuanYi Micro Hei"] #设置字体
plt.rcParams["axes.unicode_minus"]=False #该语句解决图像中的“-”负号的乱码问题

## 参数选择

In [3]:
# # Parameters
# MODEL_INDEX = 2
# EMO_INDEX = 0
# N = 20

In [4]:
ORI_DATA_PATH = '/data/public/fintechlab/zdh/Individual-Stock-Analysis/B_Temporal_Clustering/data'
ROOT_PATH = '/data/public/fintechlab/zdh/Individual-Stock-Analysis/B_Result_Analysis'
CLUSTER_PATH = f'{ROOT_PATH}/data/Comparison_of_Emotional_models'

MODEL = ['Emo-Dict/DLUT', 'Emo-Dict/Bian', 'Emo-Dict/Jiang',
         'Machine-learning/LR', 'Machine-learning/RF', 'Machine-learning/SVM', 
         'Deep-learning/BERT', 'Deep-learning/Ours'][MODEL_INDEX]
Emotion_Data_PATH = f'{ORI_DATA_PATH}/Emotion_Data/{MODEL}'   # 情绪数据路径
Financial_Data_PATH = f'{ORI_DATA_PATH}/Financial_Data' # 金融数据路径

In [5]:
EMO_MARKET = ['Shanghai_Composite_Index', 'CSI_300_Index', 'Chinext_Index'][EMO_INDEX]
EMO_INDEX_MAP = {
    'Shanghai_Composite_Index': '上证综合情绪值',
    'CSI_300_Index': '沪深300情绪值',
    'Chinext_Index': '创业板情绪值'
}
EMO_NAME = EMO_INDEX_MAP[EMO_MARKET]

## 数据预处理

In [6]:
## 读取聚类结果
cluster_data = pd.read_csv(f'{CLUSTER_PATH}/{EMO_MARKET}/{MODEL}.csv', dtype={"Stkcd": str})
cluster_data.columns = ['股票编号', '聚类标签', '公司名称']

# 尝试抽样直到满足每个簇至少出现3次
for _ in range(100):
    cluster_data = cluster_data.sample(n=N, random_state=np.random.randint(0, 10000))
    label_counts = cluster_data['聚类标签'].value_counts()
    
    if all(label_counts >= 3):
        break
else:
    raise ValueError(f"在 {RETRY_LIMIT} 次尝试中无法找到满足每个簇至少3个样本的抽样结果，请减少 N 或检查数据分布。")

cluster_data.head()

Unnamed: 0,股票编号,聚类标签,公司名称
31,2415,0,海康威视
181,688029,1,南微医学
70,600000,0,浦发银行
4,100,2,TCL科技
180,688027,1,国盾量子


In [7]:
## 读取股吧个股的数据
all_data = []
file_list = [f for f in os.listdir(Emotion_Data_PATH) if f.endswith('.csv')]

if MODEL != 'Deep-learning/Ours':
    for file in file_list:
        file_path = os.path.join(Emotion_Data_PATH, file)
        df = pd.read_csv(file_path)
        stock_code = os.path.splitext(file)[0] # 获取股票编号（文件名去掉扩展名）
        
        # 提取每一行的日期和情绪值
        for _, row in df.iterrows():
            new_row = {
                '股票编号': stock_code,
                '日期': row['日期'],
                '上证综合情绪值': row['上证综合情绪值'],
                '沪深300情绪值': row['沪深300情绪值'],
                '创业板情绪值': row['创业板情绪值']
            }
            all_data.append(new_row)
    guba_data = pd.DataFrame(all_data)
else:
    for file in tqdm(file_list, desc="读取文件"):
        file_path = os.path.join(Emotion_Data_PATH, file)
        try:
            df = pd.read_csv(file_path, usecols=['日期', '高维情绪变量'])  # 只读需要的列
        except Exception as e:
            print(f"读取失败 {file}: {e}")
            continue
    
        stock_code = os.path.splitext(file)[0]
    
        df['股票编号'] = stock_code
        all_data.append(df)
    guba_data = pd.concat(all_data, ignore_index=True)

guba_data

读取文件:   0%|          | 0/183 [00:00<?, ?it/s]

读取文件:   1%|          | 1/183 [00:00<01:05,  2.77it/s]

读取文件:   2%|▏         | 3/183 [00:00<00:39,  4.57it/s]

读取文件:   2%|▏         | 4/183 [00:01<01:19,  2.24it/s]

读取文件:   3%|▎         | 5/183 [00:01<00:59,  2.97it/s]

读取文件:   3%|▎         | 6/183 [00:01<00:49,  3.57it/s]

读取文件:   4%|▍         | 7/183 [00:02<01:24,  2.08it/s]

读取文件:   4%|▍         | 8/183 [00:02<01:04,  2.72it/s]

读取文件:   5%|▍         | 9/183 [00:03<01:04,  2.72it/s]

读取文件:   5%|▌         | 10/183 [00:03<01:08,  2.53it/s]

读取文件:   6%|▌         | 11/183 [00:03<00:54,  3.17it/s]

读取文件:   7%|▋         | 12/183 [00:04<00:47,  3.63it/s]

读取文件:   8%|▊         | 14/183 [00:04<00:29,  5.75it/s]

读取文件:   8%|▊         | 15/183 [00:04<00:37,  4.44it/s]

读取文件:   9%|▊         | 16/183 [00:07<02:58,  1.07s/it]

读取文件:   9%|▉         | 17/183 [00:08<02:14,  1.23it/s]

读取文件:  10%|▉         | 18/183 [00:09<02:40,  1.03it/s]

读取文件:  11%|█         | 20/183 [00:09<01:32,  1.76it/s]

读取文件:  11%|█▏        | 21/183 [00:09<01:14,  2.16it/s]

读取文件:  12%|█▏        | 22/183 [00:09<01:01,  2.64it/s]

读取文件:  13%|█▎        | 23/183 [00:10<01:00,  2.64it/s]

读取文件:  13%|█▎        | 24/183 [00:10<00:53,  2.98it/s]

读取文件:  14%|█▎        | 25/183 [00:10<00:53,  2.97it/s]

读取文件:  14%|█▍        | 26/183 [00:10<00:45,  3.45it/s]

读取文件:  15%|█▍        | 27/183 [00:11<00:53,  2.91it/s]

读取文件:  15%|█▌        | 28/183 [00:11<00:53,  2.88it/s]

读取文件:  16%|█▌        | 29/183 [00:12<00:50,  3.04it/s]

读取文件:  16%|█▋        | 30/183 [00:12<00:40,  3.78it/s]

读取文件:  17%|█▋        | 31/183 [00:13<01:24,  1.80it/s]

读取文件:  17%|█▋        | 32/183 [00:14<01:25,  1.77it/s]

读取文件:  19%|█▊        | 34/183 [00:14<00:57,  2.61it/s]

读取文件:  19%|█▉        | 35/183 [00:15<01:22,  1.80it/s]

读取文件:  20%|██        | 37/183 [00:15<00:53,  2.71it/s]

读取文件:  21%|██▏       | 39/183 [00:15<00:38,  3.76it/s]

读取文件:  22%|██▏       | 41/183 [00:15<00:27,  5.19it/s]

读取文件:  23%|██▎       | 43/183 [00:16<00:23,  5.84it/s]

读取文件:  24%|██▍       | 44/183 [00:16<00:25,  5.53it/s]

读取文件:  25%|██▍       | 45/183 [00:16<00:27,  4.94it/s]

读取文件:  25%|██▌       | 46/183 [00:17<00:37,  3.67it/s]

读取文件:  26%|██▌       | 47/183 [00:17<00:47,  2.87it/s]

读取文件:  26%|██▌       | 48/183 [00:18<00:45,  2.95it/s]

读取文件:  27%|██▋       | 49/183 [00:18<00:45,  2.92it/s]

读取文件:  28%|██▊       | 51/183 [00:19<00:42,  3.11it/s]

读取文件:  29%|██▉       | 53/183 [00:19<00:33,  3.85it/s]

读取文件:  30%|██▉       | 54/183 [00:19<00:34,  3.76it/s]

读取文件:  31%|███       | 56/183 [00:20<00:30,  4.19it/s]

读取文件:  31%|███       | 57/183 [00:21<00:57,  2.20it/s]

读取文件:  32%|███▏      | 58/183 [00:21<00:53,  2.35it/s]

读取文件:  33%|███▎      | 60/183 [00:21<00:35,  3.48it/s]

读取文件:  33%|███▎      | 61/183 [00:22<00:42,  2.84it/s]

读取文件:  34%|███▍      | 62/183 [00:22<00:39,  3.08it/s]

读取文件:  35%|███▍      | 64/183 [00:22<00:30,  3.89it/s]

读取文件:  36%|███▌      | 65/183 [00:23<00:30,  3.87it/s]

读取文件:  36%|███▌      | 66/183 [00:23<00:37,  3.10it/s]

读取文件:  37%|███▋      | 67/183 [00:24<00:48,  2.41it/s]

读取文件:  37%|███▋      | 68/183 [00:24<00:51,  2.24it/s]

读取文件:  38%|███▊      | 69/183 [00:25<00:46,  2.45it/s]

读取文件:  38%|███▊      | 70/183 [00:25<00:42,  2.63it/s]

读取文件:  39%|███▉      | 71/183 [00:25<00:35,  3.13it/s]

读取文件:  39%|███▉      | 72/183 [00:25<00:32,  3.42it/s]

读取文件:  40%|███▉      | 73/183 [00:26<00:54,  2.03it/s]

读取文件:  40%|████      | 74/183 [00:30<02:36,  1.43s/it]

读取文件:  41%|████      | 75/183 [00:30<01:53,  1.05s/it]

读取文件:  42%|████▏     | 76/183 [00:30<01:22,  1.29it/s]

读取文件:  43%|████▎     | 78/183 [00:31<00:51,  2.04it/s]

读取文件:  43%|████▎     | 79/183 [00:31<00:42,  2.44it/s]

读取文件:  44%|████▎     | 80/183 [00:31<00:45,  2.28it/s]

读取文件:  44%|████▍     | 81/183 [00:32<00:58,  1.75it/s]

读取文件:  45%|████▌     | 83/183 [00:33<00:45,  2.20it/s]

读取文件:  46%|████▌     | 84/183 [00:33<00:37,  2.66it/s]

读取文件:  46%|████▋     | 85/183 [00:33<00:33,  2.94it/s]

读取文件:  47%|████▋     | 86/183 [00:33<00:29,  3.34it/s]

读取文件:  48%|████▊     | 88/183 [00:34<00:30,  3.09it/s]

读取文件:  49%|████▊     | 89/183 [00:34<00:29,  3.21it/s]

读取文件:  49%|████▉     | 90/183 [00:35<00:28,  3.28it/s]

读取文件:  50%|████▉     | 91/183 [00:35<00:25,  3.68it/s]

读取文件:  50%|█████     | 92/183 [00:35<00:23,  3.84it/s]

读取文件:  51%|█████▏    | 94/183 [00:36<00:23,  3.81it/s]

读取文件:  52%|█████▏    | 95/183 [00:36<00:21,  4.15it/s]

读取文件:  52%|█████▏    | 96/183 [00:37<00:35,  2.48it/s]

读取文件:  53%|█████▎    | 97/183 [00:37<00:39,  2.17it/s]

读取文件:  54%|█████▎    | 98/183 [00:38<00:35,  2.38it/s]

读取文件:  54%|█████▍    | 99/183 [00:38<00:29,  2.89it/s]

读取文件:  55%|█████▌    | 101/183 [00:39<00:29,  2.76it/s]

读取文件:  56%|█████▋    | 103/183 [00:39<00:19,  4.09it/s]

读取文件:  57%|█████▋    | 104/183 [00:39<00:17,  4.61it/s]

读取文件:  57%|█████▋    | 105/183 [00:39<00:18,  4.22it/s]

读取文件:  58%|█████▊    | 107/183 [00:39<00:14,  5.41it/s]

读取文件:  60%|█████▉    | 109/183 [00:39<00:11,  6.59it/s]

读取文件:  60%|██████    | 110/183 [00:40<00:11,  6.52it/s]

读取文件:  61%|██████    | 111/183 [00:40<00:10,  6.72it/s]

读取文件:  61%|██████    | 112/183 [00:40<00:10,  6.96it/s]

读取文件:  62%|██████▏   | 113/183 [00:40<00:12,  5.64it/s]

读取文件:  63%|██████▎   | 115/183 [00:41<00:11,  5.67it/s]

读取文件:  64%|██████▍   | 117/183 [00:41<00:11,  5.66it/s]

读取文件:  64%|██████▍   | 118/183 [00:41<00:11,  5.62it/s]

读取文件:  66%|██████▌   | 120/183 [00:41<00:09,  6.76it/s]

读取文件:  66%|██████▌   | 121/183 [00:41<00:10,  5.96it/s]

读取文件:  67%|██████▋   | 122/183 [00:43<00:28,  2.18it/s]

读取文件:  67%|██████▋   | 123/183 [00:43<00:22,  2.64it/s]

读取文件:  68%|██████▊   | 124/183 [00:47<01:13,  1.24s/it]

读取文件:  68%|██████▊   | 125/183 [00:47<00:56,  1.03it/s]

读取文件:  69%|██████▉   | 127/183 [00:47<00:32,  1.75it/s]

读取文件:  70%|██████▉   | 128/183 [00:47<00:26,  2.08it/s]

读取文件:  70%|███████   | 129/183 [00:48<00:22,  2.35it/s]

读取文件:  72%|███████▏  | 131/183 [00:48<00:21,  2.41it/s]

读取文件:  72%|███████▏  | 132/183 [00:50<00:31,  1.62it/s]

读取文件:  73%|███████▎  | 133/183 [00:50<00:24,  2.03it/s]

读取文件:  74%|███████▍  | 135/183 [00:51<00:27,  1.73it/s]

读取文件:  74%|███████▍  | 136/183 [00:52<00:26,  1.78it/s]

读取文件:  75%|███████▍  | 137/183 [00:53<00:29,  1.54it/s]

读取文件:  76%|███████▌  | 139/183 [00:53<00:19,  2.31it/s]

读取文件:  77%|███████▋  | 140/183 [00:54<00:21,  2.02it/s]

读取文件:  77%|███████▋  | 141/183 [00:54<00:17,  2.45it/s]

读取文件:  78%|███████▊  | 142/183 [00:54<00:17,  2.34it/s]

读取文件:  78%|███████▊  | 143/183 [00:55<00:17,  2.23it/s]

读取文件:  79%|███████▊  | 144/183 [00:56<00:22,  1.72it/s]

读取文件:  80%|███████▉  | 146/183 [00:56<00:13,  2.68it/s]

读取文件:  80%|████████  | 147/183 [00:56<00:15,  2.30it/s]

读取文件:  81%|████████  | 148/183 [00:57<00:12,  2.80it/s]

读取文件:  81%|████████▏ | 149/183 [00:57<00:10,  3.27it/s]

读取文件:  83%|████████▎ | 151/183 [00:57<00:10,  3.16it/s]

读取文件:  84%|████████▎ | 153/183 [00:58<00:07,  4.27it/s]

读取文件:  84%|████████▍ | 154/183 [00:58<00:06,  4.67it/s]

读取文件:  85%|████████▌ | 156/183 [00:58<00:06,  3.89it/s]

读取文件:  86%|████████▌ | 157/183 [00:59<00:06,  3.74it/s]

读取文件:  86%|████████▋ | 158/183 [00:59<00:05,  4.25it/s]

读取文件:  87%|████████▋ | 159/183 [00:59<00:06,  3.87it/s]

读取文件:  87%|████████▋ | 160/183 [00:59<00:05,  3.88it/s]

读取文件:  88%|████████▊ | 161/183 [01:00<00:05,  4.03it/s]

读取文件:  89%|████████▉ | 163/183 [01:00<00:03,  6.04it/s]

读取文件:  90%|█████████ | 165/183 [01:00<00:02,  7.74it/s]

读取文件:  91%|█████████ | 166/183 [01:00<00:03,  4.77it/s]

读取文件:  91%|█████████▏| 167/183 [01:01<00:04,  4.00it/s]

读取文件:  92%|█████████▏| 168/183 [01:01<00:03,  4.50it/s]

读取文件:  93%|█████████▎| 170/183 [01:01<00:02,  5.78it/s]

读取文件:  94%|█████████▍| 172/183 [01:01<00:01,  7.75it/s]

读取文件:  95%|█████████▌| 174/183 [01:02<00:01,  5.75it/s]

读取文件:  96%|█████████▌| 175/183 [01:02<00:01,  5.69it/s]

读取文件:  97%|█████████▋| 177/183 [01:02<00:00,  6.73it/s]

读取文件:  98%|█████████▊| 179/183 [01:03<00:01,  3.41it/s]

读取文件:  99%|█████████▉| 181/183 [01:03<00:00,  4.60it/s]

读取文件:  99%|█████████▉| 182/183 [01:04<00:00,  4.47it/s]

读取文件: 100%|██████████| 183/183 [01:05<00:00,  2.14it/s]

读取文件: 100%|██████████| 183/183 [01:05<00:00,  2.79it/s]




Unnamed: 0,日期,高维情绪变量,股票编号
0,2024-12-27,"[-0.18730907142162323, -0.22363345324993134, 0...",601933
1,2024-11-18,"[-0.6446929574012756, 0.14099551737308502, 0.4...",601933
2,2024-11-17,"[-0.14928916096687317, -0.10993610322475433, 0...",601933
3,2024-11-17,"[-0.2892271876335144, 0.09153766185045242, 0.2...",601933
4,2024-11-17,"[-0.18511946499347687, 0.3309328258037567, 0.2...",601933
...,...,...,...
495326,2021-04-09,"[-0.14503593742847443, 0.25753334164619446, 0....",601919
495327,2021-04-09,"[-0.30359137058258057, 0.26559382677078247, 0....",601919
495328,2021-04-08,"[0.15271537005901337, -0.08832913637161255, 0....",601919
495329,2021-04-08,"[-0.004539322108030319, 0.3122449517250061, 0....",601919


In [8]:
## 读取股票回报率的数据
return_data = pd.read_csv(f'{Financial_Data_PATH}/日个股回报率.csv', dtype={'股票编号': str})
return_data

Unnamed: 0,股票编号,日期,交易量,收益率变化
0,000002,2021-06-01,60990961,-0.003745
1,000002,2021-06-02,85354506,0.006015
2,000002,2021-06-03,50594187,-0.003363
3,000002,2021-06-04,71422364,-0.012748
4,000002,2021-06-07,64745280,-0.014812
...,...,...,...,...
154877,688981,2024-11-20,58507495,-0.017071
154878,688981,2024-11-21,56197106,0.002358
154879,688981,2024-11-22,79240108,-0.050588
154880,688981,2024-11-25,76905909,-0.029402


In [9]:
## 进行左连接，以 guba_data 为主表
merged_data = pd.merge(guba_data, return_data[['股票编号', '日期', '交易量', '收益率变化']], 
                       on=['股票编号', '日期'], 
                       how='left')
merged_data = pd.merge(merged_data, cluster_data, on='股票编号', how='left')
merged_data = merged_data.dropna()

merged_data['日期'] = pd.to_datetime(merged_data['日期'])
merged_data = merged_data[(merged_data['日期'] > '2021-05-06') & (merged_data['日期'] < '2024-11-18')]
merged_data

Unnamed: 0,日期,高维情绪变量,股票编号,交易量,收益率变化,聚类标签,公司名称
20782,2024-11-15,"[-0.261523962020874, -0.16983817517757416, 0.2...",002475,77059914.0,-0.019950,2.0,立讯精密
20783,2024-11-15,"[-0.22364211082458496, -0.11699872463941574, 0...",002475,77059914.0,-0.019950,2.0,立讯精密
20784,2024-11-15,"[-0.3292462229728699, 0.08564773947000504, 0.4...",002475,77059914.0,-0.019950,2.0,立讯精密
20785,2024-11-14,"[-0.3399650454521179, 0.04659510776400566, 0.4...",002475,82870623.0,-0.035826,2.0,立讯精密
20786,2024-11-14,"[-0.26111602783203125, 0.4187474548816681, 0.3...",002475,82870623.0,-0.035826,2.0,立讯精密
...,...,...,...,...,...,...,...
482529,2022-08-02,"[-0.1272340565919876, 0.010020509362220764, 0....",600886,16932111.0,-0.023166,1.0,国投电力
482530,2022-07-22,"[-0.12217453867197037, 0.012194407172501087, 0...",600886,11472418.0,-0.002768,1.0,国投电力
482531,2022-07-20,"[-0.462770015001297, 0.1312245875597, 0.222410...",600886,10494013.0,-0.008094,1.0,国投电力
482532,2022-06-09,"[-0.3473847210407257, 0.007542190607637167, 0....",600886,19300191.0,0.001970,1.0,国投电力


In [10]:
# 对情绪值列进行 Min-Max 标准化
def min_max_normalization(df, cols):
    for col in cols:
        min_val = df[col].min()
        max_val = df[col].max()
        df[col] = 2 * (df[col] - min_val) / (max_val - min_val) - 1
    return df

# 对每个股票编号的数据进行标准化和按日期汇总
def process_data(df):
    if MODEL != 'Deep-learning/Ours':
        df_processed = pd.DataFrame()  # 用于存储结果
        for stock_code, stock_data in df.groupby('股票编号'):
            # 对每个股票编号内的数据进行标准化
            stock_data = min_max_normalization(stock_data, ['上证综合情绪值', '沪深300情绪值', '创业板情绪值'])
            
            # 按日期汇总数据，同时保留股票编号
            stock_summary = stock_data.groupby('日期').agg({
                '股票编号': 'first',  # 保留股票编号（在同一日期内它是相同的，使用 'first'）
                '上证综合情绪值': 'mean',  # 上证综合情绪值按日期取均值
                '沪深300情绪值': 'mean',  # 沪深300情绪值按日期取均值
                '创业板情绪值': 'mean',  # 创业板情绪值按日期取均值
                '交易量': 'mean',  # 交易量按日期求和
                '收益率变化': 'mean',  # 收益率变化按日期取均值
                '聚类标签': 'first', # 保留聚类标签（在同一日期内它是相同的，使用 'first'）
                '公司名称': 'first', # 保留公司名称（在同一日期内它是相同的，使用 'first'）
            }).reset_index(drop=False)
            
            # 合并每个股票的汇总数据
            df_processed = pd.concat([df_processed, stock_summary], ignore_index=True)
            df_processed = df_processed.sort_values(by=['股票编号', '日期'], ascending=[True, True])
        return df_processed
    else:
        df_processed = []    
        # 确保高维情绪变量是 list 类型（如果还没转过）
        if isinstance(df['高维情绪变量'].iloc[0], str):
            df['高维情绪变量'] = df['高维情绪变量'].progress_apply(ast.literal_eval)
        # 对每个股票编号处理，加 tqdm 进度条
        for stock_code, stock_data in tqdm(df.groupby('股票编号'), desc="Processing stocks"):
            for date, group in stock_data.groupby('日期'):
                emotion_matrix = np.array(group['高维情绪变量'].tolist())  # (样本数, 维度)

                avg_emotion = emotion_matrix.mean(axis=0).round(3).tolist()

                summary_row = {
                    '股票编号': stock_code,
                    '日期': date,
                    '高维情绪变量': avg_emotion,
                    '交易量': group['交易量'].mean(),
                    '收益率变化': group['收益率变化'].mean(),
                    '聚类标签': group['聚类标签'].iloc[0], # 保留聚类标签
                    '公司名称': group['公司名称'].iloc[0], # 保留公司名称
                }
                df_processed.append(summary_row)


    return pd.DataFrame(df_processed)

final_data = process_data(merged_data)
final_data

  0%|          | 0/43796 [00:00<?, ?it/s]

  0%|          | 61/43796 [00:00<01:12, 605.46it/s]

  0%|          | 122/43796 [00:00<01:13, 595.58it/s]

  0%|          | 182/43796 [00:00<01:13, 592.35it/s]

  1%|          | 242/43796 [00:00<01:13, 591.47it/s]

  1%|          | 302/43796 [00:00<01:13, 590.25it/s]

  1%|          | 362/43796 [00:00<01:13, 589.83it/s]

  1%|          | 421/43796 [00:00<01:28, 491.47it/s]

  1%|          | 479/43796 [00:00<01:23, 515.74it/s]

  1%|          | 538/43796 [00:00<01:20, 534.60it/s]

  1%|▏         | 597/43796 [00:01<01:18, 548.72it/s]

  1%|▏         | 655/43796 [00:01<01:17, 557.74it/s]

  2%|▏         | 714/43796 [00:01<01:16, 565.15it/s]

  2%|▏         | 773/43796 [00:01<01:15, 570.33it/s]

  2%|▏         | 831/43796 [00:01<01:29, 481.59it/s]

  2%|▏         | 890/43796 [00:01<01:24, 508.36it/s]

  2%|▏         | 948/43796 [00:01<01:21, 526.93it/s]

  2%|▏         | 1007/43796 [00:01<01:18, 542.99it/s]

  2%|▏         | 1066/43796 [00:01<01:17, 554.63it/s]

  3%|▎         | 1125/43796 [00:02<01:15, 562.63it/s]

  3%|▎         | 1184/43796 [00:02<01:14, 569.20it/s]

  3%|▎         | 1242/43796 [00:02<01:26, 493.84it/s]

  3%|▎         | 1301/43796 [00:02<01:22, 517.64it/s]

  3%|▎         | 1360/43796 [00:02<01:19, 535.68it/s]

  3%|▎         | 1419/43796 [00:02<01:17, 549.48it/s]

  3%|▎         | 1478/43796 [00:02<01:15, 559.00it/s]

  4%|▎         | 1537/43796 [00:02<01:14, 566.21it/s]

  4%|▎         | 1596/43796 [00:02<01:13, 570.87it/s]

  4%|▍         | 1654/43796 [00:03<01:28, 478.48it/s]

  4%|▍         | 1713/43796 [00:03<01:23, 505.17it/s]

  4%|▍         | 1771/43796 [00:03<01:20, 525.10it/s]

  4%|▍         | 1830/43796 [00:03<01:17, 541.08it/s]

  4%|▍         | 1889/43796 [00:03<01:15, 553.13it/s]

  4%|▍         | 1948/43796 [00:03<01:14, 561.76it/s]

  5%|▍         | 2007/43796 [00:03<01:13, 567.93it/s]

  5%|▍         | 2065/43796 [00:03<01:28, 472.36it/s]

  5%|▍         | 2124/43796 [00:03<01:23, 500.43it/s]

  5%|▍         | 2182/43796 [00:04<01:19, 521.57it/s]

  5%|▌         | 2241/43796 [00:04<01:17, 538.31it/s]

  5%|▌         | 2300/43796 [00:04<01:15, 550.72it/s]

  5%|▌         | 2359/43796 [00:04<01:14, 559.79it/s]

  6%|▌         | 2418/43796 [00:04<01:13, 566.47it/s]

  6%|▌         | 2476/43796 [00:04<01:25, 483.01it/s]

  6%|▌         | 2535/43796 [00:04<01:21, 509.07it/s]

  6%|▌         | 2594/43796 [00:04<01:17, 528.93it/s]

  6%|▌         | 2652/43796 [00:04<01:15, 543.13it/s]

  6%|▌         | 2711/43796 [00:05<01:14, 554.21it/s]

  6%|▋         | 2770/43796 [00:05<01:13, 561.90it/s]

  6%|▋         | 2829/43796 [00:05<01:12, 568.85it/s]

  7%|▋         | 2887/43796 [00:05<01:25, 480.55it/s]

  7%|▋         | 2946/43796 [00:05<01:20, 507.11it/s]

  7%|▋         | 3004/43796 [00:05<01:17, 525.01it/s]

  7%|▋         | 3061/43796 [00:05<01:15, 537.36it/s]

  7%|▋         | 3120/43796 [00:05<01:13, 550.61it/s]

  7%|▋         | 3179/43796 [00:05<01:12, 559.36it/s]

  7%|▋         | 3237/43796 [00:06<01:11, 565.33it/s]

  8%|▊         | 3295/43796 [00:06<01:30, 448.90it/s]

  8%|▊         | 3353/43796 [00:06<01:24, 481.39it/s]

  8%|▊         | 3411/43796 [00:06<01:19, 506.92it/s]

  8%|▊         | 3469/43796 [00:06<01:16, 526.72it/s]

  8%|▊         | 3527/43796 [00:06<01:14, 541.41it/s]

  8%|▊         | 3586/43796 [00:06<01:12, 553.00it/s]

  8%|▊         | 3645/43796 [00:06<01:11, 561.46it/s]

  8%|▊         | 3703/43796 [00:06<01:25, 468.57it/s]

  9%|▊         | 3762/43796 [00:07<01:20, 497.69it/s]

  9%|▊         | 3821/43796 [00:07<01:16, 520.40it/s]

  9%|▉         | 3880/43796 [00:07<01:14, 537.85it/s]

  9%|▉         | 3939/43796 [00:07<01:12, 550.67it/s]

  9%|▉         | 3998/43796 [00:07<01:11, 559.62it/s]

  9%|▉         | 4057/43796 [00:07<01:10, 565.98it/s]

  9%|▉         | 4115/43796 [00:07<01:24, 470.14it/s]

 10%|▉         | 4174/43796 [00:07<01:19, 498.62it/s]

 10%|▉         | 4233/43796 [00:07<01:15, 520.89it/s]

 10%|▉         | 4292/43796 [00:08<01:13, 537.68it/s]

 10%|▉         | 4351/43796 [00:08<01:11, 549.93it/s]

 10%|█         | 4410/43796 [00:08<01:10, 559.39it/s]

 10%|█         | 4469/43796 [00:08<01:09, 566.14it/s]

 10%|█         | 4527/43796 [00:08<01:26, 454.73it/s]

 10%|█         | 4585/43796 [00:08<01:20, 485.63it/s]

 11%|█         | 4643/43796 [00:08<01:16, 510.10it/s]

 11%|█         | 4702/43796 [00:08<01:13, 529.57it/s]

 11%|█         | 4760/43796 [00:08<01:11, 543.36it/s]

 11%|█         | 4819/43796 [00:09<01:10, 554.70it/s]

 11%|█         | 4878/43796 [00:09<01:09, 562.18it/s]

 11%|█▏        | 4936/43796 [00:09<01:24, 462.02it/s]

 11%|█▏        | 4993/43796 [00:09<01:19, 488.73it/s]

 12%|█▏        | 5051/43796 [00:09<01:15, 512.93it/s]

 12%|█▏        | 5110/43796 [00:09<01:12, 532.04it/s]

 12%|█▏        | 5168/43796 [00:09<01:10, 544.69it/s]

 12%|█▏        | 5227/43796 [00:09<01:09, 555.75it/s]

 12%|█▏        | 5286/43796 [00:09<01:08, 563.69it/s]

 12%|█▏        | 5344/43796 [00:10<01:25, 450.96it/s]

 12%|█▏        | 5403/43796 [00:10<01:19, 483.79it/s]

 12%|█▏        | 5462/43796 [00:10<01:15, 509.94it/s]

 13%|█▎        | 5521/43796 [00:10<01:12, 529.25it/s]

 13%|█▎        | 5580/43796 [00:10<01:10, 544.28it/s]

 13%|█▎        | 5639/43796 [00:10<01:08, 554.91it/s]

 13%|█▎        | 5698/43796 [00:10<01:07, 562.41it/s]

 13%|█▎        | 5756/43796 [00:10<01:25, 444.09it/s]

 13%|█▎        | 5814/43796 [00:11<01:19, 477.06it/s]

 13%|█▎        | 5872/43796 [00:11<01:15, 503.40it/s]

 14%|█▎        | 5930/43796 [00:11<01:12, 524.01it/s]

 14%|█▎        | 5987/43796 [00:11<01:10, 536.67it/s]

 14%|█▍        | 6045/43796 [00:11<01:08, 548.72it/s]

 14%|█▍        | 6103/43796 [00:11<01:07, 557.26it/s]

 14%|█▍        | 6160/43796 [00:11<01:23, 449.75it/s]

 14%|█▍        | 6219/43796 [00:11<01:17, 482.82it/s]

 14%|█▍        | 6278/43796 [00:11<01:13, 509.09it/s]

 14%|█▍        | 6337/43796 [00:12<01:10, 528.89it/s]

 15%|█▍        | 6396/43796 [00:12<01:08, 543.84it/s]

 15%|█▍        | 6455/43796 [00:12<01:07, 555.08it/s]

 15%|█▍        | 6514/43796 [00:12<01:06, 562.97it/s]

 15%|█▌        | 6572/43796 [00:12<01:22, 451.81it/s]

 15%|█▌        | 6631/43796 [00:12<01:16, 484.50it/s]

 15%|█▌        | 6690/43796 [00:12<01:12, 510.93it/s]

 15%|█▌        | 6749/43796 [00:12<01:09, 530.86it/s]

 16%|█▌        | 6808/43796 [00:12<01:07, 545.06it/s]

 16%|█▌        | 6867/43796 [00:13<01:06, 556.13it/s]

 16%|█▌        | 6926/43796 [00:13<01:05, 563.81it/s]

 16%|█▌        | 6984/43796 [00:13<01:04, 567.30it/s]

 16%|█▌        | 7042/43796 [00:13<01:23, 437.68it/s]

 16%|█▌        | 7101/43796 [00:13<01:17, 473.07it/s]

 16%|█▋        | 7160/43796 [00:13<01:13, 501.56it/s]

 16%|█▋        | 7218/43796 [00:13<01:10, 521.86it/s]

 17%|█▋        | 7277/43796 [00:13<01:07, 538.49it/s]

 17%|█▋        | 7336/43796 [00:13<01:06, 551.65it/s]

 17%|█▋        | 7395/43796 [00:14<01:04, 561.18it/s]

 17%|█▋        | 7453/43796 [00:14<01:21, 446.57it/s]

 17%|█▋        | 7512/43796 [00:14<01:15, 480.23it/s]

 17%|█▋        | 7571/43796 [00:14<01:11, 506.59it/s]

 17%|█▋        | 7630/43796 [00:14<01:08, 527.56it/s]

 18%|█▊        | 7689/43796 [00:14<01:06, 542.44it/s]

 18%|█▊        | 7748/43796 [00:14<01:05, 554.24it/s]

 18%|█▊        | 7807/43796 [00:14<01:03, 562.62it/s]

 18%|█▊        | 7865/43796 [00:15<01:20, 444.88it/s]

 18%|█▊        | 7923/43796 [00:15<01:15, 476.96it/s]

 18%|█▊        | 7982/43796 [00:15<01:10, 504.70it/s]

 18%|█▊        | 8041/43796 [00:15<01:08, 525.79it/s]

 18%|█▊        | 8100/43796 [00:15<01:05, 541.31it/s]

 19%|█▊        | 8159/43796 [00:15<01:04, 552.65it/s]

 19%|█▉        | 8218/43796 [00:15<01:03, 560.69it/s]

 19%|█▉        | 8276/43796 [00:15<01:22, 428.55it/s]

 19%|█▉        | 8334/43796 [00:15<01:16, 464.51it/s]

 19%|█▉        | 8392/43796 [00:16<01:11, 493.47it/s]

 19%|█▉        | 8450/43796 [00:16<01:08, 515.92it/s]

 19%|█▉        | 8508/43796 [00:16<01:06, 532.95it/s]

 20%|█▉        | 8566/43796 [00:16<01:04, 546.05it/s]

 20%|█▉        | 8624/43796 [00:16<01:03, 555.35it/s]

 20%|█▉        | 8681/43796 [00:16<01:20, 435.07it/s]

 20%|█▉        | 8740/43796 [00:16<01:14, 470.94it/s]

 20%|██        | 8799/43796 [00:16<01:10, 499.77it/s]

 20%|██        | 8858/43796 [00:16<01:06, 521.87it/s]

 20%|██        | 8916/43796 [00:17<01:05, 535.67it/s]

 20%|██        | 8975/43796 [00:17<01:03, 548.97it/s]

 21%|██        | 9034/43796 [00:17<01:02, 558.56it/s]

 21%|██        | 9093/43796 [00:17<01:01, 565.28it/s]

 21%|██        | 9151/43796 [00:17<01:19, 435.86it/s]

 21%|██        | 9210/43796 [00:17<01:13, 471.88it/s]

 21%|██        | 9268/43796 [00:17<01:09, 498.83it/s]

 21%|██▏       | 9327/43796 [00:17<01:06, 521.43it/s]

 21%|██▏       | 9386/43796 [00:17<01:03, 538.20it/s]

 22%|██▏       | 9445/43796 [00:18<01:02, 550.67it/s]

 22%|██▏       | 9504/43796 [00:18<01:01, 559.37it/s]

 22%|██▏       | 9561/43796 [00:18<01:21, 421.56it/s]

 22%|██▏       | 9619/43796 [00:18<01:14, 458.91it/s]

 22%|██▏       | 9677/43796 [00:18<01:09, 489.10it/s]

 22%|██▏       | 9735/43796 [00:18<01:06, 513.17it/s]

 22%|██▏       | 9793/43796 [00:18<01:04, 531.21it/s]

 22%|██▏       | 9851/43796 [00:18<01:02, 544.72it/s]

 23%|██▎       | 9908/43796 [00:18<01:01, 551.88it/s]

 23%|██▎       | 9965/43796 [00:19<01:19, 426.54it/s]

 23%|██▎       | 10024/43796 [00:19<01:12, 464.65it/s]

 23%|██▎       | 10083/43796 [00:19<01:08, 495.13it/s]

 23%|██▎       | 10142/43796 [00:19<01:04, 519.01it/s]

 23%|██▎       | 10201/43796 [00:19<01:02, 537.07it/s]

 23%|██▎       | 10260/43796 [00:19<01:00, 550.43it/s]

 24%|██▎       | 10319/43796 [00:19<00:59, 559.99it/s]

 24%|██▎       | 10377/43796 [00:20<01:17, 429.84it/s]

 24%|██▍       | 10436/43796 [00:20<01:11, 466.91it/s]

 24%|██▍       | 10495/43796 [00:20<01:07, 496.81it/s]

 24%|██▍       | 10554/43796 [00:20<01:03, 520.10it/s]

 24%|██▍       | 10613/43796 [00:20<01:01, 537.43it/s]

 24%|██▍       | 10672/43796 [00:20<01:00, 550.04it/s]

 25%|██▍       | 10731/43796 [00:20<00:59, 559.03it/s]

 25%|██▍       | 10789/43796 [00:20<01:19, 413.85it/s]

 25%|██▍       | 10847/43796 [00:20<01:12, 452.39it/s]

 25%|██▍       | 10906/43796 [00:21<01:07, 485.33it/s]

 25%|██▌       | 10965/43796 [00:21<01:04, 511.01it/s]

 25%|██▌       | 11024/43796 [00:21<01:01, 530.52it/s]

 25%|██▌       | 11083/43796 [00:21<01:00, 544.82it/s]

 25%|██▌       | 11141/43796 [00:21<00:58, 554.66it/s]

 26%|██▌       | 11199/43796 [00:21<01:17, 423.10it/s]

 26%|██▌       | 11257/43796 [00:21<01:10, 460.15it/s]

 26%|██▌       | 11316/43796 [00:21<01:06, 491.60it/s]

 26%|██▌       | 11375/43796 [00:21<01:02, 516.19it/s]

 26%|██▌       | 11434/43796 [00:22<01:00, 534.22it/s]

 26%|██▌       | 11493/43796 [00:22<00:58, 548.23it/s]

 26%|██▋       | 11552/43796 [00:22<00:57, 558.06it/s]

 27%|██▋       | 11611/43796 [00:22<00:56, 565.48it/s]

 27%|██▋       | 11669/43796 [00:22<01:15, 425.55it/s]

 27%|██▋       | 11728/43796 [00:22<01:09, 463.35it/s]

 27%|██▋       | 11786/43796 [00:22<01:05, 491.35it/s]

 27%|██▋       | 11845/43796 [00:22<01:01, 515.43it/s]

 27%|██▋       | 11903/43796 [00:22<00:59, 532.88it/s]

 27%|██▋       | 11962/43796 [00:23<00:58, 546.88it/s]

 27%|██▋       | 12021/43796 [00:23<00:57, 556.71it/s]

 28%|██▊       | 12078/43796 [00:23<01:18, 406.43it/s]

 28%|██▊       | 12137/43796 [00:23<01:10, 446.95it/s]

 28%|██▊       | 12196/43796 [00:23<01:05, 480.37it/s]

 28%|██▊       | 12255/43796 [00:23<01:02, 507.27it/s]

 28%|██▊       | 12314/43796 [00:23<00:59, 527.80it/s]

 28%|██▊       | 12373/43796 [00:23<00:57, 543.24it/s]

 28%|██▊       | 12432/43796 [00:24<00:56, 554.98it/s]

 29%|██▊       | 12489/43796 [00:24<01:15, 416.36it/s]

 29%|██▊       | 12548/43796 [00:24<01:08, 455.46it/s]

 29%|██▉       | 12606/43796 [00:24<01:04, 486.30it/s]

 29%|██▉       | 12665/43796 [00:24<01:00, 511.79it/s]

 29%|██▉       | 12723/43796 [00:24<00:58, 529.33it/s]

 29%|██▉       | 12782/43796 [00:24<00:56, 544.71it/s]

 29%|██▉       | 12841/43796 [00:24<00:55, 555.81it/s]

 29%|██▉       | 12900/43796 [00:24<00:54, 563.69it/s]

 30%|██▉       | 12958/43796 [00:25<01:15, 410.77it/s]

 30%|██▉       | 13016/43796 [00:25<01:08, 449.83it/s]

 30%|██▉       | 13075/43796 [00:25<01:03, 482.66it/s]

 30%|██▉       | 13133/43796 [00:25<01:00, 507.60it/s]

 30%|███       | 13192/43796 [00:25<00:57, 528.30it/s]

 30%|███       | 13250/43796 [00:25<00:56, 542.07it/s]

 30%|███       | 13309/43796 [00:25<00:55, 553.89it/s]

 31%|███       | 13366/43796 [00:26<01:16, 396.76it/s]

 31%|███       | 13425/43796 [00:26<01:09, 438.89it/s]

 31%|███       | 13484/43796 [00:26<01:03, 474.09it/s]

 31%|███       | 13543/43796 [00:26<01:00, 502.09it/s]

 31%|███       | 13602/43796 [00:26<00:57, 524.10it/s]

 31%|███       | 13660/43796 [00:26<00:55, 538.37it/s]

 31%|███▏      | 13719/43796 [00:26<00:54, 551.34it/s]

 31%|███▏      | 13778/43796 [00:26<00:53, 560.63it/s]

 32%|███▏      | 13836/43796 [00:26<01:12, 411.97it/s]

 32%|███▏      | 13895/43796 [00:27<01:06, 452.16it/s]

 32%|███▏      | 13954/43796 [00:27<01:01, 484.68it/s]

 32%|███▏      | 14013/43796 [00:27<00:58, 510.58it/s]

 32%|███▏      | 14072/43796 [00:27<00:55, 530.82it/s]

 32%|███▏      | 14131/43796 [00:27<00:54, 545.82it/s]

 32%|███▏      | 14190/43796 [00:27<00:53, 556.99it/s]

 33%|███▎      | 14248/43796 [00:27<01:14, 398.36it/s]

 33%|███▎      | 14307/43796 [00:27<01:06, 440.25it/s]

 33%|███▎      | 14365/43796 [00:28<01:02, 474.17it/s]

 33%|███▎      | 14424/43796 [00:28<00:58, 501.84it/s]

 33%|███▎      | 14482/43796 [00:28<00:56, 522.38it/s]

 33%|███▎      | 14540/43796 [00:28<00:54, 537.96it/s]

 33%|███▎      | 14597/43796 [00:28<00:53, 546.68it/s]

 33%|███▎      | 14654/43796 [00:28<01:14, 388.85it/s]

 34%|███▎      | 14712/43796 [00:28<01:07, 431.38it/s]

 34%|███▎      | 14770/43796 [00:28<01:02, 466.83it/s]

 34%|███▍      | 14828/43796 [00:28<00:58, 495.84it/s]

 34%|███▍      | 14887/43796 [00:29<00:55, 518.79it/s]

 34%|███▍      | 14946/43796 [00:29<00:53, 536.01it/s]

 34%|███▍      | 15004/43796 [00:29<00:52, 548.25it/s]

 34%|███▍      | 15063/43796 [00:29<00:51, 557.98it/s]

 35%|███▍      | 15121/43796 [00:29<01:10, 404.22it/s]

 35%|███▍      | 15179/43796 [00:29<01:04, 444.18it/s]

 35%|███▍      | 15238/43796 [00:29<00:59, 478.73it/s]

 35%|███▍      | 15297/43796 [00:29<00:56, 506.30it/s]

 35%|███▌      | 15356/43796 [00:30<00:53, 527.36it/s]

 35%|███▌      | 15415/43796 [00:30<00:52, 543.22it/s]

 35%|███▌      | 15474/43796 [00:30<00:51, 554.76it/s]

 35%|███▌      | 15532/43796 [00:30<01:12, 388.56it/s]

 36%|███▌      | 15591/43796 [00:30<01:05, 431.64it/s]

 36%|███▌      | 15650/43796 [00:30<01:00, 467.78it/s]

 36%|███▌      | 15709/43796 [00:30<00:56, 497.19it/s]

 36%|███▌      | 15768/43796 [00:30<00:53, 520.17it/s]

 36%|███▌      | 15827/43796 [00:30<00:52, 537.07it/s]

 36%|███▋      | 15886/43796 [00:31<00:50, 549.85it/s]

 36%|███▋      | 15945/43796 [00:31<00:49, 559.17it/s]

 37%|███▋      | 16003/43796 [00:31<01:11, 389.65it/s]

 37%|███▋      | 16062/43796 [00:31<01:04, 432.86it/s]

 37%|███▋      | 16121/43796 [00:31<00:58, 469.10it/s]

 37%|███▋      | 16180/43796 [00:31<00:55, 498.49it/s]

 37%|███▋      | 16239/43796 [00:31<00:52, 521.36it/s]

 37%|███▋      | 16298/43796 [00:31<00:51, 538.44it/s]

 37%|███▋      | 16357/43796 [00:32<00:49, 551.07it/s]

 37%|███▋      | 16414/43796 [00:32<01:09, 395.24it/s]

 38%|███▊      | 16473/43796 [00:32<01:02, 437.90it/s]

 38%|███▊      | 16532/43796 [00:32<00:57, 473.02it/s]

 38%|███▊      | 16591/43796 [00:32<00:54, 501.68it/s]

 38%|███▊      | 16650/43796 [00:32<00:51, 523.67it/s]

 38%|███▊      | 16709/43796 [00:32<00:50, 540.10it/s]

 38%|███▊      | 16768/43796 [00:32<00:48, 552.28it/s]

 38%|███▊      | 16825/43796 [00:33<01:10, 382.13it/s]

 39%|███▊      | 16884/43796 [00:33<01:03, 426.13it/s]

 39%|███▊      | 16942/43796 [00:33<00:58, 462.47it/s]

 39%|███▉      | 17000/43796 [00:33<00:54, 490.85it/s]

 39%|███▉      | 17059/43796 [00:33<00:51, 514.89it/s]

 39%|███▉      | 17117/43796 [00:33<00:50, 532.68it/s]

 39%|███▉      | 17175/43796 [00:33<00:48, 545.58it/s]

 39%|███▉      | 17234/43796 [00:33<00:47, 556.35it/s]

 39%|███▉      | 17292/43796 [00:34<01:08, 389.34it/s]

 40%|███▉      | 17351/43796 [00:34<01:01, 433.15it/s]

 40%|███▉      | 17410/43796 [00:34<00:56, 469.72it/s]

 40%|███▉      | 17469/43796 [00:34<00:52, 499.03it/s]

 40%|████      | 17528/43796 [00:34<00:50, 522.30it/s]

 40%|████      | 17587/43796 [00:34<00:48, 539.78it/s]

 40%|████      | 17646/43796 [00:34<00:47, 552.67it/s]

 40%|████      | 17704/43796 [00:34<01:08, 383.55it/s]

 41%|████      | 17763/43796 [00:35<01:00, 427.87it/s]

 41%|████      | 17822/43796 [00:35<00:55, 465.43it/s]

 41%|████      | 17881/43796 [00:35<00:52, 495.55it/s]

 41%|████      | 17940/43796 [00:35<00:49, 518.80it/s]

 41%|████      | 17999/43796 [00:35<00:48, 536.50it/s]

 41%|████      | 18058/43796 [00:35<00:46, 549.76it/s]

 41%|████▏     | 18117/43796 [00:35<00:45, 559.64it/s]

 41%|████▏     | 18175/43796 [00:35<01:07, 377.67it/s]

 42%|████▏     | 18233/43796 [00:36<01:00, 420.59it/s]

 42%|████▏     | 18292/43796 [00:36<00:55, 458.89it/s]

 42%|████▏     | 18350/43796 [00:36<00:52, 488.53it/s]

 42%|████▏     | 18409/43796 [00:36<00:49, 513.41it/s]

 42%|████▏     | 18468/43796 [00:36<00:47, 532.83it/s]

 42%|████▏     | 18527/43796 [00:36<00:46, 546.75it/s]

 42%|████▏     | 18584/43796 [00:36<01:05, 383.88it/s]

 43%|████▎     | 18642/43796 [00:36<00:58, 427.05it/s]

 43%|████▎     | 18701/43796 [00:37<00:54, 464.29it/s]

 43%|████▎     | 18760/43796 [00:37<00:50, 494.79it/s]

 43%|████▎     | 18819/43796 [00:37<00:48, 518.14it/s]

 43%|████▎     | 18878/43796 [00:37<00:46, 536.24it/s]

 43%|████▎     | 18936/43796 [00:37<00:45, 547.90it/s]

 43%|████▎     | 18995/43796 [00:37<00:44, 558.12it/s]

 44%|████▎     | 19053/43796 [00:37<01:03, 386.93it/s]

 44%|████▎     | 19111/43796 [00:37<00:57, 428.96it/s]

 44%|████▍     | 19170/43796 [00:37<00:52, 466.02it/s]

 44%|████▍     | 19229/43796 [00:38<00:49, 495.96it/s]

 44%|████▍     | 19288/43796 [00:38<00:47, 519.39it/s]

 44%|████▍     | 19347/43796 [00:38<00:45, 536.51it/s]

 44%|████▍     | 19406/43796 [00:38<00:44, 549.62it/s]

 44%|████▍     | 19463/43796 [00:38<01:05, 369.98it/s]

 45%|████▍     | 19522/43796 [00:38<00:58, 415.84it/s]

 45%|████▍     | 19581/43796 [00:38<00:53, 455.00it/s]

 45%|████▍     | 19639/43796 [00:38<00:49, 486.00it/s]

 45%|████▍     | 19698/43796 [00:39<00:47, 511.69it/s]

 45%|████▌     | 19757/43796 [00:39<00:45, 530.84it/s]

 45%|████▌     | 19816/43796 [00:39<00:43, 545.39it/s]

 45%|████▌     | 19875/43796 [00:39<00:42, 556.42it/s]

 46%|████▌     | 19933/43796 [00:39<01:02, 382.88it/s]

 46%|████▌     | 19992/43796 [00:39<00:55, 427.44it/s]

 46%|████▌     | 20050/43796 [00:39<00:51, 462.45it/s]

 46%|████▌     | 20109/43796 [00:39<00:48, 493.44it/s]

 46%|████▌     | 20168/43796 [00:40<00:45, 517.15it/s]

 46%|████▌     | 20227/43796 [00:40<00:44, 535.27it/s]

 46%|████▋     | 20286/43796 [00:40<00:42, 549.24it/s]

 46%|████▋     | 20343/43796 [00:40<01:04, 364.60it/s]

 47%|████▋     | 20402/43796 [00:40<00:56, 410.91it/s]

 47%|████▋     | 20461/43796 [00:40<00:51, 450.74it/s]

 47%|████▋     | 20520/43796 [00:40<00:48, 483.30it/s]

 47%|████▋     | 20579/43796 [00:40<00:45, 509.02it/s]

 47%|████▋     | 20638/43796 [00:41<00:43, 528.56it/s]

 47%|████▋     | 20697/43796 [00:41<00:42, 543.61it/s]

 47%|████▋     | 20756/43796 [00:41<00:41, 554.36it/s]

 48%|████▊     | 20814/43796 [00:41<01:02, 365.48it/s]

 48%|████▊     | 20873/43796 [00:41<00:55, 411.94it/s]

 48%|████▊     | 20931/43796 [00:41<00:50, 449.28it/s]

 48%|████▊     | 20990/43796 [00:41<00:47, 483.12it/s]

 48%|████▊     | 21049/43796 [00:41<00:44, 510.13it/s]

 48%|████▊     | 21108/43796 [00:42<00:42, 530.58it/s]

 48%|████▊     | 21167/43796 [00:42<00:41, 545.92it/s]

 48%|████▊     | 21226/43796 [00:42<00:40, 557.54it/s]

 49%|████▊     | 21284/43796 [00:42<00:59, 377.20it/s]

 49%|████▊     | 21343/43796 [00:42<00:53, 422.19it/s]

 49%|████▉     | 21402/43796 [00:42<00:48, 460.58it/s]

 49%|████▉     | 21461/43796 [00:42<00:45, 492.02it/s]

 49%|████▉     | 21520/43796 [00:42<00:43, 516.20it/s]

 49%|████▉     | 21579/43796 [00:43<00:41, 534.47it/s]

 49%|████▉     | 21638/43796 [00:43<00:40, 548.44it/s]

 50%|████▉     | 21695/43796 [00:43<01:01, 359.33it/s]

 50%|████▉     | 21754/43796 [00:43<00:54, 406.13it/s]

 50%|████▉     | 21812/43796 [00:43<00:49, 444.75it/s]

 50%|████▉     | 21871/43796 [00:43<00:45, 479.15it/s]

 50%|█████     | 21930/43796 [00:43<00:43, 506.33it/s]

 50%|█████     | 21989/43796 [00:43<00:41, 527.48it/s]

 50%|█████     | 22048/43796 [00:44<00:40, 543.42it/s]

 50%|█████     | 22107/43796 [00:44<00:39, 555.66it/s]

 51%|█████     | 22165/43796 [00:44<00:59, 365.04it/s]

 51%|█████     | 22212/43796 [00:44<01:00, 354.42it/s]

 51%|█████     | 22255/43796 [00:44<01:03, 338.03it/s]

 51%|█████     | 22301/43796 [00:44<00:59, 363.30it/s]

 51%|█████     | 22342/43796 [00:44<01:03, 340.17it/s]

 51%|█████     | 22379/43796 [00:45<01:03, 337.80it/s]

 51%|█████     | 22416/43796 [00:45<01:02, 344.16it/s]

 51%|█████▏    | 22452/43796 [00:45<01:05, 326.67it/s]

 51%|█████▏    | 22490/43796 [00:45<01:02, 338.47it/s]

 51%|█████▏    | 22525/43796 [00:45<01:06, 320.26it/s]

 52%|█████▏    | 22558/43796 [00:45<01:07, 314.93it/s]

 52%|█████▏    | 22590/43796 [00:45<01:42, 206.86it/s]

 52%|█████▏    | 22649/43796 [00:46<01:14, 283.19it/s]

 52%|█████▏    | 22708/43796 [00:46<01:00, 350.53it/s]

 52%|█████▏    | 22766/43796 [00:46<00:51, 405.80it/s]

 52%|█████▏    | 22825/43796 [00:46<00:46, 451.12it/s]

 52%|█████▏    | 22884/43796 [00:46<00:42, 486.40it/s]

 52%|█████▏    | 22943/43796 [00:46<00:40, 512.81it/s]

 53%|█████▎    | 23002/43796 [00:46<00:39, 532.94it/s]

 53%|█████▎    | 23058/43796 [00:46<01:00, 343.83it/s]

 53%|█████▎    | 23117/43796 [00:47<00:52, 393.82it/s]

 53%|█████▎    | 23176/43796 [00:47<00:47, 437.21it/s]

 53%|█████▎    | 23235/43796 [00:47<00:43, 473.21it/s]

 53%|█████▎    | 23294/43796 [00:47<00:40, 502.13it/s]

 53%|█████▎    | 23351/43796 [00:47<00:39, 519.38it/s]

 53%|█████▎    | 23410/43796 [00:47<00:37, 537.29it/s]

 54%|█████▎    | 23467/43796 [00:47<00:56, 359.21it/s]

 54%|█████▎    | 23526/43796 [00:47<00:49, 406.67it/s]

 54%|█████▍    | 23585/43796 [00:48<00:45, 447.62it/s]

 54%|█████▍    | 23644/43796 [00:48<00:41, 481.30it/s]

 54%|█████▍    | 23703/43796 [00:48<00:39, 507.61it/s]

 54%|█████▍    | 23762/43796 [00:48<00:37, 528.16it/s]

 54%|█████▍    | 23821/43796 [00:48<00:36, 543.72it/s]

 55%|█████▍    | 23880/43796 [00:48<00:35, 554.92it/s]

 55%|█████▍    | 23938/43796 [00:48<00:55, 355.96it/s]

 55%|█████▍    | 23996/43796 [00:48<00:49, 401.88it/s]

 55%|█████▍    | 24055/43796 [00:49<00:44, 443.14it/s]

 55%|█████▌    | 24114/43796 [00:49<00:41, 477.39it/s]

 55%|█████▌    | 24172/43796 [00:49<00:38, 503.42it/s]

 55%|█████▌    | 24229/43796 [00:49<00:37, 519.27it/s]

 55%|█████▌    | 24287/43796 [00:49<00:36, 534.94it/s]

 56%|█████▌    | 24346/43796 [00:49<00:35, 548.07it/s]

 56%|█████▌    | 24403/43796 [00:49<00:55, 348.90it/s]

 56%|█████▌    | 24462/43796 [00:49<00:48, 397.20it/s]

 56%|█████▌    | 24521/43796 [00:50<00:43, 439.60it/s]

 56%|█████▌    | 24580/43796 [00:50<00:40, 474.61it/s]

 56%|█████▋    | 24639/43796 [00:50<00:38, 502.29it/s]

 56%|█████▋    | 24697/43796 [00:50<00:36, 523.06it/s]

 57%|█████▋    | 24756/43796 [00:50<00:35, 539.18it/s]

 57%|█████▋    | 24813/43796 [00:50<00:53, 356.55it/s]

 57%|█████▋    | 24872/43796 [00:50<00:46, 403.85it/s]

 57%|█████▋    | 24930/43796 [00:50<00:42, 443.92it/s]

 57%|█████▋    | 24989/43796 [00:51<00:39, 478.61it/s]

 57%|█████▋    | 25048/43796 [00:51<00:37, 506.10it/s]

 57%|█████▋    | 25105/43796 [00:51<00:35, 521.47it/s]

 57%|█████▋    | 25163/43796 [00:51<00:34, 537.37it/s]

 58%|█████▊    | 25221/43796 [00:51<00:33, 549.04it/s]

 58%|█████▊    | 25278/43796 [00:51<00:53, 345.10it/s]

 58%|█████▊    | 25336/43796 [00:51<00:46, 392.85it/s]

 58%|█████▊    | 25394/43796 [00:51<00:42, 434.77it/s]

 58%|█████▊    | 25452/43796 [00:52<00:39, 469.91it/s]

 58%|█████▊    | 25510/43796 [00:52<00:36, 497.98it/s]

 58%|█████▊    | 25568/43796 [00:52<00:35, 519.87it/s]

 59%|█████▊    | 25626/43796 [00:52<00:33, 536.33it/s]

 59%|█████▊    | 25685/43796 [00:52<00:32, 549.00it/s]

 59%|█████▉    | 25742/43796 [00:52<00:51, 353.94it/s]

 59%|█████▉    | 25801/43796 [00:52<00:44, 401.94it/s]

 59%|█████▉    | 25859/43796 [00:52<00:40, 442.58it/s]

 59%|█████▉    | 25918/43796 [00:53<00:37, 477.08it/s]

 59%|█████▉    | 25975/43796 [00:53<00:35, 499.80it/s]

 59%|█████▉    | 26034/43796 [00:53<00:34, 522.27it/s]

 60%|█████▉    | 26093/43796 [00:53<00:32, 538.71it/s]

 60%|█████▉    | 26151/43796 [00:53<00:32, 550.16it/s]

 60%|█████▉    | 26208/43796 [00:53<00:49, 352.30it/s]

 60%|█████▉    | 26267/43796 [00:53<00:43, 400.07it/s]

 60%|██████    | 26326/43796 [00:53<00:39, 441.78it/s]

 60%|██████    | 26385/43796 [00:54<00:36, 476.38it/s]

 60%|██████    | 26444/43796 [00:54<00:34, 504.02it/s]

 61%|██████    | 26503/43796 [00:54<00:32, 525.14it/s]

 61%|██████    | 26561/43796 [00:54<00:31, 540.30it/s]

 61%|██████    | 26618/43796 [00:54<00:49, 346.77it/s]

 61%|██████    | 26676/43796 [00:54<00:43, 393.84it/s]

 61%|██████    | 26734/43796 [00:54<00:39, 435.24it/s]

 61%|██████    | 26792/43796 [00:54<00:36, 470.12it/s]

 61%|██████▏   | 26849/43796 [00:55<00:34, 494.04it/s]

 61%|██████▏   | 26907/43796 [00:55<00:32, 516.85it/s]

 62%|██████▏   | 26965/43796 [00:55<00:31, 534.31it/s]

 62%|██████▏   | 27024/43796 [00:55<00:30, 547.75it/s]

 62%|██████▏   | 27081/43796 [00:55<00:47, 348.64it/s]

 62%|██████▏   | 27139/43796 [00:55<00:42, 396.00it/s]

 62%|██████▏   | 27197/43796 [00:55<00:37, 437.35it/s]

 62%|██████▏   | 27255/43796 [00:55<00:35, 471.96it/s]

 62%|██████▏   | 27314/43796 [00:56<00:32, 500.42it/s]

 63%|██████▎   | 27373/43796 [00:56<00:31, 522.41it/s]

 63%|██████▎   | 27431/43796 [00:56<00:30, 538.11it/s]

 63%|██████▎   | 27489/43796 [00:56<00:29, 549.82it/s]

 63%|██████▎   | 27546/43796 [00:56<00:47, 339.88it/s]

 63%|██████▎   | 27604/43796 [00:56<00:41, 387.92it/s]

 63%|██████▎   | 27661/43796 [00:56<00:37, 427.13it/s]

 63%|██████▎   | 27720/43796 [00:57<00:34, 464.53it/s]

 63%|██████▎   | 27779/43796 [00:57<00:32, 494.69it/s]

 64%|██████▎   | 27838/43796 [00:57<00:30, 518.04it/s]

 64%|██████▎   | 27897/43796 [00:57<00:29, 535.76it/s]

 64%|██████▍   | 27955/43796 [00:57<00:28, 547.79it/s]

 64%|██████▍   | 28012/43796 [00:57<00:47, 335.81it/s]

 64%|██████▍   | 28071/43796 [00:57<00:40, 385.17it/s]

 64%|██████▍   | 28129/43796 [00:57<00:36, 428.08it/s]

 64%|██████▍   | 28188/43796 [00:58<00:33, 465.18it/s]

 64%|██████▍   | 28247/43796 [00:58<00:31, 494.93it/s]

 65%|██████▍   | 28305/43796 [00:58<00:29, 517.49it/s]

 65%|██████▍   | 28364/43796 [00:58<00:28, 535.64it/s]

 65%|██████▍   | 28423/43796 [00:58<00:28, 548.38it/s]

 65%|██████▌   | 28481/43796 [00:58<00:46, 331.43it/s]

 65%|██████▌   | 28537/43796 [00:58<00:40, 375.92it/s]

 65%|██████▌   | 28595/43796 [00:58<00:36, 420.08it/s]

 65%|██████▌   | 28653/43796 [00:59<00:33, 457.56it/s]

 66%|██████▌   | 28711/43796 [00:59<00:30, 487.92it/s]

 66%|██████▌   | 28770/43796 [00:59<00:29, 512.62it/s]

 66%|██████▌   | 28829/43796 [00:59<00:28, 531.38it/s]

 66%|██████▌   | 28887/43796 [00:59<00:27, 544.97it/s]

 66%|██████▌   | 28944/43796 [00:59<00:44, 331.71it/s]

 66%|██████▌   | 29003/43796 [00:59<00:38, 381.54it/s]

 66%|██████▋   | 29062/43796 [01:00<00:34, 426.05it/s]

 66%|██████▋   | 29121/43796 [01:00<00:31, 463.69it/s]

 67%|██████▋   | 29180/43796 [01:00<00:29, 494.00it/s]

 67%|██████▋   | 29239/43796 [01:00<00:28, 517.28it/s]

 67%|██████▋   | 29298/43796 [01:00<00:27, 534.90it/s]

 67%|██████▋   | 29356/43796 [01:00<00:26, 547.30it/s]

 67%|██████▋   | 29413/43796 [01:00<00:42, 338.18it/s]

 67%|██████▋   | 29472/43796 [01:00<00:36, 387.19it/s]

 67%|██████▋   | 29531/43796 [01:01<00:33, 430.62it/s]

 68%|██████▊   | 29590/43796 [01:01<00:30, 467.10it/s]

 68%|██████▊   | 29649/43796 [01:01<00:28, 496.66it/s]

 68%|██████▊   | 29708/43796 [01:01<00:27, 520.03it/s]

 68%|██████▊   | 29766/43796 [01:01<00:26, 536.38it/s]

 68%|██████▊   | 29823/43796 [01:01<00:41, 335.94it/s]

 68%|██████▊   | 29882/43796 [01:01<00:36, 385.21it/s]

 68%|██████▊   | 29941/43796 [01:01<00:32, 428.86it/s]

 68%|██████▊   | 29999/43796 [01:02<00:29, 464.93it/s]

 69%|██████▊   | 30058/43796 [01:02<00:27, 494.81it/s]

 69%|██████▉   | 30117/43796 [01:02<00:26, 518.35it/s]

 69%|██████▉   | 30176/43796 [01:02<00:25, 535.65it/s]

 69%|██████▉   | 30235/43796 [01:02<00:24, 548.78it/s]

 69%|██████▉   | 30293/43796 [01:02<00:40, 334.18it/s]

 69%|██████▉   | 30351/43796 [01:02<00:35, 382.41it/s]

 69%|██████▉   | 30410/43796 [01:03<00:31, 426.38it/s]

 70%|██████▉   | 30469/43796 [01:03<00:28, 463.66it/s]

 70%|██████▉   | 30528/43796 [01:03<00:26, 493.99it/s]

 70%|██████▉   | 30587/43796 [01:03<00:25, 517.18it/s]

 70%|██████▉   | 30645/43796 [01:03<00:24, 534.07it/s]

 70%|███████   | 30704/43796 [01:03<00:23, 547.84it/s]

 70%|███████   | 30762/43796 [01:03<00:38, 336.28it/s]

 70%|███████   | 30821/43796 [01:03<00:33, 385.22it/s]

 71%|███████   | 30880/43796 [01:04<00:30, 428.87it/s]

 71%|███████   | 30939/43796 [01:04<00:27, 466.06it/s]

 71%|███████   | 30998/43796 [01:04<00:25, 495.87it/s]

 71%|███████   | 31057/43796 [01:04<00:24, 519.08it/s]

 71%|███████   | 31116/43796 [01:04<00:23, 536.22it/s]

 71%|███████   | 31173/43796 [01:04<00:23, 544.43it/s]

 71%|███████▏  | 31230/43796 [01:04<00:37, 333.31it/s]

 71%|███████▏  | 31289/43796 [01:04<00:32, 383.04it/s]

 72%|███████▏  | 31348/43796 [01:05<00:29, 427.30it/s]

 72%|███████▏  | 31407/43796 [01:05<00:26, 464.69it/s]

 72%|███████▏  | 31466/43796 [01:05<00:24, 494.77it/s]

 72%|███████▏  | 31524/43796 [01:05<00:23, 516.86it/s]

 72%|███████▏  | 31583/43796 [01:05<00:22, 535.22it/s]

 72%|███████▏  | 31642/43796 [01:05<00:22, 548.53it/s]

 72%|███████▏  | 31700/43796 [01:05<00:36, 334.03it/s]

 73%|███████▎  | 31758/43796 [01:06<00:31, 382.35it/s]

 73%|███████▎  | 31817/43796 [01:06<00:28, 426.55it/s]

 73%|███████▎  | 31876/43796 [01:06<00:25, 464.02it/s]

 73%|███████▎  | 31935/43796 [01:06<00:23, 494.54it/s]

 73%|███████▎  | 31992/43796 [01:06<00:23, 513.14it/s]

 73%|███████▎  | 32050/43796 [01:06<00:22, 530.21it/s]

 73%|███████▎  | 32109/43796 [01:06<00:21, 545.46it/s]

 73%|███████▎  | 32166/43796 [01:06<00:36, 321.82it/s]

 74%|███████▎  | 32225/43796 [01:07<00:31, 372.42it/s]

 74%|███████▎  | 32283/43796 [01:07<00:27, 416.90it/s]

 74%|███████▍  | 32342/43796 [01:07<00:25, 455.87it/s]

 74%|███████▍  | 32401/43796 [01:07<00:23, 488.01it/s]

 74%|███████▍  | 32460/43796 [01:07<00:22, 512.90it/s]

 74%|███████▍  | 32519/43796 [01:07<00:21, 531.92it/s]

 74%|███████▍  | 32578/43796 [01:07<00:20, 546.17it/s]

 75%|███████▍  | 32636/43796 [01:08<00:34, 322.05it/s]

 75%|███████▍  | 32694/43796 [01:08<00:29, 371.14it/s]

 75%|███████▍  | 32752/43796 [01:08<00:26, 415.70it/s]

 75%|███████▍  | 32811/43796 [01:08<00:24, 454.72it/s]

 75%|███████▌  | 32870/43796 [01:08<00:22, 486.76it/s]

 75%|███████▌  | 32929/43796 [01:08<00:21, 512.23it/s]

 75%|███████▌  | 32988/43796 [01:08<00:20, 531.50it/s]

 75%|███████▌  | 33047/43796 [01:08<00:19, 545.59it/s]

 76%|███████▌  | 33105/43796 [01:09<00:33, 323.14it/s]

 76%|███████▌  | 33163/43796 [01:09<00:28, 372.01it/s]

 76%|███████▌  | 33222/43796 [01:09<00:25, 417.35it/s]

 76%|███████▌  | 33280/43796 [01:09<00:23, 455.15it/s]

 76%|███████▌  | 33339/43796 [01:09<00:21, 486.89it/s]

 76%|███████▋  | 33396/43796 [01:09<00:20, 506.81it/s]

 76%|███████▋  | 33454/43796 [01:09<00:19, 525.78it/s]

 77%|███████▋  | 33512/43796 [01:09<00:19, 539.48it/s]

 77%|███████▋  | 33569/43796 [01:10<00:33, 308.16it/s]

 77%|███████▋  | 33628/43796 [01:10<00:28, 359.49it/s]

 77%|███████▋  | 33687/43796 [01:10<00:24, 406.77it/s]

 77%|███████▋  | 33746/43796 [01:10<00:22, 447.18it/s]

 77%|███████▋  | 33805/43796 [01:10<00:20, 480.53it/s]

 77%|███████▋  | 33864/43796 [01:10<00:19, 507.11it/s]

 77%|███████▋  | 33923/43796 [01:10<00:18, 527.47it/s]

 78%|███████▊  | 33982/43796 [01:10<00:18, 542.47it/s]

 78%|███████▊  | 34039/43796 [01:11<00:30, 318.90it/s]

 78%|███████▊  | 34097/43796 [01:11<00:26, 368.46it/s]

 78%|███████▊  | 34156/43796 [01:11<00:23, 414.36it/s]

 78%|███████▊  | 34215/43796 [01:11<00:21, 453.66it/s]

 78%|███████▊  | 34274/43796 [01:11<00:19, 485.78it/s]

 78%|███████▊  | 34333/43796 [01:11<00:18, 511.22it/s]

 79%|███████▊  | 34392/43796 [01:11<00:17, 530.93it/s]

 79%|███████▊  | 34451/43796 [01:11<00:17, 545.91it/s]

 79%|███████▉  | 34509/43796 [01:12<00:28, 321.37it/s]

 79%|███████▉  | 34567/43796 [01:12<00:24, 370.55it/s]

 79%|███████▉  | 34626/43796 [01:12<00:22, 415.99it/s]

 79%|███████▉  | 34685/43796 [01:12<00:20, 454.97it/s]

 79%|███████▉  | 34744/43796 [01:12<00:18, 487.13it/s]

 79%|███████▉  | 34803/43796 [01:12<00:17, 512.26it/s]

 80%|███████▉  | 34861/43796 [01:12<00:16, 530.61it/s]

 80%|███████▉  | 34920/43796 [01:13<00:16, 544.87it/s]

 80%|███████▉  | 34978/43796 [01:13<00:27, 321.12it/s]

 80%|███████▉  | 35036/43796 [01:13<00:23, 370.19it/s]

 80%|████████  | 35095/43796 [01:13<00:20, 415.96it/s]

 80%|████████  | 35153/43796 [01:13<00:19, 453.49it/s]

 80%|████████  | 35212/43796 [01:13<00:17, 485.97it/s]

 81%|████████  | 35271/43796 [01:13<00:16, 511.36it/s]

 81%|████████  | 35330/43796 [01:14<00:15, 530.81it/s]

 81%|████████  | 35389/43796 [01:14<00:15, 545.19it/s]

 81%|████████  | 35447/43796 [01:14<00:26, 320.28it/s]

 81%|████████  | 35506/43796 [01:14<00:22, 370.51it/s]

 81%|████████  | 35565/43796 [01:14<00:19, 415.94it/s]

 81%|████████▏ | 35624/43796 [01:14<00:17, 455.02it/s]

 81%|████████▏ | 35683/43796 [01:14<00:16, 487.08it/s]

 82%|████████▏ | 35741/43796 [01:14<00:15, 510.69it/s]

 82%|████████▏ | 35799/43796 [01:15<00:15, 529.51it/s]

 82%|████████▏ | 35857/43796 [01:15<00:14, 543.37it/s]

 82%|████████▏ | 35914/43796 [01:15<00:25, 311.36it/s]

 82%|████████▏ | 35973/43796 [01:15<00:21, 362.40it/s]

 82%|████████▏ | 36032/43796 [01:15<00:18, 408.95it/s]

 82%|████████▏ | 36091/43796 [01:15<00:17, 449.23it/s]

 83%|████████▎ | 36150/43796 [01:15<00:15, 482.42it/s]

 83%|████████▎ | 36209/43796 [01:16<00:14, 508.72it/s]

 83%|████████▎ | 36267/43796 [01:16<00:14, 527.59it/s]

 83%|████████▎ | 36325/43796 [01:16<00:13, 541.22it/s]

 83%|████████▎ | 36382/43796 [01:16<00:34, 217.50it/s]

 83%|████████▎ | 36425/43796 [01:17<00:34, 216.50it/s]

 83%|████████▎ | 36462/43796 [01:17<00:32, 226.38it/s]

 83%|████████▎ | 36517/43796 [01:17<00:26, 279.92it/s]

 83%|████████▎ | 36557/43796 [01:17<00:28, 257.11it/s]

 84%|████████▎ | 36592/43796 [01:17<00:28, 251.23it/s]

 84%|████████▎ | 36623/43796 [01:17<00:27, 257.58it/s]

 84%|████████▎ | 36654/43796 [01:17<00:28, 252.09it/s]

 84%|████████▍ | 36683/43796 [01:18<00:28, 247.18it/s]

 84%|████████▍ | 36724/43796 [01:18<00:24, 284.12it/s]

 84%|████████▍ | 36762/43796 [01:18<00:22, 306.14it/s]

 84%|████████▍ | 36795/43796 [01:18<00:26, 267.83it/s]

 84%|████████▍ | 36825/43796 [01:18<00:25, 274.79it/s]

 84%|████████▍ | 36855/43796 [01:19<00:53, 129.29it/s]

 84%|████████▍ | 36878/43796 [01:19<00:49, 139.56it/s]

 84%|████████▍ | 36907/43796 [01:19<00:42, 164.00it/s]

 84%|████████▍ | 36955/43796 [01:19<00:30, 223.50it/s]

 84%|████████▍ | 36986/43796 [01:19<00:31, 217.81it/s]

 85%|████████▍ | 37016/43796 [01:19<00:28, 233.84it/s]

 85%|████████▍ | 37047/43796 [01:19<00:26, 251.42it/s]

 85%|████████▍ | 37080/43796 [01:19<00:24, 270.26it/s]

 85%|████████▍ | 37124/43796 [01:19<00:21, 313.91it/s]

 85%|████████▍ | 37177/43796 [01:20<00:17, 372.72it/s]

 85%|████████▌ | 37232/43796 [01:20<00:15, 421.41it/s]

 85%|████████▌ | 37277/43796 [01:20<00:15, 420.29it/s]

 85%|████████▌ | 37321/43796 [01:20<00:30, 215.76it/s]

 85%|████████▌ | 37377/43796 [01:20<00:23, 273.84it/s]

 85%|████████▌ | 37422/43796 [01:20<00:20, 307.42it/s]

 86%|████████▌ | 37465/43796 [01:20<00:18, 333.69it/s]

 86%|████████▌ | 37507/43796 [01:21<00:19, 315.37it/s]

 86%|████████▌ | 37545/43796 [01:21<00:22, 283.32it/s]

 86%|████████▌ | 37578/43796 [01:21<00:23, 268.88it/s]

 86%|████████▌ | 37609/43796 [01:21<00:22, 277.13it/s]

 86%|████████▌ | 37640/43796 [01:21<00:23, 257.95it/s]

 86%|████████▌ | 37685/43796 [01:21<00:20, 303.05it/s]

 86%|████████▌ | 37718/43796 [01:22<00:24, 244.57it/s]

 86%|████████▌ | 37749/43796 [01:22<00:23, 257.54it/s]

 86%|████████▋ | 37778/43796 [01:22<00:43, 138.07it/s]

 86%|████████▋ | 37802/43796 [01:22<00:39, 153.18it/s]

 86%|████████▋ | 37835/43796 [01:22<00:32, 184.21it/s]

 87%|████████▋ | 37891/43796 [01:22<00:22, 259.28it/s]

 87%|████████▋ | 37938/43796 [01:23<00:19, 306.24it/s]

 87%|████████▋ | 37992/43796 [01:23<00:16, 362.27it/s]

 87%|████████▋ | 38043/43796 [01:23<00:14, 398.49it/s]

 87%|████████▋ | 38092/43796 [01:23<00:13, 421.82it/s]

 87%|████████▋ | 38149/43796 [01:23<00:12, 460.69it/s]

 87%|████████▋ | 38199/43796 [01:23<00:11, 466.90it/s]

 87%|████████▋ | 38253/43796 [01:23<00:11, 487.41it/s]

 87%|████████▋ | 38304/43796 [01:24<00:21, 253.43it/s]

 88%|████████▊ | 38361/43796 [01:24<00:17, 308.67it/s]

 88%|████████▊ | 38416/43796 [01:24<00:15, 356.81it/s]

 88%|████████▊ | 38471/43796 [01:24<00:13, 399.42it/s]

 88%|████████▊ | 38528/43796 [01:24<00:11, 439.57it/s]

 88%|████████▊ | 38585/43796 [01:24<00:11, 472.72it/s]

 88%|████████▊ | 38642/43796 [01:24<00:10, 498.17it/s]

 88%|████████▊ | 38699/43796 [01:24<00:09, 517.51it/s]

 88%|████████▊ | 38755/43796 [01:25<00:18, 278.69it/s]

 89%|████████▊ | 38811/43796 [01:25<00:15, 327.91it/s]

 89%|████████▊ | 38867/43796 [01:25<00:13, 374.07it/s]

 89%|████████▉ | 38923/43796 [01:25<00:11, 414.44it/s]

 89%|████████▉ | 38980/43796 [01:25<00:10, 450.22it/s]

 89%|████████▉ | 39036/43796 [01:25<00:09, 476.25it/s]

 89%|████████▉ | 39091/43796 [01:25<00:09, 495.76it/s]

 89%|████████▉ | 39147/43796 [01:25<00:09, 511.83it/s]

 90%|████████▉ | 39202/43796 [01:25<00:08, 522.26it/s]

 90%|████████▉ | 39257/43796 [01:26<00:16, 273.01it/s]

 90%|████████▉ | 39312/43796 [01:26<00:13, 321.22it/s]

 90%|████████▉ | 39359/43796 [01:26<00:12, 349.58it/s]

 90%|████████▉ | 39413/43796 [01:26<00:11, 391.44it/s]

 90%|█████████ | 39462/43796 [01:26<00:10, 406.43it/s]

 90%|█████████ | 39513/43796 [01:26<00:09, 432.37it/s]

 90%|█████████ | 39568/43796 [01:27<00:09, 462.05it/s]

 90%|█████████ | 39619/43796 [01:27<00:09, 460.26it/s]

 91%|█████████ | 39674/43796 [01:27<00:08, 484.15it/s]

 91%|█████████ | 39725/43796 [01:27<00:17, 235.86it/s]

 91%|█████████ | 39782/43796 [01:27<00:13, 289.00it/s]

 91%|█████████ | 39827/43796 [01:27<00:12, 318.30it/s]

 91%|█████████ | 39876/43796 [01:28<00:11, 353.60it/s]

 91%|█████████ | 39931/43796 [01:28<00:09, 398.62it/s]

 91%|█████████▏| 39980/43796 [01:28<00:09, 406.50it/s]

 91%|█████████▏| 40032/43796 [01:28<00:08, 433.94it/s]

 92%|█████████▏| 40088/43796 [01:28<00:07, 465.89it/s]

 92%|█████████▏| 40139/43796 [01:28<00:08, 455.85it/s]

 92%|█████████▏| 40188/43796 [01:29<00:15, 227.38it/s]

 92%|█████████▏| 40241/43796 [01:29<00:12, 274.95it/s]

 92%|█████████▏| 40283/43796 [01:29<00:11, 299.51it/s]

 92%|█████████▏| 40336/43796 [01:29<00:09, 347.03it/s]

 92%|█████████▏| 40391/43796 [01:29<00:08, 392.23it/s]

 92%|█████████▏| 40439/43796 [01:29<00:08, 400.61it/s]

 92%|█████████▏| 40488/43796 [01:29<00:07, 422.92it/s]

 93%|█████████▎| 40544/43796 [01:29<00:07, 457.77it/s]

 93%|█████████▎| 40594/43796 [01:29<00:06, 467.43it/s]

 93%|█████████▎| 40644/43796 [01:29<00:06, 453.59it/s]

 93%|█████████▎| 40692/43796 [01:30<00:13, 222.24it/s]

 93%|█████████▎| 40744/43796 [01:30<00:11, 269.49it/s]

 93%|█████████▎| 40785/43796 [01:30<00:10, 292.03it/s]

 93%|█████████▎| 40836/43796 [01:30<00:08, 337.00it/s]

 93%|█████████▎| 40893/43796 [01:30<00:07, 389.52it/s]

 93%|█████████▎| 40945/43796 [01:30<00:06, 420.25it/s]

 94%|█████████▎| 40994/43796 [01:31<00:06, 412.12it/s]

 94%|█████████▎| 41046/43796 [01:31<00:06, 438.70it/s]

 94%|█████████▍| 41103/43796 [01:31<00:05, 471.91it/s]

 94%|█████████▍| 41154/43796 [01:31<00:11, 220.54it/s]

 94%|█████████▍| 41192/43796 [01:31<00:10, 244.26it/s]

 94%|█████████▍| 41244/43796 [01:32<00:08, 292.89it/s]

 94%|█████████▍| 41302/43796 [01:32<00:07, 350.26it/s]

 94%|█████████▍| 41353/43796 [01:32<00:06, 385.86it/s]

 95%|█████████▍| 41401/43796 [01:32<00:06, 391.08it/s]

 95%|█████████▍| 41449/43796 [01:32<00:05, 412.96it/s]

 95%|█████████▍| 41504/43796 [01:32<00:05, 448.86it/s]

 95%|█████████▍| 41557/43796 [01:32<00:04, 469.90it/s]

 95%|█████████▌| 41607/43796 [01:32<00:04, 450.45it/s]

 95%|█████████▌| 41655/43796 [01:33<00:08, 243.52it/s]

 95%|█████████▌| 41701/43796 [01:33<00:07, 280.40it/s]

 95%|█████████▌| 41742/43796 [01:33<00:06, 304.79it/s]

 95%|█████████▌| 41794/43796 [01:33<00:05, 350.45it/s]

 96%|█████████▌| 41837/43796 [01:33<00:05, 366.33it/s]

 96%|█████████▌| 41881/43796 [01:33<00:04, 383.14it/s]

 96%|█████████▌| 41933/43796 [01:33<00:04, 418.42it/s]

 96%|█████████▌| 41990/43796 [01:33<00:03, 458.52it/s]

 96%|█████████▌| 42048/43796 [01:34<00:03, 492.32it/s]

 96%|█████████▌| 42106/43796 [01:34<00:03, 517.18it/s]

 96%|█████████▋| 42160/43796 [01:34<00:06, 271.64it/s]

 96%|█████████▋| 42218/43796 [01:34<00:04, 325.79it/s]

 97%|█████████▋| 42276/43796 [01:34<00:04, 376.62it/s]

 97%|█████████▋| 42334/43796 [01:34<00:03, 421.55it/s]

 97%|█████████▋| 42392/43796 [01:34<00:03, 459.14it/s]

 97%|█████████▋| 42450/43796 [01:35<00:02, 489.89it/s]

 97%|█████████▋| 42508/43796 [01:35<00:02, 513.44it/s]

 97%|█████████▋| 42566/43796 [01:35<00:02, 530.96it/s]

 97%|█████████▋| 42623/43796 [01:35<00:04, 291.64it/s]

 97%|█████████▋| 42681/43796 [01:35<00:03, 342.78it/s]

 98%|█████████▊| 42739/43796 [01:35<00:02, 390.21it/s]

 98%|█████████▊| 42797/43796 [01:35<00:02, 432.65it/s]

 98%|█████████▊| 42855/43796 [01:36<00:02, 468.28it/s]

 98%|█████████▊| 42913/43796 [01:36<00:01, 496.54it/s]

 98%|█████████▊| 42971/43796 [01:36<00:01, 518.75it/s]

 98%|█████████▊| 43029/43796 [01:36<00:01, 534.64it/s]

 98%|█████████▊| 43086/43796 [01:36<00:02, 292.19it/s]

 99%|█████████▊| 43144/43796 [01:36<00:01, 343.34it/s]

 99%|█████████▊| 43202/43796 [01:36<00:01, 391.07it/s]

 99%|█████████▉| 43260/43796 [01:37<00:01, 432.98it/s]

 99%|█████████▉| 43319/43796 [01:37<00:01, 469.36it/s]

 99%|█████████▉| 43377/43796 [01:37<00:00, 497.72it/s]

 99%|█████████▉| 43435/43796 [01:37<00:00, 519.59it/s]

 99%|█████████▉| 43492/43796 [01:37<00:00, 533.38it/s]

 99%|█████████▉| 43550/43796 [01:37<00:00, 290.51it/s]

100%|█████████▉| 43608/43796 [01:37<00:00, 341.63it/s]

100%|█████████▉| 43666/43796 [01:38<00:00, 389.51it/s]

100%|█████████▉| 43724/43796 [01:38<00:00, 431.95it/s]

100%|█████████▉| 43782/43796 [01:38<00:00, 467.55it/s]

100%|██████████| 43796/43796 [01:38<00:00, 445.60it/s]




Processing stocks:   0%|          | 0/20 [00:00<?, ?it/s]

Processing stocks:   5%|▌         | 1/20 [00:00<00:04,  4.31it/s]

Processing stocks:  10%|█         | 2/20 [00:00<00:03,  4.84it/s]

Processing stocks:  15%|█▌        | 3/20 [00:00<00:03,  4.93it/s]

Processing stocks:  20%|██        | 4/20 [00:00<00:03,  4.87it/s]

Processing stocks:  30%|███       | 6/20 [00:01<00:02,  5.69it/s]

Processing stocks:  40%|████      | 8/20 [00:01<00:01,  6.65it/s]

Processing stocks:  45%|████▌     | 9/20 [00:01<00:01,  6.70it/s]

Processing stocks:  50%|█████     | 10/20 [00:01<00:02,  4.36it/s]

Processing stocks:  65%|██████▌   | 13/20 [00:02<00:01,  3.82it/s]

Processing stocks:  70%|███████   | 14/20 [00:02<00:01,  4.27it/s]

Processing stocks:  80%|████████  | 16/20 [00:03<00:00,  5.54it/s]

Processing stocks:  85%|████████▌ | 17/20 [00:03<00:00,  5.34it/s]

Processing stocks:  90%|█████████ | 18/20 [00:03<00:00,  5.73it/s]

Processing stocks:  95%|█████████▌| 19/20 [00:03<00:00,  6.36it/s]

Processing stocks: 100%|██████████| 20/20 [00:03<00:00,  5.52it/s]




Unnamed: 0,股票编号,日期,高维情绪变量,交易量,收益率变化,聚类标签,公司名称
0,000100,2021-06-01,"[-0.214, -0.003, 0.359, 0.245, -0.063, 0.255, ...",324450523.0,0.009901,2.0,TCL科技
1,000100,2021-06-02,"[-0.187, 0.203, 0.317, 0.287, -0.054, 0.203, -...",276452006.0,-0.018382,2.0,TCL科技
2,000100,2021-06-03,"[-0.073, 0.151, 0.331, 0.325, -0.119, 0.07, -0...",243472574.0,-0.012484,2.0,TCL科技
3,000100,2021-06-04,"[-0.218, 0.139, 0.407, 0.302, -0.096, 0.187, -...",398071334.0,-0.026549,2.0,TCL科技
4,000100,2021-06-07,"[-0.178, 0.052, 0.349, 0.223, -0.061, 0.26, -0...",259087535.0,0.019481,2.0,TCL科技
...,...,...,...,...,...,...,...
8611,688029,2024-09-06,"[-0.37, -0.125, 0.612, 0.289, -0.026, 0.278, -...",733391.0,-0.016180,1.0,南微医学
8612,688029,2024-09-18,"[-0.21, 0.012, 0.33, 0.263, 0.172, 0.214, -0.4...",1847995.0,-0.026627,1.0,南微医学
8613,688029,2024-10-08,"[0.041, 0.107, 0.281, 0.313, 0.048, 0.185, -0....",9632104.0,0.103133,1.0,南微医学
8614,688029,2024-10-29,"[-0.032, 0.17, 0.237, 0.299, 0.065, 0.191, -0....",2357027.0,-0.033338,1.0,南微医学


In [11]:
# 统计检验 : 按照聚类标签分组，并计算均值和方差
df = final_data.copy()
if MODEL != 'Deep-learning/Ours':
    statistical_result = df.groupby('聚类标签').agg(
        情绪值均值=(EMO_NAME, 'mean'),
        情绪值方差=(EMO_NAME, 'var'),
        收益率变化均值=('收益率变化', 'mean'),
        收益率变化方差=('收益率变化', 'var')
    ).reset_index()
else:
    statistical_result = None
# 输出结果
statistical_result

## 情绪值与股价二维展示图

In [12]:
## 绘制不同簇情绪值随时间变化图
if MODEL != 'Deep-learning/Ours':
    # 按 '聚类标签' 和 '日期' 分组，计算平均情绪值
    df_grouped = final_data.groupby(['聚类标签', '日期'])[['上证综合情绪值', '沪深300情绪值', '创业板情绪值']].mean().reset_index()
    
    # 为每个聚类标签绘制折线
    i = 0
    plt.figure(figsize=(12, 12))
    for cluster in df_grouped['聚类标签'].unique():
        if i == 0:
            x = 'red'
        elif i == 1:
            x = 'blue'
        elif i == 2:
            x = 'green'
        df_cluster = df_grouped[df_grouped['聚类标签'] == cluster]
        plt.plot(df_cluster['日期'], df_cluster[EMO_NAME], label=f'{EMO_NAME} - 聚类 {cluster}', color=x)
        i += 1
        
    plt.xlabel('日期')
    plt.ylabel('平均情绪值')
    plt.title('情绪值趋势图')
    
    
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=3))  # 每三个月显示
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 格式化为年-月
    
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    
    # plt.savefig(f'个股分析/{market_address}/图片/情绪值趋势图', dpi=300)
    plt.show()


In [13]:
## 绘制不同簇收益率随时间变化图
if MODEL != 'Deep-learning/Ours':
    # 按 '聚类标签' 和 '日期' 分组，计算平均收益率
    return_grouped = final_data.groupby(['聚类标签', '日期'])['收益率变化'].mean().reset_index()
    
    # 为每个聚类标签绘制折线
    i = 0
    plt.figure(figsize=(12, 12))
    for cluster in df_grouped['聚类标签'].unique():
        if i == 0:
            x = 'red'
        elif i == 1:
            x = 'blue'
        elif i == 2:
            x = 'green'
        df_cluster = return_grouped[return_grouped['聚类标签'] == cluster]
        plt.plot(df_cluster['日期'], df_cluster['收益率变化'], label=f'收益率变化 - 聚类 {cluster}', color=x)
        i += 1
    
    plt.xlabel('日期')
    plt.ylabel('平均收益率')
    plt.title('收益率趋势图')
    
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=3))  # 每三个月显示
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 格式化为年-月
    
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    
    # 显示图表
    # plt.savefig(f'个股分析/{market_address}/图片/收益率趋势图', dpi=300)
    plt.show()

## 三维展示图，分别展示簇是0，1，2的代表图

In [14]:
### 聚类0示例 ###
if MODEL != 'Deep-learning/Ours':
    # 复制数据集，确保不会修改原始数据
    merged_data_cleaned = final_data.copy()
    merged_data_cleaned['日期'] = pd.to_datetime(merged_data_cleaned['日期'])
    
    # 选择聚类标签为 0 的数据，找到数据最多的股票编号
    cluster_0_data = merged_data_cleaned[merged_data_cleaned['聚类标签'] == 0]
    top_stock_code = cluster_0_data['股票编号'].value_counts().idxmax()
    top_stock_data = cluster_0_data[cluster_0_data['股票编号'] == top_stock_code]
    
    # 创建 3D 图
    fig = plt.figure(figsize=(50, 50))
    ax = fig.add_subplot(111, projection='3d')
    
    # 提取绘图数据
    x = top_stock_data[EMO_NAME]  # 情绪值作为 x 轴
    y = mdates.date2num(top_stock_data['日期'])  # 时间转换为数值类型
    z = top_stock_data['收益率变化']  # 收益率变化作为 z 轴
    
    # 绘制三维折线图
    ax.plot(x, y, z, label=f"股票 {top_stock_code}", marker='o', linestyle='-', alpha=0.7)
    
    # 设置轴标签
    ax.set_xlabel(EMO_NAME)
    ax.set_ylabel('时间')
    ax.set_zlabel('收益率变化')
    ax.set_title(f'股票 {top_stock_code}: {EMO_NAME} 与收益率变化的三维关系')
    
    # 设置 y 轴时间刻度（每半年一个刻度）
    ax.yaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=6))  # 每半年一个刻度
    ax.yaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 按年月显示
    
    # 旋转日期标签防止重叠
    fig.autofmt_xdate()
    
    # 调整布局并保存图像
    plt.legend()
    # plt.savefig(f'个股分析/{market_address}/图片/聚类0_{top_stock_code}.png', dpi=300, bbox_inches='tight')
    plt.show()


In [15]:
### 聚类1示例 ###
if MODEL != 'Deep-learning/Ours':
    # 复制数据集，确保不会修改原始数据
    merged_data_cleaned = final_data.copy()
    merged_data_cleaned['日期'] = pd.to_datetime(merged_data_cleaned['日期'])
    
    # 选择聚类标签为 0 的数据，找到数据最多的股票编号
    cluster_0_data = merged_data_cleaned[merged_data_cleaned['聚类标签'] == 1]
    top_stock_code = cluster_0_data['股票编号'].value_counts().idxmax()
    top_stock_data = cluster_0_data[cluster_0_data['股票编号'] == top_stock_code]
    
    
    # 创建 3D 图
    fig = plt.figure(figsize=(50, 50))
    ax = fig.add_subplot(111, projection='3d')
    
    # 提取绘图数据
    x = top_stock_data[EMO_NAME]  # 情绪值作为 x 轴
    y = mdates.date2num(top_stock_data['日期'])  # 时间转换为数值类型
    z = top_stock_data['收益率变化']  # 收益率变化作为 z 轴
    
    # 绘制三维折线图
    ax.plot(x, y, z, label=f"股票 {top_stock_code}", marker='o', linestyle='-', alpha=0.7)
    
    # 设置轴标签
    ax.set_xlabel(EMO_NAME)
    ax.set_ylabel('时间')
    ax.set_zlabel('收益率变化')
    ax.set_title(f'股票 {top_stock_code}: {EMO_NAME} 与收益率变化的三维关系')
    
    # 设置 y 轴时间刻度（每半年一个刻度）
    ax.yaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=6))  # 每半年一个刻度
    ax.yaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 按年月显示
    
    # 旋转日期标签防止重叠
    fig.autofmt_xdate()
    
    # 调整布局并保存图像
    plt.legend()
    # plt.savefig(f'个股分析/{market_address}/图片/聚类1_{top_stock_code}.png', dpi=300, bbox_inches='tight')
    plt.show()


In [16]:
### 聚类2示例 ###
if MODEL != 'Deep-learning/Ours':
    # 复制数据集，确保不会修改原始数据
    merged_data_cleaned = final_data.copy()
    merged_data_cleaned['日期'] = pd.to_datetime(merged_data_cleaned['日期'])
    
    # 选择聚类标签为 0 的数据，找到数据最多的股票编号
    cluster_0_data = merged_data_cleaned[merged_data_cleaned['聚类标签'] == 2]
    top_stock_code = cluster_0_data['股票编号'].value_counts().idxmax()
    top_stock_data = cluster_0_data[cluster_0_data['股票编号'] == top_stock_code]
    
    
    # 创建 3D 图
    fig = plt.figure(figsize=(50, 50))
    ax = fig.add_subplot(111, projection='3d')
    
    # 提取绘图数据
    x = top_stock_data[EMO_NAME]  # 情绪值作为 x 轴
    y = mdates.date2num(top_stock_data['日期'])  # 时间转换为数值类型
    z = top_stock_data['收益率变化']  # 收益率变化作为 z 轴
    
    # 绘制三维折线图
    ax.plot(x, y, z, label=f"股票 {top_stock_code}", marker='o', linestyle='-', alpha=0.7)
    
    # 设置轴标签
    ax.set_xlabel(EMO_NAME)
    ax.set_ylabel('时间')
    ax.set_zlabel('收益率变化')
    ax.set_title(f'股票 {top_stock_code}: {EMO_NAME} 与收益率变化的三维关系')
    
    # 设置 y 轴时间刻度（每半年一个刻度）
    ax.yaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=6))  # 每半年一个刻度
    ax.yaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 按年月显示
    
    # 旋转日期标签防止重叠
    fig.autofmt_xdate()
    
    # 调整布局并保存图像
    plt.legend()
    # plt.savefig(f'个股分析/{market_address}/图片/聚类2_{top_stock_code}.png', dpi=300, bbox_inches='tight')
    plt.show()


## 进行聚类间实证分析

#### 读取个股金融数据

In [17]:
# 读取原始数据
microfinancial_data = pd.read_csv(f'{Financial_Data_PATH}/个股微观金融数据.csv', encoding='utf-8')

# 选择重要的列，日期和股票代码一定要包含
columns_to_select = [
    '日期_Date',  # 日期
    '股票代码_Stkcd',  # 股票代码
    '日振幅(%)_Dampltd',  # 日振幅
    '日收益率_Dret',  # 日收益率
    '市盈率_PE',  # 市盈率
    '成交量_Trdvol',  # 成交量
    '流通股日换手率(%)_DTrdTurnR',
    '总市值加权平均日资本收益_Daretmc',
]

# 提取相关列
microfinancial_data = microfinancial_data[columns_to_select]
microfinancial_data['股票代码_Stkcd'] = microfinancial_data['股票代码_Stkcd'].apply(lambda x: str(x).zfill(6))

# microfinancial_data = microfinancial_data[microfinancial_data['日期_Date'] == '2022-03-01'] #用于检验某一天数据
microfinancial_data

Unnamed: 0,日期_Date,股票代码_Stkcd,日振幅(%)_Dampltd,日收益率_Dret,市盈率_PE,成交量_Trdvol,流通股日换手率(%)_DTrdTurnR,总市值加权平均日资本收益_Daretmc
0,2023-02-01,000100,1.6667,0.0071,276.47,147751077.0,1.0357,0.0145
1,2023-02-02,000100,2.1277,-0.0095,273.86,188929965.0,1.3244,-0.0005
2,2023-02-03,000100,2.1480,-0.0048,272.55,200087013.0,1.4026,-0.0042
3,2023-02-06,000100,1.6787,-0.0072,270.59,134158688.0,0.9404,-0.0084
4,2023-02-07,000100,1.4493,0.0024,271.24,87200030.0,0.6113,0.0036
...,...,...,...,...,...,...,...,...
162617,2024-11-14,601566,3.0340,-0.0255,16.94,3223500.0,0.5610,-0.0146
162618,2024-11-15,601566,2.8643,0.0025,16.98,2824500.0,0.4915,-0.0112
162619,2024-11-18,601566,3.8509,-0.0025,16.94,3729993.0,0.6491,0.0009
162620,2024-11-19,601566,2.3661,0.0137,17.17,3367065.0,0.5859,0.0031


#### 加载之前的情绪聚类数据

In [18]:
final_data

Unnamed: 0,股票编号,日期,高维情绪变量,交易量,收益率变化,聚类标签,公司名称
0,000100,2021-06-01,"[-0.214, -0.003, 0.359, 0.245, -0.063, 0.255, ...",324450523.0,0.009901,2.0,TCL科技
1,000100,2021-06-02,"[-0.187, 0.203, 0.317, 0.287, -0.054, 0.203, -...",276452006.0,-0.018382,2.0,TCL科技
2,000100,2021-06-03,"[-0.073, 0.151, 0.331, 0.325, -0.119, 0.07, -0...",243472574.0,-0.012484,2.0,TCL科技
3,000100,2021-06-04,"[-0.218, 0.139, 0.407, 0.302, -0.096, 0.187, -...",398071334.0,-0.026549,2.0,TCL科技
4,000100,2021-06-07,"[-0.178, 0.052, 0.349, 0.223, -0.061, 0.26, -0...",259087535.0,0.019481,2.0,TCL科技
...,...,...,...,...,...,...,...
8611,688029,2024-09-06,"[-0.37, -0.125, 0.612, 0.289, -0.026, 0.278, -...",733391.0,-0.016180,1.0,南微医学
8612,688029,2024-09-18,"[-0.21, 0.012, 0.33, 0.263, 0.172, 0.214, -0.4...",1847995.0,-0.026627,1.0,南微医学
8613,688029,2024-10-08,"[0.041, 0.107, 0.281, 0.313, 0.048, 0.185, -0....",9632104.0,0.103133,1.0,南微医学
8614,688029,2024-10-29,"[-0.032, 0.17, 0.237, 0.299, 0.065, 0.191, -0....",2357027.0,-0.033338,1.0,南微医学


#### 读取市场换手率与市盈率并进行插值

In [19]:
turnover_PE_rate = pd.read_csv(f'{Financial_Data_PATH}/市场整体换手率与市盈率.csv', encoding='utf-8')
turnover_PE_rate['统计日期'] = pd.to_datetime(turnover_PE_rate['统计日期'])

# 按照 '统计日期' 排序
turnover_PE_rate = turnover_PE_rate.sort_values(by='统计日期')

# 对 '平均市盈率' 和 '换手率(总股本)' 进行线性插值
turnover_PE_rate[['平均市盈率', '换手率(总股本)']] = turnover_PE_rate[['平均市盈率', '换手率(总股本)']].interpolate(method='linear', axis=0)

turnover_PE_rate = turnover_PE_rate.drop(columns=['统计口径编码'])
turnover_PE_rate.columns = ['统计日期', '市场平均市盈率', '市场换手率(总股本)']
turnover_PE_rate

Unnamed: 0,统计日期,市场平均市盈率,市场换手率(总股本)
0,2021-01-04,16.918000,1.140600
12,2021-01-04,25.986305,1.237217
11,2021-01-04,35.054611,1.333834
10,2021-01-04,44.122916,1.430451
8,2021-01-04,53.191221,1.527068
...,...,...,...
12364,2024-12-06,39.185061,2.176139
12363,2024-12-06,14.390000,1.292500
12374,2024-12-06,14.390000,1.292500
12368,2024-12-06,14.390000,1.292500


#### 数据拼接

In [20]:
# 将日期列转换为 datetime 格式
final_data['日期'] = pd.to_datetime(final_data['日期'])
microfinancial_data['日期_Date'] = pd.to_datetime(microfinancial_data['日期_Date'], errors='coerce')

# 使用 merge 进行内连接
result = pd.merge(
    final_data, 
    microfinancial_data, 
    left_on=['日期', '股票编号'],  # 左表的 key
    right_on=['日期_Date', '股票代码_Stkcd'], # 右表的 key
    how='inner'                   # 内连接
)

result = pd.merge(
    result, 
    turnover_PE_rate, 
    left_on=['日期'],  # 左表的 key
    right_on=['统计日期'], # 右表的 key
    how='inner'                   # 内连接
)

# 删除重复的列
result = result.drop(columns=['日期_Date', '股票代码_Stkcd', '交易量', '日收益率_Dret', '统计日期'])

# 修改列的位置，更加美观可视
columns_to_move = ['聚类标签', '公司名称']
result = pd.concat([result.drop(columns=columns_to_move), result[columns_to_move]], axis=1)

# 将成交量除以10000，并以万计数
result['成交量(百万)_Trdvol'] = result['成交量_Trdvol'] / 1_000_000
result = result.drop(columns=['成交量_Trdvol'])

cols_to_check = ['收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
                 '成交量(百万)_Trdvol', '流通股日换手率(%)_DTrdTurnR']

# 删除含有空值的行
result = result.dropna(subset=cols_to_check)
result

Unnamed: 0,股票编号,日期,高维情绪变量,收益率变化,日振幅(%)_Dampltd,市盈率_PE,流通股日换手率(%)_DTrdTurnR,总市值加权平均日资本收益_Daretmc,市场平均市盈率,市场换手率(总股本),聚类标签,公司名称,成交量(百万)_Trdvol
0,000100,2021-06-01,"[-0.214, -0.003, 0.359, 0.245, -0.063, 0.255, ...",0.009901,2.8465,17.93,2.4002,0.0039,24.523860,1.030223,2.0,TCL科技,324.450523
1,000100,2021-06-01,"[-0.214, -0.003, 0.359, 0.245, -0.063, 0.255, ...",0.009901,2.8465,17.93,2.4002,0.0039,31.761720,1.132746,2.0,TCL科技,324.450523
2,000100,2021-06-01,"[-0.214, -0.003, 0.359, 0.245, -0.063, 0.255, ...",0.009901,2.8465,17.93,2.4002,0.0039,38.999580,1.235269,2.0,TCL科技,324.450523
3,000100,2021-06-01,"[-0.214, -0.003, 0.359, 0.245, -0.063, 0.255, ...",0.009901,2.8465,17.93,2.4002,0.0039,46.237440,1.337792,2.0,TCL科技,324.450523
4,000100,2021-06-01,"[-0.214, -0.003, 0.359, 0.245, -0.063, 0.255, ...",0.009901,2.8465,17.93,2.4002,0.0039,53.475301,1.440316,2.0,TCL科技,324.450523
...,...,...,...,...,...,...,...,...,...,...,...,...,...
107609,603185,2024-11-15,"[-0.157, 0.048, 0.218, 0.195, -0.075, 0.089, -...",-0.071590,7.2043,-424.25,2.9536,-0.0112,65.934994,2.786785,0.0,弘元绿能,20.055621
107610,603185,2024-11-15,"[-0.157, 0.048, 0.218, 0.195, -0.075, 0.089, -...",-0.071590,7.2043,-424.25,2.9536,-0.0112,48.663329,2.336790,0.0,弘元绿能,20.055621
107611,603185,2024-11-15,"[-0.157, 0.048, 0.218, 0.195, -0.075, 0.089, -...",-0.071590,7.2043,-424.25,2.9536,-0.0112,31.391665,1.886795,0.0,弘元绿能,20.055621
107612,603185,2024-11-15,"[-0.157, 0.048, 0.218, 0.195, -0.075, 0.089, -...",-0.071590,7.2043,-424.25,2.9536,-0.0112,14.120000,1.436800,0.0,弘元绿能,20.055621


In [21]:
# 查看匹配的公司数
unique_stock_count = result['股票编号'].nunique()
print(unique_stock_count)

18


#### 回归检验

In [22]:
## 单因子：总市值加权平均日资本收益回归检验
df = result.copy()

# 自变量是总市值加权平均日资本收益
X = df[['总市值加权平均日资本收益_Daretmc']]

# 要拟合的因变量列表
y_vars = [
    '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
    '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
]


# 添加常数项（截距项）
X_with_const = sm.add_constant(X)

# 创建一个空的列表存储回归结果
regression_results = []

# 对每个因变量进行回归分析
for y_var in y_vars:
    y = df[y_var]  # 当前的因变量

    # 对股票编号分组进行回归
    grouped = df.groupby('股票编号')
    
    r_squared_list = []
    
    for stock_code, group in grouped:
        X_group = group[['总市值加权平均日资本收益_Daretmc']]
        X_with_const_group = sm.add_constant(X_group)
        y_group = group[y_var]
        
        try:
            # 拟合回归模型
            model = sm.OLS(y_group, X_with_const_group)
            results = model.fit()

            # 提取回归R方
            r_squared_list.append(results.rsquared)
        except Exception as e:
            # 如果回归失败（如R方计算为-inf），跳过
            print(f"回归失败：股票编号 {stock_code}, 错误信息: {e}")
            r_squared_list.append(float('nan'))  # 将其R方设为NaN
    
    # 计算R方的平均值，忽略NaN值
    avg_r_squared = pd.Series(r_squared_list).mean()
    
    # 对于每个因变量，存储回归系数等信息
    model = sm.OLS(y, X_with_const)
    results = model.fit()

    regression_results.append({
        'Variable': y_var,
        'S': round(results.bse['总市值加权平均日资本收益_Daretmc'], 3),  # 标准误差
        'Econ': round(results.params['总市值加权平均日资本收益_Daretmc'], 3),  # 回归系数
        'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
        'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
        'Econ_t_value': round(results.tvalues['总市值加权平均日资本收益_Daretmc'], 3),  # Econ的t统计量，保留三位小数
        'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
    })

# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/单因子：总市值加权平均日资本收益.csv')
regression_df


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.006,1.074,0.0,25.763,193.249,0.712
1,日振幅(%)_Dampltd,0.492,11.781,3.344,0.531,23.961,541.6
2,市盈率_PE,27.146,61.642,27.236,0.005,2.271,79.898
3,成交量(百万)_Trdvol,25.935,501.556,56.188,0.346,19.339,172.527
4,流通股日换手率(%)_DTrdTurnR,0.337,7.142,1.173,0.415,21.185,277.151


In [23]:
## 单因子：情绪回归检验
if MODEL != 'Deep-learning/Ours':
    df = result.copy()
    
    # 自变量是情绪值
    X = df[[EMO_NAME]]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')
        
        for stock_code, group in grouped:
            X_group = group[[EMO_NAME]]
            X_with_const_group = sm.add_constant(X_group)
            y_group = group[y_var]
            
            # 拟合回归模型
            model = sm.OLS(y_group, X_with_const_group)
            results = model.fit()
    
            # 提取回归R方
            r_squared_list.append(results.rsquared)
        
        # 计算R方的平均值
        avg_r_squared = sum(r_squared_list) / len(r_squared_list)
        
        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })
else:
    df = result.copy()
    
    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')
        
        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared_global': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })
    


# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/单因子：{EMO_NAME}.csv')
regression_df


Unnamed: 0,Variable,S,Econ,Constant,R_squared_global,Econ_t_value,Constant_t_value
0,收益率变化,0.01,0.003,-0.069,17.831,0.296,-1.444
1,日振幅(%)_Dampltd,0.705,2.454,15.771,24.172,3.482,4.477
2,市盈率_PE,40.759,36.865,158.807,16.318,0.904,0.779
3,成交量(百万)_Trdvol,37.325,95.907,6.761,23.381,2.57,0.036
4,流通股日换手率(%)_DTrdTurnR,0.462,-0.004,17.017,30.656,-0.008,7.371


In [24]:
## 总体双因子回归检验
if MODEL != 'Deep-learning/Ours':
    df = result.copy()
    
    # 自变量是情绪值和总市值加权平均日资本收益
    X = df[[EMO_NAME, '总市值加权平均日资本收益_Daretmc']]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')
   
        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })

else:
    df = result.copy()
    
    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[['总市值加权平均日资本收益_Daretmc'] + EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')

        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })
    

# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/双因子：总体分析.csv')
regression_df


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.008,0.015,-0.046,36.48,1.796,-1.084
1,日振幅(%)_Dampltd,0.703,2.579,16.011,24.502,3.668,4.555
2,市盈率_PE,40.757,38.292,161.535,16.333,0.94,0.793
3,成交量(百万)_Trdvol,37.283,100.774,16.063,23.561,2.703,0.086
4,流通股日换手率(%)_DTrdTurnR,0.461,0.068,17.153,30.884,0.147,7.443


In [25]:
## 聚类0的回归检验
if MODEL != 'Deep-learning/Ours':
    df = result[result['聚类标签'] == 0].copy()
    
    # 自变量是情绪值和总市值加权平均日资本收益
    X = df[[EMO_NAME, '总市值加权平均日资本收益_Daretmc']]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')
        
        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })

else:
    df = result[result['聚类标签'] == 0].copy()

    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[['总市值加权平均日资本收益_Daretmc'] + EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')
        
        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })
    
# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/双因子：聚类0检验.csv')
regression_df


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.013,0.06,0.006,40.433,4.623,0.088
1,日振幅(%)_Dampltd,1.08,6.127,26.548,35.656,5.675,4.863
2,市盈率_PE,27.692,57.122,440.831,44.984,2.063,3.148
3,成交量(百万)_Trdvol,24.485,119.131,-189.297,48.913,4.865,-1.529
4,流通股日换手率(%)_DTrdTurnR,0.652,1.192,3.987,45.802,1.827,1.209


In [26]:
## 聚类1的回归检验
if MODEL != 'Deep-learning/Ours':
    df = result[result['聚类标签'] == 1].copy()
    
    # 自变量是情绪值和总市值加权平均日资本收益
    X = df[[EMO_NAME, '总市值加权平均日资本收益_Daretmc']]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')

        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })
else:
    df = result[result['聚类标签'] == 1].copy()
    
    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    print('(样本数, 维度):', emo_array.shape)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[['总市值加权平均日资本收益_Daretmc'] + EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')    

        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })

# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/双因子：聚类1检验.csv')
regression_df


(样本数, 维度): (12116, 768)


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.028,0.375,0.675,85.69,13.591,2.872
1,日振幅(%)_Dampltd,2.225,12.132,72.597,83.799,5.452,3.827
2,市盈率_PE,7.353,-127.614,-72.529,93.395,-17.356,-1.157
3,成交量(百万)_Trdvol,20.635,127.019,-20.469,86.213,6.155,-0.116
4,流通股日换手率(%)_DTrdTurnR,2.036,11.52,41.8,86.747,5.658,2.408


In [27]:
## 聚类2的回归检验
if MODEL != 'Deep-learning/Ours':
    df = result[result['聚类标签'] == 2].copy()
    
    # 自变量是情绪值和总市值加权平均日资本收益
    X = df[[EMO_NAME, '总市值加权平均日资本收益_Daretmc']]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')
    
        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })
else:
    df = result[result['聚类标签'] == 2].copy()
    
    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    print('(样本数, 维度):', emo_array.shape)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[['总市值加权平均日资本收益_Daretmc'] + EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')
        r_squared_list = []
        
        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })

# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/双因子：聚类2检验.csv')
regression_df


(样本数, 维度): (45851, 768)


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.012,-0.074,-0.292,51.034,-6.094,-3.782
1,日振幅(%)_Dampltd,0.989,-0.723,29.254,37.673,-0.731,4.649
2,市盈率_PE,87.733,201.525,-634.164,34.174,2.297,-1.136
3,成交量(百万)_Trdvol,82.502,48.728,1268.64,33.87,0.591,2.417
4,流通股日换手率(%)_DTrdTurnR,0.533,-0.946,19.727,40.319,-1.774,5.817
