In [1]:
# Parameters
EMO_INDEX = 2
MODEL_INDEX = 7
N = 20


In [2]:
import os
import ast
import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas()
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.dates as mdates
import statsmodels.api as sm # 用于统计检验
%matplotlib inline
plt.rcParams["font.sans-serif"]=["WenQuanYi Micro Hei"] #设置字体
plt.rcParams["axes.unicode_minus"]=False #该语句解决图像中的“-”负号的乱码问题

## 参数选择

In [3]:
# # Parameters
# MODEL_INDEX = 2
# EMO_INDEX = 0
# N = 20

In [4]:
ORI_DATA_PATH = '/data/public/fintechlab/zdh/Individual-Stock-Analysis/B_Temporal_Clustering/data'
ROOT_PATH = '/data/public/fintechlab/zdh/Individual-Stock-Analysis/B_Result_Analysis'
CLUSTER_PATH = f'{ROOT_PATH}/data/Comparison_of_Emotional_models'

MODEL = ['Emo-Dict/DLUT', 'Emo-Dict/Bian', 'Emo-Dict/Jiang',
         'Machine-learning/LR', 'Machine-learning/RF', 'Machine-learning/SVM', 
         'Deep-learning/BERT', 'Deep-learning/Ours'][MODEL_INDEX]
Emotion_Data_PATH = f'{ORI_DATA_PATH}/Emotion_Data/{MODEL}'   # 情绪数据路径
Financial_Data_PATH = f'{ORI_DATA_PATH}/Financial_Data' # 金融数据路径

In [5]:
EMO_MARKET = ['Shanghai_Composite_Index', 'CSI_300_Index', 'Chinext_Index'][EMO_INDEX]
EMO_INDEX_MAP = {
    'Shanghai_Composite_Index': '上证综合情绪值',
    'CSI_300_Index': '沪深300情绪值',
    'Chinext_Index': '创业板情绪值'
}
EMO_NAME = EMO_INDEX_MAP[EMO_MARKET]

## 数据预处理

In [6]:
## 读取聚类结果
cluster_data = pd.read_csv(f'{CLUSTER_PATH}/{EMO_MARKET}/{MODEL}.csv', dtype={"Stkcd": str})
cluster_data.columns = ['股票编号', '聚类标签', '公司名称']

# 尝试抽样直到满足每个簇至少出现3次
for _ in range(100):
    cluster_data = cluster_data.sample(n=N, random_state=np.random.randint(0, 10000))
    label_counts = cluster_data['聚类标签'].value_counts()
    
    if all(label_counts >= 3):
        break
else:
    raise ValueError(f"在 {RETRY_LIMIT} 次尝试中无法找到满足每个簇至少3个样本的抽样结果，请减少 N 或检查数据分布。")

cluster_data.head()

Unnamed: 0,股票编号,聚类标签,公司名称
182,688981,2,中芯国际
116,600690,0,海尔智家
87,600166,2,福田汽车
122,600763,0,通策医疗
89,600198,0,大唐电信


In [7]:
## 读取股吧个股的数据
all_data = []
file_list = [f for f in os.listdir(Emotion_Data_PATH) if f.endswith('.csv')]

if MODEL != 'Deep-learning/Ours':
    for file in file_list:
        file_path = os.path.join(Emotion_Data_PATH, file)
        df = pd.read_csv(file_path)
        stock_code = os.path.splitext(file)[0] # 获取股票编号（文件名去掉扩展名）
        
        # 提取每一行的日期和情绪值
        for _, row in df.iterrows():
            new_row = {
                '股票编号': stock_code,
                '日期': row['日期'],
                '上证综合情绪值': row['上证综合情绪值'],
                '沪深300情绪值': row['沪深300情绪值'],
                '创业板情绪值': row['创业板情绪值']
            }
            all_data.append(new_row)
    guba_data = pd.DataFrame(all_data)
else:
    for file in tqdm(file_list, desc="读取文件"):
        file_path = os.path.join(Emotion_Data_PATH, file)
        try:
            df = pd.read_csv(file_path, usecols=['日期', '高维情绪变量'])  # 只读需要的列
        except Exception as e:
            print(f"读取失败 {file}: {e}")
            continue
    
        stock_code = os.path.splitext(file)[0]
    
        df['股票编号'] = stock_code
        all_data.append(df)
    guba_data = pd.concat(all_data, ignore_index=True)

guba_data

读取文件:   0%|          | 0/183 [00:00<?, ?it/s]

读取文件:   1%|          | 1/183 [00:00<01:04,  2.82it/s]

读取文件:   2%|▏         | 3/183 [00:00<00:38,  4.69it/s]

读取文件:   2%|▏         | 4/183 [00:01<01:18,  2.27it/s]

读取文件:   3%|▎         | 5/183 [00:01<00:59,  3.01it/s]

读取文件:   3%|▎         | 6/183 [00:01<00:48,  3.62it/s]

读取文件:   4%|▍         | 7/183 [00:02<01:23,  2.10it/s]

读取文件:   4%|▍         | 8/183 [00:02<01:03,  2.74it/s]

读取文件:   5%|▍         | 9/183 [00:03<01:03,  2.74it/s]

读取文件:   5%|▌         | 10/183 [00:03<01:07,  2.55it/s]

读取文件:   6%|▌         | 11/183 [00:03<00:53,  3.19it/s]

读取文件:   7%|▋         | 12/183 [00:03<00:46,  3.66it/s]

读取文件:   8%|▊         | 14/183 [00:04<00:29,  5.80it/s]

读取文件:   8%|▊         | 15/183 [00:04<00:37,  4.48it/s]

读取文件:   9%|▊         | 16/183 [00:07<02:57,  1.07s/it]

读取文件:   9%|▉         | 17/183 [00:08<02:13,  1.24it/s]

读取文件:  10%|▉         | 18/183 [00:09<02:40,  1.03it/s]

读取文件:  11%|█         | 20/183 [00:09<01:32,  1.77it/s]

读取文件:  11%|█▏        | 21/183 [00:09<01:14,  2.17it/s]

读取文件:  12%|█▏        | 22/183 [00:09<01:00,  2.65it/s]

读取文件:  13%|█▎        | 23/183 [00:10<01:00,  2.66it/s]

读取文件:  13%|█▎        | 24/183 [00:10<00:53,  3.00it/s]

读取文件:  14%|█▎        | 25/183 [00:10<00:52,  2.98it/s]

读取文件:  14%|█▍        | 26/183 [00:10<00:45,  3.46it/s]

读取文件:  15%|█▍        | 27/183 [00:11<00:53,  2.91it/s]

读取文件:  15%|█▌        | 28/183 [00:11<00:53,  2.88it/s]

读取文件:  16%|█▌        | 29/183 [00:12<00:50,  3.05it/s]

读取文件:  16%|█▋        | 30/183 [00:12<00:40,  3.79it/s]

读取文件:  17%|█▋        | 31/183 [00:13<01:23,  1.82it/s]

读取文件:  17%|█▋        | 32/183 [00:13<01:25,  1.77it/s]

读取文件:  19%|█▊        | 34/183 [00:14<00:52,  2.82it/s]

读取文件:  19%|█▉        | 35/183 [00:14<01:06,  2.22it/s]

读取文件:  20%|██        | 37/183 [00:15<00:42,  3.41it/s]

读取文件:  21%|██▏       | 39/183 [00:15<00:29,  4.82it/s]

读取文件:  22%|██▏       | 41/183 [00:15<00:21,  6.50it/s]

读取文件:  23%|██▎       | 43/183 [00:15<00:20,  6.86it/s]

读取文件:  25%|██▍       | 45/183 [00:16<00:24,  5.63it/s]

读取文件:  25%|██▌       | 46/183 [00:16<00:32,  4.23it/s]

读取文件:  26%|██▌       | 47/183 [00:17<00:40,  3.39it/s]

读取文件:  26%|██▌       | 48/183 [00:17<00:38,  3.49it/s]

读取文件:  27%|██▋       | 49/183 [00:17<00:40,  3.31it/s]

读取文件:  28%|██▊       | 51/183 [00:18<00:39,  3.35it/s]

读取文件:  29%|██▉       | 53/183 [00:18<00:32,  4.03it/s]

读取文件:  30%|██▉       | 54/183 [00:18<00:33,  3.90it/s]

读取文件:  31%|███       | 56/183 [00:19<00:29,  4.31it/s]

读取文件:  31%|███       | 57/183 [00:20<00:56,  2.25it/s]

读取文件:  32%|███▏      | 58/183 [00:20<00:52,  2.39it/s]

读取文件:  33%|███▎      | 60/183 [00:20<00:35,  3.51it/s]

读取文件:  33%|███▎      | 61/183 [00:21<00:42,  2.87it/s]

读取文件:  34%|███▍      | 62/183 [00:21<00:38,  3.11it/s]

读取文件:  35%|███▍      | 64/183 [00:22<00:30,  3.93it/s]

读取文件:  36%|███▌      | 65/183 [00:22<00:30,  3.90it/s]

读取文件:  36%|███▌      | 66/183 [00:22<00:37,  3.12it/s]

读取文件:  37%|███▋      | 67/183 [00:23<00:47,  2.42it/s]

读取文件:  37%|███▋      | 68/183 [00:24<00:51,  2.25it/s]

读取文件:  38%|███▊      | 69/183 [00:24<00:45,  2.48it/s]

读取文件:  38%|███▊      | 70/183 [00:24<00:42,  2.66it/s]

读取文件:  39%|███▉      | 71/183 [00:24<00:35,  3.15it/s]

读取文件:  39%|███▉      | 72/183 [00:25<00:32,  3.44it/s]

读取文件:  40%|███▉      | 73/183 [00:26<00:53,  2.04it/s]

读取文件:  40%|████      | 74/183 [00:29<02:34,  1.42s/it]

读取文件:  41%|████      | 75/183 [00:29<01:52,  1.04s/it]

读取文件:  42%|████▏     | 76/183 [00:29<01:22,  1.30it/s]

读取文件:  43%|████▎     | 78/183 [00:30<00:51,  2.05it/s]

读取文件:  43%|████▎     | 79/183 [00:30<00:42,  2.45it/s]

读取文件:  44%|████▎     | 80/183 [00:30<00:44,  2.30it/s]

读取文件:  44%|████▍     | 81/183 [00:31<00:57,  1.77it/s]

读取文件:  45%|████▌     | 83/183 [00:32<00:45,  2.21it/s]

读取文件:  46%|████▌     | 84/183 [00:32<00:36,  2.68it/s]

读取文件:  46%|████▋     | 85/183 [00:32<00:33,  2.95it/s]

读取文件:  47%|████▋     | 86/183 [00:33<00:28,  3.36it/s]

读取文件:  48%|████▊     | 88/183 [00:33<00:30,  3.13it/s]

读取文件:  49%|████▊     | 89/183 [00:33<00:29,  3.23it/s]

读取文件:  49%|████▉     | 90/183 [00:34<00:28,  3.30it/s]

读取文件:  50%|████▉     | 91/183 [00:34<00:24,  3.70it/s]

读取文件:  50%|█████     | 92/183 [00:34<00:23,  3.85it/s]

读取文件:  51%|█████▏    | 94/183 [00:35<00:23,  3.82it/s]

读取文件:  52%|█████▏    | 95/183 [00:35<00:21,  4.18it/s]

读取文件:  52%|█████▏    | 96/183 [00:36<00:34,  2.50it/s]

读取文件:  53%|█████▎    | 97/183 [00:36<00:39,  2.19it/s]

读取文件:  54%|█████▎    | 98/183 [00:37<00:35,  2.39it/s]

读取文件:  54%|█████▍    | 99/183 [00:37<00:28,  2.90it/s]

读取文件:  55%|█████▌    | 101/183 [00:38<00:29,  2.77it/s]

读取文件:  56%|█████▋    | 103/183 [00:38<00:19,  4.12it/s]

读取文件:  57%|█████▋    | 104/183 [00:38<00:16,  4.65it/s]

读取文件:  57%|█████▋    | 105/183 [00:38<00:18,  4.26it/s]

读取文件:  58%|█████▊    | 107/183 [00:38<00:13,  5.46it/s]

读取文件:  60%|█████▉    | 109/183 [00:39<00:11,  6.66it/s]

读取文件:  60%|██████    | 110/183 [00:39<00:11,  6.55it/s]

读取文件:  61%|██████    | 111/183 [00:39<00:10,  6.75it/s]

读取文件:  61%|██████    | 112/183 [00:39<00:10,  6.97it/s]

读取文件:  62%|██████▏   | 113/183 [00:39<00:12,  5.65it/s]

读取文件:  63%|██████▎   | 115/183 [00:40<00:11,  5.68it/s]

读取文件:  64%|██████▍   | 117/183 [00:40<00:11,  5.70it/s]

读取文件:  64%|██████▍   | 118/183 [00:40<00:11,  5.67it/s]

读取文件:  66%|██████▌   | 120/183 [00:40<00:09,  6.83it/s]

读取文件:  66%|██████▌   | 121/183 [00:41<00:10,  6.01it/s]

读取文件:  67%|██████▋   | 122/183 [00:42<00:27,  2.19it/s]

读取文件:  67%|██████▋   | 123/183 [00:42<00:22,  2.66it/s]

读取文件:  68%|██████▊   | 124/183 [00:46<01:12,  1.23s/it]

读取文件:  68%|██████▊   | 125/183 [00:46<00:56,  1.03it/s]

读取文件:  69%|██████▉   | 127/183 [00:46<00:31,  1.75it/s]

读取文件:  70%|██████▉   | 128/183 [00:46<00:26,  2.09it/s]

读取文件:  70%|███████   | 129/183 [00:47<00:22,  2.36it/s]

读取文件:  72%|███████▏  | 131/183 [00:47<00:21,  2.43it/s]

读取文件:  72%|███████▏  | 132/183 [00:49<00:31,  1.63it/s]

读取文件:  73%|███████▎  | 133/183 [00:49<00:24,  2.04it/s]

读取文件:  74%|███████▍  | 135/183 [00:50<00:27,  1.75it/s]

读取文件:  74%|███████▍  | 136/183 [00:51<00:26,  1.79it/s]

读取文件:  75%|███████▍  | 137/183 [00:52<00:29,  1.54it/s]

读取文件:  76%|███████▌  | 139/183 [00:52<00:18,  2.32it/s]

读取文件:  77%|███████▋  | 140/183 [00:53<00:21,  2.04it/s]

读取文件:  77%|███████▋  | 141/183 [00:53<00:17,  2.46it/s]

读取文件:  78%|███████▊  | 142/183 [00:53<00:17,  2.35it/s]

读取文件:  78%|███████▊  | 143/183 [00:54<00:17,  2.24it/s]

读取文件:  79%|███████▊  | 144/183 [00:55<00:22,  1.73it/s]

读取文件:  80%|███████▉  | 146/183 [00:55<00:13,  2.70it/s]

读取文件:  80%|████████  | 147/183 [00:55<00:15,  2.31it/s]

读取文件:  81%|████████  | 148/183 [00:56<00:12,  2.82it/s]

读取文件:  81%|████████▏ | 149/183 [00:56<00:10,  3.29it/s]

读取文件:  83%|████████▎ | 151/183 [00:56<00:10,  3.17it/s]

读取文件:  84%|████████▎ | 153/183 [00:57<00:07,  4.28it/s]

读取文件:  84%|████████▍ | 154/183 [00:57<00:06,  4.70it/s]

读取文件:  85%|████████▌ | 156/183 [00:57<00:06,  3.91it/s]

读取文件:  86%|████████▌ | 157/183 [00:58<00:06,  3.76it/s]

读取文件:  86%|████████▋ | 158/183 [00:58<00:05,  4.27it/s]

读取文件:  87%|████████▋ | 159/183 [00:58<00:06,  3.89it/s]

读取文件:  87%|████████▋ | 160/183 [00:58<00:05,  3.88it/s]

读取文件:  88%|████████▊ | 161/183 [00:59<00:05,  4.04it/s]

读取文件:  89%|████████▉ | 163/183 [00:59<00:03,  6.06it/s]

读取文件:  90%|█████████ | 165/183 [00:59<00:02,  7.78it/s]

读取文件:  91%|█████████▏| 167/183 [01:00<00:03,  4.19it/s]

读取文件:  92%|█████████▏| 168/183 [01:00<00:03,  4.59it/s]

读取文件:  93%|█████████▎| 170/183 [01:00<00:02,  5.68it/s]

读取文件:  94%|█████████▍| 172/183 [01:00<00:01,  7.44it/s]

读取文件:  95%|█████████▌| 174/183 [01:01<00:01,  5.72it/s]

读取文件:  96%|█████████▌| 175/183 [01:01<00:01,  5.67it/s]

读取文件:  97%|█████████▋| 177/183 [01:01<00:00,  6.69it/s]

读取文件:  98%|█████████▊| 179/183 [01:02<00:01,  3.45it/s]

读取文件:  99%|█████████▉| 181/183 [01:02<00:00,  4.64it/s]

读取文件:  99%|█████████▉| 182/183 [01:03<00:00,  4.52it/s]

读取文件: 100%|██████████| 183/183 [01:04<00:00,  2.17it/s]

读取文件: 100%|██████████| 183/183 [01:04<00:00,  2.84it/s]




Unnamed: 0,日期,高维情绪变量,股票编号
0,2024-12-27,"[-0.18730907142162323, -0.22363345324993134, 0...",601933
1,2024-11-18,"[-0.6446929574012756, 0.14099551737308502, 0.4...",601933
2,2024-11-17,"[-0.14928916096687317, -0.10993610322475433, 0...",601933
3,2024-11-17,"[-0.2892271876335144, 0.09153766185045242, 0.2...",601933
4,2024-11-17,"[-0.18511946499347687, 0.3309328258037567, 0.2...",601933
...,...,...,...
495326,2021-04-09,"[-0.14503593742847443, 0.25753334164619446, 0....",601919
495327,2021-04-09,"[-0.30359137058258057, 0.26559382677078247, 0....",601919
495328,2021-04-08,"[0.15271537005901337, -0.08832913637161255, 0....",601919
495329,2021-04-08,"[-0.004539322108030319, 0.3122449517250061, 0....",601919


In [8]:
## 读取股票回报率的数据
return_data = pd.read_csv(f'{Financial_Data_PATH}/日个股回报率.csv', dtype={'股票编号': str})
return_data

Unnamed: 0,股票编号,日期,交易量,收益率变化
0,000002,2021-06-01,60990961,-0.003745
1,000002,2021-06-02,85354506,0.006015
2,000002,2021-06-03,50594187,-0.003363
3,000002,2021-06-04,71422364,-0.012748
4,000002,2021-06-07,64745280,-0.014812
...,...,...,...,...
154877,688981,2024-11-20,58507495,-0.017071
154878,688981,2024-11-21,56197106,0.002358
154879,688981,2024-11-22,79240108,-0.050588
154880,688981,2024-11-25,76905909,-0.029402


In [9]:
## 进行左连接，以 guba_data 为主表
merged_data = pd.merge(guba_data, return_data[['股票编号', '日期', '交易量', '收益率变化']], 
                       on=['股票编号', '日期'], 
                       how='left')
merged_data = pd.merge(merged_data, cluster_data, on='股票编号', how='left')
merged_data = merged_data.dropna()

merged_data['日期'] = pd.to_datetime(merged_data['日期'])
merged_data = merged_data[(merged_data['日期'] > '2021-05-06') & (merged_data['日期'] < '2024-11-18')]
merged_data

Unnamed: 0,日期,高维情绪变量,股票编号,交易量,收益率变化,聚类标签,公司名称
11194,2024-11-15,"[-0.21272552013397217, 0.11189557611942291, 0....",600685,15188430.0,-0.034014,1.0,中船防务
11195,2024-11-15,"[-0.05771368741989136, 0.05143539980053902, 0....",600685,15188430.0,-0.034014,1.0,中船防务
11196,2024-11-14,"[-0.29904043674468994, -0.10444869101047516, 0...",600685,19288530.0,-0.028634,1.0,中船防务
11197,2024-11-14,"[-0.5945582985877991, 0.06150205433368683, 0.1...",600685,19288530.0,-0.028634,1.0,中船防务
11198,2024-11-13,"[-0.23249807953834534, 0.10749123245477676, 0....",600685,28835039.0,0.035741,1.0,中船防务
...,...,...,...,...,...,...,...
481589,2021-06-03,"[-0.11042080074548721, 0.00951363891363144, 0....",002230,38880163.0,-0.025195,2.0,科大讯飞
481590,2021-06-03,"[-0.03181225433945656, 0.00998214352875948, 0....",002230,38880163.0,-0.025195,2.0,科大讯飞
481591,2021-06-03,"[-0.20233358442783356, 0.05385609343647957, 0....",002230,38880163.0,-0.025195,2.0,科大讯飞
481592,2021-06-03,"[-0.3143976032733917, 0.08426006883382797, 0.5...",002230,38880163.0,-0.025195,2.0,科大讯飞


In [10]:
# 对情绪值列进行 Min-Max 标准化
def min_max_normalization(df, cols):
    for col in cols:
        min_val = df[col].min()
        max_val = df[col].max()
        df[col] = 2 * (df[col] - min_val) / (max_val - min_val) - 1
    return df

# 对每个股票编号的数据进行标准化和按日期汇总
def process_data(df):
    if MODEL != 'Deep-learning/Ours':
        df_processed = pd.DataFrame()  # 用于存储结果
        for stock_code, stock_data in df.groupby('股票编号'):
            # 对每个股票编号内的数据进行标准化
            stock_data = min_max_normalization(stock_data, ['上证综合情绪值', '沪深300情绪值', '创业板情绪值'])
            
            # 按日期汇总数据，同时保留股票编号
            stock_summary = stock_data.groupby('日期').agg({
                '股票编号': 'first',  # 保留股票编号（在同一日期内它是相同的，使用 'first'）
                '上证综合情绪值': 'mean',  # 上证综合情绪值按日期取均值
                '沪深300情绪值': 'mean',  # 沪深300情绪值按日期取均值
                '创业板情绪值': 'mean',  # 创业板情绪值按日期取均值
                '交易量': 'mean',  # 交易量按日期求和
                '收益率变化': 'mean',  # 收益率变化按日期取均值
                '聚类标签': 'first', # 保留聚类标签（在同一日期内它是相同的，使用 'first'）
                '公司名称': 'first', # 保留公司名称（在同一日期内它是相同的，使用 'first'）
            }).reset_index(drop=False)
            
            # 合并每个股票的汇总数据
            df_processed = pd.concat([df_processed, stock_summary], ignore_index=True)
            df_processed = df_processed.sort_values(by=['股票编号', '日期'], ascending=[True, True])
        return df_processed
    else:
        df_processed = []    
        # 确保高维情绪变量是 list 类型（如果还没转过）
        if isinstance(df['高维情绪变量'].iloc[0], str):
            df['高维情绪变量'] = df['高维情绪变量'].progress_apply(ast.literal_eval)
        # 对每个股票编号处理，加 tqdm 进度条
        for stock_code, stock_data in tqdm(df.groupby('股票编号'), desc="Processing stocks"):
            for date, group in stock_data.groupby('日期'):
                emotion_matrix = np.array(group['高维情绪变量'].tolist())  # (样本数, 维度)

                avg_emotion = emotion_matrix.mean(axis=0).round(3).tolist()

                summary_row = {
                    '股票编号': stock_code,
                    '日期': date,
                    '高维情绪变量': avg_emotion,
                    '交易量': group['交易量'].mean(),
                    '收益率变化': group['收益率变化'].mean(),
                    '聚类标签': group['聚类标签'].iloc[0], # 保留聚类标签
                    '公司名称': group['公司名称'].iloc[0], # 保留公司名称
                }
                df_processed.append(summary_row)


    return pd.DataFrame(df_processed)

final_data = process_data(merged_data)
final_data

  0%|          | 0/37031 [00:00<?, ?it/s]

  0%|          | 14/37031 [00:00<04:24, 139.74it/s]

  0%|          | 73/37031 [00:00<01:31, 403.66it/s]

  0%|          | 132/37031 [00:00<01:16, 484.79it/s]

  1%|          | 191/37031 [00:00<01:10, 523.54it/s]

  1%|          | 250/37031 [00:00<01:07, 544.67it/s]

  1%|          | 309/37031 [00:00<01:05, 557.14it/s]

  1%|          | 368/37031 [00:00<01:04, 565.08it/s]

  1%|          | 425/37031 [00:00<01:17, 474.64it/s]

  1%|▏         | 483/37031 [00:00<01:12, 503.27it/s]

  1%|▏         | 542/37031 [00:01<01:09, 525.55it/s]

  2%|▏         | 601/37031 [00:01<01:07, 541.52it/s]

  2%|▏         | 660/37031 [00:01<01:05, 552.82it/s]

  2%|▏         | 719/37031 [00:01<01:04, 561.22it/s]

  2%|▏         | 777/37031 [00:01<01:03, 566.53it/s]

  2%|▏         | 835/37031 [00:01<01:13, 495.74it/s]

  2%|▏         | 893/37031 [00:01<01:09, 518.20it/s]

  3%|▎         | 952/37031 [00:01<01:07, 535.90it/s]

  3%|▎         | 1011/37031 [00:01<01:05, 549.25it/s]

  3%|▎         | 1070/37031 [00:02<01:04, 559.48it/s]

  3%|▎         | 1129/37031 [00:02<01:03, 566.56it/s]

  3%|▎         | 1188/37031 [00:02<01:02, 571.55it/s]

  3%|▎         | 1246/37031 [00:02<01:12, 494.90it/s]

  4%|▎         | 1305/37031 [00:02<01:08, 518.34it/s]

  4%|▎         | 1364/37031 [00:02<01:06, 535.99it/s]

  4%|▍         | 1423/37031 [00:02<01:04, 549.11it/s]

  4%|▍         | 1482/37031 [00:02<01:03, 559.14it/s]

  4%|▍         | 1540/37031 [00:02<01:02, 564.59it/s]

  4%|▍         | 1599/37031 [00:02<01:02, 569.35it/s]

  4%|▍         | 1657/37031 [00:03<01:15, 468.96it/s]

  5%|▍         | 1715/37031 [00:03<01:11, 497.25it/s]

  5%|▍         | 1774/37031 [00:03<01:07, 519.72it/s]

  5%|▍         | 1832/37031 [00:03<01:05, 536.14it/s]

  5%|▌         | 1890/37031 [00:03<01:04, 548.31it/s]

  5%|▌         | 1949/37031 [00:03<01:02, 557.65it/s]

  5%|▌         | 2007/37031 [00:03<01:02, 564.02it/s]

  6%|▌         | 2065/37031 [00:03<01:14, 470.39it/s]

  6%|▌         | 2123/37031 [00:04<01:10, 497.21it/s]

  6%|▌         | 2182/37031 [00:04<01:06, 520.28it/s]

  6%|▌         | 2241/37031 [00:04<01:04, 537.86it/s]

  6%|▌         | 2299/37031 [00:04<01:03, 548.71it/s]

  6%|▋         | 2358/37031 [00:04<01:02, 558.14it/s]

  7%|▋         | 2417/37031 [00:04<01:01, 564.99it/s]

  7%|▋         | 2475/37031 [00:04<01:11, 483.01it/s]

  7%|▋         | 2534/37031 [00:04<01:07, 508.83it/s]

  7%|▋         | 2593/37031 [00:04<01:05, 528.98it/s]

  7%|▋         | 2652/37031 [00:05<01:03, 543.95it/s]

  7%|▋         | 2711/37031 [00:05<01:01, 554.60it/s]

  7%|▋         | 2770/37031 [00:05<01:00, 562.42it/s]

  8%|▊         | 2829/37031 [00:05<01:00, 568.05it/s]

  8%|▊         | 2887/37031 [00:05<01:13, 466.53it/s]

  8%|▊         | 2945/37031 [00:05<01:08, 494.62it/s]

  8%|▊         | 3003/37031 [00:05<01:05, 517.21it/s]

  8%|▊         | 3061/37031 [00:05<01:03, 534.41it/s]

  8%|▊         | 3119/37031 [00:05<01:01, 547.21it/s]

  9%|▊         | 3177/37031 [00:05<01:00, 556.32it/s]

  9%|▊         | 3235/37031 [00:06<01:00, 562.10it/s]

  9%|▉         | 3292/37031 [00:06<01:13, 460.05it/s]

  9%|▉         | 3350/37031 [00:06<01:08, 490.42it/s]

  9%|▉         | 3409/37031 [00:06<01:05, 514.67it/s]

  9%|▉         | 3468/37031 [00:06<01:02, 533.59it/s]

 10%|▉         | 3527/37031 [00:06<01:01, 547.64it/s]

 10%|▉         | 3586/37031 [00:06<01:00, 557.38it/s]

 10%|▉         | 3645/37031 [00:06<00:59, 564.12it/s]

 10%|▉         | 3703/37031 [00:07<01:10, 472.52it/s]

 10%|█         | 3762/37031 [00:07<01:06, 500.95it/s]

 10%|█         | 3821/37031 [00:07<01:03, 523.11it/s]

 10%|█         | 3880/37031 [00:07<01:01, 539.75it/s]

 11%|█         | 3939/37031 [00:07<00:59, 551.96it/s]

 11%|█         | 3998/37031 [00:07<00:58, 560.40it/s]

 11%|█         | 4057/37031 [00:07<00:58, 566.53it/s]

 11%|█         | 4115/37031 [00:07<01:09, 470.78it/s]

 11%|█▏        | 4173/37031 [00:07<01:05, 498.53it/s]

 11%|█▏        | 4231/37031 [00:08<01:03, 519.08it/s]

 12%|█▏        | 4289/37031 [00:08<01:01, 534.21it/s]

 12%|█▏        | 4347/37031 [00:08<00:59, 545.17it/s]

 12%|█▏        | 4405/37031 [00:08<00:58, 553.57it/s]

 12%|█▏        | 4463/37031 [00:08<00:58, 561.10it/s]

 12%|█▏        | 4520/37031 [00:08<01:13, 439.72it/s]

 12%|█▏        | 4578/37031 [00:08<01:08, 474.06it/s]

 13%|█▎        | 4637/37031 [00:08<01:04, 502.11it/s]

 13%|█▎        | 4696/37031 [00:08<01:01, 523.74it/s]

 13%|█▎        | 4755/37031 [00:09<00:59, 540.31it/s]

 13%|█▎        | 4814/37031 [00:09<00:58, 552.23it/s]

 13%|█▎        | 4873/37031 [00:09<00:57, 561.25it/s]

 13%|█▎        | 4931/37031 [00:09<01:09, 463.54it/s]

 13%|█▎        | 4989/37031 [00:09<01:05, 492.90it/s]

 14%|█▎        | 5048/37031 [00:09<01:01, 516.46it/s]

 14%|█▍        | 5107/37031 [00:09<00:59, 534.55it/s]

 14%|█▍        | 5166/37031 [00:09<00:58, 548.00it/s]

 14%|█▍        | 5225/37031 [00:09<00:56, 558.08it/s]

 14%|█▍        | 5284/37031 [00:10<00:56, 565.51it/s]

 14%|█▍        | 5342/37031 [00:10<01:08, 463.16it/s]

 15%|█▍        | 5401/37031 [00:10<01:04, 493.44it/s]

 15%|█▍        | 5459/37031 [00:10<01:01, 515.94it/s]

 15%|█▍        | 5517/37031 [00:10<00:59, 533.17it/s]

 15%|█▌        | 5575/37031 [00:10<00:57, 545.93it/s]

 15%|█▌        | 5633/37031 [00:10<00:56, 555.23it/s]

 15%|█▌        | 5691/37031 [00:10<00:55, 562.00it/s]

 16%|█▌        | 5750/37031 [00:10<00:55, 567.55it/s]

 16%|█▌        | 5808/37031 [00:11<01:09, 447.75it/s]

 16%|█▌        | 5866/37031 [00:11<01:04, 480.11it/s]

 16%|█▌        | 5924/37031 [00:11<01:01, 505.52it/s]

 16%|█▌        | 5982/37031 [00:11<00:59, 525.13it/s]

 16%|█▋        | 6040/37031 [00:11<00:57, 540.08it/s]

 16%|█▋        | 6098/37031 [00:11<00:56, 550.82it/s]

 17%|█▋        | 6156/37031 [00:11<00:55, 559.25it/s]

 17%|█▋        | 6213/37031 [00:11<01:08, 453.03it/s]

 17%|█▋        | 6270/37031 [00:11<01:03, 482.25it/s]

 17%|█▋        | 6328/37031 [00:12<01:00, 506.33it/s]

 17%|█▋        | 6386/37031 [00:12<00:58, 526.06it/s]

 17%|█▋        | 6444/37031 [00:12<00:56, 541.00it/s]

 18%|█▊        | 6502/37031 [00:12<00:55, 550.73it/s]

 18%|█▊        | 6561/37031 [00:12<00:54, 559.75it/s]

 18%|█▊        | 6618/37031 [00:12<01:08, 441.72it/s]

 18%|█▊        | 6677/37031 [00:12<01:03, 476.36it/s]

 18%|█▊        | 6736/37031 [00:12<01:00, 503.84it/s]

 18%|█▊        | 6794/37031 [00:12<00:57, 524.19it/s]

 19%|█▊        | 6852/37031 [00:13<00:55, 539.54it/s]

 19%|█▊        | 6910/37031 [00:13<00:54, 550.53it/s]

 19%|█▉        | 6968/37031 [00:13<00:53, 558.23it/s]

 19%|█▉        | 7025/37031 [00:13<01:08, 434.88it/s]

 19%|█▉        | 7083/37031 [00:13<01:03, 469.70it/s]

 19%|█▉        | 7141/37031 [00:13<01:00, 497.69it/s]

 19%|█▉        | 7199/37031 [00:13<00:57, 519.55it/s]

 20%|█▉        | 7257/37031 [00:13<00:55, 534.03it/s]

 20%|█▉        | 7315/37031 [00:13<00:54, 546.49it/s]

 20%|█▉        | 7373/37031 [00:14<00:53, 555.81it/s]

 20%|██        | 7430/37031 [00:14<01:06, 443.61it/s]

 20%|██        | 7489/37031 [00:14<01:01, 477.98it/s]

 20%|██        | 7547/37031 [00:14<00:58, 504.46it/s]

 21%|██        | 7605/37031 [00:14<00:56, 524.94it/s]

 21%|██        | 7664/37031 [00:14<00:54, 540.59it/s]

 21%|██        | 7723/37031 [00:14<00:53, 552.11it/s]

 21%|██        | 7781/37031 [00:14<00:52, 560.07it/s]

 21%|██        | 7840/37031 [00:14<00:51, 566.14it/s]

 21%|██▏       | 7898/37031 [00:15<01:05, 447.44it/s]

 21%|██▏       | 7957/37031 [00:15<01:00, 480.87it/s]

 22%|██▏       | 8015/37031 [00:15<00:57, 506.65it/s]

 22%|██▏       | 8074/37031 [00:15<00:54, 526.93it/s]

 22%|██▏       | 8132/37031 [00:15<00:53, 541.52it/s]

 22%|██▏       | 8190/37031 [00:15<00:52, 551.80it/s]

 22%|██▏       | 8248/37031 [00:15<00:51, 557.24it/s]

 22%|██▏       | 8305/37031 [00:15<01:07, 427.74it/s]

 23%|██▎       | 8363/37031 [00:16<01:01, 463.22it/s]

 23%|██▎       | 8421/37031 [00:16<00:58, 492.79it/s]

 23%|██▎       | 8479/37031 [00:16<00:55, 514.85it/s]

 23%|██▎       | 8537/37031 [00:16<00:53, 532.09it/s]

 23%|██▎       | 8595/37031 [00:16<00:52, 545.58it/s]

 23%|██▎       | 8653/37031 [00:16<00:51, 554.88it/s]

 24%|██▎       | 8710/37031 [00:16<01:05, 434.94it/s]

 24%|██▎       | 8768/37031 [00:16<01:00, 470.35it/s]

 24%|██▍       | 8827/37031 [00:16<00:56, 499.11it/s]

 24%|██▍       | 8886/37031 [00:17<00:53, 521.41it/s]

 24%|██▍       | 8945/37031 [00:17<00:52, 538.47it/s]

 24%|██▍       | 9004/37031 [00:17<00:50, 551.01it/s]

 24%|██▍       | 9063/37031 [00:17<00:49, 560.07it/s]

 25%|██▍       | 9121/37031 [00:17<01:05, 428.06it/s]

 25%|██▍       | 9179/37031 [00:17<01:00, 463.32it/s]

 25%|██▍       | 9238/37031 [00:17<00:56, 493.78it/s]

 25%|██▌       | 9297/37031 [00:17<00:53, 517.59it/s]

 25%|██▌       | 9356/37031 [00:18<00:51, 534.96it/s]

 25%|██▌       | 9414/37031 [00:18<00:50, 547.32it/s]

 26%|██▌       | 9472/37031 [00:18<00:49, 556.40it/s]

 26%|██▌       | 9530/37031 [00:18<01:06, 415.37it/s]

 26%|██▌       | 9588/37031 [00:18<01:00, 453.25it/s]

 26%|██▌       | 9646/37031 [00:18<00:56, 484.51it/s]

 26%|██▌       | 9704/37031 [00:18<00:53, 509.39it/s]

 26%|██▋       | 9762/37031 [00:18<00:51, 528.16it/s]

 27%|██▋       | 9819/37031 [00:18<00:50, 539.87it/s]

 27%|██▋       | 9877/37031 [00:19<00:49, 550.67it/s]

 27%|██▋       | 9935/37031 [00:19<00:48, 558.83it/s]

 27%|██▋       | 9992/37031 [00:19<01:02, 429.25it/s]

 27%|██▋       | 10051/37031 [00:19<00:57, 466.30it/s]

 27%|██▋       | 10109/37031 [00:19<00:54, 493.54it/s]

 27%|██▋       | 10167/37031 [00:19<00:52, 516.56it/s]

 28%|██▊       | 10226/37031 [00:19<00:50, 534.60it/s]

 28%|██▊       | 10285/37031 [00:19<00:48, 548.02it/s]

 28%|██▊       | 10344/37031 [00:19<00:47, 557.55it/s]

 28%|██▊       | 10401/37031 [00:20<01:02, 428.23it/s]

 28%|██▊       | 10460/37031 [00:20<00:57, 465.28it/s]

 28%|██▊       | 10518/37031 [00:20<00:53, 493.27it/s]

 29%|██▊       | 10577/37031 [00:20<00:51, 516.79it/s]

 29%|██▊       | 10635/37031 [00:20<00:49, 534.07it/s]

 29%|██▉       | 10693/37031 [00:20<00:48, 547.01it/s]

 29%|██▉       | 10751/37031 [00:20<00:47, 556.10it/s]

 29%|██▉       | 10808/37031 [00:20<01:03, 411.52it/s]

 29%|██▉       | 10866/37031 [00:21<00:58, 450.63it/s]

 29%|██▉       | 10924/37031 [00:21<00:54, 482.21it/s]

 30%|██▉       | 10982/37031 [00:21<00:51, 507.12it/s]

 30%|██▉       | 11040/37031 [00:21<00:49, 526.81it/s]

 30%|██▉       | 11097/37031 [00:21<00:48, 538.20it/s]

 30%|███       | 11155/37031 [00:21<00:47, 548.45it/s]

 30%|███       | 11213/37031 [00:21<00:46, 557.38it/s]

 30%|███       | 11270/37031 [00:21<01:01, 421.36it/s]

 31%|███       | 11328/37031 [00:22<00:55, 459.23it/s]

 31%|███       | 11386/37031 [00:22<00:52, 489.68it/s]

 31%|███       | 11444/37031 [00:22<00:49, 513.49it/s]

 31%|███       | 11502/37031 [00:22<00:48, 531.77it/s]

 31%|███       | 11561/37031 [00:22<00:46, 545.71it/s]

 31%|███▏      | 11620/37031 [00:22<00:45, 556.07it/s]

 32%|███▏      | 11677/37031 [00:22<01:00, 421.02it/s]

 32%|███▏      | 11736/37031 [00:22<00:55, 459.55it/s]

 32%|███▏      | 11795/37031 [00:22<00:51, 490.62it/s]

 32%|███▏      | 11854/37031 [00:23<00:48, 514.88it/s]

 32%|███▏      | 11912/37031 [00:23<00:47, 532.33it/s]

 32%|███▏      | 11970/37031 [00:23<00:45, 545.64it/s]

 32%|███▏      | 12028/37031 [00:23<00:45, 554.81it/s]

 33%|███▎      | 12085/37031 [00:23<01:01, 403.13it/s]

 33%|███▎      | 12143/37031 [00:23<00:56, 443.55it/s]

 33%|███▎      | 12201/37031 [00:23<00:52, 476.85it/s]

 33%|███▎      | 12259/37031 [00:23<00:49, 503.75it/s]

 33%|███▎      | 12317/37031 [00:23<00:47, 523.93it/s]

 33%|███▎      | 12375/37031 [00:24<00:45, 537.27it/s]

 34%|███▎      | 12433/37031 [00:24<00:44, 549.05it/s]

 34%|███▎      | 12490/37031 [00:24<00:59, 411.93it/s]

 34%|███▍      | 12549/37031 [00:24<00:54, 452.11it/s]

 34%|███▍      | 12608/37031 [00:24<00:50, 484.72it/s]

 34%|███▍      | 12667/37031 [00:24<00:47, 510.80it/s]

 34%|███▍      | 12726/37031 [00:24<00:45, 530.49it/s]

 35%|███▍      | 12785/37031 [00:24<00:44, 544.84it/s]

 35%|███▍      | 12844/37031 [00:25<00:43, 555.14it/s]

 35%|███▍      | 12903/37031 [00:25<00:42, 562.49it/s]

 35%|███▌      | 12961/37031 [00:25<00:57, 417.09it/s]

 35%|███▌      | 13019/37031 [00:25<00:52, 454.97it/s]

 35%|███▌      | 13078/37031 [00:25<00:49, 486.73it/s]

 35%|███▌      | 13136/37031 [00:25<00:46, 510.64it/s]

 36%|███▌      | 13194/37031 [00:25<00:45, 529.23it/s]

 36%|███▌      | 13252/37031 [00:25<00:43, 543.11it/s]

 36%|███▌      | 13310/37031 [00:25<00:42, 552.80it/s]

 36%|███▌      | 13367/37031 [00:26<00:59, 397.43it/s]

 36%|███▋      | 13425/37031 [00:26<00:53, 438.65it/s]

 36%|███▋      | 13483/37031 [00:26<00:49, 473.03it/s]

 37%|███▋      | 13541/37031 [00:26<00:46, 500.39it/s]

 37%|███▋      | 13599/37031 [00:26<00:44, 521.43it/s]

 37%|███▋      | 13657/37031 [00:26<00:43, 537.34it/s]

 37%|███▋      | 13715/37031 [00:26<00:42, 549.21it/s]

 37%|███▋      | 13773/37031 [00:26<00:41, 557.60it/s]

 37%|███▋      | 13830/37031 [00:27<01:02, 372.42it/s]

 37%|███▋      | 13877/37031 [00:27<01:07, 344.91it/s]

 38%|███▊      | 13924/37031 [00:27<01:02, 371.27it/s]

 38%|███▊      | 13967/37031 [00:27<01:06, 344.60it/s]

 38%|███▊      | 14006/37031 [00:27<01:07, 339.83it/s]

 38%|███▊      | 14043/37031 [00:27<01:06, 345.78it/s]

 38%|███▊      | 14080/37031 [00:27<01:10, 327.18it/s]

 38%|███▊      | 14118/37031 [00:28<01:07, 338.21it/s]

 38%|███▊      | 14154/37031 [00:28<01:11, 321.52it/s]

 38%|███▊      | 14188/37031 [00:28<01:11, 319.29it/s]

 38%|███▊      | 14221/37031 [00:28<01:42, 222.91it/s]

 39%|███▊      | 14280/37031 [00:28<01:16, 298.56it/s]

 39%|███▊      | 14338/37031 [00:28<01:02, 362.90it/s]

 39%|███▉      | 14396/37031 [00:28<00:54, 416.25it/s]

 39%|███▉      | 14454/37031 [00:28<00:49, 458.38it/s]

 39%|███▉      | 14512/37031 [00:29<00:45, 491.16it/s]

 39%|███▉      | 14570/37031 [00:29<00:43, 513.81it/s]

 40%|███▉      | 14628/37031 [00:29<00:42, 532.21it/s]

 40%|███▉      | 14684/37031 [00:29<00:59, 375.86it/s]

 40%|███▉      | 14742/37031 [00:29<00:52, 420.85it/s]

 40%|███▉      | 14800/37031 [00:29<00:48, 458.95it/s]

 40%|████      | 14858/37031 [00:29<00:45, 489.64it/s]

 40%|████      | 14916/37031 [00:29<00:43, 513.49it/s]

 40%|████      | 14974/37031 [00:29<00:41, 531.79it/s]

 41%|████      | 15033/37031 [00:30<00:40, 546.06it/s]

 41%|████      | 15090/37031 [00:30<00:56, 389.43it/s]

 41%|████      | 15149/37031 [00:30<00:50, 432.90it/s]

 41%|████      | 15208/37031 [00:30<00:46, 469.10it/s]

 41%|████      | 15267/37031 [00:30<00:43, 498.20it/s]

 41%|████▏     | 15326/37031 [00:30<00:41, 520.85it/s]

 42%|████▏     | 15385/37031 [00:30<00:40, 537.50it/s]

 42%|████▏     | 15444/37031 [00:30<00:39, 549.71it/s]

 42%|████▏     | 15502/37031 [00:31<00:38, 556.72it/s]

 42%|████▏     | 15559/37031 [00:31<00:54, 396.33it/s]

 42%|████▏     | 15617/37031 [00:31<00:48, 437.42it/s]

 42%|████▏     | 15675/37031 [00:31<00:45, 471.69it/s]

 42%|████▏     | 15733/37031 [00:31<00:42, 498.75it/s]

 43%|████▎     | 15791/37031 [00:31<00:40, 520.27it/s]

 43%|████▎     | 15849/37031 [00:31<00:39, 536.37it/s]

 43%|████▎     | 15907/37031 [00:31<00:38, 548.30it/s]

 43%|████▎     | 15964/37031 [00:32<00:54, 384.32it/s]

 43%|████▎     | 16022/37031 [00:32<00:49, 427.41it/s]

 43%|████▎     | 16080/37031 [00:32<00:45, 462.84it/s]

 44%|████▎     | 16139/37031 [00:32<00:42, 493.21it/s]

 44%|████▎     | 16198/37031 [00:32<00:40, 517.07it/s]

 44%|████▍     | 16257/37031 [00:32<00:38, 534.80it/s]

 44%|████▍     | 16316/37031 [00:32<00:37, 548.23it/s]

 44%|████▍     | 16375/37031 [00:32<00:37, 558.15it/s]

 44%|████▍     | 16433/37031 [00:33<00:51, 398.75it/s]

 45%|████▍     | 16492/37031 [00:33<00:46, 440.32it/s]

 45%|████▍     | 16550/37031 [00:33<00:43, 473.51it/s]

 45%|████▍     | 16608/37031 [00:33<00:40, 500.80it/s]

 45%|████▌     | 16666/37031 [00:33<00:39, 521.93it/s]

 45%|████▌     | 16724/37031 [00:33<00:37, 537.42it/s]

 45%|████▌     | 16782/37031 [00:33<00:36, 549.18it/s]

 45%|████▌     | 16839/37031 [00:33<00:52, 383.88it/s]

 46%|████▌     | 16897/37031 [00:34<00:47, 426.82it/s]

 46%|████▌     | 16955/37031 [00:34<00:43, 463.11it/s]

 46%|████▌     | 17013/37031 [00:34<00:40, 492.03it/s]

 46%|████▌     | 17071/37031 [00:34<00:38, 514.70it/s]

 46%|████▋     | 17129/37031 [00:34<00:37, 532.27it/s]

 46%|████▋     | 17187/37031 [00:34<00:36, 545.21it/s]

 47%|████▋     | 17245/37031 [00:34<00:35, 554.37it/s]

 47%|████▋     | 17302/37031 [00:34<00:52, 379.27it/s]

 47%|████▋     | 17360/37031 [00:35<00:46, 423.00it/s]

 47%|████▋     | 17418/37031 [00:35<00:42, 460.15it/s]

 47%|████▋     | 17476/37031 [00:35<00:39, 489.78it/s]

 47%|████▋     | 17534/37031 [00:35<00:38, 513.06it/s]

 48%|████▊     | 17592/37031 [00:35<00:36, 530.84it/s]

 48%|████▊     | 17650/37031 [00:35<00:35, 544.07it/s]

 48%|████▊     | 17707/37031 [00:35<00:50, 383.78it/s]

 48%|████▊     | 17765/37031 [00:35<00:45, 426.91it/s]

 48%|████▊     | 17823/37031 [00:35<00:41, 463.01it/s]

 48%|████▊     | 17881/37031 [00:36<00:38, 491.43it/s]

 48%|████▊     | 17939/37031 [00:36<00:37, 514.55it/s]

 49%|████▊     | 17997/37031 [00:36<00:35, 532.32it/s]

 49%|████▉     | 18055/37031 [00:36<00:34, 543.31it/s]

 49%|████▉     | 18113/37031 [00:36<00:34, 553.37it/s]

 49%|████▉     | 18170/37031 [00:36<00:48, 385.07it/s]

 49%|████▉     | 18228/37031 [00:36<00:43, 428.09it/s]

 49%|████▉     | 18286/37031 [00:36<00:40, 464.62it/s]

 50%|████▉     | 18344/37031 [00:37<00:37, 493.84it/s]

 50%|████▉     | 18402/37031 [00:37<00:36, 516.23it/s]

 50%|████▉     | 18460/37031 [00:37<00:34, 533.02it/s]

 50%|█████     | 18518/37031 [00:37<00:33, 545.76it/s]

 50%|█████     | 18575/37031 [00:37<00:48, 380.06it/s]

 50%|█████     | 18633/37031 [00:37<00:43, 423.81it/s]

 50%|█████     | 18691/37031 [00:37<00:39, 460.73it/s]

 51%|█████     | 18749/37031 [00:37<00:37, 490.73it/s]

 51%|█████     | 18807/37031 [00:37<00:35, 514.24it/s]

 51%|█████     | 18865/37031 [00:38<00:34, 532.08it/s]

 51%|█████     | 18923/37031 [00:38<00:33, 544.97it/s]

 51%|█████▏    | 18981/37031 [00:38<00:32, 554.31it/s]

 51%|█████▏    | 19038/37031 [00:38<00:47, 380.59it/s]

 52%|█████▏    | 19096/37031 [00:38<00:42, 423.25it/s]

 52%|█████▏    | 19154/37031 [00:38<00:38, 460.47it/s]

 52%|█████▏    | 19212/37031 [00:38<00:36, 490.77it/s]

 52%|█████▏    | 19270/37031 [00:38<00:34, 514.44it/s]

 52%|█████▏    | 19328/37031 [00:39<00:33, 532.19it/s]

 52%|█████▏    | 19386/37031 [00:39<00:32, 545.26it/s]

 53%|█████▎    | 19443/37031 [00:39<00:46, 375.62it/s]

 53%|█████▎    | 19501/37031 [00:39<00:41, 420.05it/s]

 53%|█████▎    | 19559/37031 [00:39<00:38, 457.58it/s]

 53%|█████▎    | 19617/37031 [00:39<00:35, 488.12it/s]

 53%|█████▎    | 19675/37031 [00:39<00:33, 512.16it/s]

 53%|█████▎    | 19733/37031 [00:39<00:32, 530.34it/s]

 53%|█████▎    | 19791/37031 [00:40<00:31, 544.22it/s]

 54%|█████▎    | 19849/37031 [00:40<00:31, 552.93it/s]

 54%|█████▍    | 19906/37031 [00:40<00:46, 369.15it/s]

 54%|█████▍    | 19964/37031 [00:40<00:41, 413.28it/s]

 54%|█████▍    | 20022/37031 [00:40<00:37, 452.06it/s]

 54%|█████▍    | 20081/37031 [00:40<00:34, 484.72it/s]

 54%|█████▍    | 20139/37031 [00:40<00:33, 509.70it/s]

 55%|█████▍    | 20197/37031 [00:40<00:31, 528.88it/s]

 55%|█████▍    | 20255/37031 [00:40<00:30, 543.18it/s]

 55%|█████▍    | 20313/37031 [00:41<00:30, 553.21it/s]

 55%|█████▌    | 20370/37031 [00:41<00:46, 360.11it/s]

 55%|█████▌    | 20428/37031 [00:41<00:40, 406.21it/s]

 55%|█████▌    | 20486/37031 [00:41<00:37, 445.84it/s]

 55%|█████▌    | 20544/37031 [00:41<00:34, 478.61it/s]

 56%|█████▌    | 20602/37031 [00:41<00:32, 504.80it/s]

 56%|█████▌    | 20660/37031 [00:41<00:31, 524.38it/s]

 56%|█████▌    | 20718/37031 [00:41<00:30, 539.03it/s]

 56%|█████▌    | 20775/37031 [00:42<00:45, 356.30it/s]

 56%|█████▋    | 20833/37031 [00:42<00:40, 402.70it/s]

 56%|█████▋    | 20890/37031 [00:42<00:36, 440.95it/s]

 57%|█████▋    | 20948/37031 [00:42<00:33, 474.87it/s]

 57%|█████▋    | 21006/37031 [00:42<00:31, 501.79it/s]

 57%|█████▋    | 21064/37031 [00:42<00:30, 522.59it/s]

 57%|█████▋    | 21122/37031 [00:42<00:29, 538.22it/s]

 57%|█████▋    | 21180/37031 [00:42<00:28, 549.89it/s]

 57%|█████▋    | 21237/37031 [00:43<00:43, 364.39it/s]

 58%|█████▊    | 21295/37031 [00:43<00:38, 409.98it/s]

 58%|█████▊    | 21353/37031 [00:43<00:34, 449.34it/s]

 58%|█████▊    | 21411/37031 [00:43<00:32, 481.43it/s]

 58%|█████▊    | 21469/37031 [00:43<00:30, 507.10it/s]

 58%|█████▊    | 21527/37031 [00:43<00:29, 526.60it/s]

 58%|█████▊    | 21585/37031 [00:43<00:28, 541.22it/s]

 58%|█████▊    | 21643/37031 [00:43<00:27, 551.63it/s]

 59%|█████▊    | 21700/37031 [00:44<00:41, 369.23it/s]

 59%|█████▉    | 21757/37031 [00:44<00:37, 411.03it/s]

 59%|█████▉    | 21815/37031 [00:44<00:33, 450.24it/s]

 59%|█████▉    | 21873/37031 [00:44<00:31, 482.61it/s]

 59%|█████▉    | 21931/37031 [00:44<00:29, 507.85it/s]

 59%|█████▉    | 21989/37031 [00:44<00:28, 527.18it/s]

 60%|█████▉    | 22047/37031 [00:44<00:27, 541.71it/s]

 60%|█████▉    | 22104/37031 [00:45<00:40, 364.77it/s]

 60%|█████▉    | 22162/37031 [00:45<00:36, 410.54it/s]

 60%|██████    | 22220/37031 [00:45<00:32, 449.61it/s]

 60%|██████    | 22278/37031 [00:45<00:30, 481.72it/s]

 60%|██████    | 22336/37031 [00:45<00:28, 507.56it/s]

 60%|██████    | 22394/37031 [00:45<00:27, 526.64it/s]

 61%|██████    | 22452/37031 [00:45<00:26, 541.07it/s]

 61%|██████    | 22510/37031 [00:45<00:26, 551.81it/s]

 61%|██████    | 22567/37031 [00:46<00:39, 366.15it/s]

 61%|██████    | 22625/37031 [00:46<00:35, 410.83it/s]

 61%|██████▏   | 22684/37031 [00:46<00:31, 450.99it/s]

 61%|██████▏   | 22743/37031 [00:46<00:29, 483.82it/s]

 62%|██████▏   | 22802/37031 [00:46<00:27, 509.87it/s]

 62%|██████▏   | 22861/37031 [00:46<00:26, 529.90it/s]

 62%|██████▏   | 22920/37031 [00:46<00:25, 545.21it/s]

 62%|██████▏   | 22979/37031 [00:46<00:25, 556.50it/s]

 62%|██████▏   | 23037/37031 [00:47<00:38, 367.13it/s]

 62%|██████▏   | 23096/37031 [00:47<00:33, 413.25it/s]

 63%|██████▎   | 23155/37031 [00:47<00:30, 452.80it/s]

 63%|██████▎   | 23213/37031 [00:47<00:28, 484.31it/s]

 63%|██████▎   | 23272/37031 [00:47<00:26, 510.04it/s]

 63%|██████▎   | 23331/37031 [00:47<00:25, 529.68it/s]

 63%|██████▎   | 23390/37031 [00:47<00:25, 545.02it/s]

 63%|██████▎   | 23447/37031 [00:47<00:37, 362.86it/s]

 63%|██████▎   | 23505/37031 [00:48<00:33, 408.24it/s]

 64%|██████▎   | 23563/37031 [00:48<00:30, 446.78it/s]

 64%|██████▍   | 23622/37031 [00:48<00:27, 480.73it/s]

 64%|██████▍   | 23680/37031 [00:48<00:26, 506.15it/s]

 64%|██████▍   | 23739/37031 [00:48<00:25, 526.59it/s]

 64%|██████▍   | 23798/37031 [00:48<00:24, 541.92it/s]

 64%|██████▍   | 23857/37031 [00:48<00:23, 553.80it/s]

 65%|██████▍   | 23915/37031 [00:48<00:36, 364.20it/s]

 65%|██████▍   | 23973/37031 [00:49<00:31, 409.63it/s]

 65%|██████▍   | 24031/37031 [00:49<00:28, 448.83it/s]

 65%|██████▌   | 24090/37031 [00:49<00:26, 481.91it/s]

 65%|██████▌   | 24149/37031 [00:49<00:25, 507.95it/s]

 65%|██████▌   | 24208/37031 [00:49<00:24, 528.01it/s]

 66%|██████▌   | 24266/37031 [00:49<00:23, 542.18it/s]

 66%|██████▌   | 24324/37031 [00:49<00:36, 349.06it/s]

 66%|██████▌   | 24382/37031 [00:49<00:32, 393.40it/s]

 66%|██████▌   | 24440/37031 [00:50<00:28, 434.99it/s]

 66%|██████▌   | 24498/37031 [00:50<00:26, 470.08it/s]

 66%|██████▋   | 24556/37031 [00:50<00:25, 497.74it/s]

 66%|██████▋   | 24614/37031 [00:50<00:23, 519.66it/s]

 67%|██████▋   | 24672/37031 [00:50<00:23, 536.24it/s]

 67%|██████▋   | 24731/37031 [00:50<00:22, 548.88it/s]

 67%|██████▋   | 24788/37031 [00:50<00:35, 348.40it/s]

 67%|██████▋   | 24846/37031 [00:51<00:30, 395.57it/s]

 67%|██████▋   | 24904/37031 [00:51<00:27, 437.17it/s]

 67%|██████▋   | 24962/37031 [00:51<00:25, 471.93it/s]

 68%|██████▊   | 25020/37031 [00:51<00:24, 499.55it/s]

 68%|██████▊   | 25078/37031 [00:51<00:22, 521.20it/s]

 68%|██████▊   | 25136/37031 [00:51<00:22, 537.21it/s]

 68%|██████▊   | 25195/37031 [00:51<00:21, 549.77it/s]

 68%|██████▊   | 25252/37031 [00:51<00:33, 354.60it/s]

 68%|██████▊   | 25311/37031 [00:52<00:29, 402.42it/s]

 69%|██████▊   | 25369/37031 [00:52<00:26, 442.40it/s]

 69%|██████▊   | 25428/37031 [00:52<00:24, 476.96it/s]

 69%|██████▉   | 25486/37031 [00:52<00:22, 502.71it/s]

 69%|██████▉   | 25545/37031 [00:52<00:21, 524.34it/s]

 69%|██████▉   | 25604/37031 [00:52<00:21, 540.73it/s]

 69%|██████▉   | 25663/37031 [00:52<00:20, 552.33it/s]

 69%|██████▉   | 25721/37031 [00:52<00:31, 353.69it/s]

 70%|██████▉   | 25780/37031 [00:53<00:28, 401.21it/s]

 70%|██████▉   | 25839/37031 [00:53<00:25, 442.44it/s]

 70%|██████▉   | 25898/37031 [00:53<00:23, 477.10it/s]

 70%|███████   | 25957/37031 [00:53<00:21, 504.72it/s]

 70%|███████   | 26016/37031 [00:53<00:20, 525.96it/s]

 70%|███████   | 26075/37031 [00:53<00:20, 541.98it/s]

 71%|███████   | 26134/37031 [00:53<00:19, 553.07it/s]

 71%|███████   | 26192/37031 [00:53<00:31, 345.48it/s]

 71%|███████   | 26251/37031 [00:54<00:27, 393.73it/s]

 71%|███████   | 26310/37031 [00:54<00:24, 436.35it/s]

 71%|███████   | 26369/37031 [00:54<00:22, 471.98it/s]

 71%|███████▏  | 26428/37031 [00:54<00:21, 500.80it/s]

 72%|███████▏  | 26487/37031 [00:54<00:20, 522.74it/s]

 72%|███████▏  | 26546/37031 [00:54<00:19, 539.68it/s]

 72%|███████▏  | 26603/37031 [00:54<00:30, 340.27it/s]

 72%|███████▏  | 26662/37031 [00:54<00:26, 388.88it/s]

 72%|███████▏  | 26721/37031 [00:55<00:23, 431.98it/s]

 72%|███████▏  | 26780/37031 [00:55<00:21, 468.09it/s]

 72%|███████▏  | 26839/37031 [00:55<00:20, 497.21it/s]

 73%|███████▎  | 26898/37031 [00:55<00:19, 519.65it/s]

 73%|███████▎  | 26957/37031 [00:55<00:18, 536.81it/s]

 73%|███████▎  | 27015/37031 [00:55<00:18, 548.58it/s]

 73%|███████▎  | 27072/37031 [00:55<00:29, 340.43it/s]

 73%|███████▎  | 27131/37031 [00:55<00:25, 389.19it/s]

 73%|███████▎  | 27189/37031 [00:56<00:22, 430.82it/s]

 74%|███████▎  | 27248/37031 [00:56<00:20, 467.78it/s]

 74%|███████▎  | 27307/37031 [00:56<00:19, 496.91it/s]

 74%|███████▍  | 27366/37031 [00:56<00:18, 519.55it/s]

 74%|███████▍  | 27425/37031 [00:56<00:17, 536.96it/s]

 74%|███████▍  | 27484/37031 [00:56<00:17, 549.90it/s]

 74%|███████▍  | 27542/37031 [00:56<00:27, 343.53it/s]

 75%|███████▍  | 27600/37031 [00:57<00:24, 390.79it/s]

 75%|███████▍  | 27659/37031 [00:57<00:21, 433.72it/s]

 75%|███████▍  | 27718/37031 [00:57<00:19, 469.68it/s]

 75%|███████▌  | 27777/37031 [00:57<00:18, 498.67it/s]

 75%|███████▌  | 27836/37031 [00:57<00:17, 521.21it/s]

 75%|███████▌  | 27895/37031 [00:57<00:16, 538.28it/s]

 75%|███████▌  | 27952/37031 [00:57<00:16, 545.31it/s]

 76%|███████▌  | 28009/37031 [00:57<00:26, 341.85it/s]

 76%|███████▌  | 28068/37031 [00:58<00:22, 390.69it/s]

 76%|███████▌  | 28126/37031 [00:58<00:20, 432.92it/s]

 76%|███████▌  | 28185/37031 [00:58<00:18, 469.62it/s]

 76%|███████▋  | 28244/37031 [00:58<00:17, 498.70it/s]

 76%|███████▋  | 28303/37031 [00:58<00:16, 521.51it/s]

 77%|███████▋  | 28362/37031 [00:58<00:16, 538.68it/s]

 77%|███████▋  | 28421/37031 [00:58<00:15, 551.59it/s]

 77%|███████▋  | 28479/37031 [00:59<00:33, 252.46it/s]

 77%|███████▋  | 28523/37031 [00:59<00:33, 254.58it/s]

 77%|███████▋  | 28580/37031 [00:59<00:27, 307.27it/s]

 77%|███████▋  | 28630/37031 [00:59<00:24, 343.27it/s]

 77%|███████▋  | 28677/37031 [00:59<00:22, 369.89it/s]

 78%|███████▊  | 28723/37031 [00:59<00:21, 382.74it/s]

 78%|███████▊  | 28774/37031 [00:59<00:19, 413.06it/s]

 78%|███████▊  | 28832/37031 [00:59<00:18, 454.87it/s]

 78%|███████▊  | 28882/37031 [01:00<00:32, 250.94it/s]

 78%|███████▊  | 28932/37031 [01:00<00:27, 293.35it/s]

 78%|███████▊  | 28974/37031 [01:00<00:25, 316.65it/s]

 78%|███████▊  | 29016/37031 [01:00<00:24, 326.43it/s]

 78%|███████▊  | 29066/37031 [01:00<00:21, 365.60it/s]

 79%|███████▊  | 29109/37031 [01:00<00:24, 327.55it/s]

 79%|███████▊  | 29147/37031 [01:01<00:24, 320.69it/s]

 79%|███████▉  | 29187/37031 [01:01<00:23, 338.48it/s]

 79%|███████▉  | 29229/37031 [01:01<00:21, 358.56it/s]

 79%|███████▉  | 29269/37031 [01:01<00:21, 369.42it/s]

 79%|███████▉  | 29312/37031 [01:01<00:20, 385.03it/s]

 79%|███████▉  | 29352/37031 [01:01<00:37, 202.62it/s]

 79%|███████▉  | 29407/37031 [01:02<00:29, 262.30it/s]

 80%|███████▉  | 29448/37031 [01:02<00:26, 289.70it/s]

 80%|███████▉  | 29497/37031 [01:02<00:22, 332.95it/s]

 80%|███████▉  | 29552/37031 [01:02<00:19, 383.32it/s]

 80%|███████▉  | 29605/37031 [01:02<00:17, 420.15it/s]

 80%|████████  | 29653/37031 [01:02<00:17, 415.05it/s]

 80%|████████  | 29704/37031 [01:02<00:16, 438.47it/s]

 80%|████████  | 29760/37031 [01:02<00:15, 471.59it/s]

 81%|████████  | 29810/37031 [01:03<00:28, 249.37it/s]

 81%|████████  | 29857/37031 [01:03<00:25, 286.82it/s]

 81%|████████  | 29912/37031 [01:03<00:21, 337.96it/s]

 81%|████████  | 29963/37031 [01:03<00:18, 373.60it/s]

 81%|████████  | 30010/37031 [01:03<00:17, 390.59it/s]

 81%|████████  | 30056/37031 [01:03<00:20, 338.68it/s]

 81%|████████▏ | 30096/37031 [01:03<00:22, 305.65it/s]

 81%|████████▏ | 30131/37031 [01:04<00:24, 287.09it/s]

 81%|████████▏ | 30163/37031 [01:04<00:23, 289.26it/s]

 82%|████████▏ | 30195/37031 [01:04<00:26, 255.88it/s]

 82%|████████▏ | 30223/37031 [01:04<00:27, 248.68it/s]

 82%|████████▏ | 30250/37031 [01:04<00:30, 223.72it/s]

 82%|████████▏ | 30274/37031 [01:05<00:49, 136.12it/s]

 82%|████████▏ | 30295/37031 [01:05<00:45, 147.51it/s]

 82%|████████▏ | 30324/37031 [01:05<00:38, 173.89it/s]

 82%|████████▏ | 30352/37031 [01:05<00:34, 195.23it/s]

 82%|████████▏ | 30396/37031 [01:05<00:26, 251.05it/s]

 82%|████████▏ | 30451/37031 [01:05<00:20, 323.88it/s]

 82%|████████▏ | 30498/37031 [01:05<00:18, 361.10it/s]

 83%|████████▎ | 30553/37031 [01:05<00:15, 411.82it/s]

 83%|████████▎ | 30602/37031 [01:05<00:14, 432.86it/s]

 83%|████████▎ | 30653/37031 [01:05<00:14, 453.32it/s]

 83%|████████▎ | 30710/37031 [01:06<00:23, 274.80it/s]

 83%|████████▎ | 30759/37031 [01:06<00:19, 314.92it/s]

 83%|████████▎ | 30814/37031 [01:06<00:17, 363.94it/s]

 83%|████████▎ | 30871/37031 [01:06<00:15, 410.12it/s]

 84%|████████▎ | 30928/37031 [01:06<00:13, 449.46it/s]

 84%|████████▎ | 30984/37031 [01:06<00:12, 478.36it/s]

 84%|████████▍ | 31040/37031 [01:06<00:11, 499.57it/s]

 84%|████████▍ | 31097/37031 [01:07<00:11, 517.16it/s]

 84%|████████▍ | 31153/37031 [01:07<00:11, 528.04it/s]

 84%|████████▍ | 31208/37031 [01:07<00:19, 297.32it/s]

 84%|████████▍ | 31265/37031 [01:07<00:16, 347.92it/s]

 85%|████████▍ | 31322/37031 [01:07<00:14, 394.31it/s]

 85%|████████▍ | 31379/37031 [01:07<00:13, 433.58it/s]

 85%|████████▍ | 31435/37031 [01:07<00:12, 464.46it/s]

 85%|████████▌ | 31491/37031 [01:08<00:11, 487.68it/s]

 85%|████████▌ | 31547/37031 [01:08<00:10, 505.77it/s]

 85%|████████▌ | 31603/37031 [01:08<00:10, 519.92it/s]

 85%|████████▌ | 31658/37031 [01:08<00:19, 270.15it/s]

 86%|████████▌ | 31701/37031 [01:08<00:19, 271.84it/s]

 86%|████████▌ | 31749/37031 [01:08<00:17, 308.56it/s]

 86%|████████▌ | 31790/37031 [01:09<00:17, 300.21it/s]

 86%|████████▌ | 31834/37031 [01:09<00:15, 328.52it/s]

 86%|████████▌ | 31873/37031 [01:09<00:16, 310.59it/s]

 86%|████████▌ | 31916/37031 [01:09<00:15, 335.74it/s]

 86%|████████▋ | 31954/37031 [01:09<00:16, 309.85it/s]

 86%|████████▋ | 31997/37031 [01:09<00:14, 338.28it/s]

 87%|████████▋ | 32034/37031 [01:09<00:16, 306.83it/s]

 87%|████████▋ | 32079/37031 [01:09<00:14, 338.93it/s]

 87%|████████▋ | 32116/37031 [01:10<00:28, 172.61it/s]

 87%|████████▋ | 32170/37031 [01:10<00:21, 228.63it/s]

 87%|████████▋ | 32212/37031 [01:10<00:18, 262.78it/s]

 87%|████████▋ | 32265/37031 [01:10<00:15, 317.36it/s]

 87%|████████▋ | 32318/37031 [01:10<00:12, 364.82it/s]

 87%|████████▋ | 32364/37031 [01:10<00:12, 381.23it/s]

 88%|████████▊ | 32418/37031 [01:11<00:10, 420.49it/s]

 88%|████████▊ | 32471/37031 [01:11<00:10, 448.41it/s]

 88%|████████▊ | 32520/37031 [01:11<00:10, 443.26it/s]

 88%|████████▊ | 32568/37031 [01:11<00:18, 240.44it/s]

 88%|████████▊ | 32622/37031 [01:11<00:15, 291.13it/s]

 88%|████████▊ | 32664/37031 [01:11<00:13, 314.89it/s]

 88%|████████▊ | 32715/37031 [01:11<00:12, 357.40it/s]

 88%|████████▊ | 32770/37031 [01:12<00:10, 402.07it/s]

 89%|████████▊ | 32818/37031 [01:12<00:10, 421.05it/s]

 89%|████████▉ | 32866/37031 [01:12<00:09, 425.66it/s]

 89%|████████▉ | 32920/37031 [01:12<00:09, 454.81it/s]

 89%|████████▉ | 32972/37031 [01:12<00:08, 472.04it/s]

 89%|████████▉ | 33022/37031 [01:12<00:18, 220.87it/s]

 89%|████████▉ | 33075/37031 [01:13<00:14, 269.37it/s]

 89%|████████▉ | 33128/37031 [01:13<00:12, 316.77it/s]

 90%|████████▉ | 33174/37031 [01:13<00:11, 336.42it/s]

 90%|████████▉ | 33223/37031 [01:13<00:10, 369.92it/s]

 90%|████████▉ | 33279/37031 [01:13<00:09, 414.17it/s]

 90%|█████████ | 33331/37031 [01:13<00:08, 440.53it/s]

 90%|█████████ | 33381/37031 [01:13<00:08, 426.74it/s]

 90%|█████████ | 33432/37031 [01:13<00:08, 447.68it/s]

 90%|█████████ | 33488/37031 [01:14<00:15, 233.69it/s]

 91%|█████████ | 33540/37031 [01:14<00:12, 279.01it/s]

 91%|█████████ | 33582/37031 [01:14<00:11, 304.47it/s]

 91%|█████████ | 33627/37031 [01:14<00:10, 333.67it/s]

 91%|█████████ | 33681/37031 [01:14<00:08, 379.71it/s]

 91%|█████████ | 33736/37031 [01:14<00:07, 421.50it/s]

 91%|█████████ | 33786/37031 [01:14<00:07, 441.48it/s]

 91%|█████████▏| 33835/37031 [01:15<00:07, 423.41it/s]

 92%|█████████▏| 33887/37031 [01:15<00:07, 447.20it/s]

 92%|█████████▏| 33943/37031 [01:15<00:06, 478.07it/s]

 92%|█████████▏| 33993/37031 [01:15<00:13, 230.87it/s]

 92%|█████████▏| 34044/37031 [01:15<00:10, 275.66it/s]

 92%|█████████▏| 34092/37031 [01:15<00:09, 313.70it/s]

 92%|█████████▏| 34136/37031 [01:16<00:08, 333.65it/s]

 92%|█████████▏| 34187/37031 [01:16<00:07, 372.83it/s]

 92%|█████████▏| 34232/37031 [01:16<00:07, 386.21it/s]

 93%|█████████▎| 34277/37031 [01:16<00:06, 396.56it/s]

 93%|█████████▎| 34329/37031 [01:16<00:06, 427.76it/s]

 93%|█████████▎| 34386/37031 [01:16<00:05, 465.40it/s]

 93%|█████████▎| 34436/37031 [01:16<00:09, 275.22it/s]

 93%|█████████▎| 34494/37031 [01:17<00:07, 331.94it/s]

 93%|█████████▎| 34552/37031 [01:17<00:06, 383.37it/s]

 93%|█████████▎| 34610/37031 [01:17<00:05, 427.96it/s]

 94%|█████████▎| 34668/37031 [01:17<00:05, 464.53it/s]

 94%|█████████▍| 34726/37031 [01:17<00:04, 493.64it/s]

 94%|█████████▍| 34784/37031 [01:17<00:04, 515.74it/s]

 94%|█████████▍| 34842/37031 [01:17<00:04, 532.00it/s]

 94%|█████████▍| 34898/37031 [01:17<00:06, 305.03it/s]

 94%|█████████▍| 34956/37031 [01:18<00:05, 355.31it/s]

 95%|█████████▍| 35014/37031 [01:18<00:05, 401.45it/s]

 95%|█████████▍| 35072/37031 [01:18<00:04, 441.66it/s]

 95%|█████████▍| 35130/37031 [01:18<00:04, 474.88it/s]

 95%|█████████▌| 35187/37031 [01:18<00:03, 499.19it/s]

 95%|█████████▌| 35245/37031 [01:18<00:03, 519.76it/s]

 95%|█████████▌| 35302/37031 [01:18<00:03, 532.64it/s]

 95%|█████████▌| 35359/37031 [01:18<00:03, 542.50it/s]

 96%|█████████▌| 35416/37031 [01:19<00:05, 297.01it/s]

 96%|█████████▌| 35473/37031 [01:19<00:04, 346.63it/s]

 96%|█████████▌| 35531/37031 [01:19<00:03, 393.65it/s]

 96%|█████████▌| 35589/37031 [01:19<00:03, 434.89it/s]

 96%|█████████▋| 35647/37031 [01:19<00:02, 468.50it/s]

 96%|█████████▋| 35705/37031 [01:19<00:02, 496.42it/s]

 97%|█████████▋| 35763/37031 [01:19<00:02, 517.84it/s]

 97%|█████████▋| 35821/37031 [01:19<00:02, 533.38it/s]

 97%|█████████▋| 35878/37031 [01:20<00:03, 305.77it/s]

 97%|█████████▋| 35935/37031 [01:20<00:03, 354.38it/s]

 97%|█████████▋| 35993/37031 [01:20<00:02, 400.89it/s]

 97%|█████████▋| 36051/37031 [01:20<00:02, 440.90it/s]

 98%|█████████▊| 36109/37031 [01:20<00:01, 473.95it/s]

 98%|█████████▊| 36167/37031 [01:20<00:01, 500.16it/s]

 98%|█████████▊| 36225/37031 [01:20<00:01, 520.28it/s]

 98%|█████████▊| 36283/37031 [01:20<00:01, 535.24it/s]

 98%|█████████▊| 36340/37031 [01:21<00:02, 307.65it/s]

 98%|█████████▊| 36398/37031 [01:21<00:01, 357.76it/s]

 98%|█████████▊| 36456/37031 [01:21<00:01, 403.32it/s]

 99%|█████████▊| 36514/37031 [01:21<00:01, 443.07it/s]

 99%|█████████▉| 36572/37031 [01:21<00:00, 475.98it/s]

 99%|█████████▉| 36630/37031 [01:21<00:00, 501.91it/s]

 99%|█████████▉| 36688/37031 [01:21<00:00, 521.45it/s]

 99%|█████████▉| 36746/37031 [01:22<00:00, 535.93it/s]

 99%|█████████▉| 36803/37031 [01:22<00:00, 305.76it/s]

100%|█████████▉| 36861/37031 [01:22<00:00, 355.61it/s]

100%|█████████▉| 36919/37031 [01:22<00:00, 401.30it/s]

100%|█████████▉| 36977/37031 [01:22<00:00, 440.98it/s]

100%|██████████| 37031/37031 [01:22<00:00, 447.05it/s]




Processing stocks:   0%|          | 0/20 [00:00<?, ?it/s]

Processing stocks:   5%|▌         | 1/20 [00:00<00:05,  3.47it/s]

Processing stocks:  10%|█         | 2/20 [00:00<00:03,  5.54it/s]

Processing stocks:  15%|█▌        | 3/20 [00:00<00:02,  6.47it/s]

Processing stocks:  20%|██        | 4/20 [00:00<00:03,  4.21it/s]

Processing stocks:  30%|███       | 6/20 [00:01<00:02,  6.48it/s]

Processing stocks:  35%|███▌      | 7/20 [00:01<00:02,  5.23it/s]

Processing stocks:  40%|████      | 8/20 [00:01<00:02,  5.36it/s]

Processing stocks:  45%|████▌     | 9/20 [00:01<00:01,  5.75it/s]

Processing stocks:  50%|█████     | 10/20 [00:01<00:01,  6.54it/s]

Processing stocks:  55%|█████▌    | 11/20 [00:02<00:02,  3.80it/s]

Processing stocks:  60%|██████    | 12/20 [00:02<00:01,  4.39it/s]

Processing stocks:  70%|███████   | 14/20 [00:02<00:00,  6.19it/s]

Processing stocks:  80%|████████  | 16/20 [00:02<00:00,  6.99it/s]

Processing stocks:  85%|████████▌ | 17/20 [00:02<00:00,  6.96it/s]

Processing stocks:  90%|█████████ | 18/20 [00:03<00:00,  5.65it/s]

Processing stocks:  95%|█████████▌| 19/20 [00:03<00:00,  5.96it/s]

Processing stocks: 100%|██████████| 20/20 [00:03<00:00,  5.19it/s]

Processing stocks: 100%|██████████| 20/20 [00:03<00:00,  5.48it/s]




Unnamed: 0,股票编号,日期,高维情绪变量,交易量,收益率变化,聚类标签,公司名称
0,000002,2021-06-01,"[-0.178, -0.141, 0.283, 0.184, -0.002, 0.299, ...",60990961.0,-0.003745,2.0,万科A
1,000002,2021-06-08,"[-0.276, -0.035, 0.247, 0.326, 0.018, 0.244, -...",44676494.0,0.004626,2.0,万科A
2,000002,2021-06-10,"[-0.083, 0.072, 0.273, 0.26, -0.087, 0.276, -0...",53800776.0,-0.010035,2.0,万科A
3,000002,2021-06-11,"[-0.114, 0.163, 0.381, 0.325, -0.091, 0.088, -...",75853738.0,-0.014035,2.0,万科A
4,000002,2021-06-15,"[-0.169, 0.147, 0.442, 0.245, -0.009, 0.368, -...",89915501.0,-0.020957,2.0,万科A
...,...,...,...,...,...,...,...
9679,688981,2024-11-11,"[-0.055, 0.026, 0.277, 0.147, -0.029, 0.276, -...",138069018.0,0.046660,2.0,中芯国际
9680,688981,2024-11-12,"[-0.134, -0.011, 0.308, 0.208, 0.049, 0.263, -...",108866759.0,-0.036864,2.0,中芯国际
9681,688981,2024-11-13,"[-0.228, 0.028, 0.358, 0.294, -0.137, 0.178, -...",80759477.0,-0.019484,2.0,中芯国际
9682,688981,2024-11-14,"[-0.18, 0.054, 0.392, 0.292, -0.048, 0.215, -0...",76194102.0,-0.022897,2.0,中芯国际


In [11]:
# 统计检验 : 按照聚类标签分组，并计算均值和方差
df = final_data.copy()
if MODEL != 'Deep-learning/Ours':
    statistical_result = df.groupby('聚类标签').agg(
        情绪值均值=(EMO_NAME, 'mean'),
        情绪值方差=(EMO_NAME, 'var'),
        收益率变化均值=('收益率变化', 'mean'),
        收益率变化方差=('收益率变化', 'var')
    ).reset_index()
else:
    statistical_result = None
# 输出结果
statistical_result

## 情绪值与股价二维展示图

In [12]:
## 绘制不同簇情绪值随时间变化图
if MODEL != 'Deep-learning/Ours':
    # 按 '聚类标签' 和 '日期' 分组，计算平均情绪值
    df_grouped = final_data.groupby(['聚类标签', '日期'])[['上证综合情绪值', '沪深300情绪值', '创业板情绪值']].mean().reset_index()
    
    # 为每个聚类标签绘制折线
    i = 0
    plt.figure(figsize=(12, 12))
    for cluster in df_grouped['聚类标签'].unique():
        if i == 0:
            x = 'red'
        elif i == 1:
            x = 'blue'
        elif i == 2:
            x = 'green'
        df_cluster = df_grouped[df_grouped['聚类标签'] == cluster]
        plt.plot(df_cluster['日期'], df_cluster[EMO_NAME], label=f'{EMO_NAME} - 聚类 {cluster}', color=x)
        i += 1
        
    plt.xlabel('日期')
    plt.ylabel('平均情绪值')
    plt.title('情绪值趋势图')
    
    
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=3))  # 每三个月显示
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 格式化为年-月
    
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    
    # plt.savefig(f'个股分析/{market_address}/图片/情绪值趋势图', dpi=300)
    plt.show()


In [13]:
## 绘制不同簇收益率随时间变化图
if MODEL != 'Deep-learning/Ours':
    # 按 '聚类标签' 和 '日期' 分组，计算平均收益率
    return_grouped = final_data.groupby(['聚类标签', '日期'])['收益率变化'].mean().reset_index()
    
    # 为每个聚类标签绘制折线
    i = 0
    plt.figure(figsize=(12, 12))
    for cluster in df_grouped['聚类标签'].unique():
        if i == 0:
            x = 'red'
        elif i == 1:
            x = 'blue'
        elif i == 2:
            x = 'green'
        df_cluster = return_grouped[return_grouped['聚类标签'] == cluster]
        plt.plot(df_cluster['日期'], df_cluster['收益率变化'], label=f'收益率变化 - 聚类 {cluster}', color=x)
        i += 1
    
    plt.xlabel('日期')
    plt.ylabel('平均收益率')
    plt.title('收益率趋势图')
    
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=3))  # 每三个月显示
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 格式化为年-月
    
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    
    # 显示图表
    # plt.savefig(f'个股分析/{market_address}/图片/收益率趋势图', dpi=300)
    plt.show()

## 三维展示图，分别展示簇是0，1，2的代表图

In [14]:
### 聚类0示例 ###
if MODEL != 'Deep-learning/Ours':
    # 复制数据集，确保不会修改原始数据
    merged_data_cleaned = final_data.copy()
    merged_data_cleaned['日期'] = pd.to_datetime(merged_data_cleaned['日期'])
    
    # 选择聚类标签为 0 的数据，找到数据最多的股票编号
    cluster_0_data = merged_data_cleaned[merged_data_cleaned['聚类标签'] == 0]
    top_stock_code = cluster_0_data['股票编号'].value_counts().idxmax()
    top_stock_data = cluster_0_data[cluster_0_data['股票编号'] == top_stock_code]
    
    # 创建 3D 图
    fig = plt.figure(figsize=(50, 50))
    ax = fig.add_subplot(111, projection='3d')
    
    # 提取绘图数据
    x = top_stock_data[EMO_NAME]  # 情绪值作为 x 轴
    y = mdates.date2num(top_stock_data['日期'])  # 时间转换为数值类型
    z = top_stock_data['收益率变化']  # 收益率变化作为 z 轴
    
    # 绘制三维折线图
    ax.plot(x, y, z, label=f"股票 {top_stock_code}", marker='o', linestyle='-', alpha=0.7)
    
    # 设置轴标签
    ax.set_xlabel(EMO_NAME)
    ax.set_ylabel('时间')
    ax.set_zlabel('收益率变化')
    ax.set_title(f'股票 {top_stock_code}: {EMO_NAME} 与收益率变化的三维关系')
    
    # 设置 y 轴时间刻度（每半年一个刻度）
    ax.yaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=6))  # 每半年一个刻度
    ax.yaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 按年月显示
    
    # 旋转日期标签防止重叠
    fig.autofmt_xdate()
    
    # 调整布局并保存图像
    plt.legend()
    # plt.savefig(f'个股分析/{market_address}/图片/聚类0_{top_stock_code}.png', dpi=300, bbox_inches='tight')
    plt.show()


In [15]:
### 聚类1示例 ###
if MODEL != 'Deep-learning/Ours':
    # 复制数据集，确保不会修改原始数据
    merged_data_cleaned = final_data.copy()
    merged_data_cleaned['日期'] = pd.to_datetime(merged_data_cleaned['日期'])
    
    # 选择聚类标签为 0 的数据，找到数据最多的股票编号
    cluster_0_data = merged_data_cleaned[merged_data_cleaned['聚类标签'] == 1]
    top_stock_code = cluster_0_data['股票编号'].value_counts().idxmax()
    top_stock_data = cluster_0_data[cluster_0_data['股票编号'] == top_stock_code]
    
    
    # 创建 3D 图
    fig = plt.figure(figsize=(50, 50))
    ax = fig.add_subplot(111, projection='3d')
    
    # 提取绘图数据
    x = top_stock_data[EMO_NAME]  # 情绪值作为 x 轴
    y = mdates.date2num(top_stock_data['日期'])  # 时间转换为数值类型
    z = top_stock_data['收益率变化']  # 收益率变化作为 z 轴
    
    # 绘制三维折线图
    ax.plot(x, y, z, label=f"股票 {top_stock_code}", marker='o', linestyle='-', alpha=0.7)
    
    # 设置轴标签
    ax.set_xlabel(EMO_NAME)
    ax.set_ylabel('时间')
    ax.set_zlabel('收益率变化')
    ax.set_title(f'股票 {top_stock_code}: {EMO_NAME} 与收益率变化的三维关系')
    
    # 设置 y 轴时间刻度（每半年一个刻度）
    ax.yaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=6))  # 每半年一个刻度
    ax.yaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 按年月显示
    
    # 旋转日期标签防止重叠
    fig.autofmt_xdate()
    
    # 调整布局并保存图像
    plt.legend()
    # plt.savefig(f'个股分析/{market_address}/图片/聚类1_{top_stock_code}.png', dpi=300, bbox_inches='tight')
    plt.show()


In [16]:
### 聚类2示例 ###
if MODEL != 'Deep-learning/Ours':
    # 复制数据集，确保不会修改原始数据
    merged_data_cleaned = final_data.copy()
    merged_data_cleaned['日期'] = pd.to_datetime(merged_data_cleaned['日期'])
    
    # 选择聚类标签为 0 的数据，找到数据最多的股票编号
    cluster_0_data = merged_data_cleaned[merged_data_cleaned['聚类标签'] == 2]
    top_stock_code = cluster_0_data['股票编号'].value_counts().idxmax()
    top_stock_data = cluster_0_data[cluster_0_data['股票编号'] == top_stock_code]
    
    
    # 创建 3D 图
    fig = plt.figure(figsize=(50, 50))
    ax = fig.add_subplot(111, projection='3d')
    
    # 提取绘图数据
    x = top_stock_data[EMO_NAME]  # 情绪值作为 x 轴
    y = mdates.date2num(top_stock_data['日期'])  # 时间转换为数值类型
    z = top_stock_data['收益率变化']  # 收益率变化作为 z 轴
    
    # 绘制三维折线图
    ax.plot(x, y, z, label=f"股票 {top_stock_code}", marker='o', linestyle='-', alpha=0.7)
    
    # 设置轴标签
    ax.set_xlabel(EMO_NAME)
    ax.set_ylabel('时间')
    ax.set_zlabel('收益率变化')
    ax.set_title(f'股票 {top_stock_code}: {EMO_NAME} 与收益率变化的三维关系')
    
    # 设置 y 轴时间刻度（每半年一个刻度）
    ax.yaxis.set_major_locator(mdates.MonthLocator(bymonthday=15, interval=6))  # 每半年一个刻度
    ax.yaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))  # 按年月显示
    
    # 旋转日期标签防止重叠
    fig.autofmt_xdate()
    
    # 调整布局并保存图像
    plt.legend()
    # plt.savefig(f'个股分析/{market_address}/图片/聚类2_{top_stock_code}.png', dpi=300, bbox_inches='tight')
    plt.show()


## 进行聚类间实证分析

#### 读取个股金融数据

In [17]:
# 读取原始数据
microfinancial_data = pd.read_csv(f'{Financial_Data_PATH}/个股微观金融数据.csv', encoding='utf-8')

# 选择重要的列，日期和股票代码一定要包含
columns_to_select = [
    '日期_Date',  # 日期
    '股票代码_Stkcd',  # 股票代码
    '日振幅(%)_Dampltd',  # 日振幅
    '日收益率_Dret',  # 日收益率
    '市盈率_PE',  # 市盈率
    '成交量_Trdvol',  # 成交量
    '流通股日换手率(%)_DTrdTurnR',
    '总市值加权平均日资本收益_Daretmc',
]

# 提取相关列
microfinancial_data = microfinancial_data[columns_to_select]
microfinancial_data['股票代码_Stkcd'] = microfinancial_data['股票代码_Stkcd'].apply(lambda x: str(x).zfill(6))

# microfinancial_data = microfinancial_data[microfinancial_data['日期_Date'] == '2022-03-01'] #用于检验某一天数据
microfinancial_data

Unnamed: 0,日期_Date,股票代码_Stkcd,日振幅(%)_Dampltd,日收益率_Dret,市盈率_PE,成交量_Trdvol,流通股日换手率(%)_DTrdTurnR,总市值加权平均日资本收益_Daretmc
0,2023-02-01,000100,1.6667,0.0071,276.47,147751077.0,1.0357,0.0145
1,2023-02-02,000100,2.1277,-0.0095,273.86,188929965.0,1.3244,-0.0005
2,2023-02-03,000100,2.1480,-0.0048,272.55,200087013.0,1.4026,-0.0042
3,2023-02-06,000100,1.6787,-0.0072,270.59,134158688.0,0.9404,-0.0084
4,2023-02-07,000100,1.4493,0.0024,271.24,87200030.0,0.6113,0.0036
...,...,...,...,...,...,...,...,...
162617,2024-11-14,601566,3.0340,-0.0255,16.94,3223500.0,0.5610,-0.0146
162618,2024-11-15,601566,2.8643,0.0025,16.98,2824500.0,0.4915,-0.0112
162619,2024-11-18,601566,3.8509,-0.0025,16.94,3729993.0,0.6491,0.0009
162620,2024-11-19,601566,2.3661,0.0137,17.17,3367065.0,0.5859,0.0031


#### 加载之前的情绪聚类数据

In [18]:
final_data

Unnamed: 0,股票编号,日期,高维情绪变量,交易量,收益率变化,聚类标签,公司名称
0,000002,2021-06-01,"[-0.178, -0.141, 0.283, 0.184, -0.002, 0.299, ...",60990961.0,-0.003745,2.0,万科A
1,000002,2021-06-08,"[-0.276, -0.035, 0.247, 0.326, 0.018, 0.244, -...",44676494.0,0.004626,2.0,万科A
2,000002,2021-06-10,"[-0.083, 0.072, 0.273, 0.26, -0.087, 0.276, -0...",53800776.0,-0.010035,2.0,万科A
3,000002,2021-06-11,"[-0.114, 0.163, 0.381, 0.325, -0.091, 0.088, -...",75853738.0,-0.014035,2.0,万科A
4,000002,2021-06-15,"[-0.169, 0.147, 0.442, 0.245, -0.009, 0.368, -...",89915501.0,-0.020957,2.0,万科A
...,...,...,...,...,...,...,...
9679,688981,2024-11-11,"[-0.055, 0.026, 0.277, 0.147, -0.029, 0.276, -...",138069018.0,0.046660,2.0,中芯国际
9680,688981,2024-11-12,"[-0.134, -0.011, 0.308, 0.208, 0.049, 0.263, -...",108866759.0,-0.036864,2.0,中芯国际
9681,688981,2024-11-13,"[-0.228, 0.028, 0.358, 0.294, -0.137, 0.178, -...",80759477.0,-0.019484,2.0,中芯国际
9682,688981,2024-11-14,"[-0.18, 0.054, 0.392, 0.292, -0.048, 0.215, -0...",76194102.0,-0.022897,2.0,中芯国际


#### 读取市场换手率与市盈率并进行插值

In [19]:
turnover_PE_rate = pd.read_csv(f'{Financial_Data_PATH}/市场整体换手率与市盈率.csv', encoding='utf-8')
turnover_PE_rate['统计日期'] = pd.to_datetime(turnover_PE_rate['统计日期'])

# 按照 '统计日期' 排序
turnover_PE_rate = turnover_PE_rate.sort_values(by='统计日期')

# 对 '平均市盈率' 和 '换手率(总股本)' 进行线性插值
turnover_PE_rate[['平均市盈率', '换手率(总股本)']] = turnover_PE_rate[['平均市盈率', '换手率(总股本)']].interpolate(method='linear', axis=0)

turnover_PE_rate = turnover_PE_rate.drop(columns=['统计口径编码'])
turnover_PE_rate.columns = ['统计日期', '市场平均市盈率', '市场换手率(总股本)']
turnover_PE_rate

Unnamed: 0,统计日期,市场平均市盈率,市场换手率(总股本)
0,2021-01-04,16.918000,1.140600
12,2021-01-04,25.986305,1.237217
11,2021-01-04,35.054611,1.333834
10,2021-01-04,44.122916,1.430451
8,2021-01-04,53.191221,1.527068
...,...,...,...
12364,2024-12-06,39.185061,2.176139
12363,2024-12-06,14.390000,1.292500
12374,2024-12-06,14.390000,1.292500
12368,2024-12-06,14.390000,1.292500


#### 数据拼接

In [20]:
# 将日期列转换为 datetime 格式
final_data['日期'] = pd.to_datetime(final_data['日期'])
microfinancial_data['日期_Date'] = pd.to_datetime(microfinancial_data['日期_Date'], errors='coerce')

# 使用 merge 进行内连接
result = pd.merge(
    final_data, 
    microfinancial_data, 
    left_on=['日期', '股票编号'],  # 左表的 key
    right_on=['日期_Date', '股票代码_Stkcd'], # 右表的 key
    how='inner'                   # 内连接
)

result = pd.merge(
    result, 
    turnover_PE_rate, 
    left_on=['日期'],  # 左表的 key
    right_on=['统计日期'], # 右表的 key
    how='inner'                   # 内连接
)

# 删除重复的列
result = result.drop(columns=['日期_Date', '股票代码_Stkcd', '交易量', '日收益率_Dret', '统计日期'])

# 修改列的位置，更加美观可视
columns_to_move = ['聚类标签', '公司名称']
result = pd.concat([result.drop(columns=columns_to_move), result[columns_to_move]], axis=1)

# 将成交量除以10000，并以万计数
result['成交量(百万)_Trdvol'] = result['成交量_Trdvol'] / 1_000_000
result = result.drop(columns=['成交量_Trdvol'])

cols_to_check = ['收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
                 '成交量(百万)_Trdvol', '流通股日换手率(%)_DTrdTurnR']

# 删除含有空值的行
result = result.dropna(subset=cols_to_check)
result

Unnamed: 0,股票编号,日期,高维情绪变量,收益率变化,日振幅(%)_Dampltd,市盈率_PE,流通股日换手率(%)_DTrdTurnR,总市值加权平均日资本收益_Daretmc,市场平均市盈率,市场换手率(总股本),聚类标签,公司名称,成交量(百万)_Trdvol
0,000002,2021-06-01,"[-0.178, -0.141, 0.283, 0.184, -0.002, 0.299, ...",-0.003745,1.8727,7.44,0.6272,0.0039,24.523860,1.030223,2.0,万科A,60.990961
1,000002,2021-06-01,"[-0.178, -0.141, 0.283, 0.184, -0.002, 0.299, ...",-0.003745,1.8727,7.44,0.6272,0.0039,31.761720,1.132746,2.0,万科A,60.990961
2,000002,2021-06-01,"[-0.178, -0.141, 0.283, 0.184, -0.002, 0.299, ...",-0.003745,1.8727,7.44,0.6272,0.0039,38.999580,1.235269,2.0,万科A,60.990961
3,000002,2021-06-01,"[-0.178, -0.141, 0.283, 0.184, -0.002, 0.299, ...",-0.003745,1.8727,7.44,0.6272,0.0039,46.237440,1.337792,2.0,万科A,60.990961
4,000002,2021-06-01,"[-0.178, -0.141, 0.283, 0.184, -0.002, 0.299, ...",-0.003745,1.8727,7.44,0.6272,0.0039,53.475301,1.440316,2.0,万科A,60.990961
...,...,...,...,...,...,...,...,...,...,...,...,...,...
116592,603288,2024-11-11,"[0.17, 0.06, 0.068, 0.234, 0.072, 0.144, -0.65...",-0.028694,3.8532,45.78,0.4017,-0.0010,70.557753,3.703170,0.0,海天味业,22.335171
116593,603288,2024-11-11,"[0.17, 0.06, 0.068, 0.234, 0.072, 0.144, -0.65...",-0.028694,3.8532,45.78,0.4017,-0.0010,51.935168,3.099546,0.0,海天味业,22.335171
116594,603288,2024-11-11,"[0.17, 0.06, 0.068, 0.234, 0.072, 0.144, -0.65...",-0.028694,3.8532,45.78,0.4017,-0.0010,33.312584,2.495923,0.0,海天味业,22.335171
116595,603288,2024-11-11,"[0.17, 0.06, 0.068, 0.234, 0.072, 0.144, -0.65...",-0.028694,3.8532,45.78,0.4017,-0.0010,14.690000,1.892300,0.0,海天味业,22.335171


In [21]:
# 查看匹配的公司数
unique_stock_count = result['股票编号'].nunique()
print(unique_stock_count)

19


#### 回归检验

In [22]:
## 单因子：总市值加权平均日资本收益回归检验
df = result.copy()

# 自变量是总市值加权平均日资本收益
X = df[['总市值加权平均日资本收益_Daretmc']]

# 要拟合的因变量列表
y_vars = [
    '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
    '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
]


# 添加常数项（截距项）
X_with_const = sm.add_constant(X)

# 创建一个空的列表存储回归结果
regression_results = []

# 对每个因变量进行回归分析
for y_var in y_vars:
    y = df[y_var]  # 当前的因变量

    # 对股票编号分组进行回归
    grouped = df.groupby('股票编号')
    
    r_squared_list = []
    
    for stock_code, group in grouped:
        X_group = group[['总市值加权平均日资本收益_Daretmc']]
        X_with_const_group = sm.add_constant(X_group)
        y_group = group[y_var]
        
        try:
            # 拟合回归模型
            model = sm.OLS(y_group, X_with_const_group)
            results = model.fit()

            # 提取回归R方
            r_squared_list.append(results.rsquared)
        except Exception as e:
            # 如果回归失败（如R方计算为-inf），跳过
            print(f"回归失败：股票编号 {stock_code}, 错误信息: {e}")
            r_squared_list.append(float('nan'))  # 将其R方设为NaN
    
    # 计算R方的平均值，忽略NaN值
    avg_r_squared = pd.Series(r_squared_list).mean()
    
    # 对于每个因变量，存储回归系数等信息
    model = sm.OLS(y, X_with_const)
    results = model.fit()

    regression_results.append({
        'Variable': y_var,
        'S': round(results.bse['总市值加权平均日资本收益_Daretmc'], 3),  # 标准误差
        'Econ': round(results.params['总市值加权平均日资本收益_Daretmc'], 3),  # 回归系数
        'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
        'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
        'Econ_t_value': round(results.tvalues['总市值加权平均日资本收益_Daretmc'], 3),  # Econ的t统计量，保留三位小数
        'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
    })

# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/单因子：总市值加权平均日资本收益.csv')
regression_df


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.006,1.091,0.001,22.112,181.938,11.641
1,日振幅(%)_Dampltd,0.533,8.629,3.438,0.224,16.184,549.441
2,市盈率_PE,57.149,-123.203,59.781,0.004,-2.156,89.131
3,成交量(百万)_Trdvol,21.662,508.151,64.412,0.47,23.458,253.365
4,流通股日换手率(%)_DTrdTurnR,0.451,11.012,1.498,0.508,24.404,282.825


In [23]:
## 单因子：情绪回归检验
if MODEL != 'Deep-learning/Ours':
    df = result.copy()
    
    # 自变量是情绪值
    X = df[[EMO_NAME]]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')
        
        for stock_code, group in grouped:
            X_group = group[[EMO_NAME]]
            X_with_const_group = sm.add_constant(X_group)
            y_group = group[y_var]
            
            # 拟合回归模型
            model = sm.OLS(y_group, X_with_const_group)
            results = model.fit()
    
            # 提取回归R方
            r_squared_list.append(results.rsquared)
        
        # 计算R方的平均值
        avg_r_squared = sum(r_squared_list) / len(r_squared_list)
        
        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })
else:
    df = result.copy()
    
    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')
        
        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared_global': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })
    


# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/单因子：{EMO_NAME}.csv')
regression_df


Unnamed: 0,Variable,S,Econ,Constant,R_squared_global,Econ_t_value,Constant_t_value
0,收益率变化,0.01,-0.023,-0.103,14.715,-2.4,-2.582
1,日振幅(%)_Dampltd,0.731,-1.68,17.066,20.17,-2.296,5.626
2,市盈率_PE,79.483,-232.155,-188.685,17.77,-2.921,-0.572
3,成交量(百万)_Trdvol,27.579,-13.941,103.63,31.414,-0.505,0.906
4,流通股日换手率(%)_DTrdTurnR,0.611,-2.051,9.494,22.321,-3.354,3.744


In [24]:
## 总体双因子回归检验
if MODEL != 'Deep-learning/Ours':
    df = result.copy()
    
    # 自变量是情绪值和总市值加权平均日资本收益
    X = df[[EMO_NAME, '总市值加权平均日资本收益_Daretmc']]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')
   
        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })

else:
    df = result.copy()
    
    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[['总市值加权平均日资本收益_Daretmc'] + EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')

        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })
    

# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/双因子：总体分析.csv')
regression_df


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.009,-0.017,-0.058,31.799,-1.995,-1.617
1,日振幅(%)_Dampltd,0.731,-1.635,17.405,20.325,-2.238,5.743
2,市盈率_PE,79.48,-233.3,-197.443,17.779,-2.935,-0.599
3,成交量(百万)_Trdvol,27.503,-11.113,125.258,31.795,-0.404,1.098
4,流通股日换手率(%)_DTrdTurnR,0.61,-1.989,9.969,22.743,-3.262,3.942


In [25]:
## 聚类0的回归检验
if MODEL != 'Deep-learning/Ours':
    df = result[result['聚类标签'] == 0].copy()
    
    # 自变量是情绪值和总市值加权平均日资本收益
    X = df[[EMO_NAME, '总市值加权平均日资本收益_Daretmc']]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')
        
        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })

else:
    df = result[result['聚类标签'] == 0].copy()

    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[['总市值加权平均日资本收益_Daretmc'] + EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')
        
        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })
    
# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/双因子：聚类0检验.csv')
regression_df


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.013,-0.009,0.11,37.491,-0.679,1.842
1,日振幅(%)_Dampltd,1.027,-3.619,46.938,31.702,-3.523,9.743
2,市盈率_PE,115.966,-22.233,-2476.121,26.097,-0.192,-4.553
3,成交量(百万)_Trdvol,44.024,-27.448,-234.74,48.501,-0.623,-1.137
4,流通股日换手率(%)_DTrdTurnR,0.914,-2.984,15.228,29.131,-3.264,3.552


In [26]:
## 聚类1的回归检验
if MODEL != 'Deep-learning/Ours':
    df = result[result['聚类标签'] == 1].copy()
    
    # 自变量是情绪值和总市值加权平均日资本收益
    X = df[[EMO_NAME, '总市值加权平均日资本收益_Daretmc']]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')

        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })
else:
    df = result[result['聚类标签'] == 1].copy()
    
    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    print('(样本数, 维度):', emo_array.shape)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[['总市值加权平均日资本收益_Daretmc'] + EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')    

        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })

# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/双因子：聚类1检验.csv')
regression_df


(样本数, 维度): (17927, 768)


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.025,-0.015,-0.816,69.209,-0.59,-7.005
1,日振幅(%)_Dampltd,2.286,-3.541,-35.772,61.673,-1.549,-3.348
2,市盈率_PE,345.789,-122.119,-9474.82,64.791,-0.353,-5.863
3,成交量(百万)_Trdvol,16.636,-27.822,-785.271,56.516,-1.672,-10.1
4,流通股日换手率(%)_DTrdTurnR,1.351,2.7,-22.601,63.469,1.998,-3.579


In [27]:
## 聚类2的回归检验
if MODEL != 'Deep-learning/Ours':
    df = result[result['聚类标签'] == 2].copy()
    
    # 自变量是情绪值和总市值加权平均日资本收益
    X = df[[EMO_NAME, '总市值加权平均日资本收益_Daretmc']]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    # 创建一个空的列表存储回归结果
    regression_results = []
    
    # 对每个因变量进行回归分析
    for y_var in y_vars:
        y = df[y_var]  # 当前的因变量
    
        # 对股票编号分组进行回归
        grouped = df.groupby('股票编号')
    
        # 对于每个因变量，存储回归系数等信息
        model = sm.OLS(y, X_with_const)
        results = model.fit()
    
        regression_results.append({
            'Variable': y_var,
            'S': round(results.bse[EMO_NAME], 3),  # 标准误差，保留三位小数
            'Econ': round(results.params[EMO_NAME], 3),  # 回归系数，保留三位小数
            'Constant': round(results.params['const'], 3),  # 截距项，保留三位小数
            'R_squared': round(results.rsquared * 100, 3), # 总体的回归R方，保留三位小数
            'Econ_t_value': round(results.tvalues[EMO_NAME], 3),  # Econ的t统计量，保留三位小数
            'Constant_t_value': round(results.tvalues['const'], 3)  # Constant的t统计量，保留三位小数
        })
else:
    df = result[result['聚类标签'] == 2].copy()
    
    # 拆分高维情绪变量为多个列（多变量）
    emo_array = np.array(df['高维情绪变量'].tolist())  # shape: (样本数, 维度)
    print('(样本数, 维度):', emo_array.shape)
    emo_df = pd.DataFrame(emo_array, columns=[f'emotion_{i}' for i in range(emo_array.shape[1])])
    
    # 合并回df
    df = pd.concat([df.reset_index(drop=True), emo_df], axis=1)
    
    # 构造多维自变量X
    EMO_COLUMNS = [f'emotion_{i}' for i in range(emo_array.shape[1])]
    X = df[['总市值加权平均日资本收益_Daretmc'] + EMO_COLUMNS]
    
    # 要拟合的因变量列表
    y_vars = [
        '收益率变化', '日振幅(%)_Dampltd', '市盈率_PE', 
        '成交量(百万)_Trdvol','流通股日换手率(%)_DTrdTurnR'
    ]
    
    # 添加常数项（截距项）
    X_with_const = sm.add_constant(X)
    
    regression_results = []

    for y_var in y_vars:
        y = df[y_var]
        
        # 每个股票编号做回归，统计 R²
        grouped = df.groupby('股票编号')
        r_squared_list = []
        
        # 全部数据回归
        model = sm.OLS(y, X_with_const)
        results = model.fit()

        # 提取部分指标（示例提取第一个变量的参数）
        econ_param = results.params[EMO_COLUMNS[0]]
        econ_bse = results.bse[EMO_COLUMNS[0]]
        econ_t = results.tvalues[EMO_COLUMNS[0]]
        
        regression_results.append({
            'Variable': y_var,
            'S': round(econ_bse, 3),
            'Econ': round(econ_param, 3),
            'Constant': round(results.params['const'], 3),
            'R_squared': round(results.rsquared * 100, 3),
            'Econ_t_value': round(econ_t, 3),
            'Constant_t_value': round(results.tvalues['const'], 3)
        })

# 转换为 DataFrame
regression_df = pd.DataFrame(regression_results)
# regression_df.to_csv(f'个股分析/{market_address}/双因子回归结果/双因子：聚类2检验.csv')
regression_df


(样本数, 维度): (45617, 768)


Unnamed: 0,Variable,S,Econ,Constant,R_squared,Econ_t_value,Constant_t_value
0,收益率变化,0.014,0.041,-0.401,38.983,2.976,-5.883
1,日振幅(%)_Dampltd,1.133,-3.36,11.003,31.369,-2.966,1.983
2,市盈率_PE,56.653,-273.151,-485.97,41.341,-4.821,-1.752
3,成交量(百万)_Trdvol,43.377,-293.721,-725.347,35.597,-6.771,-3.415
4,流通股日换手率(%)_DTrdTurnR,1.025,-4.997,-0.397,36.59,-4.877,-0.079
