In [1]:
# -*- coding: utf-8 -*-
'''
1、取数仓的1分钟数据，接入本地qlib（本demo只提供样例转换代码）
2、qlib计算出的股池回写到数仓中（由后台搜集器同步到股池）
'''
import time
import qlib
import os
import numpy as np
import pandas as pd
from dw import DW
from datetime import datetime
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')

dw = DW()
# 获取转债基础信息:债券代码   债券简称           申购日期    申购代码  申购上限    正股代码  正股简称     正股价     转股价   债现价  转股溢价率   原股东配售-股权登记日  原股东配售-每股配售额     发行规模         中签号发布日       中签率          上市时间  信用评级
def get_redis_cb_info():    
    # 获取所有可转债代码
    df_info = dw.redis_get_base('cb_info')
    return df_info

# 获取历史日线数据
def get_redis_day01(df_info):
    for index, row in df_info.iterrows():
        # 转化为tr_code
        tr_code =  row['债券代码']+'.SH' if row['债券代码'][:2] == '11' else row['债券代码']+'.SZ'
        df_day01 = dw.redis_get_day01(tr_code)
        if df_day01 is not None:
            df_day01.to_csv(f'C:/Users/tantra/Desktop/数据/日线数据/{tr_code}.csv')
            # # 打印前5行数据
            print(tr_code)
            print(df_day01)


# 获取历史1分钟数据
def get_api_min01(df_info: pd.DataFrame):
    s_time = time.strftime('%Y-%m-%d', time.localtime(time.time()-86400*20)) # 20天  
    e_time = time.strftime('%Y-%m-%d', time.localtime(time.time())) # 当天
    for inde, row in df_info.iterrows():
        # 转化为tr_code
        tr_code =  row['债券代码']+'.SH' if row['债券代码'][:2] == '11' else row['债券代码']+'.SZ'
        # 获取1分钟数据        
        df_min01_his = dw.api_get_history_min01(tr_code=tr_code, start_date=s_time, end_date=e_time)        
        if not df_min01_his.empty and df_min01_his is not None:
            df_min01_his.to_csv(f'C:/Users/tantra/Desktop/数据/分钟数据/{tr_code}.csv')
            # # 打印前5行数据
            print(tr_code)
            print(df_min01_his.head())


# 获取当日的1分钟数据
def get_redis_min01(df_info: pd.DataFrame):    
    for index, row in df_info.iterrows():
        # 转化为tr_code
        tr_code =  row['债券代码']+'.SH' if row['债券代码'][:2] == '11' else row['债券代码']+'.SZ'        
        # 获取1分钟数据        
        df_min = dw.redis_get_min01(tr_code)
        if df_min is not None:
            df_min.to_csv(f'C:/Users/tantra/Desktop/数据2/实时分钟数据/{tr_code}.csv')
            # 打印前5行数据，此处需要和历史数据进行合并处理
            print(tr_code)
            print(df_min.head())

def save_to_redis(df_sp: pd.DataFrame, sp_name: str):
    # 回写股池到redis中
    r = dw.get_redis()
    r.set(f"sp:{sp_name}", df_sp.to_json(orient='records', force_ascii=False))

In [2]:
#转为dict格式
def factor_dict(factor):
    factor_dict = {}
    for day in list(factor.index):
        factor_dict[day] = factor.loc[day].sort_values()
    return factor_dict

#dict格式因子缩尾处理
def winsorize(factor,low = 0.01, up = 0.99):
    for date in factor.keys():
        s = factor[date].copy()
        lower = s.quantile(low)
        upper = s.quantile(up)
        factor[date] = s.clip(lower, upper)
    return factor

#series格式因子缩尾处理
def series_winsorize(factor,low = 0.01, up = 0.99):
    lower = factor.quantile(low)
    upper = factor.quantile(up)
    factor = factor.clip(lower, upper)
    return factor

#dict格式因子标准化
def factor_std(factor):
    for date in factor.keys():
        factor[date] = (factor[date] - factor[date].mean()) / factor[date].std()
    return factor

#series格式因子标准化
def series_factor_std(factor):
    factor = (factor - factor.mean()) / factor.std()
    return factor

#series格式因子对另一组因子中性化
def net_factor(factor1, factor_list):
    factor = {}
    factor_x = pd.DataFrame()
    for i in factor_list: 
        factor_x = pd.concat([factor_x, i], axis=1) 
    y = factor1.dropna()
    x = factor_x.dropna()
    x, y = x.align(y, join='inner', axis=0)
    X = sm.add_constant(x)
    model = sm.OLS(y, X).fit()
    factor = model.resid.sort_values()
    return factor

In [12]:
# # 获取转债基础信息，含实时转股溢价率等信息 
df_info = get_redis_cb_info()
# # # 获取历史日线数据
# get_redis_day01(df_info)
# # # 获取历史1分钟数据
# get_api_min01(df_info)
# # # 获取当日的1分钟数据
get_redis_min01(df_info)
# 进行因子逻辑运算。。。。

118057.SH
                  time   open   high    low  close  volume     amount
0  2025-07-16 09:30:00  130.0  130.0  130.0  130.0   26382  3429660.0
1  2025-07-16 09:31:00  130.0  130.0  130.0  130.0       0        0.0
2  2025-07-16 09:32:00  130.0  130.0  130.0  130.0       0        0.0
3  2025-07-16 09:33:00  130.0  130.0  130.0  130.0       0        0.0
4  2025-07-16 09:34:00  130.0  130.0  130.0  130.0       0        0.0
113695.SH
                  time     open     high      low    close  volume     amount
0  2025-07-16 09:30:00  143.510  143.510  143.510  143.510     433    62139.8
1  2025-07-16 09:31:00  143.510  144.200  143.330  143.837    2429  3492235.0
2  2025-07-16 09:32:00  143.794  143.794  143.579  143.710     583   837840.0
3  2025-07-16 09:33:00  143.698  143.866  143.588  143.863     877  1260804.0
4  2025-07-16 09:34:00  144.280  145.480  144.202  145.199    4163  6028811.0
111022.SH
                  time     open     high      low    close  volume     amount
0  2

In [147]:
local_file_path = 'C:/Users/tantra/Desktop/数据/Fund_allValueDev.csv'
s3_object_key = 'Fund_allValueDev.csv'
dw.s3_download_file('tantra.factor', s3_object_key, local_file_path)

local_file_path = 'C:/Users/tantra/Desktop/数据/Fund_StrbPremiumRate.csv'
s3_object_key = 'Fund_StrbPremiumRate.csv'
dw.s3_download_file('tantra.factor', s3_object_key, local_file_path)

local_file_path = 'C:/Users/tantra/Desktop/数据/CBStyleMark.csv'
s3_object_key = 'CBStyleMark.csv'
dw.s3_download_file('tantra.factor', s3_object_key, local_file_path)

local_file_path = 'C:/Users/tantra/Desktop/数据/Fund_NewBnd.csv'
s3_object_key = 'Fund_NewBnd.csv'
dw.s3_download_file('tantra.factor', s3_object_key, local_file_path)

local_file_path = 'C:/Users/tantra/Desktop/数据/Fund_FlagST.csv'
s3_object_key = 'Fund_FlagST.csv'
dw.s3_download_file('tantra.factor', s3_object_key, local_file_path)

local_file_path = 'C:/Users/tantra/Desktop/数据/CBredeem.csv'
s3_object_key = 'CBredeem.csv'
dw.s3_download_file('tantra.factor', s3_object_key, local_file_path)

True

In [13]:
df_info.to_csv('C:/Users/tantra/Desktop/数据2/information.csv')

data_close_min = pd.DataFrame()
data_high_min = pd.DataFrame()
data_low_min = pd.DataFrame()
data_open_min = pd.DataFrame()
data_volume_min = pd.DataFrame()
data_amount_min = pd.DataFrame()
data_close_min.index = pd.to_datetime(data_close_min.index)
data_high_min.index = pd.to_datetime(data_high_min.index)
data_low_min.index = pd.to_datetime(data_low_min.index)
data_open_min.index = pd.to_datetime(data_open_min.index)
data_volume_min.index = pd.to_datetime(data_volume_min.index)
data_amount_min.index = pd.to_datetime(data_amount_min.index)
for csvname in os.listdir('C:/Users/tantra/Desktop/数据2/实时分钟数据'):
    data_stock_close = pd.read_csv(f'C:/Users/tantra/Desktop/数据2/实时分钟数据/{csvname}', parse_dates=['time'], index_col='time')['close']
    data_stock_high = pd.read_csv(f'C:/Users/tantra/Desktop/数据2/实时分钟数据/{csvname}', parse_dates=['time'], index_col='time')['high']
    data_stock_low = pd.read_csv(f'C:/Users/tantra/Desktop/数据2/实时分钟数据/{csvname}', parse_dates=['time'], index_col='time')['low']
    data_stock_open = pd.read_csv(f'C:/Users/tantra/Desktop/数据2/实时分钟数据/{csvname}', parse_dates=['time'], index_col='time')['open']
    data_stock_volume = pd.read_csv(f'C:/Users/tantra/Desktop/数据2/实时分钟数据/{csvname}', parse_dates=['time'], index_col='time')['volume']
    data_stock_amount = pd.read_csv(f'C:/Users/tantra/Desktop/数据2/实时分钟数据/{csvname}', parse_dates=['time'], index_col='time')['amount']
    data_stock_close.name = csvname[:9]
    data_stock_high.name = csvname[:9]
    data_stock_low.name = csvname[:9]
    data_stock_open.name = csvname[:9]
    data_stock_volume.name = csvname[:9]
    data_stock_amount.name = csvname[:9]
    data_close_min = pd.concat([data_close_min, data_stock_close], axis=1)
    data_high_min = pd.concat([data_high_min, data_stock_high], axis=1)
    data_low_min = pd.concat([data_low_min, data_stock_low], axis=1)
    data_open_min = pd.concat([data_open_min, data_stock_open], axis=1)
    data_volume_min = pd.concat([data_volume_min, data_stock_volume], axis=1)
    data_amount_min = pd.concat([data_amount_min, data_stock_amount], axis=1)

information = pd.read_csv('C:/Users/tantra/Desktop/数据/information.csv', index_col='债券代码')
information.index = [str(code) + '.SH' if str(code).startswith('11') else str(code) + '.SZ' for code in information.index]
stock_list = information.index.tolist()

# 获取今天的日期date
date = data_close_min.index.strftime('%Y-%m-%d').unique()[-1]

data_close_min = data_close_min.loc[date]
data_high_min = data_high_min.loc[date]
data_low_min = data_low_min.loc[date]
data_open_min = data_open_min.loc[date]
data_volume_min = data_volume_min.loc[date]
data_amount_min = data_amount_min.loc[date]

bond_list = data_volume_min.loc[:, ~(data_volume_min.fillna(0) == 0).all()].columns.tolist()
data_close_min = data_close_min.reindex(columns=bond_list)
data_high_min = data_high_min.reindex(columns=bond_list)
data_low_min = data_low_min.reindex(columns=bond_list)
data_open_min = data_open_min.reindex(columns=bond_list)
data_volume_min = data_volume_min.reindex(columns=bond_list)
data_amount_min = data_amount_min.reindex(columns=bond_list)

In [None]:
# 获取今天的日期date
date = data_close_min.index.strftime('%Y-%m-%d').unique()[-1]

# factor1
data_compare_True = data_close_min > data_close_min.shift(1)
data_compare_False = data_close_min <= data_close_min.shift(1)
if datetime.now().hour < 13:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=10, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    factor1_1h = data_volume_min.loc[begain_time:end_time][data_compare_True.loc[begain_time:end_time]].dropna(how='all', axis=1).sum() / data_volume_min.loc[begain_time:end_time][data_compare_False.loc[begain_time:end_time]].dropna(how='all', axis=1).sum()
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=9, minutes=31)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    factor1_2h = data_volume_min.loc[begain_time:end_time][data_compare_True.loc[begain_time:end_time]].dropna(how='all', axis=1).sum() / data_volume_min.loc[begain_time:end_time][data_compare_False.loc[begain_time:end_time]].dropna(how='all', axis=1).sum()
if datetime.now().hour >= 14:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    factor1_1h = data_volume_min.loc[begain_time:end_time][data_compare_True.loc[begain_time:end_time]].dropna(how='all', axis=1).sum() / data_volume_min.loc[begain_time:end_time][data_compare_False.loc[begain_time:end_time]].dropna(how='all', axis=1).sum()
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=13)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    factor1_2h = data_volume_min.loc[begain_time:end_time][data_compare_True.loc[begain_time:end_time]].dropna(how='all', axis=1).sum() / data_volume_min.loc[begain_time:end_time][data_compare_False.loc[begain_time:end_time]].dropna(how='all', axis=1).sum()
factor1_1h = factor1_1h.replace([np.inf, -np.inf], np.nan)
factor1_2h = factor1_2h.replace([np.inf, -np.inf], np.nan)

# factor2
if datetime.now().hour < 13:  
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=10, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    R1 = data / data.shift(1) - 1
    R2 = data / data.shift(2) - 1
    factor2_1h = (R1 + R2).mean()
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=9, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    R1 = data / data.shift(1) - 1
    R2 = data / data.shift(2) - 1
    factor2_2h = (R1 + R2).mean()
if datetime.now().hour >= 14:
    # 30min
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14, minutes=20)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    R1 = data / data.shift(1) - 1
    R2 = data / data.shift(2) - 1
    factor2_30min = (R1 + R2).mean()
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    R1 = data / data.shift(1) - 1
    R2 = data / data.shift(2) - 1
    factor2_1h = (R1 + R2).mean()
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=13)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    R1 = data / data.shift(1) - 1
    R2 = data / data.shift(2) - 1
    factor2_2h = (R1 + R2).mean()

# factor3
if datetime.now().hour < 13:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=10, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    return_min = data / data.shift(1) - 1
    return_std = return_min.rolling(5).std()
    return_std_std = return_std.rolling(5).std()
    factor3_1h = return_std_std.corrwith(data_amount_min.loc[begain_time:end_time])
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=9, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    return_min = data / data.shift(1) - 1
    return_std = return_min.rolling(5).std()
    return_std_std = return_std.rolling(5).std()
    factor3_2h = return_std_std.corrwith(data_amount_min.loc[begain_time:end_time])
if datetime.now().hour >= 14:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    return_min = data / data.shift(1) - 1
    return_std = return_min.rolling(5).std()
    return_std_std = return_std.rolling(5).std()
    factor3_1h = return_std_std.corrwith(data_amount_min.loc[begain_time:end_time])
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=13)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    return_min = data / data.shift(1) - 1
    return_std = return_min.rolling(5).std()
    return_std_std = return_std.rolling(5).std()
    factor3_2h = return_std_std.corrwith(data_amount_min.loc[begain_time:end_time])
    
# factor4
if datetime.now().hour < 13:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=10, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    data_abs = np.abs(data - data.shift(1))
    SU = data_abs[data_compare_True.loc[begain_time:end_time]].sum()
    SD = data_abs[data_compare_False.loc[begain_time:end_time]].sum()
    factor4_1h = (SU - SD) / (SU + SD)
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=9, minutes=31)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    data_abs = np.abs(data - data.shift(1))
    SU = data_abs[data_compare_True.loc[begain_time:end_time]].sum()
    SD = data_abs[data_compare_False.loc[begain_time:end_time]].sum()
    factor4_2h = (SU - SD) / (SU + SD)
if datetime.now().hour >= 14:
    # 30min
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14, minutes=20)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    data_abs = np.abs(data - data.shift(1))
    SU = data_abs[data_compare_True.loc[begain_time:end_time]].sum()
    SD = data_abs[data_compare_False.loc[begain_time:end_time]].sum()
    factor4_30min = (SU - SD) / (SU + SD)
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    data_abs = np.abs(data - data.shift(1))
    SU = data_abs[data_compare_True.loc[begain_time:end_time]].sum()
    SD = data_abs[data_compare_False.loc[begain_time:end_time]].sum()
    factor4_1h = (SU - SD) / (SU + SD)
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=13)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    data_abs = np.abs(data - data.shift(1))
    SU = data_abs[data_compare_True.loc[begain_time:end_time]].sum()
    SD = data_abs[data_compare_False.loc[begain_time:end_time]].sum()
    factor4_2h = (SU - SD) / (SU + SD)

# factor5
if datetime.now().hour < 13:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=9, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    min_index = data.index
    return_min = data / data.shift(1) - 1
    return_std = return_min.rolling(5).std()
    return_std_std = return_std.rolling(5).std()
    return_std_std_mean = return_std_std.mean()
    factor5_1h = data_amount_min.loc[min_index][return_std_std > return_std_std_mean].mean() / data_amount_min.loc[min_index].mean()
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=9, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    min_index = data.index
    return_min = data / data.shift(1) - 1
    return_std = return_min.rolling(5).std()
    return_std_std = return_std.rolling(5).std()
    return_std_std_mean = return_std_std.mean()
    factor5_2h = data_amount_min.loc[min_index][return_std_std > return_std_std_mean].mean() / data_amount_min.loc[min_index].mean()
if datetime.now().hour >= 14:
    # 1h    
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    min_index = data.index
    return_min = data / data.shift(1) - 1
    return_std = return_min.rolling(5).std()
    return_std_std = return_std.rolling(5).std()
    return_std_std_mean = return_std_std.mean()
    factor5_1h = data_amount_min.loc[min_index][return_std_std > return_std_std_mean].mean() / data_amount_min.loc[min_index].mean()
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=13)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    min_index = data.index
    return_min = data / data.shift(1) - 1
    return_std = return_min.rolling(5).std()
    return_std_std = return_std.rolling(5).std()
    return_std_std_mean = return_std_std.mean()
    factor5_2h = data_amount_min.loc[min_index][return_std_std > return_std_std_mean].mean() / data_amount_min.loc[min_index].mean()

# factor6
if datetime.now().hour < 13:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=10, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    close_max = data.rolling(2).max()
    factor6_1h = ((data - data.shift(1)) / close_max).mean()
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=9, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    data = data_close_min.loc[begain_time:end_time]
    close_max = data.rolling(2).max()
    factor6_2h = ((data - data.shift(1)) / close_max).mean()
if datetime.now().hour >= 14:
    # 30min
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14, minutes=20)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    close_max = data.rolling(2).max()
    factor6_30min = ((data - data.shift(1)) / close_max).mean()
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    close_max = data.rolling(2).max()
    factor6_1h = ((data - data.shift(1)) / close_max).mean()
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=13)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    data = data_close_min.loc[begain_time:end_time]
    close_max = data.rolling(2).max()
    factor6_2h = ((data - data.shift(1)) / close_max).mean()

# factor7
data_compare_DTM = data_open_min > data_open_min.shift(1)
data_compare_DBM = data_open_min >= data_open_min.shift(1)

HO = data_high_min - data_open_min
OO = data_open_min - data_open_min.shift(1)
OL = data_open_min - data_low_min
DTM = np.maximum(HO,OO)[data_compare_DTM]
DBM = np.maximum(OL,OO)[data_compare_DBM]
if datetime.now().hour < 13:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=10, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    STM = DTM.loc[begain_time:end_time].sum()
    SBM = DBM.loc[begain_time:end_time].sum()
    factor7_1h = (STM - SBM) / np.maximum(STM,SBM)
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=9, minutes=31)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    STM = DTM.loc[begain_time:end_time].sum()
    SBM = DBM.loc[begain_time:end_time].sum()
    factor7_2h = (STM - SBM) / np.maximum(STM,SBM)
if datetime.now().hour >= 14:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    STM = DTM.loc[begain_time:end_time].sum()
    SBM = DBM.loc[begain_time:end_time].sum()
    factor7_1h = (STM - SBM) / np.maximum(STM,SBM)
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=13)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    STM = DTM.loc[begain_time:end_time].sum()
    SBM = DBM.loc[begain_time:end_time].sum()
    factor7_2h = (STM - SBM) / np.maximum(STM,SBM)

# factor9
if datetime.now().hour < 13:
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=10, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    high_low = data_high_min.loc[begain_time:end_time] - data_low_min.loc[begain_time:end_time]
    close_high = data_close_min.loc[begain_time:end_time].shift(1) - data_high_min.loc[begain_time:end_time]
    abs_close_low = np.abs(data_close_min.loc[begain_time:end_time].shift(1) - data_low_min.loc[begain_time:end_time])
    atr = np.maximum(np.maximum(high_low, close_high), abs_close_low)
    ATR = atr.iloc[1:-1].mean()
    factor9_1h = ATR
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=9, minutes=30)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=11, minutes=30)
    high_low = data_high_min.loc[begain_time:end_time] - data_low_min.loc[begain_time:end_time]
    close_high = data_close_min.loc[begain_time:end_time].shift(1) - data_high_min.loc[begain_time:end_time]
    abs_close_low = np.abs(data_close_min.loc[begain_time:end_time].shift(1) - data_low_min.loc[begain_time:end_time])
    atr = np.maximum(np.maximum(high_low, close_high), abs_close_low)
    ATR = atr.iloc[1:-1].mean()
    factor9_2h = ATR
if datetime.now().hour >= 14:
    # 30min
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14, minutes=20)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    high_low = data_high_min.loc[begain_time:end_time] - data_low_min.loc[begain_time:end_time]
    close_high = data_close_min.loc[begain_time:end_time].shift(1) - data_high_min.loc[begain_time:end_time]
    abs_close_low = np.abs(data_close_min.loc[begain_time:end_time].shift(1) - data_low_min.loc[begain_time:end_time])
    atr = np.maximum(np.maximum(high_low, close_high), abs_close_low)
    ATR = atr.iloc[1:-1].mean()
    factor9_30min = ATR
    # 1h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=14)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    high_low = data_high_min.loc[begain_time:end_time] - data_low_min.loc[begain_time:end_time]
    close_high = data_close_min.loc[begain_time:end_time].shift(1) - data_high_min.loc[begain_time:end_time]
    abs_close_low = np.abs(data_close_min.loc[begain_time:end_time].shift(1) - data_low_min.loc[begain_time:end_time])
    atr = np.maximum(np.maximum(high_low, close_high), abs_close_low)
    ATR = atr.iloc[1:-1].mean()
    factor9_1h = ATR
    # 2h
    begain_time = pd.to_datetime(date) + pd.Timedelta(hours=13)
    end_time = pd.to_datetime(date) + pd.Timedelta(hours=15)
    high_low = data_high_min.loc[begain_time:end_time] - data_low_min.loc[begain_time:end_time]
    close_high = data_close_min.loc[begain_time:end_time].shift(1) - data_high_min.loc[begain_time:end_time]
    abs_close_low = np.abs(data_close_min.loc[begain_time:end_time].shift(1) - data_low_min.loc[begain_time:end_time])
    atr = np.maximum(np.maximum(high_low, close_high), abs_close_low)
    ATR = atr.iloc[1:-1].mean()
    factor9_2h = ATR

# 合成因子
if datetime.now().hour <= 13:
    # 缩尾和标准化
    factor1_1h_std = series_factor_std(series_winsorize(factor1_1h))
    factor2_1h_std = series_factor_std(series_winsorize(factor2_1h))
    factor3_1h_std = series_factor_std(series_winsorize(factor3_1h))
    factor4_1h_std = series_factor_std(series_winsorize(factor4_1h))
    factor5_1h_std = series_factor_std(series_winsorize(factor5_1h))
    factor6_1h_std = series_factor_std(series_winsorize(factor6_1h))
    factor7_1h_std = series_factor_std(series_winsorize(factor7_1h))
    factor9_1h_std = series_factor_std(series_winsorize(factor9_1h))

    factor1_2h_std = series_factor_std(series_winsorize(factor1_2h))
    factor2_2h_std = series_factor_std(series_winsorize(factor2_2h))
    factor3_2h_std = series_factor_std(series_winsorize(factor3_2h))
    factor4_2h_std = series_factor_std(series_winsorize(factor4_2h))
    factor5_2h_std = series_factor_std(series_winsorize(factor5_2h))
    factor6_2h_std = series_factor_std(series_winsorize(factor6_2h))
    factor7_2h_std = series_factor_std(series_winsorize(factor7_2h))
    factor9_2h_std = series_factor_std(series_winsorize(factor9_2h))

    # 高平因子
    #yjl_std = series_factor_std(series_winsorize(information['转股溢价率'].dropna()))
    factor9_net = net_factor(factor9_2h_std,[factor3_2h_std])
    factor5_net = net_factor(factor5_2h_std,[factor3_2h_std,factor9_net])
    factor1_net = net_factor(factor1_2h_std,[factor3_2h_std,factor9_net,factor5_net])
    factor4_net = net_factor(factor2_2h_std,[factor3_2h_std,factor9_net,factor5_net,factor1_net])
    combined_factor_GP = (factor3_2h_std - factor1_net + factor4_net + factor5_net).reindex(stock_list)

    # 中平因子
    factor5_net = net_factor(factor5_2h_std,[factor3_2h_std])
    factor6_net = net_factor(factor6_2h_std,[factor3_2h_std,factor5_net])
    factor4_net = net_factor(factor4_2h_std,[factor3_2h_std,factor5_net,factor6_net])
    factor2_net = net_factor(factor2_2h_std,[factor3_2h_std,factor5_net,factor6_net,factor4_net])
    combined_factor_ZP = (factor3_2h_std - factor5_net + factor6_net + factor4_net + factor2_net).reindex(stock_list)

    # 低平因子
    factor4_net = net_factor(factor4_2h_std,[factor2_2h_std])
    factor6_net = net_factor(factor6_2h_std,[factor2_2h_std,factor4_net])
    factor3_net = net_factor(factor3_2h_std,[factor2_2h_std,factor4_net,factor6_net])
    combined_factor_DP = (factor2_2h_std + factor4_net - factor6_net + factor3_net).reindex(stock_list)

if datetime.now().hour >= 14:
    # 缩尾和标准化
    factor2_30min_std = series_factor_std(series_winsorize(factor2_30min))
    factor4_30min_std = series_factor_std(series_winsorize(factor4_30min))
    factor6_30min_std = series_factor_std(series_winsorize(factor6_30min))
    factor9_30min_std = series_factor_std(series_winsorize(factor9_30min))

    factor1_1h_std = series_factor_std(series_winsorize(factor1_1h))
    factor2_1h_std = series_factor_std(series_winsorize(factor2_1h))
    factor3_1h_std = series_factor_std(series_winsorize(factor3_1h))
    factor4_1h_std = series_factor_std(series_winsorize(factor4_1h))
    factor5_1h_std = series_factor_std(series_winsorize(factor5_1h))
    factor6_1h_std = series_factor_std(series_winsorize(factor6_1h))
    factor7_1h_std = series_factor_std(series_winsorize(factor7_1h))
    factor9_1h_std = series_factor_std(series_winsorize(factor9_1h))

    factor1_2h_std = series_factor_std(series_winsorize(factor1_2h))
    factor2_2h_std = series_factor_std(series_winsorize(factor2_2h))
    factor3_2h_std = series_factor_std(series_winsorize(factor3_2h))
    factor4_2h_std = series_factor_std(series_winsorize(factor4_2h))
    factor5_2h_std = series_factor_std(series_winsorize(factor5_2h))
    factor6_2h_std = series_factor_std(series_winsorize(factor6_2h))
    factor7_2h_std = series_factor_std(series_winsorize(factor7_2h))
    factor9_2h_std = series_factor_std(series_winsorize(factor9_2h))

    # 高平因子
    factor9_net = net_factor(factor9_30min_std,[factor2_30min_std])
    combined_factor_GP = (factor2_30min_std + factor9_net).reindex(stock_list)

    # 中平因子
    combined_factor_ZP = (factor2_1h_std + factor6_1h_std).reindex(stock_list)

    # 低平因子
    combined_factor_DP = (factor2_30min_std + factor4_30min_std + factor6_30min_std).reindex(stock_list)

In [15]:
stock_gp = pd.read_csv('C:/Users/tantra/Desktop/数据/CBStyleMark.csv', parse_dates=['date']).pivot(index='date', columns='SYMBOL9', values='GP').iloc[-1].dropna().index.tolist()
stock_zp = pd.read_csv('C:/Users/tantra/Desktop/数据/CBStyleMark.csv', parse_dates=['date']).pivot(index='date', columns='SYMBOL9', values='ZP').iloc[-1].dropna().index.tolist()
stock_dp = pd.read_csv('C:/Users/tantra/Desktop/数据/CBStyleMark.csv', parse_dates=['date']).pivot(index='date', columns='SYMBOL9', values='DP').iloc[-1].dropna().index.tolist()
factor_gp = combined_factor_GP.loc[stock_gp].sort_values()
factor_zp = combined_factor_ZP.loc[stock_zp].sort_values()
factor_dp = combined_factor_DP.loc[stock_dp].sort_values()
if datetime.now().hour <= 13:
    trade_list_gp = factor_gp.index[:int(len(factor_gp)/3)].tolist()
    trade_list_zp = factor_zp.index[:int(len(factor_zp)/4)].tolist()
    trade_list_dp = factor_dp.index[:int(len(factor_dp)/3)].tolist()
if datetime.now().hour >= 14:
    trade_list_gp = factor_gp.index[:int(len(factor_gp)/3)].tolist()
    trade_list_zp = factor_zp.index[:int(len(factor_zp)/4)].tolist()
    trade_list_dp = factor_dp.index[:int(len(factor_dp)/3)].tolist()

# # # df_sp: [tr_code, mark]，其中mark为权重，百分制(mark和为100)
# # # sp_name: 股池名称，以[SP_]开头，后面接因子名称，股池名称，版本号等等
new = pd.read_csv('C:/Users/tantra/Desktop/数据/Fund_NewBnd.csv').pivot(index='TRADINGDATE', columns='SYMBOL9', values='FACTORVALUE').iloc[-1].dropna()
new = new[new == 1].index.tolist()
ST = pd.read_csv('C:/Users/tantra/Desktop/数据/Fund_FlagST.csv').pivot(index='TRADINGDATE', columns='SYMBOL9', values='FACTORVALUE').iloc[-1].dropna()
ST = ST[ST == 1].index.tolist()
QS = pd.read_csv('C:/Users/tantra/Desktop/数据/CBredeem.csv').pivot(index='TRADINGDATE', columns='SYMBOL9', values='FACTORVALUE').iloc[-1].dropna()
QS = QS[QS == 1].index.tolist()

# df1 = dw.api_get_sp(sp_name='SP_预警_基础')
# df2 = dw.api_get_sp(sp_name='SP_预警_标准D')
# SP1 = []
# SP2 = []
# if df1 is not None:
#     SP1 = df1['tr_code'].tolist()
# if df2 is not None:
#     SP2 = df2['tr_code'].tolist()

trade_list_gp = [x for x in trade_list_gp if x not in new and x not in ST and x not in QS]
trade_list_zp = [x for x in trade_list_zp if x not in new and x not in ST and x not in QS]
trade_list_dp = [x for x in trade_list_dp if x not in new and x not in ST and x not in QS]
mark_gp = [round(100/len(trade_list_gp),4)] * len(trade_list_gp)
mark_zp = [round(100/len(trade_list_zp),4)] * len(trade_list_zp)
mark_dp = [round(100/len(trade_list_dp),4)] * len(trade_list_dp)

df_sp = pd.DataFrame({'tr_code': trade_list_gp, 'mark': mark_gp})
save_to_redis(df_sp=df_sp, sp_name='SP_RDS_CF03h2_GP_G3') # SP_FACTORNAME_GP为股池名称
df_sp = pd.DataFrame({'tr_code': trade_list_zp, 'mark': mark_zp})
save_to_redis(df_sp=df_sp, sp_name='SP_RDS_CF03h2_ZP_G4') # SP_FACTORNAME_GP为股池名称
df_sp = pd.DataFrame({'tr_code': trade_list_dp, 'mark': mark_dp})
save_to_redis(df_sp=df_sp, sp_name='SP_RDS_CF03h2_DP_G3') # SP_FACTORNAME_GP为股池名称