In [1]:
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm_notebook

from typing import List
import collections



In [2]:
def industry_DF_to_01(DF, pos=-6):
    DF = pd.get_dummies(DF)
    DF.columns = list(map(lambda x:x[pos:],DF.columns))
    return DF

In [3]:
with open('other_data_0415.pkl', 'rb') as f:
    other_data = pickle.load(f)

In [4]:
# 接受ind_limit作为ind_ceil 当作等权分配的放松比率
def ind_res4(d1: str, df_temp: pd.core.frame.DataFrame, pool_N: set, retain: float, hold_num: int,
             ind_ceil: np.float64, ind_sig: str, func_pos: int, max_res_retain: float,
             ind_limit_days_dict: dict) -> None:
    if ind_limit_days_dict is None:
        ind_limit_days_dict = {}
        for ind in set(other_data[d1][ind_sig]):
            ind_limit_days_dict[ind] = {}
            ind_limit_days_dict[ind][d1] = 1

    df_temp_ind = []
    # 合并行业数据
    df_temp_ind = pd.merge(df_temp, industry_DF_to_01(other_data[d1][ind_sig], pos=func_pos),
                           how='left', left_index=True, right_index=True)

    # 行业限额字典
    zz500_dic = collections.Counter(other_data[d1].loc[other_data[d1]['index_pool'] == 500, :]['industry'])
    for key, val in zz500_dic.items():
        zz500_dic[key] /= 500

    # ptr2_N = [1 for i in range(N)]
    while True:  # 行业约束检测
        # 找出持仓内股票的行业
        ind_df_N = industry_DF_to_01(other_data[d1].loc[list(pool_N), [ind_sig]], pos=func_pos)

        # 按列求和 并计算是否超额
        ind_df_N.loc["列总和", :] = ind_df_N.apply(lambda x: x.sum(), axis=0)
        ind_df_N.loc["当前超额值", :] = ind_df_N.apply(
            lambda x: x["列总和"] - np.ceil(zz500_dic[x.name] * hold_num * (1 + ind_ceil)), axis=0)

        # 构建一个限额字典 key:行业 value: 剩余可配置券个数
        del_dic = {}
        for ind in ind_df_N.columns:
            del_dic[ind] = ind_df_N[ind][-1]
            if ind_limit_days_dict[ind][d1] != 1:
                del_dic[ind] = 0

        for ind in set(other_data[d1][ind_sig]) ^ (set(ind_df_N.columns)):
            del_dic[ind] = - np.ceil(zz500_dic[ind] * hold_num * (1 + ind_ceil))
            if ind_limit_days_dict[ind][d1] != 1:
                del_dic[ind] = 0
        del_list_N = del_dic
        # print(del_list_N[0])
        # print(df_temp_ind[0].head())

        dict_warning = {}
        for ind in set(other_data[d1][ind_sig]):
            dict_warning[ind] = False

        sig = True  # 是否满足约束的判断

        if not all(val <= 0 for val in set(del_list_N.values())):
            sig = False
        # print(len(pool_N[0]))
        if sig:
            return del_list_N, pool_N

        for ind in del_list_N.keys():
            while del_list_N[ind] > 0:

                ptr1 = df_temp.shape[0] - 1  # 贪心 每次都从因子值相对最小的开始搜索能够剔除的个券
                ptr2_N = 1
                while (df_temp_ind.loc[df_temp_ind.index[-ptr1], ind] != 1) or \
                        (df_temp_ind.index[-ptr1] not in pool_N):
                    # df_temp_ind[i].index[-ptr1] in pool_N_temp2[i]
                    if ptr1 < 0:
                        dict_warning[ind] = True
                        # print('*****行业调整失败，仓内行业暴露过大',d1,ind)
                        # print('行业：',ind,sw_l1_ind_name[ind],
                        # '超额,配额：', del_list_N[0][ind],np.ceil(zz500_dic[ind]*hold_num*(1+ind_ceil)))
                        break
                    ptr1 -= 1

                if dict_warning[ind]: break

                while del_list_N.get(other_data[d1].loc[df_temp_ind.index[-ptr2_N], ind_sig], -1) >= 0 or \
                        df_temp_ind.index[-ptr2_N] in pool_N:
                    if ptr2_N >= min(df_temp_ind.shape[0] - 1, hold_num * (1 + retain) * max_res_retain):  ### 仅在模糊区间里调整
                        dict_warning[ind] = True
                        # print('*****行业调整失败，指针超过模糊区间',d1,ind)
                        # print('行业：',ind,sw_l1_ind_name[ind],
                        # '超额,配额：', del_list_N[0][ind],np.ceil(zz500_dic[ind]*hold_num*(1+ind_ceil)))
                        break
                    ptr2_N += 1  # ptr2指向一个能够被加入pool中的券 其中动态调整限额字典 只需要一次遍历调整

                if dict_warning[ind]: break

                # 增删个券 调整限额字典
                pool_N.remove(df_temp_ind.index[-ptr1])
                del_list_N[ind] -= 1
                pool_N.add(df_temp_ind.index[-ptr2_N])
                del_list_N[other_data[d1].loc[df_temp_ind.index[-ptr2_N], ind_sig]] += 1

        if sum(dict_warning.values()) > 0:
            # break
            return del_list_N, pool_N

    return del_list_N, pool_N

In [5]:
with open('stock_score.pkl', 'rb') as fff:
    df_y = pickle.load(fff)

In [6]:
pool = set(df_y[0].iloc[:, -1].sort_values(ascending=False)[:200].index)

result = ind_res4("2021-04-15", df_y[0].iloc[:, -1], pool, 0.5, 200, ind_ceil=0.2,
         ind_sig='industry', func_pos=-6, max_res_retain=2, ind_limit_days_dict=None)



In [8]:
result[0]


{'801010': 0.0,
 '801020': 0.0,
 '801030': 0.0,
 '801040': -6.0,
 '801050': 0.0,
 '801080': 0.0,
 '801110': 0.0,
 '801120': 0.0,
 '801130': 0.0,
 '801140': 0.0,
 '801150': 0.0,
 '801160': -1.0,
 '801170': -8.0,
 '801180': -11.0,
 '801200': -1.0,
 '801210': 0.0,
 '801230': -1.0,
 '801710': -2.0,
 '801720': 0.0,
 '801730': 0.0,
 '801740': -6.0,
 '801750': 0.0,
 '801760': 0.0,
 '801770': -4.0,
 '801780': -3.0,
 '801880': 0.0,
 '801890': 0.0,
 '801790': -10.0}

In [9]:
result[1]

{'000069.XSHE',
 '000501.XSHE',
 '000623.XSHE',
 '000655.XSHE',
 '000657.XSHE',
 '000722.XSHE',
 '000829.XSHE',
 '000875.XSHE',
 '000906.XSHE',
 '000930.XSHE',
 '000966.XSHE',
 '000989.XSHE',
 '002177.XSHE',
 '002179.XSHE',
 '002234.XSHE',
 '002292.XSHE',
 '002327.XSHE',
 '002466.XSHE',
 '002493.XSHE',
 '002607.XSHE',
 '002614.XSHE',
 '002648.XSHE',
 '002714.XSHE',
 '002746.XSHE',
 '002791.XSHE',
 '002891.XSHE',
 '002967.XSHE',
 '300015.XSHE',
 '300041.XSHE',
 '300143.XSHE',
 '300224.XSHE',
 '300229.XSHE',
 '300314.XSHE',
 '300381.XSHE',
 '300395.XSHE',
 '300475.XSHE',
 '300582.XSHE',
 '300627.XSHE',
 '300632.XSHE',
 '300677.XSHE',
 '300687.XSHE',
 '300715.XSHE',
 '300726.XSHE',
 '300740.XSHE',
 '300770.XSHE',
 '300771.XSHE',
 '300806.XSHE',
 '600031.XSHG',
 '600039.XSHG',
 '600071.XSHG',
 '600079.XSHG',
 '600088.XSHG',
 '600136.XSHG',
 '600171.XSHG',
 '600172.XSHG',
 '600197.XSHG',
 '600218.XSHG',
 '600276.XSHG',
 '600284.XSHG',
 '600292.XSHG',
 '600295.XSHG',
 '600315.XSHG',
 '600346