In [2]:
from init import *

In [1]:
from rqfactor.extension import UserDefinedLeafFactor,CombinedCrossSectionalFactor

import numpy as np
import pandas as pd

class FAMA(object):
    
    def __init__(self):
        
        #自定义股票收益率因子
        def Stocks_Return(order_book_ids, start_date, end_date):
            return get_price_change_rate(order_book_ids, start_date, end_date).reindex(columns = order_book_ids)
        
        self.stocks_return = UserDefinedLeafFactor('stocks_return', Stocks_Return) 
        self.market_cap = Factor('market_cap_3')
        self.bm = 1/Factor('pb_ratio_lf')
        self.roe=Factor('net_profit_parent_company_ttm_0')/Factor('equity_parent_company_mrq_0')
        self.asset=(Factor('total_assets_mrq_0')-Factor('total_assets_mrq_4'))/ABS(Factor('total_assets_mrq_4'))
    
    def get_fama_three_factors(self):
        
        def fama_three_factors(stocks_return, market_cap, bm):
            
            bs = market_cap.apply(lambda x: pd.qcut(x, 2, labels=['S','B']), axis=1)
            hml = bm.apply(lambda x: pd.qcut(x, [0, 0.3, 0.7, 1], labels=['L','M','H']), axis=1)
            #smb计算时长：0:01:07.841932
            smb = np.where((bs=='S') & (hml=='L'), 'SL', 
                           np.where((bs=='S') & (hml=='M'), 'SM',
                                    np.where((bs=='S') & (hml=='H'), 'SH',
                                             np.where((bs=='B') & (hml=='L'), 'BL',
                                                      np.where((bs=='B') & (hml=='M'), 'BM',
                                                               np.where((bs=='B') & (hml=='H'), 'BH', np.nan))))))
            
            stocks_return_weighted = stocks_return*market_cap
            sl = np.nansum(np.where(smb == 'SL', stocks_return_weighted, np.nan), axis=1)/np.nansum(market_cap, axis=1)
            sm = np.nansum(np.where(smb == 'SM', stocks_return_weighted, np.nan), axis=1)/np.nansum(market_cap, axis=1)
            sh = np.nansum(np.where(smb == 'SH', stocks_return_weighted, np.nan), axis=1)/np.nansum(market_cap, axis=1)
            bl = np.nansum(np.where(smb == 'BL', stocks_return_weighted, np.nan), axis=1)/np.nansum(market_cap, axis=1)
            bm = np.nansum(np.where(smb == 'BM', stocks_return_weighted, np.nan), axis=1)/np.nansum(market_cap, axis=1)
            bh = np.nansum(np.where(smb == 'BH', stocks_return_weighted, np.nan), axis=1)/np.nansum(market_cap, axis=1)

            fama = pd.DataFrame(data = {'SMB':(sl + sm + sh)/3 - (bl + bm + bh)/3, 'HML':(sh + bh)/2 - (sl + bl)/2},
                                index = stocks_return_weighted.index)
            
            rm = get_price_change_rate('000985.XSHG', stocks_return_weighted.index[0], stocks_return_weighted.index[-1])
            rf = get_yield_curve(stocks_return_weighted.index[0], stocks_return_weighted.index[-1], tenor='0S', market='cn') 
            rb = pd.concat([rm, rf], axis=1)
            rb.columns=['Rm', 'Rf']
            rb['Rb'] = rb['Rm'] - ((rb['Rf'] + 1)** (1/252) - 1)
            
            fama = pd.concat([fama, rb['Rb']], axis=1)
            
            return fama.dropna(how='all')
        
        def FAMA_Three_Factors(*f):
            return CombinedCrossSectionalFactor(fama_three_factors, *f)
        
        ff3 = FAMA_Three_Factors(self.stocks_return, self.market_cap, self.bm)
        return ff3

In [3]:
# 宏观指标因子化
def micro_index_factor(df):
    """
    :param df:宏观因子 -> df
    :param name:宏观因子名称(df.columns) -> str
    :return micro_index:宏观因子值 -> unstack
    """
    name = df.columns.tolist()
    start_date = df.index[0].strftime('%F')
    end_date = df.index[-1].strftime('%F')
    try:
        fama_data = pd.read_pickle(f'fama_data_{start_date}_{end_date}.pkl')
        rp = pd.read_pickle(f'rp_{start_date}_{end_date}.pkl').sort_index()
    except:
        # Fama模型构建
        f = FAMA().get_fama_three_factors()
        fama_data = execute_factor(f,stock_list,start_date,end_date)
        fama_data.to_pickle(f'fama_data_{start_date}_{end_date}.pkl')
        # 标的收益构建
        rf = (get_yield_curve(get_previous_trading_date(start_date,19), end_date, tenor='5Y', market='cn') + 1) ** (1/252) - 1
        ret = execute_factor(PCT_CHANGE(Factor('close'),1),stock_list,start_date,end_date)
        rp = ret.sub(rf.loc[ret.index,'5Y'].to_list(),axis=0).sort_index()
        rp.to_pickle(f'rp_{start_date}_{end_date}.pkl')

    daily_x = pd.concat([df,fama_data],axis = 1).ffill().dropna()
    daily_x = daily_x.drop_duplicates(subset=name)

    rp = rp.rolling(20,1).mean()
    rp = rp.loc[list(set(rp.index) & set(daily_x.index))].sort_index()
    
    num = daily_x.shape[0]
    ts_df = pd.DataFrame()
    for i in tqdm(range(12,num)):
        end = daily_x.iloc[i].name
        start = daily_x.iloc[i-12].name
        rp_temp = rp.loc[start:end]
        daily_x_temp = daily_x.loc[start:end].dropna()
        x_index = set(daily_x_temp.index)
        cs_dict = {}
        for s in stock_list:
            try:
                rp_temp_stock = rp_temp[s].dropna()
                y_index = set(rp_temp_stock.index)
                inter_index = sorted(list(x_index & y_index))
                x = np.array(daily_x_temp.loc[inter_index])
                y = np.transpose([np.array(rp_temp_stock.loc[inter_index])])
                epu_value = np.dot(np.dot(np.linalg.inv(np.dot(x.T,x)),x.T),y)[0][0]
            except:
                epu_value = np.nan
            cs_dict[s] = epu_value
        cs_df = pd.DataFrame([cs_dict],index = [end])
        ts_df = pd.concat([ts_df,cs_df],axis = 0)

    micro_index = ts_df.reindex(pd.to_datetime(get_trading_dates(start_date,end_date))).ffill().dropna(how = 'all')
    micro_index = micro_index.mask(~index_fix)
    
    return micro_index

### 数据获取

In [5]:
dataset = pd.read_excel('宏观指标.xlsx',index_col=[0],parse_dates=[0]).iloc[2:-1]
dataset.index = pd.to_datetime(dataset.index )
dataset.columns = ['GDP','CPI','SF','M2','PMI']
dataset = dataset.sort_index()

In [9]:
micro_dict = {}
for i in dataset.columns.tolist():
    micro_dict[i] = dataset[[i]].ffill().loc['2016-01-01':]

In [12]:
micro_index_dict = {}
for k,v in micro_dict.items():
    print(k,'loading ... ')
    micro_index_dict[k] = micro_index_factor(v).mask(~index_fix)

GDP loading ... 


100%|██████████| 19/19 [00:31<00:00,  1.64s/it]


CPI loading ... 


100%|██████████| 54/54 [01:36<00:00,  1.79s/it]


SF loading ... 


100%|██████████| 78/78 [02:40<00:00,  2.05s/it]


M2 loading ... 


100%|██████████| 78/78 [02:06<00:00,  1.63s/it]


PMI loading ... 


100%|██████████| 25/25 [00:41<00:00,  1.67s/it]


### 因子检验

In [15]:
ic_summary = pd.DataFrame()
for k,v in micro_index_dict.items():
    v = data_clean(v).abs()  # 这里沿用EPU因子的处理abs
    df,ic_summary_temp = Quick_Factor_Return_N_IC(v,20,k)
    ic_summary = pd.concat([ic_summary,ic_summary_temp],axis = 0)

中信行业数据已获取
动态行业数据已构建


100%|██████████| 908/908 [00:08<00:00, 105.73it/s]


{'name': 'GDP', 'IC mean': -0.022, 'IC std': 0.0802, 'IR': -0.2739, 'IR_ly': -0.4194, 'IC>0': 0.4178, 'ABS_IC>2%': 0.8187, 't_stat': -8.1621}
中信行业数据已获取
动态行业数据已构建


100%|██████████| 1542/1542 [00:19<00:00, 80.74it/s]


{'name': 'CPI', 'IC mean': -0.029, 'IC std': 0.0805, 'IR': -0.3604, 'IR_ly': -0.644, 'IC>0': 0.3811, 'ABS_IC>2%': 0.8364, 't_stat': -14.0612}


100%|██████████| 1542/1542 [00:22<00:00, 68.22it/s]


{'name': 'SF', 'IC mean': -0.0284, 'IC std': 0.0737, 'IR': -0.3855, 'IR_ly': -0.3641, 'IC>0': 0.3252, 'ABS_IC>2%': 0.8081, 't_stat': -15.0409}


100%|██████████| 1542/1542 [00:25<00:00, 60.88it/s]


{'name': 'M2', 'IC mean': -0.0318, 'IC std': 0.0726, 'IR': -0.4375, 'IR_ly': -0.362, 'IC>0': 0.3068, 'ABS_IC>2%': 0.8292, 't_stat': -17.0665}
中信行业数据已获取
动态行业数据已构建


100%|██████████| 1519/1519 [00:21<00:00, 69.21it/s]


{'name': 'PMI', 'IC mean': -0.0247, 'IC std': 0.075, 'IR': -0.3298, 'IR_ly': -0.3967, 'IC>0': 0.3889, 'ABS_IC>2%': 0.7999, 't_stat': -12.7682}


In [16]:
ic_summary

Unnamed: 0,name,IC mean,IC std,IR,IR_ly,IC>0,ABS_IC>2%,t_stat
0,GDP,-0.022,0.0802,-0.2739,-0.4194,0.4178,0.8187,-8.1621
0,CPI,-0.029,0.0805,-0.3604,-0.644,0.3811,0.8364,-14.0612
0,SF,-0.0284,0.0737,-0.3855,-0.3641,0.3252,0.8081,-15.0409
0,M2,-0.0318,0.0726,-0.4375,-0.362,0.3068,0.8292,-17.0665
0,PMI,-0.0247,0.075,-0.3298,-0.3967,0.3889,0.7999,-12.7682
