In [2]:
import pandas as pd
import numpy as np
from dqt import standardize, normalize, daily_neutralize, pearson
from sqlalchemy import create_engine

market_df = pd.read_parquet('/home/factors/market.parquet').query("date > '2011-01-01'")

market_df.eval('adj_close = adj_factor * close', inplace=True)
market_df.eval('turnover_rate = turnover / 10000 / trad_a_share / close', inplace=True)
market_df.eval('adj_vwap = adj_factor * close', inplace=True)

close_arr = pd.pivot_table(data=market_df, index='date', columns='symbol', values='adj_close', dropna=False)
ready_arr = pd.pivot_table(data=market_df, index='date', columns='symbol', values='trade_status', aggfunc='first', dropna=False)
# ind_arr = pd.pivot_table(data=market_df, index='date', columns='symbol', values='ind_lv1', aggfunc='first', dropna=False)
ret_arr = close_arr.pct_change(periods=1, fill_method='ffill').shift(-1)
turnover_arr = pd.pivot_table(data=market_df, index='date', columns='symbol', values='turnover_rate', aggfunc='first', dropna=False)
vwap_arr = pd.pivot_table(data=market_df, index='date', columns='symbol', values='adj_vwap', dropna=False)

In [3]:
turnover_arr = turnover_arr/1000
turnover_arr

symbol,000001.SZ,000002.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,000011.SZ,...,603987.SH,603988.SH,603989.SH,603990.SH,603991.SH,603993.SH,603996.SH,603997.SH,603998.SH,603999.SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-04,0.115250,0.193572,0.053326,0.121758,0.222674,0.085879,0.061832,0.159680,0.000000,0.322595,...,,,,,,,,,,
2011-01-05,0.076790,0.130444,0.053176,0.081143,0.128395,0.071221,0.051529,0.200127,0.000000,0.184413,...,,,,,,,,,,
2011-01-06,0.066635,0.081989,0.049631,0.071096,0.142557,0.081108,0.069182,0.210619,0.000000,0.151274,...,,,,,,,,,,
2011-01-07,0.247905,0.152286,0.056858,0.134991,0.418648,0.065952,0.083705,0.240202,0.000000,1.395688,...,,,,,,,,,,
2011-01-10,0.117721,0.137080,0.035822,0.070343,0.241983,0.000000,0.080223,0.207200,0.000000,1.082444,...,,,,,,,,,,
2011-01-11,0.066684,0.138443,0.037465,0.067443,0.910379,0.110844,0.077122,0.148835,0.000000,1.433888,...,,,,,,,,,,
2011-01-12,0.088614,0.141009,0.027066,0.090020,0.603177,0.203321,0.063067,0.138589,0.000000,0.976435,...,,,,,,,,,,
2011-01-13,0.062998,0.061309,0.062329,0.035635,0.454610,0.101682,0.084954,0.075897,0.000000,0.561725,...,,,,,,,,,,
2011-01-14,0.067025,0.090876,0.055246,0.038972,0.299427,0.150386,0.082333,0.138353,0.000000,0.487762,...,,,,,,,,,,
2011-01-17,0.087060,0.126906,0.084576,0.083297,0.368030,0.056426,0.075808,0.228780,0.000000,0.464402,...,,,,,,,,,,


In [82]:
from tqdm import tqdm
from numba import jit,njit
from numpy.lib.stride_tricks import as_strided as strided
from scipy.ndimage.interpolation import shift

N = 24

@njit
def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return strided(a, shape=shape, strides=strides)


@njit
def get_turnover_weight(turnover):
    weight = np.ones(turnover.shape)
    weight[1:] = turnover[:-1]
    turnover_weight = np.array([turnover[i] * weight[:i+1].prod() for i in range(turnover.shape[0])])
    
    turnover_weights = np.nansum(turnover_weight) #权重的和，可能包含np.nan 使用nansum
    
    if turnover_weights==0:
        return np.array([np.nan for i in range(turnover_weight.shape[0])])
    else: 
        return np.array([turnover_weight[i]/turnover_weights for i in range(turnover_weight.shape[0])])

@njit
def get_onedim_cgo(vwap,turnover_weight,close):  
    rp = np.nansum(vwap*turnover_weight)
    if rp == 0:
        return np.nan
    else:
        return (close[0] - rp)/rp

@jit
def get_onesym_cgo(vwap,turnover,close,ret,N):
    leng = vwap.shape[0]
    
    vwap = rolling_window(vwap,N)
    turnover = rolling_window(turnover,N)
    close = rolling_window(close,N)
    ret = rolling_window(ret,N)
    
    cgo = np.array([np.nan]*leng)
    
    for i in range(vwap.shape[0]):
        if np.isnan(ret[i]).sum() <= N // 3 and np.isnan(turnover[i]).sum() <= N // 3 and np.count_nonzero(ret) > N // 3:
            turnover_weight = get_turnover_weight(turnover[i])
            cgo[i+N-1] = get_onedim_cgo(vwap[i],turnover_weight,close[i])
        else: cgo[i+N-1] = np.nan
    return cgo


vwap = vwap_arr.values
turnover = turnover_arr.values
close = close_arr.values
ret = ret_arr.values

cgos = np.zeros(vwap.shape)

for i in tqdm(range(vwap_arr.shape[1])):
    cgos[:,i] = get_onesym_cgo(vwap[:,i],turnover[:,i],close[:,i],ret[:,i],N)



  0%|          | 0/3573 [00:00<?, ?it/s][A
Exception in thread Thread-29:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py", line 148, in run
    for instance in self.tqdm_cls._instances:
  File "/opt/anaconda3/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration

100%|██████████| 3573/3573 [17:17<00:00,  3.44it/s]


In [83]:
pd.DataFrame(cgos,index=vwap_arr.index,columns=vwap_arr.columns)

symbol,000001.SZ,000002.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,000011.SZ,...,603987.SH,603988.SH,603989.SH,603990.SH,603991.SH,603993.SH,603996.SH,603997.SH,603998.SH,603999.SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-04,,,,,,,,,,,...,,,,,,,,,,
2011-01-05,,,,,,,,,,,...,,,,,,,,,,
2011-01-06,,,,,,,,,,,...,,,,,,,,,,
2011-01-07,,,,,,,,,,,...,,,,,,,,,,
2011-01-10,,,,,,,,,,,...,,,,,,,,,,
2011-01-11,,,,,,,,,,,...,,,,,,,,,,
2011-01-12,,,,,,,,,,,...,,,,,,,,,,
2011-01-13,,,,,,,,,,,...,,,,,,,,,,
2011-01-14,,,,,,,,,,,...,,,,,,,,,,
2011-01-17,,,,,,,,,,,...,,,,,,,,,,
