In [12]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math

In [13]:
# 股票的日行情数据
path = '/Users/ryan/Documents/GitHub/GDHF-Quant-Investment/data'
file_name = 'eod_price.gz'
input_file = os.path.join(path, file_name)
df_stock = pd.read_csv(input_file)
df_stock.head(3)

Unnamed: 0,S_INFO_WINDCODE,TRADE_DT,S_DQ_PRECLOSE,S_DQ_OPEN,S_DQ_HIGH,S_DQ_LOW,S_DQ_CLOSE,S_DQ_VOLUME,S_DQ_PCTCHANGE,S_DQ_AVGPRICE
0,000001.SZ,20170906,11.64,11.59,11.88,11.48,11.7,791621.55,0.5155,11.6914
1,000002.SZ,20170906,23.76,23.79,24.27,23.5,23.8,361719.63,0.1684,23.94
2,000063.SZ,20170906,24.36,24.28,24.99,24.19,24.95,708125.33,2.422,24.7464


In [14]:
# 获取股票的换手率数据
file_name = 'value_factor.gz'
input_file = os.path.join(path, file_name)
df_turn = pd.read_csv(input_file)
df_turn.head(3)

Unnamed: 0,S_INFO_WINDCODE,TRADE_DT,S_VAL_PE,S_VAL_PB_NEW,S_VAL_PS,S_DQ_TURN,S_DQ_MV,NET_ASSETS_TODAY
0,000725.SZ,20170101,61.4432,1.3033,2.0677,,6872158.0,77142700000.0
1,600346.SH,20170101,,4.4023,27.998,,556266.4,5346761000.0
2,600010.SH,20170101,,1.9186,4.0373,,4392025.0,47349140000.0


In [15]:
# 合并数据 得到计算情绪因子所需要的字段
data = pd.merge(df_stock[['S_INFO_WINDCODE','TRADE_DT','S_DQ_VOLUME','S_DQ_AVGPRICE']],\
     df_turn[['S_INFO_WINDCODE','TRADE_DT','S_DQ_TURN']], on=['S_INFO_WINDCODE','TRADE_DT'], how='left')

data.head(3)

Unnamed: 0,S_INFO_WINDCODE,TRADE_DT,S_DQ_VOLUME,S_DQ_AVGPRICE,S_DQ_TURN
0,000001.SZ,20170906,791621.55,11.6914,0.4679
1,000002.SZ,20170906,361719.63,23.94,0.3726
2,000063.SZ,20170906,708125.33,24.7464,2.0649


In [16]:
data.set_index('TRADE_DT', inplace=True)
data.sort_index(inplace=True)

In [17]:
# VOL20 20日平均换手率
vol20 = data[['S_INFO_WINDCODE', 'S_DQ_TURN']].groupby('S_INFO_WINDCODE').apply(lambda x: x.rolling(20).mean())
vol20 = pd.DataFrame(vol20)
vol20.columns = ['VOL20']
vol20.reset_index(inplace=True)
vol20.head(3)

Unnamed: 0,S_INFO_WINDCODE,TRADE_DT,VOL20
0,000001.SZ,20170103,
1,000001.SZ,20170104,
2,000001.SZ,20170105,


In [18]:
# VSTD20 20日成交量标准差
vstd20 = data[['S_INFO_WINDCODE', 'S_DQ_VOLUME']].groupby('S_INFO_WINDCODE').apply(lambda x:x.rolling(20).std())
vstd20 = pd.DataFrame(vstd20)
vstd20.columns = ['VSTD20']
vstd20.reset_index(inplace=True)
vstd20.head(3)

Unnamed: 0,S_INFO_WINDCODE,TRADE_DT,VSTD20
0,000001.SZ,20170103,
1,000001.SZ,20170104,
2,000001.SZ,20170105,


In [19]:
# TVMA20 20日成交金额的移动平均值
tvma20 = data[['S_INFO_WINDCODE', 'S_DQ_AVGPRICE']]
tvma20['TVMA20'] = data['S_DQ_AVGPRICE'].ewm(span=20, min_periods=0, adjust=False, ignore_na=False).mean()
tvma20.reset_index(inplace=True)
tvma20.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tvma20['TVMA20'] = data['S_DQ_AVGPRICE'].ewm(span=20, min_periods=0, adjust=False, ignore_na=False).mean()


Unnamed: 0,TRADE_DT,S_INFO_WINDCODE,S_DQ_AVGPRICE,TVMA20
0,20170103,002812.SZ,63.9377,63.9377
1,20170103,600183.SH,10.999,58.895919
2,20170103,600188.SH,10.8668,54.321717


In [20]:
# WVAD 威廉变异离散量
df_stock.set_index('TRADE_DT', inplace=True)
df_stock.sort_index(inplace=True)

wvad = df_stock.groupby('S_INFO_WINDCODE').apply(lambda x: (x['S_DQ_CLOSE']-x['S_DQ_OPEN'])/(x['S_DQ_HIGH']-x['S_DQ_LOW'])*x['S_DQ_VOLUME'])
wvad = pd.DataFrame(wvad)
wvad.columns = ['data']
wvad['WVAD'] = wvad['data'].rolling(6).apply(lambda x: x.sum())
wvad.reset_index(inplace=True)
wvad.head(3)

Unnamed: 0,S_INFO_WINDCODE,TRADE_DT,data,WVAD
0,000001.SZ,20170103,255466.938889,
1,000001.SZ,20170104,112332.3825,
2,000001.SZ,20170105,0.0,


In [21]:
# 合并成最终的因子数据集
df1 = pd.merge(vol20, vstd20, on=['TRADE_DT', 'S_INFO_WINDCODE'], how='left')
df2 = pd.merge(tvma20[['TRADE_DT','S_INFO_WINDCODE','TVMA20']], wvad[['TRADE_DT','S_INFO_WINDCODE','WVAD']], on=['TRADE_DT', 'S_INFO_WINDCODE'], how='left')

df = pd.merge(df1, df2, on=['TRADE_DT', 'S_INFO_WINDCODE'], how='left')
# df.to_csv(os.path.join(path, 'trade_factor.gz'), compression='gzip', index=False)