In [1]:
import sys
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
%matplotlib inline

In [6]:
DATA_DIR = "C:\Users\jgtzsx01\Documents\workspace\data\stocks"
INDEX_FILE = "C:/Users/jgtzsx01/Documents/workspace/data/index-component/000300.xlsx"

In [7]:
df = pd.read_excel(INDEX_FILE)

In [9]:
codes = df["code"].tolist()

# 计算codes里的股票的成交量和动量

In [12]:
def rank_percentile(array):
    """
    返回s的最后一个元素在s中的分位值
    """
    s = pd.Series(array)
    s = s.rank(pct=True)
    return s.iloc[-1]

In [13]:
def classify(x):
    if x <= 0.2:
        return -2
    if 0.2 < x and x <= 0.4:
        return -1
    if 0.4 < x and x <= 0.6:
        return 0
    if 0.6 < x and x <= 0.8:
        return 1
    if x > 0.8:
        return 2

In [22]:
def get_volume_momentum(code, periods=20, long_window=20, short_window=5, volume_window=60):
    df = pd.read_excel("%s/%s.xlsx"%(DATA_DIR, code), index_col=0)
    df.index = pd.to_datetime(df.index)
    df["return"] = df["close"].pct_change(periods=periods)
    # 动量=短均值-长均值
    df["momentum"] = df["close"].rolling(window=short_window).mean() - df["close"].rolling(window=long_window).mean()
    df["momentum"] /= df["close"].shift(long_window)
    # 移位
    df["momentum"] = df["momentum"].shift(periods)
    df["volume"] = df["volume"].shift(periods)
    # 成交量历史分位
    df["volume_percent"] = df["volume"].rolling(window=volume_window).apply(rank_percentile)
    df["volume_val"] = df["volume_percent"].map(lambda x: classify(x))
    # 成交量-动量因子
    df["mom-vol"] = df["volume_val"] * df["momentum"]
    
    return df[["mom-vol", "return", "volume_percent", "momentum"]]

In [31]:
for code in codes:
    df = get_volume_momentum(code)
    print("%s, corr=%.2f"%(code, df["mom-vol"].corr(df["return"])))

000001.SZ, corr=-0.09
000002.SZ, corr=-0.31
000008.SZ, corr=0.07
000009.SZ, corr=-0.10
000027.SZ, corr=-0.26
000039.SZ, corr=-0.52
000060.SZ, corr=-0.21
000061.SZ, corr=-0.12
000063.SZ, corr=-0.26
000069.SZ, corr=-0.54
000100.SZ, corr=-0.33
000156.SZ, corr=0.25
000157.SZ, corr=-0.32
000166.SZ, corr=-0.05
000333.SZ, corr=-0.10
000338.SZ, corr=-0.34
000402.SZ, corr=-0.36
000413.SZ, corr=0.18
000415.SZ, corr=-0.48
000423.SZ, corr=0.06
000425.SZ, corr=0.10
000503.SZ, corr=-0.03
000538.SZ, corr=0.02
000540.SZ, corr=-0.12
000555.SZ, corr=0.15
000559.SZ, corr=0.29
000568.SZ, corr=0.09
000623.SZ, corr=-0.36
000625.SZ, corr=-0.17
000627.SZ, corr=-0.22
000630.SZ, corr=-0.10
000651.SZ, corr=-0.20
000671.SZ, corr=-0.20
000686.SZ, corr=-0.07
000709.SZ, corr=-0.28
000712.SZ, corr=-0.13
000718.SZ, corr=-0.07
000725.SZ, corr=-0.01
000728.SZ, corr=-0.38
000738.SZ, corr=-0.19
000750.SZ, corr=0.06
000768.SZ, corr=-0.26
000776.SZ, corr=-0.44
000778.SZ, corr=0.27
000783.SZ, corr=-0.27
000792.SZ, corr=0.00


In [28]:
df["mom-vol"].corr(df["return"])

-0.25502272667378606