In [63]:
import numpy as np
import pandas as pd
import scipy
import math
import statsmodels
import matplotlib.pyplot as plt
import datetime
import seaborn as sns
from WindPy import w
import calendar
%matplotlib inline

In [6]:
w.start()

Welcome to use Wind Quant API for Python (WindPy)!
You can use w.menu to help yourself to create commands(WSD,WSS,WST,WSI,WSQ,...)!

COPYRIGHT (C) 2016 WIND HONGHUI INFORMATION & TECHKNOLEWDGE CO., LTD. ALL RIGHTS RESERVED.
IN NO CIRCUMSTANCE SHALL WIND BE RESPONSIBLE FOR ANY DAMAGES OR LOSSES CAUSED BY USING WIND QUANT API FOR Python.


.ErrorCode=0
.Data=[OK!]

In [13]:
def WindData2DF(raw_data):
    dic = {}
    for data, field in zip(raw_data.Data, raw_data.Fields):
        dic[str.lower(str(field))] = data
    return pd.DataFrame(dic)

# 1. 获取A股全部股票

In [14]:
codes = w.wset("SectorConstituent",u"date=20170212;sector=全部A股") #取全部A 股股票代码、名称信息

In [15]:
codes

.ErrorCode=0
.Codes=[1,2,3,4,5,6,7,8,9,10,...]
.Fields=[date,wind_code,sec_name]
.Times=[20170213]
.Data=[[2017-02-12 00:00:00.005000,2017-02-12 00:00:00.005000,2017-02-12 00:00:00.005000,2017-02-12 00:00:00.005000,2017-02-12 00:00:00.005000,2017-02-12 00:00:00.005000,2017-02-12 00:00:00.005000,2017-02-12 00:00:00.005000,2017-02-12 00:00:00.005000,2017-02-12 00:00:00.005000,...],[000001.SZ,000002.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,000011.SZ,...],[u'\u5e73\u5b89\u94f6\u884c',u'\u4e07\u79d1A',u'\u56fd\u519c\u79d1\u6280',u'\u4e16\u7eaa\u661f\u6e90',u'\u6df1\u632f\u4e1aA',u'\u5168\u65b0\u597d',u'\u795e\u5dde\u9ad8\u94c1',u'\u4e2d\u56fd\u5b9d\u5b89',u'\u7f8e\u4e3d\u751f\u6001',u'\u6df1\u7269\u4e1aA',...]]

In [16]:
codes_df = WindData2DF(codes)

In [18]:
codes_df.head()

Unnamed: 0,date,sec_name,wind_code
0,2017-02-12 00:00:00.005,平安银行,000001.SZ
1,2017-02-12 00:00:00.005,万科A,000002.SZ
2,2017-02-12 00:00:00.005,国农科技,000004.SZ
3,2017-02-12 00:00:00.005,世纪星源,000005.SZ
4,2017-02-12 00:00:00.005,深振业A,000006.SZ


# 2. 选择候选因子

1. 估值：账面市值比（B/M)、盈利收益率（EPS）、动态市盈（PEG）
2. 成长性：ROE、ROA、主营毛利率（GP/R)、净利率(P/R)
3. 资本结构：资产负债（L/A)、固定资产比例（FAP）、流通市值（CMV）

In [31]:
factors = ["bps", # 每股净资产
           "mkt_cap", # 总市值2
           "eps_ttm", # 每股收益（TMM）
           "est_peg", # 预测PEG
           "roe", # 净资产收益率ROE
           "roa", # 总资产收益率ROA
           "grossmargin", # 毛利
           "yoyprofit", # 净利润（同比增长率）
           "debttoassets", # 资产负债率
           "faturn", # 固定资产周转率
           "mkt_cap_float", # 流通市值
          ]

In [53]:
date = "20150930"

In [54]:
def get_fundamental(symbols, factors, date):
    data = w.wss(symbols, factors, "rptYear=%s;rptDate=%s;tradeDate=%s;currencyType="%(date[:4], date, date))
    return WindData2DF(data)

In [86]:
get_fundamental(["000002.SZ"], "mkt_cap", "20151231")

Unnamed: 0,mkt_cap
0,269990900000.0


In [75]:
def get_fundamental_all(symbols, factors, startdate="20150101", enddate="20151231"):
    df = pd.DataFrame()
    current_date = datetime.datetime.strptime(startdate, "%Y%m%d")
    end_date = datetime.datetime.strptime(enddate, "%Y%m%d")
    dates = []
    while current_date <= end_date:
        (_, day) = calendar.monthrange(current_date.year, current_date.month)
        current_date = datetime.datetime(current_date.year, current_date.month, day) # last day of this month
        print current_date.strftime("%Y%m%d")
        one_day_df = get_fundamental(symbols, factors, current_date.strftime("%Y%m%d"))
        if df.empty:
            df = one_day_df
        else:
            df = df.append(one_day_df)
        dates.append(current_date)
        current_date = current_date + datetime.timedelta(1)
    df["date"] = dates
    df.index = df["date"]
    return df

In [81]:
df = get_fundamental_all(["000002.SZ"], factors)

20150131
20150228
20150331
20150430
20150531
20150630
20150731
20150831
20150930
20151031
20151130
20151231


In [82]:
df.head()

Unnamed: 0_level_0,bps,debttoassets,eps_ttm,est_peg,faturn,grossmargin,mkt_cap_ard,mkt_cap_float,roa,roe,yoyprofit,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-01-31,,,1.396342,0.176885,,,144852800000.0,127609100000.0,,,,2015-01-31
2015-02-28,,,1.39626,0.177408,,,140776000000.0,124066300000.0,,,,2015-02-28
2015-03-31,8.05,77.9098,1.425489,0.268189,3.2169,3024228000.0,152650800000.0,134478200000.0,0.1756,0.7346,-44.576124,2015-03-31
2015-04-30,,,1.345888,0.400308,,,161266500000.0,142074800000.0,,,,2015-04-30
2015-05-31,,,1.345888,0.404624,,,158505000000.0,139658300000.0,,,,2015-05-31


In [79]:
df.dropna(inplace=True)

In [80]:
df

Unnamed: 0_level_0,bps,debttoassets,eps_ttm,est_peg,faturn,grossmargin,mkt_cap_ard,mkt_cap_float,roa,roe,yoyprofit,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-03-31,8.05,77.9098,1.425489,0.268189,3.2169,3024228000.0,152650800000.0,134478200000.0,0.1756,0.7346,-44.576124,2015-03-31
2015-06-30,7.9,78.0608,1.345694,0.473126,16.5462,15305980000.0,160406000000.0,141324700000.0,1.299,5.5248,23.614141,2015-06-30
2015-09-30,8.05,78.7795,1.428418,0.486936,25.0755,23129050000.0,140652900000.0,123922200000.0,1.7677,7.7412,25.425737,2015-09-30
2015-12-31,9.08,77.7015,1.460725,0.990342,54.125,57398500000.0,269964300000.0,237866500000.0,4.6351,19.2403,34.540017,2015-12-31
