-
Notifications
You must be signed in to change notification settings - Fork 555
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
chopchopjames
committed
Feb 15, 2016
1 parent
2924869
commit f3e3d36
Showing
14 changed files
with
2,868 additions
and
45 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
简版code包含4个py文件,data、constant、pyalg2,pyalgo_test | ||
1.下载数据:data.py | ||
调用tushare财经数据包接口,详细内容请读文档:http://pythonhosted.org/tushare/index.html#id2 | ||
调用constant.py文件,存储部分下载时间的参数 | ||
方法: | ||
1.sava_data():(需运行) | ||
下载全部tushare数据至d:/data/目录,格式为0004.csv | ||
code.csv为全部代码 | ||
code_inuse.csv为过滤数据项较全的代码,可忽略 | ||
2.refresh_data(): | ||
每次下载以往数据设定了某一天,若需更新至当日,调用此方法 | ||
3.plt_macd() | ||
算出macd并作图的示例 | ||
4.change_type_to_yahoo():(需运行) | ||
下载完成后需调用此方法转换为pyalotrade可识别的类型,存储于d:/data2/,格式为0019.csv | ||
此处使用的为inuse数据,可以更改为code.csv | ||
5.get_beta(): | ||
算beta示例 | ||
2.进行测试:pyalg_2.py | ||
调用pyalgotrade方法进行回测,详细内容请读文档:http://gbeced.github.io/pyalgotrade/docs/v0.17/html/tutorial.html | ||
调用pyalgo_test.py文件 | ||
调用pyalg_util.py文件 | ||
方法: | ||
1.提供两个测试方法: turtle_test():和vwap(plot):,底部有调用 | ||
2.turrle_test 提供三种数据加载方式:csv,dataFrame,sql(未完成直接方式,暂由dataFrame为桥) | ||
dataFrame方式调用同目录util文件夹下的dataFrameBarfeed.py 和dataFramefeed.py | ||
sql方式数据来自data.sql_py | ||
3.回测主体pyalgo_test.py, | ||
主体位于onbar()方法,可使用self.__position和self.marketOrder(element, 100)两种方式,效果一样。 | ||
注意onbar()是一条条更新,故__init__()中的数据也是随着onbar的滚动而增加。 | ||
如highlow.Low()最后一参数为存储数据个数,[-1]为当前运行结果,[-2]为上一次,用以调节窗口 | ||
方法: | ||
1.SMACrossOver(): | ||
示例方法 | ||
2.VWAPMomentum(): | ||
两只股票组合示例 | ||
3.turtle(): | ||
海龟交易法示例 | ||
4.最新版本已上传:pyalg_util.py,添加运行时数据信息,格式为dic格式,包含retur、sharpratio、tradeInfo等 | ||
调用方法见pyalg_2.py | ||
调用pyalgo_test.py文件 | ||
需在pyalgo_test.py中添加addInfo信息,具体内容有注释 | ||
****注意:此方法只为监测数据并返回array,json等格式自己作图用。pyalgotrade本身已带作图方法及基础的信息。 | ||
若不需要可删除调用部分:pyalg_util.py,pyalgo_test.py中的addInfo 方法,调用部分、getDateTimeSeries方法部分。 | ||
5.目前支持同tushare中获取数据并存入数据库中:data_sql.py,数据库为postgress(已经支持pandas_dataFrame为桥进行pyalgotrade回测, | ||
代码见pyalg_2,直接读取功能开发中) | ||
调用constant.py,数据库连接等设置在此处,其他数据库也一样 | ||
方法: | ||
支持对h_data、hist_data、realtime_quotes等的get、set方法,其中set为获取数据并存入数据库中,get为获取数据库数据 | ||
详见方法内注释 | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Sat Aug 01 17:28:29 2015 | ||
@author: lenovo | ||
""" | ||
from datetime import datetime | ||
import pandas as pd | ||
""" | ||
时间常数 | ||
""" | ||
_START_ = '1994-01-01'; | ||
_MIDDLE_ = '2015-11-27'; | ||
_TODAY_ = datetime.now().strftime('%Y-%m-%d'); | ||
_RATE_FREE_ = 0.05 | ||
|
||
_start_range = pd.date_range(start=_START_,periods=7) | ||
_end_range = pd.date_range(end=_MIDDLE_,periods=7) | ||
|
||
|
||
""" | ||
数据库常数 | ||
""" | ||
_PATH_CODE_ = 'd:/data/code.csv'; | ||
_ENGINE_ = 'postgresql://postgres:root@localhost:5432/tushare' | ||
|
||
#数据库参数信息及基础语句,pgres——test用 | ||
_DATABASE_ = 'tushare' | ||
_USER_ = 'postgres' | ||
_PASSWORD_ = 'root' | ||
_HOST_ = '127.0.0.1' | ||
|
||
_LOG_FILENAME_ = 'logging.conf' #日志配置文件名 | ||
_LOG_CONTENT_NAME_ = 'pg_log' #日志语句提示信息 | ||
|
||
__SQL1_ = '''CREATE TABLE ts_his( | ||
date INTEGER, | ||
sv_productname VARCHAR(32) | ||
);''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Tue Jul 28 11:04:32 2015 | ||
@author: lenovo | ||
""" | ||
from itertools import izip | ||
#import sys | ||
import constant as ct | ||
import pylab as plt | ||
import pandas as pd | ||
import tushare as ts | ||
import numpy as np | ||
import time,os | ||
from pandas import DataFrame | ||
#reload(sys) | ||
#sys.setdefaultencoding('utf-8') | ||
#code为全部,code_inuse为起止日期完备的数据 | ||
def save_data(): | ||
dat = ts.get_industry_classified() | ||
dat = dat.drop_duplicates('code') | ||
dat.to_csv('d:/data/code.csv',encoding='gbk') | ||
inuse = [] | ||
|
||
i = 0 | ||
for code in dat['code'].values: | ||
i+= 1 | ||
print i,code | ||
try: | ||
_data_ = ts.get_hist_data(code,end=ct._MIDDLE_) #默认取3年,code为str,start无效的,start 和end若当天有数据则全都取 | ||
if _data_ is not None: | ||
_data_.to_csv('d:/data/%s.csv'%code,encoding='gbk') | ||
if _data_.index[0] in ct._start_range and _data_.index[-1] in ct._end_range: #筛选一次代码,使用头尾都包含的代码 | ||
inuse.append(code) | ||
except IOError: | ||
pass #不行的话还是continue | ||
#print len(inuse) | ||
_df_inuse = DataFrame(inuse,columns={'code'}) | ||
_df_inuse.to_csv('d:/data/code_inuse.csv',encoding='gbk') | ||
|
||
#从网络中更新数据,code 必须为str,dat中的为int | ||
def refresh_data(_start_ ='2015-08-01',_end_ = ct._TODAY_): | ||
dat = pd.read_csv('d:/data/code.csv',index_col=0,encoding='gbk') | ||
inuse = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk') | ||
new_inuse = [] | ||
|
||
i=0 | ||
for code in dat['code'].values: | ||
i+= 1 | ||
print i,code | ||
try: | ||
_data_ = ts.get_hist_data(str(code),start=_start_,end=_end_) #默认取3年,start 8-1包括 | ||
filename = 'd:/data/%s.csv'%code | ||
if _data_ is not None and _data_.size != 0: | ||
if os.path.exists(filename): | ||
_data_.to_csv(filename, mode='a', header=None,encoding='gbk') | ||
else: | ||
_data_.to_csv(filename,encoding='gbk') | ||
if code in inuse['code'].values and _data_.index[0] in pd.date_range(start=_start_,periods=7) and _data_.index[-1] in pd.date_range(end=_end_,periods=7): #筛选一次代码,使用头尾都包含的代码 | ||
new_inuse.append(code) | ||
except IOError: | ||
pass #不行的话还是continue | ||
#print len(inuse) | ||
_df_inuse = DataFrame(new_inuse,columns={'code'}) | ||
_df_inuse.to_csv('d:/data/code_new_inuse.csv',encoding='gbk') | ||
|
||
|
||
def read_data(): | ||
dat = pd.read_csv('d:/data/code.csv',index_col=0,encoding='gbk') | ||
dic = {} | ||
|
||
i = 0 | ||
for code in dat['code'].values: | ||
i+= 1 | ||
print i,code | ||
try: | ||
df = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #parse_dates直接转换数据类型,不用再重新狗再累 | ||
if df is not None: | ||
dic[code] = df | ||
except IOError: | ||
pass #不行的话还是continue | ||
return dic | ||
|
||
#仅适用数据头尾完备的code | ||
def get_universe(): | ||
try: | ||
dat = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk') | ||
except Exception: | ||
dat = ts.get_industry_classified() | ||
dat = dat.drop_duplicates('code') #去除重复code | ||
return dat['code'].values | ||
|
||
# | ||
def get_data(code): | ||
try: | ||
dat = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #parse_dates直接转换数据类型,不用再重新狗再累 | ||
except Exception: | ||
dat = None | ||
return dat | ||
def get_macd(df): | ||
_columns_ = ['EMA_12','EMA_26','DIFF','MACD','BAR'] | ||
a = np.zeros(len(df)*5).reshape(len(df),5) #也可以EMA_12 = [0 for i in range(len(df))] | ||
a[-1][0] = df['close'][0] #EMA_12 | ||
a[-1][1] = df['close'][0] | ||
|
||
for i in range(len(df)): | ||
a[i][0] = a[i-1][0]*11/13+df['close'][i]*2/13 #EMA_12 | ||
a[i][1] = a[i-1][1]*25/27+df['close'][i]*2/27 #EMA_26 | ||
a[i][2] = a[i][0]-a[i][1] #DIFF | ||
a[i][3] = a[i-1][3]*8/10+a[i][2]*2/10 #MACD | ||
a[i][4]=2*(a[i][2]-a[i][3]) | ||
return DataFrame(a,index = df.index,columns = _columns_) | ||
|
||
#df为原dataframe da为macd | ||
def plt_macd(df,da): | ||
my_dfs = [df['open'], da['EMA_12'], da['EMA_26'], da['DIFF'], da['MACD'], da['BAR'],] # or in your case [ df,do] | ||
my_opts = [ {"color":"green", "linewidth":1.0, "linestyle":"-","label":"open"}, | ||
{"color":"blue","linestyle":"-","label":"EMA_12"}, | ||
{"color":"yellow","linestyle":"-","label":"EMA_26"}, | ||
{"color":"black","linestyle":"-","label":"DIFF"}, | ||
{"color":"red","linestyle":"-","label":"MACD"}, | ||
{"color":"orange","linestyle":"-","label":"BAR"}] | ||
for d,opt in izip(my_dfs, my_opts): | ||
d.plot( **opt) | ||
plt.grid() | ||
plt.legend(loc=0) | ||
plt.show() | ||
|
||
|
||
#save_data() | ||
#refresh_data() | ||
#df = pd.read_csv('d:/data/600848.csv',index_col=0,parse_dates=[0],encoding='gbk') | ||
#da = get_macd(df) | ||
#plt_macd(df,da) | ||
#_data_ = pd.read_csv('d:/data/600848.csv',index_col=0,encoding='gbk') | ||
#dic = read_data() | ||
#_data_ = ts.get_hist_data('900901',start=ct._START_,end=ct._MIDDLE_) | ||
#print _data_ | ||
|
||
|
||
def temp2(): | ||
dat = pd.read_csv('d:/data/code.csv',index_col=0,encoding='gbk') | ||
inuse = [] | ||
i = 0 | ||
for code in dat['code'].values: | ||
i+= 1 | ||
print i,code | ||
try: | ||
_data_ = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,code为str,start无效的,start 和end若当天有数据则全都取 | ||
if _data_ is not None: | ||
if _data_.index[0] in ct._start_range and _data_.index[-1] in ct._end_range: #筛选一次代码,使用头尾都包含的代码 | ||
inuse.append(code) | ||
except IOError: | ||
pass #不行的话还是continue | ||
#print len(inuse) | ||
_df_inuse = DataFrame(inuse,columns={'code'}) | ||
_df_inuse.to_csv('d:/data/code_inuse.csv',encoding='gbk') | ||
def temp(): | ||
dat = pd.read_csv('d:/data/code.csv',index_col=0,encoding='gbk') | ||
inuse = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk') | ||
new_inuse = [] | ||
|
||
i=0 | ||
for code in dat['code'].values: | ||
i+= 1 | ||
#print i,code | ||
try: | ||
_data_ = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,start 8-1包括 | ||
if code in inuse['code'].values and _data_.index[0] in pd.date_range(start=ct._START_,periods=7) and _data_.index[-1] in pd.date_range(end=ct._TODAY_,periods=7): #筛选一次代码,使用头尾都包含的代码 | ||
new_inuse.append(code) | ||
|
||
except IOError: | ||
pass #不行的话还是continue | ||
#print len(inuse) | ||
_df_inuse = DataFrame(new_inuse,columns={'code'}) | ||
_df_inuse.to_csv('d:/data/code_new_inuse.csv',encoding='gbk') | ||
|
||
#temp2() | ||
#重命名索引名,列名,将调整收盘价置为none | ||
def change_type_to_yahoo(): | ||
inuse = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk') | ||
inuse.to_csv('d:/data2/code_inuse.csv',encoding='gbk') | ||
re_columns ={'high':'High','low':'Low','open':'Open','close':'Close','volume':'Volume','price_change':'Adj Close'} | ||
i=0 | ||
for code in inuse['code'].values: | ||
i+= 1 | ||
print i,code | ||
_data_ = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,start 8-1包括 | ||
_data_=_data_.rename(columns=re_columns) | ||
_data_.index.name = 'Date' | ||
_data_.to_csv('d:/data2/%s.csv'%code,columns=['Open','High','Low','Close','Volume','Adj Close'],date_format="%Y-%m-%d",encoding='gbk') | ||
|
||
def get_beta(values1, values2): | ||
# http://statsmodels.sourceforge.net/stable/regression.html | ||
model = sm.OLS(values1, values2) | ||
results = model.fit() | ||
return results.params[0] | ||
value1=[0.5,1.0,1.5,2.0,2.5,3.0] | ||
value2=[1.75,2.45,3.81,4.80,7.00,8.60] | ||
print get_beta(value1,value2) | ||
|
||
#选择下跌行情中天量成交和高换手率,后期加入小盘股等指标,scope 为近15日 | ||
#scope =15,看最近15天的情况,v_times 为当日成交量为前一日的倍数,t_percent为当日换手率 | ||
def bigVolume(scope=15,v_times=5,t_percent=20): | ||
inuse = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk') | ||
rs_list = [] | ||
i=0 | ||
for code in inuse['code'].values: | ||
try: | ||
_data_ = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,code为str,start无效的,start 和end若当天有数据则全都取 | ||
dd = (_data_['volume']/_data_['volume'].shift(1)>v_times) & (_data_['turnover']>t_percent) | ||
dd = dd & (_data_['close']<22) | ||
if dd[-scope:].any(): | ||
i+=1 | ||
if i<5: | ||
_data_['close'].plot() | ||
rs_list.append(code) | ||
print i,code | ||
except IOError: | ||
pass #不行的话还是continue | ||
#refresh_data() | ||
#change_type_to_yahoo() | ||
bigVolume() | ||
#_data_ = pd.read_csv('d:/data/600848.csv',index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,code为str,start无效的,start 和end若当天有数据则全都取 | ||
#_data_.plot() |
Oops, something went wrong.