Skip to content

Commit

Permalink
dataframeFeed
Browse files Browse the repository at this point in the history
  • Loading branch information
chopchopjames committed Feb 15, 2016
1 parent 2924869 commit f3e3d36
Show file tree
Hide file tree
Showing 14 changed files with 2,868 additions and 45 deletions.
708 changes: 708 additions & 0 deletions dataframefeed/000004.csv

Large diffs are not rendered by default.

790 changes: 790 additions & 0 deletions dataframefeed/200019.csv

Large diffs are not rendered by default.

52 changes: 52 additions & 0 deletions dataframefeed/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
简版code包含4个py文件,data、constant、pyalg2,pyalgo_test
1.下载数据:data.py
调用tushare财经数据包接口,详细内容请读文档:http://pythonhosted.org/tushare/index.html#id2
调用constant.py文件,存储部分下载时间的参数
方法:
1.sava_data():(需运行)
下载全部tushare数据至d:/data/目录,格式为0004.csv
code.csv为全部代码
code_inuse.csv为过滤数据项较全的代码,可忽略
2.refresh_data():
每次下载以往数据设定了某一天,若需更新至当日,调用此方法
3.plt_macd()
算出macd并作图的示例
4.change_type_to_yahoo():(需运行)
下载完成后需调用此方法转换为pyalotrade可识别的类型,存储于d:/data2/,格式为0019.csv
此处使用的为inuse数据,可以更改为code.csv
5.get_beta():
算beta示例
2.进行测试:pyalg_2.py
调用pyalgotrade方法进行回测,详细内容请读文档:http://gbeced.github.io/pyalgotrade/docs/v0.17/html/tutorial.html
调用pyalgo_test.py文件
调用pyalg_util.py文件
方法:
1.提供两个测试方法: turtle_test():和vwap(plot):,底部有调用
2.turrle_test 提供三种数据加载方式:csv,dataFrame,sql(未完成直接方式,暂由dataFrame为桥)
dataFrame方式调用同目录util文件夹下的dataFrameBarfeed.py 和dataFramefeed.py
sql方式数据来自data.sql_py
3.回测主体pyalgo_test.py,
主体位于onbar()方法,可使用self.__position和self.marketOrder(element, 100)两种方式,效果一样。
注意onbar()是一条条更新,故__init__()中的数据也是随着onbar的滚动而增加。
如highlow.Low()最后一参数为存储数据个数,[-1]为当前运行结果,[-2]为上一次,用以调节窗口
方法:
1.SMACrossOver():
示例方法
2.VWAPMomentum():
两只股票组合示例
3.turtle():
海龟交易法示例
4.最新版本已上传:pyalg_util.py,添加运行时数据信息,格式为dic格式,包含retur、sharpratio、tradeInfo等
调用方法见pyalg_2.py
调用pyalgo_test.py文件
需在pyalgo_test.py中添加addInfo信息,具体内容有注释
****注意:此方法只为监测数据并返回array,json等格式自己作图用。pyalgotrade本身已带作图方法及基础的信息。
若不需要可删除调用部分:pyalg_util.py,pyalgo_test.py中的addInfo 方法,调用部分、getDateTimeSeries方法部分。
5.目前支持同tushare中获取数据并存入数据库中:data_sql.py,数据库为postgress(已经支持pandas_dataFrame为桥进行pyalgotrade回测,
代码见pyalg_2,直接读取功能开发中)
调用constant.py,数据库连接等设置在此处,其他数据库也一样
方法:
支持对h_data、hist_data、realtime_quotes等的get、set方法,其中set为获取数据并存入数据库中,get为获取数据库数据
详见方法内注释


39 changes: 39 additions & 0 deletions dataframefeed/constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 01 17:28:29 2015
@author: lenovo
"""
from datetime import datetime
import pandas as pd
"""
时间常数
"""
_START_ = '1994-01-01';
_MIDDLE_ = '2015-11-27';
_TODAY_ = datetime.now().strftime('%Y-%m-%d');
_RATE_FREE_ = 0.05

_start_range = pd.date_range(start=_START_,periods=7)
_end_range = pd.date_range(end=_MIDDLE_,periods=7)


"""
数据库常数
"""
_PATH_CODE_ = 'd:/data/code.csv';
_ENGINE_ = 'postgresql://postgres:root@localhost:5432/tushare'

#数据库参数信息及基础语句,pgres——test用
_DATABASE_ = 'tushare'
_USER_ = 'postgres'
_PASSWORD_ = 'root'
_HOST_ = '127.0.0.1'

_LOG_FILENAME_ = 'logging.conf' #日志配置文件名
_LOG_CONTENT_NAME_ = 'pg_log' #日志语句提示信息

__SQL1_ = '''CREATE TABLE ts_his(
date INTEGER,
sv_productname VARCHAR(32)
);'''
225 changes: 225 additions & 0 deletions dataframefeed/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 28 11:04:32 2015
@author: lenovo
"""
from itertools import izip
#import sys
import constant as ct
import pylab as plt
import pandas as pd
import tushare as ts
import numpy as np
import time,os
from pandas import DataFrame
#reload(sys)
#sys.setdefaultencoding('utf-8')
#code为全部,code_inuse为起止日期完备的数据
def save_data():
dat = ts.get_industry_classified()
dat = dat.drop_duplicates('code')
dat.to_csv('d:/data/code.csv',encoding='gbk')
inuse = []

i = 0
for code in dat['code'].values:
i+= 1
print i,code
try:
_data_ = ts.get_hist_data(code,end=ct._MIDDLE_) #默认取3年,code为str,start无效的,start 和end若当天有数据则全都取
if _data_ is not None:
_data_.to_csv('d:/data/%s.csv'%code,encoding='gbk')
if _data_.index[0] in ct._start_range and _data_.index[-1] in ct._end_range: #筛选一次代码,使用头尾都包含的代码
inuse.append(code)
except IOError:
pass #不行的话还是continue
#print len(inuse)
_df_inuse = DataFrame(inuse,columns={'code'})
_df_inuse.to_csv('d:/data/code_inuse.csv',encoding='gbk')

#从网络中更新数据,code 必须为str,dat中的为int
def refresh_data(_start_ ='2015-08-01',_end_ = ct._TODAY_):
dat = pd.read_csv('d:/data/code.csv',index_col=0,encoding='gbk')
inuse = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk')
new_inuse = []

i=0
for code in dat['code'].values:
i+= 1
print i,code
try:
_data_ = ts.get_hist_data(str(code),start=_start_,end=_end_) #默认取3年,start 8-1包括
filename = 'd:/data/%s.csv'%code
if _data_ is not None and _data_.size != 0:
if os.path.exists(filename):
_data_.to_csv(filename, mode='a', header=None,encoding='gbk')
else:
_data_.to_csv(filename,encoding='gbk')
if code in inuse['code'].values and _data_.index[0] in pd.date_range(start=_start_,periods=7) and _data_.index[-1] in pd.date_range(end=_end_,periods=7): #筛选一次代码,使用头尾都包含的代码
new_inuse.append(code)
except IOError:
pass #不行的话还是continue
#print len(inuse)
_df_inuse = DataFrame(new_inuse,columns={'code'})
_df_inuse.to_csv('d:/data/code_new_inuse.csv',encoding='gbk')


def read_data():
dat = pd.read_csv('d:/data/code.csv',index_col=0,encoding='gbk')
dic = {}

i = 0
for code in dat['code'].values:
i+= 1
print i,code
try:
df = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #parse_dates直接转换数据类型,不用再重新狗再累
if df is not None:
dic[code] = df
except IOError:
pass #不行的话还是continue
return dic

#仅适用数据头尾完备的code
def get_universe():
try:
dat = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk')
except Exception:
dat = ts.get_industry_classified()
dat = dat.drop_duplicates('code') #去除重复code
return dat['code'].values

#
def get_data(code):
try:
dat = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #parse_dates直接转换数据类型,不用再重新狗再累
except Exception:
dat = None
return dat
def get_macd(df):
_columns_ = ['EMA_12','EMA_26','DIFF','MACD','BAR']
a = np.zeros(len(df)*5).reshape(len(df),5) #也可以EMA_12 = [0 for i in range(len(df))]
a[-1][0] = df['close'][0] #EMA_12
a[-1][1] = df['close'][0]

for i in range(len(df)):
a[i][0] = a[i-1][0]*11/13+df['close'][i]*2/13 #EMA_12
a[i][1] = a[i-1][1]*25/27+df['close'][i]*2/27 #EMA_26
a[i][2] = a[i][0]-a[i][1] #DIFF
a[i][3] = a[i-1][3]*8/10+a[i][2]*2/10 #MACD
a[i][4]=2*(a[i][2]-a[i][3])
return DataFrame(a,index = df.index,columns = _columns_)

#df为原dataframe da为macd
def plt_macd(df,da):
my_dfs = [df['open'], da['EMA_12'], da['EMA_26'], da['DIFF'], da['MACD'], da['BAR'],] # or in your case [ df,do]
my_opts = [ {"color":"green", "linewidth":1.0, "linestyle":"-","label":"open"},
{"color":"blue","linestyle":"-","label":"EMA_12"},
{"color":"yellow","linestyle":"-","label":"EMA_26"},
{"color":"black","linestyle":"-","label":"DIFF"},
{"color":"red","linestyle":"-","label":"MACD"},
{"color":"orange","linestyle":"-","label":"BAR"}]
for d,opt in izip(my_dfs, my_opts):
d.plot( **opt)
plt.grid()
plt.legend(loc=0)
plt.show()


#save_data()
#refresh_data()
#df = pd.read_csv('d:/data/600848.csv',index_col=0,parse_dates=[0],encoding='gbk')
#da = get_macd(df)
#plt_macd(df,da)
#_data_ = pd.read_csv('d:/data/600848.csv',index_col=0,encoding='gbk')
#dic = read_data()
#_data_ = ts.get_hist_data('900901',start=ct._START_,end=ct._MIDDLE_)
#print _data_


def temp2():
dat = pd.read_csv('d:/data/code.csv',index_col=0,encoding='gbk')
inuse = []
i = 0
for code in dat['code'].values:
i+= 1
print i,code
try:
_data_ = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,code为str,start无效的,start 和end若当天有数据则全都取
if _data_ is not None:
if _data_.index[0] in ct._start_range and _data_.index[-1] in ct._end_range: #筛选一次代码,使用头尾都包含的代码
inuse.append(code)
except IOError:
pass #不行的话还是continue
#print len(inuse)
_df_inuse = DataFrame(inuse,columns={'code'})
_df_inuse.to_csv('d:/data/code_inuse.csv',encoding='gbk')
def temp():
dat = pd.read_csv('d:/data/code.csv',index_col=0,encoding='gbk')
inuse = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk')
new_inuse = []

i=0
for code in dat['code'].values:
i+= 1
#print i,code
try:
_data_ = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,start 8-1包括
if code in inuse['code'].values and _data_.index[0] in pd.date_range(start=ct._START_,periods=7) and _data_.index[-1] in pd.date_range(end=ct._TODAY_,periods=7): #筛选一次代码,使用头尾都包含的代码
new_inuse.append(code)

except IOError:
pass #不行的话还是continue
#print len(inuse)
_df_inuse = DataFrame(new_inuse,columns={'code'})
_df_inuse.to_csv('d:/data/code_new_inuse.csv',encoding='gbk')

#temp2()
#重命名索引名,列名,将调整收盘价置为none
def change_type_to_yahoo():
inuse = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk')
inuse.to_csv('d:/data2/code_inuse.csv',encoding='gbk')
re_columns ={'high':'High','low':'Low','open':'Open','close':'Close','volume':'Volume','price_change':'Adj Close'}
i=0
for code in inuse['code'].values:
i+= 1
print i,code
_data_ = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,start 8-1包括
_data_=_data_.rename(columns=re_columns)
_data_.index.name = 'Date'
_data_.to_csv('d:/data2/%s.csv'%code,columns=['Open','High','Low','Close','Volume','Adj Close'],date_format="%Y-%m-%d",encoding='gbk')

def get_beta(values1, values2):
# http://statsmodels.sourceforge.net/stable/regression.html
model = sm.OLS(values1, values2)
results = model.fit()
return results.params[0]
value1=[0.5,1.0,1.5,2.0,2.5,3.0]
value2=[1.75,2.45,3.81,4.80,7.00,8.60]
print get_beta(value1,value2)

#选择下跌行情中天量成交和高换手率,后期加入小盘股等指标,scope 为近15日
#scope =15,看最近15天的情况,v_times 为当日成交量为前一日的倍数,t_percent为当日换手率
def bigVolume(scope=15,v_times=5,t_percent=20):
inuse = pd.read_csv('d:/data/code_inuse.csv',index_col=0,parse_dates=[0],encoding='gbk')
rs_list = []
i=0
for code in inuse['code'].values:
try:
_data_ = pd.read_csv('d:/data/%s.csv'%code,index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,code为str,start无效的,start 和end若当天有数据则全都取
dd = (_data_['volume']/_data_['volume'].shift(1)>v_times) & (_data_['turnover']>t_percent)
dd = dd & (_data_['close']<22)
if dd[-scope:].any():
i+=1
if i<5:
_data_['close'].plot()
rs_list.append(code)
print i,code
except IOError:
pass #不行的话还是continue
#refresh_data()
#change_type_to_yahoo()
bigVolume()
#_data_ = pd.read_csv('d:/data/600848.csv',index_col=0,parse_dates=[0],encoding='gbk') #默认取3年,code为str,start无效的,start 和end若当天有数据则全都取
#_data_.plot()
Loading

0 comments on commit f3e3d36

Please sign in to comment.