In [1]:
# preload database by scrapy crawler
# start rabbitmq-server, mongodb, celery, django manager.py
#manual run
#!scrapy crawl twseid --loglevel=DEBUG --logfile=./log/test_twseid.log -s LOG_ENABLED=1 -s GIANT_LIMIT=1 -s GIANT_DEBUG=1
#!scrapy crawl twsehisstock --loglevel=DEBUG --logfile=./log/test_twsehisstock.log -s LOG_ENABLED=1 -s GIANT_LIMIT=1 -s GIANT_DEBUG=1
#!scrapy crawl twsehistrader --loglevel=DEBUG --logfile=./log/test_twsehistrader.log -s LOG_ENABLED=1 -s GIANT_LIMIT=1 -s GIANT_DEBUG=1
#!scrapy crawl twsehiscredit --loglevel=DEBUG --logfile=./log/test_twsehiscredit.log -s LOG_ENABLED=1 -s GIANT_LIMIT=1 -s GIANT_DEBUG=1

In [2]:
#import scrapy_giant + zipline + profilo
# ref: http://nbviewer.jupyter.org/github/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/blob/master/Chapter1_Introduction/Chapter1.ipynb

import pytz
import matplotlib.pyplot as plt
import numpy as np
import talib
import traceback
import pandas as pd

from zipline.algorithm import TradingAlgorithm
from zipline.utils.factory import *
from zipline.finance.trading import SimulationParameters

# Import exponential moving average from talib wrapper
# ref: http://mrjbq7.github.io/ta-lib/doc_index.html

from datetime import datetime, timedelta, date
from collections import deque

from bin.mongodb_driver import *
from bin.start import *
from handler.tasks import collect_hisframe
from handler.hisdb_handler import TwseHisDBHandler, OtcHisDBHandler
from handler.iddb_handler import TwseIdDBHandler, OtcIdDBHandler

from algorithm.report import Report
from algorithm.register import AlgRegister


In [3]:
debug = False

#2015-04 ~ 2015-8
if debug:
    sht_startime = datetime.utcnow() - timedelta(days=10)
    sht_endtime = datetime.utcnow()
    lng_sarttime = datetime.utcnow() - timedelta(days=60)
    lng_endtime = datetime.utcnow()
else:
    sht_starttime = date(2015, 8, 1)
    sht_endtime = date(2015, 8 ,30)
    lng_starttime = date(2015, 4, 1)
    lng_endtime = date(2015, 8, 30)
    
opt, stockid = 'twse', '2317'

In [4]:
kwargs = {
    'debug': debug,
    'limit': 2,
    'opt': 'twse'
}

idhandler = TwseIdDBHandler(**kwargs)  
stockids = idhandler.stock.get_ids()
print [symbol for symbol in stockids][:3]

[u'1101', u'1102', u'1103']


In [5]:
# get hisstock default raw data 
kwargs = {
    'opt': opt,
    'targets': ['stock'],
    'starttime': lng_starttime,
    'endtime': lng_endtime,
    'stockids': [stockid],
    'traderids': [],
    'base': 'stock',
    'callback': None,
    'limit': 1,
    'debug': debug
}
st_panel, dbhandler = collect_hisframe(**kwargs)
if not st_panel.empty:
    print st_panel[stockid].tail(4)

                           close  high   low  open  price  volume
2015-08-25 00:00:00+00:00   85.0  85.0  81.0  81.0   85.0   60830
2015-08-26 00:00:00+00:00   83.5  84.7  83.1  84.0   83.5   58576
2015-08-27 00:00:00+00:00   84.6  85.3  84.0  84.0   84.6   68806
2015-08-28 00:00:00+00:00   88.6  89.2  86.2  86.9   88.6   65098


In [6]:
# get hisstock trader raw data
kwargs = {
    'opt': opt,
    'targets': ['trader'],
    'starttime': sht_starttime,
    'endtime': sht_endtime,
    'stockids': [stockid],
    'traderids': [],
    'base': 'stock',
    'constraint': lambda x: x.value["ebuyratio"] > 10 or x.value["totalkeepbuy"] >= 1,
    'order': lambda x: [-x.value["totalvolume"], -x.value["totalbuyratio"]],
    'callback': None,
    'limit': 10,
    'debug': debug
}
trb_panel, dbhandler = collect_hisframe(**kwargs)
if not trb_panel.empty:
    print trb_panel[stockid].tail(4)

                           top0_avgbuyprice  top0_avgsellprice  top0_buyratio  \
2015-08-25 00:00:00+00:00                84                 84           1.62   
2015-08-26 00:00:00+00:00                83                 83           2.36   
2015-08-27 00:00:00+00:00                84                 84           3.42   
2015-08-28 00:00:00+00:00                87                 87           6.19   

                           top0_buyvolume  top0_keepbuy  top0_keepsell  \
2015-08-25 00:00:00+00:00             986             1              1   
2015-08-26 00:00:00+00:00            1383             1              1   
2015-08-27 00:00:00+00:00            2354             1              1   
2015-08-28 00:00:00+00:00            4028             1              1   

                           top0_sellratio  top0_sellvolume  top1_avgbuyprice  \
2015-08-25 00:00:00+00:00            3.15             1915                82   
2015-08-26 00:00:00+00:00           32.70            19157     

In [7]:
# get best Buy/Sell trader name baed on stockid
bst_traders = []
if dbhandler: 
    # map attr to physical traderid 
    tops = list(dbhandler.trader.get_alias([stockid], 'trader', ["top%d" %i for i in range(10)]))
    
    for i, top in enumerate(tops):
        if idhandler.trader.has_id(top):
            bst_traders.append(
                ("top{0}".format(i), top, "{0}".format(idhandler.trader.get_name(top).encode('utf-8')))
            )
    print bst_traders[:3]

[('top0', u'1520', '\xe7\x91\x9e\xe5\xa3\xab\xe4\xbf\xa1\xe8\xb2\xb8'), ('top1', u'1470', '\xe5\x8f\xb0\xe7\x81\xa3\xe6\x91\xa9\xe6\xa0\xb9\xe5\xa3\xab\xe4\xb8\xb9\xe5\x88\xa9'), ('top2', u'1650', '\xe6\x96\xb0\xe5\x8a\xa0\xe5\x9d\xa1\xe5\x95\x86\xe7\x91\x9e\xe9\x8a\x80')]


In [8]:
traderid = bst_traders[0][1]

# find cross map table bwteen trader and releated stock 
kwargs = {
    'opt': opt,
    'targets': ['trader'],
    'starttime': sht_starttime,
    'endtime': sht_endtime,
    'stockids': [stockid, '2330'],
    'traderids': [traderid],
    'base': 'trader',
    'constraint': lambda x: x.value["ebuyratio"] > 10 or x.value["totalkeepbuy"] >= 1,
    'order': lambda x: [-x.value["totalvolume"], -x.value["totalbuyratio"]],
    'callback': None,
    'limit': 10,
    'debug': debug
}
stb_panel, dbhandler = collect_hisframe(**kwargs)
if not stb_panel.empty:
    items = stb_panel.items
    print stb_panel
    print stb_panel[stockid].sort_values(by=['top0_buyratio','top0_sellratio'], ascending=[False,True]).head(4)

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 20 (major_axis) x 16 (minor_axis)
Items axis: 2330 to 2317
Major_axis axis: 2015-08-03 00:00:00+00:00 to 2015-08-28 00:00:00+00:00
Minor_axis axis: top0_avgbuyprice to top1_sellvolume
                           top0_avgbuyprice  top0_avgsellprice  top0_buyratio  \
2015-08-03 00:00:00+00:00                 0                  0              0   
2015-08-04 00:00:00+00:00                 0                  0              0   
2015-08-05 00:00:00+00:00                 0                  0              0   
2015-08-06 00:00:00+00:00                 0                  0              0   

                           top0_buyvolume  top0_keepbuy  top0_keepsell  \
2015-08-03 00:00:00+00:00               0             0              0   
2015-08-04 00:00:00+00:00               0             0              0   
2015-08-05 00:00:00+00:00               0             0              0   
2015-08-06 00:00:00+00:00               0             0 

In [9]:
# get best Buy/Sell stock name baed on traderid
bst_stocks = []
if dbhandler: 
    # map attr to physical stockid
    tops = list(dbhandler.trader.get_alias([traderid], 'stock', ["top%d" %i for i in range(10)]))
    
    for i, top in enumerate(tops):
        if idhandler.stock.has_id(top):
            bst_stocks.append(
                ("top{0}".format(i), top, "{0}".format(idhandler.stock.get_name(top).encode('utf-8')))
            )
    print bst_stocks[:3]    

[('top0', u'2330', '\xe5\x8f\xb0\xe7\xa9\x8d\xe9\x9b\xbb'), ('top1', u'2317', '\xe9\xb4\xbb\xe6\xb5\xb7')]


In [10]:
# get hisstock credit raw data
kwargs = {
    'opt': opt,
    'targets': ['credit'],
    'starttime': lng_starttime,
    'endtime': lng_endtime,
    'stockids': [stockid],
    'base': 'stock',
    'constraint': lambda x: x.value["efinanceremain"] > 0 or x.value["ebearfinaratio"] > 0,
    'order': lambda x: [-x.value["ebearfinaratio"], -x.value["totalfinanceremain"]],
    'limit': 1,
    'debug': debug
}
crt_panel, dbhandler = collect_hisframe(**kwargs)
if not crt_panel.empty:
    print crt_panel[stockid].tail(4)

                           bearfinaratio  bearishbuyvolume  bearishremain  \
2015-08-25 00:00:00+00:00           0.95               211           0.02   
2015-08-26 00:00:00+00:00           0.22               683           0.01   
2015-08-27 00:00:00+00:00           0.20                77           0.01   
2015-08-28 00:00:00+00:00           0.01               177           0.00   

                           bearishsellvolume  bearishtrend  financebuyvolume  \
2015-08-25 00:00:00+00:00                214          0.00              3782   
2015-08-26 00:00:00+00:00                  0         -0.76              4191   
2015-08-27 00:00:00+00:00                 51         -0.12              3796   
2015-08-28 00:00:00+00:00                  0         -0.96              3094   

                           financeremain  financesellvolume  financetrend  
2015-08-25 00:00:00+00:00           2.50               5441         -0.02  
2015-08-26 00:00:00+00:00           2.56               1818  

In [11]:
# get future raw data
kwargs = {
    'opt': opt,
    'targets': ['future'],
    'starttime': lng_starttime,
    'endtime': lng_endtime,
    'stockids': [stockid],
    'base': 'stock',
    'constraint': lambda x: x.value["edfcdiff"] > 0 or x.value["totalvolume"] > 0,
    'order': lambda x: [-x.value["edfcdiff"], -x.value["totalvolume"]],
    'limit': 1,
    'debug': debug
}
fut_panel, dbhandler = collect_hisframe(**kwargs)
if not fut_panel.empty:
    print fut_panel[stockid].tail(4)

                           dfcdiff  dfhdiff  dfldiff  dfodiff  fbestbuy  \
2015-08-25 00:00:00+00:00      0.0     -0.1      0.2     -0.9      84.9   
2015-08-26 00:00:00+00:00     -0.2     -0.2      0.2     -0.7      83.6   
2015-08-27 00:00:00+00:00      0.1      0.0      0.0     -0.3      84.5   
2015-08-28 00:00:00+00:00      0.4      0.0      0.6      1.3      88.3   

                           fbestsell  fclose  fhigh  flow  fopen  fprice  \
2015-08-25 00:00:00+00:00       85.0    85.0   85.1  80.8   81.9    85.0   
2015-08-26 00:00:00+00:00       83.8    83.7   84.9  82.9   84.7    83.7   
2015-08-27 00:00:00+00:00       84.6    84.5   85.3  84.0   84.3    84.5   
2015-08-28 00:00:00+00:00       88.4    88.2   89.2  85.6   85.6    88.2   

                           fsetprice  funtrdcount  fvolume  
2015-08-25 00:00:00+00:00       85.0         5543     3521  
2015-08-26 00:00:00+00:00       83.7         5690     1849  
2015-08-27 00:00:00+00:00       84.6         5608     2075  

In [12]:
# find cross correlation weight distance between each raw feature
# merge each raw frame into one
mdf = pd.concat([st_panel[stockid], trb_panel[stockid], crt_panel[stockid]], axis=1)
print mdf.dropna().tail(4)

                           close  high   low  open  price  volume  \
2015-08-25 00:00:00+00:00   85.0  85.0  81.0  81.0   85.0   60830   
2015-08-26 00:00:00+00:00   83.5  84.7  83.1  84.0   83.5   58576   
2015-08-27 00:00:00+00:00   84.6  85.3  84.0  84.0   84.6   68806   
2015-08-28 00:00:00+00:00   88.6  89.2  86.2  86.9   88.6   65098   

                           top0_avgbuyprice  top0_avgsellprice  top0_buyratio  \
2015-08-25 00:00:00+00:00                84                 84           1.62   
2015-08-26 00:00:00+00:00                83                 83           2.36   
2015-08-27 00:00:00+00:00                84                 84           3.42   
2015-08-28 00:00:00+00:00                87                 87           6.19   

                           top0_buyvolume      ...       top9_sellvolume  \
2015-08-25 00:00:00+00:00             986      ...                   473   
2015-08-26 00:00:00+00:00            1383      ...                   456   
2015-08-27 00:00:00+