In [1]:
# preload database by scrapy crawler
# start rabbitmq-server, mongodb, celery, django manager.py
#manual run
#!scrapy crawl twseid --loglevel=DEBUG --logfile=./log/test_twseid.log -s LOG_ENABLED=1 -s GIANT_LIMIT=1 -s GIANT_DEBUG=1
#!scrapy crawl twsehisstock --loglevel=DEBUG --logfile=./log/test_twsehisstock.log -s LOG_ENABLED=1 -s GIANT_LIMIT=1 -s GIANT_DEBUG=1
#!scrapy crawl twsehistrader --loglevel=DEBUG --logfile=./log/test_twsehistrader.log -s LOG_ENABLED=1 -s GIANT_LIMIT=1 -s GIANT_DEBUG=1
#!scrapy crawl twsehiscredit --loglevel=DEBUG --logfile=./log/test_twsehiscredit.log -s LOG_ENABLED=1 -s GIANT_LIMIT=1 -s GIANT_DEBUG=1

In [2]:
#import scrapy_giant + zipline + profilo
# ref: http://nbviewer.jupyter.org/github/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/blob/master/Chapter1_Introduction/Chapter1.ipynb

import pytz
import matplotlib.pyplot as plt
import numpy as np
import talib
import traceback
import pandas as pd

from zipline.algorithm import TradingAlgorithm
from zipline.utils.factory import *
from zipline.finance.trading import SimulationParameters

# Import exponential moving average from talib wrapper
# ref: http://mrjbq7.github.io/ta-lib/doc_index.html

from datetime import datetime, timedelta
from collections import deque

from bin.mongodb_driver import *
from bin.start import *
from handler.tasks import collect_hisframe
from handler.hisdb_handler import TwseHisDBHandler, OtcHisDBHandler
from handler.iddb_handler import TwseIdDBHandler, OtcIdDBHandler

from algorithm.report import Report
from algorithm.register import AlgRegister

In [3]:
kwargs = {
    'debug': True,
    'limit': 2,
    'opt': 'twse'
}

idhandler = TwseIdDBHandler(**kwargs)  
stockids = idhandler.stock.get_ids()
print [symbol for symbol in stockids]

['2317', '1314', '2330']


In [4]:
starttime = datetime.utcnow() - timedelta(days=10)
endtime = datetime.utcnow()
opt, stockid = 'twse', '2317'

# get hisstock default raw data 
kwargs = {
    'opt': opt,
    'targets': ['stock'],
    'starttime': starttime,
    'endtime': endtime,
    'stockids': [stockid],
    'traderids': [],
    'base': 'stock',
    'callback': None,
    'limit': 1,
    'debug': True
}
st_panel, dbhandler = collect_hisframe(**kwargs)
if not st_panel.empty:
    print st_panel[stockid].tail(4)

                           close  high   low  open  price   volume
2016-09-06 00:00:00+00:00   79.8  79.9  78.6  78.8   79.8  51022.0
2016-09-07 00:00:00+00:00   79.5  80.8  79.5  80.4   79.5  59399.0
2016-09-08 00:00:00+00:00   78.5  79.7  78.5  79.6   78.5  50257.0
2016-09-09 00:00:00+00:00   77.7  77.9  77.5  77.8   77.7  36760.0


In [5]:
# get hisstock trader raw data
kwargs = {
    'opt': opt,
    'targets': ['trader'],
    'starttime': starttime,
    'endtime': endtime,
    'stockids': [stockid],
    'traderids': [],
    'base': 'stock',
    'constraint': lambda x: x.value["ebuyratio"] > 10 or x.value["totalkeepbuy"] >= 1,
    'order': lambda x: [-x.value["totalvolume"], -x.value["totalbuyratio"]],
    'callback': None,
    'limit': 10,
    'debug': True
}
trb_panel, dbhandler = collect_hisframe(**kwargs)
if not trb_panel.empty:
    print trb_panel[stockid].tail(4).loc[:,lambda df: ['top0_buyratio', 'top1_buyratio']]

bst_traders = []
if dbhandler: 
    # map attr to physical traderid 
    tops = list(dbhandler.trader.get_alias([stockid], 'trader', ["top%d" %i for i in range(10)]))
    
    for i, top in enumerate(tops):
        if idhandler.trader.has_id(top):
            bst_traders.append(
                ("top{0}".format(i), top, "{0}".format(idhandler.trader.get_name(top).encode('utf-8')))
            )
    print bst_traders

                           top0_buyratio  top1_buyratio
2016-09-08 00:00:00+00:00           0.55           0.03
2016-09-09 00:00:00+00:00           0.22           0.00
[('top0', u'8440', '\xe6\x91\xa9\xe6\xa0\xb9\xe5\xa4\xa7\xe9\x80\x9a'), ('top1', u'1380', '\xe6\x9d\xb1\xe6\x96\xb9\xe5\x8c\xaf\xe7\x90\x86'), ('top2', u'1440', '\xe7\xbe\x8e\xe6\x9e\x97')]


In [6]:
traderid = bst_traders[0][1]

# find cross map table bwteen trader and stock 
kwargs = {
    'opt': opt,
    'targets': ['trader'],
    'starttime': starttime,
    'endtime': endtime,
    'stockids': [],
    'traderids': [traderid],
    'base': 'trader',
    'constraint': lambda x: x.value["ebuyratio"] > 10 or x.value["totalkeepbuy"] >= 1,
    'order': lambda x: [-x.value["totalvolume"], -x.value["totalbuyratio"]],
    'callback': None,
    'limit': 10,
    'debug': True
}
stb_panel, dbhandler = collect_hisframe(**kwargs)
if not stb_panel.empty:
    print stb_panel[stockid].tail(4).loc[:,lambda df: ['top0_sellratio', 'top1_sellratio', 'top2_sellratio']]

bst_stocks = []
if dbhandler: 
    # map attr to physical stockid
    tops = list(dbhandler.trader.get_alias([traderid], 'stock', ["top%d" %i for i in range(10)]))
    
    for i, top in enumerate(tops):
        if idhandler.stock.has_id(top):
            bst_stocks.append(
                ("top{0}".format(i), top, "{0}".format(idhandler.stock.get_name(top).encode('utf-8')))
            )
    print bst_stocks    

                           top0_sellratio  top1_sellratio  top2_sellratio
2016-09-08 00:00:00+00:00           13.84             NaN             NaN
2016-09-09 00:00:00+00:00            7.39             NaN             NaN
[('top0', u'2317', '\xe9\xb4\xbb\xe6\xb5\xb7')]


In [7]:
# get hisstock credit raw data
kwargs = {
    'opt': opt,
    'targets': ['credit'],
    'starttime': datetime.utcnow() - timedelta(days=5),
    'endtime': datetime.utcnow(),
    'stockids': [stockid],
    'base': 'stock',
    'constraint': lambda x: x.value["efinanceremain"] > 0 or x.value["ebearfinaratio"] > 0,
    'order': lambda x: [-x.value["ebearfinaratio"], -x.value["totalfinanceremain"]],
    'limit': 2,
    'debug': True
}
crt_panel, dbhandler = collect_hisframe(**kwargs)
if not crt_panel.empty:
    print crt_panel[stockid].tail(4).loc[:]

                           bearfinaratio  bearishbuyvolume  bearishremain  \
2016-09-09 00:00:00+00:00            5.5             363.0           0.11   

                           bearishsellvolume  bearishtrend  financebuyvolume  \
2016-09-09 00:00:00+00:00              195.0         -0.04            2300.0   

                           financeremain  financesellvolume  financetrend  
2016-09-09 00:00:00+00:00           1.92             2020.0           0.0  


In [8]:
# find cross correlation weight distance between each raw feature
# merge each raw frame into one
mdf = pd.concat([st_panel[stockid], trb_panel[stockid], crt_panel[stockid]], axis=1)
print mdf

                           close  high   low  open  price    volume  \
2016-08-31 00:00:00+00:00   88.0  88.9  88.0  88.7   88.0   68836.0   
2016-09-01 00:00:00+00:00   87.4  87.8  86.8  87.2   87.4  103889.0   
2016-09-02 00:00:00+00:00   77.2  77.9  76.3  76.6   77.2  158664.0   
2016-09-05 00:00:00+00:00   78.2  78.4  77.6  77.9   78.2   44625.0   
2016-09-06 00:00:00+00:00   79.8  79.9  78.6  78.8   79.8   51022.0   
2016-09-07 00:00:00+00:00   79.5  80.8  79.5  80.4   79.5   59399.0   
2016-09-08 00:00:00+00:00   78.5  79.7  78.5  79.6   78.5   50257.0   
2016-09-09 00:00:00+00:00   77.7  77.9  77.5  77.8   77.7   36760.0   

                           top0_avgbuyprice  top0_avgsellprice  top0_buyratio  \
2016-08-31 00:00:00+00:00               NaN                NaN            NaN   
2016-09-01 00:00:00+00:00               NaN                NaN            NaN   
2016-09-02 00:00:00+00:00               NaN                NaN            NaN   
2016-09-05 00:00:00+00:00           

In [9]:
# find