In [1]:
import datetime as dt
import pandas as pd
import util as ut
import random
from QLearner import QLearner
import indicators
import numpy as np

In [2]:
learner = QLearner(num_states=1000, \
        num_actions = 3, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.98, \
        radr = 0.999, \
        dyna = 0, \
        verbose = False)

In [6]:
def addEvidence(symbol = "JPM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,12,31), \
        sv = 100000):

    dates = pd.date_range(sd, ed)
    prices = ut.get_data([symbol], dates)[[symbol]]
    prices['Cash'] = 1.0
    high = ut.get_data([symbol], dates, colname = 'High')[[symbol]]
    low = ut.get_data([symbol], dates, colname = 'Low')[[symbol]]
    orig_close = ut.get_data([symbol], dates, colname = 'Close')[[symbol]]
    adj_high = high * prices[[symbol]] / orig_close
    adj_low = low * prices[[symbol]] / orig_close

    orders = pd.DataFrame().reindex_like(prices)
    orders = orders.rename(index=str, columns={'Cash': 'Order', symbol:'Shares'})
    orders['Shares'] = 0
    orders['Order'] = 'CASH'
    orders.index.name = 'Date'
    orders.index = pd.to_datetime(orders.index, format="%Y/%m/%d")

    positions = pd.DataFrame().reindex_like(prices)
    positions.fillna(0, inplace=True)
    positions.iloc[0, -1] = sv
    action = learner.querysetstate(0)

    sma_range = indicators.sma(prices[[symbol]], 10).iloc[:, 0]
    sma_bins = pd.cut(sma_range, 10, labels=False)
    bb_range = indicators.bb(prices[[symbol]])
    bb_range['value'] = bb_range.High - bb_range.Low
    bb_range.fillna(method='bfill', inplace=True)
    bb_range = bb_range['value']
    bb_bins = pd.cut(bb_range, 10, labels=False)
    comparisons = [adj_high[symbol] - adj_low[symbol], abs(adj_low[symbol] - prices[symbol].shift(1)), abs(adj_high[symbol] - prices[symbol].shift(1))]
    tr = pd.concat(comparisons, axis=1).max(axis=1)
    tr.fillna(method='bfill', inplace=True)
    atr_range = tr.rolling(14, min_periods=1).mean()
    atr_bins = pd.cut(atr_range, 10, labels=False)
    states = sma_bins * 100 + bb_bins * 10 + atr_bins
    #states = atr_bins * 100 + bb_bins * 10 + sma_bins

    pre_shares = 0
    normalized_close = prices[symbol]/prices.iloc[0,0]
    daily_return = normalized_close - normalized_close.shift(1)
    daily_return.fillna(method='bfill', inplace=True)
    pre_qtable = np.ones(())
    pre_qtable.itemset(0, 1)
    cur_qtable = learner.q
    print(pre_qtable)
    while not np.array_equal(pre_qtable, cur_qtable): # check if converges
        pre_cash = sv
        pre_holdings = 0
        for date, row in orders.iterrows():
            cur_state = states[date]
            reward = daily_return[date] * pre_holdings
            action = learner.query(cur_state, reward)
            orders.loc[date, 'Order'] = action
            if action == 0:
                orders.loc[date, 'Shares'] = -1000 - pre_shares
                positions.loc[date, symbol] = -1000
            if action == 1:
                orders.loc[date, 'Shares'] = 0 - pre_shares
                positions.loc[date, symbol] = 0
            if action == 2:
                orders.loc[date, 'Shares'] = 1000 - pre_shares
                positions.loc[date, symbol] = 1000

            positions.loc[date, 'Cash'] = pre_cash - orders.loc[date, 'Shares'] * prices.loc[date, symbol]
            pre_cash = positions.loc[date, 'Cash']
            pre_holdings = positions.loc[date, symbol]
        pre_qtable = cur_qtable
        cur_qtable = learner.q

In [7]:
addEvidence(symbol = "JPM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,12,31), \
        sv = 100000)

[[ 1.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]
 ..., 
 [ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]


In [None]:
learner.q

In [9]:
def testPolicy(symbol = "JPM", \
    sd=dt.datetime(2010,1,1), \
    ed=dt.datetime(2011,12,31), \
    sv = 100000):
    dates = pd.date_range(sd, ed)
    prices = ut.get_data([symbol], dates)[[symbol]]
    high = ut.get_data([symbol], dates, colname = 'High')[[symbol]]
    low = ut.get_data([symbol], dates, colname = 'Low')[[symbol]]
    orig_close = ut.get_data([symbol], dates, colname = 'Close')[[symbol]]
    adj_high = high * prices[[symbol]] / orig_close
    adj_low = low * prices[[symbol]] / orig_close

    sma_range = indicators.sma(prices[[symbol]], 10).iloc[:, 0]
    sma_bins = pd.qcut(sma_range, 10, labels=False)
    bb_range = indicators.bb(prices[[symbol]])
    bb_range['value'] = bb_range.High - bb_range.Low
    bb_range.fillna(method='bfill', inplace=True)
    bb_range=bb_range['value']
    bb_bins = pd.cut(bb_range, 10, labels=False)
    comparisons = [adj_high[symbol]-adj_low[symbol], abs(adj_low[symbol] - prices[symbol].shift(1)), abs(adj_high[symbol] - prices[symbol].shift(1))]
    tr = pd.concat(comparisons, axis=1).max(axis=1)
    tr.fillna(method='bfill', inplace=True)
    atr_range = tr.rolling(14, min_periods=1).mean()
    atr_bins = pd.qcut(atr_range, 10, labels=False)
    states = sma_bins * 100 + bb_bins * 10 + atr_bins
    trades = pd.DataFrame().reindex_like(prices)  # only portfolio symbols
    trades[symbol] = 0
    pre_position = 0
    for date, row in trades.iterrows():
        cur_state = states[date] # compute current state
        action = learner.querysetstate(cur_state)
        if action == 0:
            trades.loc[date, symbol] = -1000 - pre_position
            pre_position = -1000
        elif action == 1:
            trades.loc[date, symbol] = 0 - pre_position
            pre_position = 0
        elif action == 2:
            trades.loc[date, symbol] = 1000 - pre_position
            pre_position = 1000
    return trades

In [10]:
order_book = testPolicy(symbol = "JPM", \
        sd=dt.datetime(2010,1,1), \
        ed=dt.datetime(2011,12,31), \
        sv = 100000)

TypeError: testPolicy() missing 1 required positional argument: 'self'

In [None]:
order_book

In [None]:
sd=dt.datetime(2008,1,1)
yesterday = sd - dt.timedelta(1)
ed=dt.datetime(2009,12,31)
dates = pd.date_range(sd, ed)
prices = ut.get_data(['JPM'], dates)[['JPM']]
normalized_close = prices/prices.iloc[0, :]
sma_range = indicators.sma(prices[['JPM']], 10)
sma = sma_range.loc[ed, 'JPM']
sma_range = indicators.sma(prices[['JPM']], 10).iloc[:, 0]
states = set(pd.qcut(sma_range, 100))
states = list(states)
for i in range(len((states))):
        if sma in states[i]:
            print(i)

In [None]:
q = np.zeros((1000, 3))

In [None]:
q.itemset(4, 1)

In [None]:
q