In [135]:
# %load trade.py
import numpy as np
import pandas as pd
import KNNLearner as knn
import LinRegLearner as ll
import BagLearner as bl
import datetime as dt
import matplotlib.pyplot as plt
from sklearn import linear_model
from util import get_data, plot_data

# get price data: Sine, IBM
start_date = dt.datetime(2007,12,31)
end_date = dt.datetime(2009,12,31)
symbols = ['IBM','SINE_FAST','SINE_SLOW','GOOG','AAPL','XOM']
dates = pd.date_range(start_date, end_date)
prices_all = get_data(symbols, dates)

tag = 'IBM'
pibm = prices_all[tag]

# contruct features X
def get_feature(pibm):
    indates = dates[1:]
    sma = pibm.rolling(window = 20, min_periods=0)
    bbup = sma.mean() + 2*sma.std() 
    bblow = sma.mean() - 2*sma.std() 
    bbands = pd.DataFrame({'price':pibm[1:], 'sma':sma.mean()[1:], 'ub':bbup[1:], 'lb':bblow[1]})
    bbvals = (pibm[1:] - sma.mean()[1:])/(4*sma.std()[1:])
    vtl = sma.std()[1:]/sma.mean()[1:]*8
    mmtn5 = pibm.values[5:]/pibm.values[:-5]-1
    X = pd.DataFrame({'x0':bbvals[4:-5], 'vtl':vtl[4:-5],'x5':mmtn5[:-5]})
    return X, bbvals[4:-5]

# construct Y
def get_Y(pibm):
    Y = pibm.values[5:] 
    Y = Y[5:]/Y[:-5] - 1
    return Y

def trade(pfl):
    for idx in range(pfl.shape[0]-5):
        if pfl['pred'].ix[idx] < pfl['pred'].ix[idx+5]:
            if pfl['shares'].ix[idx] <= 0:
                pfl['cash'].ix[idx:] = pfl['cash'].ix[idx] - pfl['price'].ix[idx]*(100 - pfl['shares'].ix[idx])
                pfl['shares'].ix[idx:] = 100
        if pfl['pred'].ix[idx] > pfl['pred'].ix[idx+5]:
            if pfl['shares'].ix[idx] >= 0:
                pfl['cash'].ix[idx:] = pfl['cash'].ix[idx] + pfl['price'].ix[idx]*(100 + pfl['shares'].ix[idx])
                pfl['shares'].ix[idx:] = -100
    pv = pfl['price']*pfl['shares'] + pfl['cash']
    return pv 
    
def train(X, Y):
    kl = knn.KNNLearner()
    Ypred = np.zeros(Y.size)
    Ypred[:5] = Y[:5]
    for i in range(5, X.shape[0]):
        kl.addEvidence(X.values[:i], Y[:i])
        Ypred[i] = kl.query(X.values[i])[0]
    return Ypred, kl

In [136]:
#----------------------In-sample test-----------------------------#
X, bbvs = get_feature(pibm)
Y = get_Y(pibm)

Ypred, kl = train(X, Y)
# convert predicted Y back to price, in-sample backtest
ppred = pibm.values[5:-5]*(Ypred + 1)

pdiff = pd.DataFrame(index = pibm.index[10:], data = {'price':pibm.values[10:], 'pred':ppred})
plot_data(pdiff)

ppred = pd.Series(index = pibm.index[10:], data = ppred)# convert numpy array to pandas.Series

# initial portfolio
pfl = pd.DataFrame({'price':pibm[10:], 'pred':ppred, 'bbvs':bbvs.values, 'shares':np.zeros(ppred.size), 'cash':np.ones(ppred.size)*10000})

# trading
pv = trade(pfl)
pspy = prices_all['SPY'][pfl.index]
pfl_vs_spy = pd.DataFrame(index = pfl.index, data = {'my_portval':pv/pv.ix[0], 'SPY':pspy/pspy.ix[0]})
plot_data(pfl_vs_spy, title = "My_Portfolio vs SPY", ylabel = "Accumulative Return")

In [132]:
pfl

Unnamed: 0,bbvs,cash,pred,price,shares
2008-01-15,0.331426,-743.353645,107.433536,107.433536,100.0
2008-01-16,0.341958,-743.353645,108.064184,108.064184,100.0
2008-01-17,0.348527,-743.353645,108.614257,108.614257,100.0
2008-01-18,0.352046,-743.353645,109.078259,109.078259,100.0
2008-01-22,0.353056,-743.353645,109.451554,109.451554,100.0
2008-01-23,0.351879,-743.353645,110.990434,109.730413,100.0
2008-01-24,0.348696,-743.353645,110.814606,109.912048,100.0
2008-01-25,0.343596,-743.353645,111.378680,109.994645,100.0
2008-01-28,0.336595,-743.353645,111.404969,109.977380,100.0
2008-01-29,0.327654,21228.730998,111.786227,109.860423,-100.0


In [133]:
plt.plot(Ypred, label = 'pred')
plt.plot(Y, label = 'train')
plt.legend()
plt.show()
plt.clf()

In [134]:
indates = dates[1:]
sma = pibm.rolling(window = 20, min_periods=0)
bbup = sma.mean() + 2*sma.std() 
bblow = sma.mean() - 2*sma.std() 
bbands = pd.DataFrame({'price':pibm[10:], 'sma':sma.mean()[10:], 'ub':bbup[10:], 'lb':bblow[10:],'ppred':ppred})
plot_data(bbands)
bbvals = (pibm[1:] - sma.mean()[1:])/(4*sma.std()[1:])
plot_data(bbvals)
pbbvals = (ppred - sma.mean()[10:])/(4*sma.std()[1:])
plot_data(pbbvals)

In [93]:
sma80 = pibm.rolling(window = 80, min_periods=0)
sma40 = pibm.rolling(window = 40, min_periods=0)
sma10 = pibm.rolling(window = 10, min_periods=0)
psma80 = ppred.rolling(window = 80, min_periods=0)
psma40 = ppred.rolling(window = 40, min_periods=0)
psma10 = ppred.rolling(window = 10, min_periods=0)

signals = pd.DataFrame({'psma10':psma10.mean()[1:],'psma40':psma40.mean()[1:],'price':pibm[1:]})
plot_data(signals)

In [196]:
np.where(pbbvals > 0.5)[0].size 

23

In [197]:
np.where(pbbvals < -0.5)[0].size 

26

In [175]:
len(set(np.where(pbbvals > 0.5)[0].tolist()).symmetric_difference(np.where(bbvals > 0.5)[0].tolist()))

52

In [145]:
vtl = sma.std()/sma.mean()
vtl.mean()

0.02921260308272721

In [137]:
#------------------------Out-Sample test---------------------------# 
tsd = dt.datetime(2009,12,31)
ted = dt.datetime(2011,12,31)
symbols = [tag]
dates = pd.date_range(tsd, ted)
tprices = get_data(symbols, dates)
tpibm = tprices[tag]

tX, _ = get_feature(tpibm)
# compare to the true price
tYpred = kl.query(tX.values)
tppred = tpibm.values[5:-5]*(tYpred + 1)
tppred = pd.Series(index = tpibm.index[10:], data = tppred)# convert numpy array to pandas.Series
#tppred = tpibm.values[5:-5]*(tX.values.dot(clf.coef_.T) + clf.intercept_ + 1)
tpdiff = pd.DataFrame(index = tpibm.index[10:], data = {'price':tpibm.values[10:], 'pred':tppred})
plot_data(tpdiff)

tpfl = pd.DataFrame({'price':tpibm[10:], 'pred':tppred, 'shares':np.zeros(tppred.size), 'cash':np.ones(tppred.size)*10000})

tpv = trade(tpfl)

tpspy = tprices['SPY'][tpfl.index]
tpfl_vs_tspy = pd.DataFrame(index = tpfl.index, data = {'my_portval':tpv/tpv.ix[0], 'SPY':tpspy/tpspy.ix[0]})
plot_data(tpfl_vs_tspy, title = "My_Portfolio vs SPY", ylabel = "Accumulative Return")


# For report

In [95]:
tsma80 = tpibm.rolling(window = 80, min_periods=0)
tsma40 = tpibm.rolling(window = 40, min_periods=0)
tsma10 = tpibm.rolling(window = 10, min_periods=0)
tpsma80 = tppred.rolling(window = 80, min_periods=0)
tpsma40 = tppred.rolling(window = 40, min_periods=0)
tpsma10 = tppred.rolling(window = 10, min_periods=0)

tsignals = pd.DataFrame({'tpsma40':tpsma40.mean()[1:],'price':tpibm[1:]})
plot_data(tsignals)

In [109]:
ibmema10 = pibm.ewm(span = 10, min_periods = 0).mean() 
ibmema20 = pibm.ewm(span = 20, min_periods = 0).mean()
ibmema40 = pibm.ewm(span = 40, min_periods = 0).mean()
pibmema10 = ppred.ewm(span = 10, min_periods = 0).mean() 
pibmema20 = ppred.ewm(span = 20, min_periods = 0).mean()
pibmema40 = ppred.ewm(span = 40, min_periods = 0).mean()
tibmema10 = tpibm.ewm(span = 10, min_periods = 0).mean() 
tibmema20 = tpibm.ewm(span = 20, min_periods = 0).mean()
tibmema40 = tpibm.ewm(span = 40, min_periods = 0).mean()
tpibmema10 = tppred.ewm(span = 10, min_periods = 0).mean() 
tpibmema20 = tppred.ewm(span = 20, min_periods = 0).mean()
tpibmema40 = tppred.ewm(span = 40, min_periods = 0).mean()

emas = pd.DataFrame({'ema10':ibmema10, 'ema40':ibmema40, 'price':pibm})
plot_data(emas)
temas = pd.DataFrame({'ema10':tibmema10, 'ema40':tibmema40, 'price':tpibm})
plot_data(temas)

In [128]:
emas_cmp = pd.DataFrame({'pema40':pibmema40, 'pema20':pibmema20,'price':pibm})
plot_data(emas_cmp)

In [129]:
emas_cmp = pd.DataFrame({'pema40':tibmema40, 'pema20':tibmema20,'price':tpibm})
plot_data(emas_cmp)

In [115]:
ibmema40 

2007-12-31     99.270000
2008-01-02     97.665875
2008-01-03     97.201639
2008-01-04     96.036192
2008-01-07     95.119806
2008-01-08     94.084875
2008-01-09     93.456484
2008-01-10     93.205509
2008-01-11     92.733695
2008-01-14     92.956357
2008-01-15     93.021338
2008-01-16     93.054704
2008-01-17     93.033815
2008-01-18     93.220434
2008-01-22     93.196360
2008-01-23     93.572226
2008-01-24     93.964726
2008-01-25     94.131184
2008-01-28     94.312426
2008-01-29     94.553750
2008-01-30     94.738795
2008-01-31     95.003547
2008-02-01     95.372303
2008-02-04     95.633885
2008-02-05     95.689675
2008-02-06     95.676958
2008-02-07     95.588925
2008-02-08     95.563746
2008-02-11     95.650171
2008-02-12     95.810891
                 ...    
2009-11-18    117.155922
2009-11-19    117.370755
2009-11-20    117.547791
2009-11-23    117.774240
2009-11-24    117.976960
2009-11-25    118.139547
2009-11-27    118.220545
2009-11-30    118.327836
2009-12-01    118.504039


In [119]:
pd.concat((pibm[:10], ppred))

2007-12-31     99.270000
2008-01-02     96.140000
2008-01-03     96.340000
2008-01-04     92.870000
2008-01-07     91.880000
2008-01-08     89.620000
2008-01-09     90.280000
2008-01-10     91.760000
2008-01-11     89.700000
2008-01-14     94.530000
2008-01-15     93.520000
2008-01-16     93.330000
2008-01-17     92.850000
2008-01-18     94.960000
2008-01-22     92.960000
2008-01-23     95.200562
2008-01-24     94.486872
2008-01-25     95.241625
2008-01-28     97.405974
2008-01-29     94.217354
2008-01-30     98.757950
2008-01-31     97.995119
2008-02-01     96.399321
2008-02-04     98.161623
2008-02-05     99.210337
2008-02-06     98.593525
2008-02-07     99.955258
2008-02-08    100.196683
2008-02-11     98.702471
2008-02-12     96.043718
                 ...    
2009-11-18    124.247457
2009-11-19    122.640911
2009-11-20    124.093711
2009-11-23    127.137373
2009-11-24    128.394981
2009-11-25    122.789452
2009-11-27    122.206368
2009-11-30    121.643390
2009-12-01    122.839717


In [120]:
pibm.size

506