In [113]:
# %load trade.py
import numpy as np
import pandas as pd
import KNNLearner as knn
import LinRegLearner as ll
import BagLearner as bl
import datetime as dt
import matplotlib.pyplot as plt
from sklearn import linear_model
from util import get_data, plot_data

# get price data: Sine, IBM
start_date = dt.datetime(2007,12,31)
end_date = dt.datetime(2009,12,31)
symbols = ['IBM','SINE_FAST','SINE_SLOW','GOOG','AAPL']
indates = pd.date_range(start_date, end_date)
prices_all = get_data(symbols, indates)

pibm = prices_all['IBM']

# contruct features X
def get_feature(pibm):
  mmtn1 = pibm.values[1:]/pibm.values[:-1]-1  
  mmtn2 = pibm.values[2:]/pibm.values[:-2]-1
  mmtn3 = pibm.values[3:]/pibm.values[:-3]-1
  mmtn4 = pibm.values[4:]/pibm.values[:-4]-1
  mmtn5 = pibm.values[5:]/pibm.values[:-5]-1
  X = pd.DataFrame({'x1':mmtn1[4:][:-5], 'x2':mmtn2[3:][:-5], 'x3':mmtn3[2:][:-5],\
      'x4':mmtn4[1:][:-5], 'x5':mmtn5[:-5]})
  return X

# construct Y
def get_Y(pibm):
  Y = pibm.values[5:] 
  Y = Y[5:]/Y[:-5] - 1
  return Y

def trade(pfl):
  for idx in range(pfl.shape[0]-1):
    if pfl['pred'].ix[idx] < pfl['pred'].ix[idx+1]:
      if pfl['shares'].ix[idx] <= 0:
	pfl['cash'].ix[idx:] = pfl['cash'].ix[idx] - pfl['price'].ix[idx]*(100 - pfl['shares'].ix[idx])
	pfl['shares'].ix[idx:] = 100
    if pfl['pred'].ix[idx] > pfl['pred'].ix[idx+1]:
      if pfl['shares'].ix[idx] >= 0:
	pfl['cash'].ix[idx:] = pfl['cash'].ix[idx] + pfl['price'].ix[idx]*(100 + pfl['shares'].ix[idx])
	pfl['shares'].ix[idx:] = -100
  pv = pfl['price']*pfl['shares'] + pfl['cash']
  return pv


#----------------------In-sample test-----------------------------#
X = get_feature(pibm)
Y = get_Y(pibm)

# train
clf = linear_model.LinearRegression()
clf.fit(X.values,Y)


# convert predicted Y back to price, in-sample backtest
Ypred = X.values.dot(clf.coef_) + clf.intercept_
ppred = pibm.values[10:]*(X.values.dot(clf.coef_) + clf.intercept_ + 1)
pdiff = pd.DataFrame(index = pibm.index[10:], data = {'price':pibm.values[10:], 'pred':ppred})
plot_data(pdiff)

ppred = pd.Series(index = pibm.index[10:], data = ppred)# convert numpy array to pandas.Series

# initial portfolio
pfl = pd.DataFrame({'price':pibm[10:], 'pred':ppred, 'shares':np.zeros(ppred.size), 'cash':np.ones(ppred.size)*10000})

# trading
pv = trade(pfl)
pspy = prices_all['SPY'][pfl.index]
pfl_vs_spy = pd.DataFrame(index = pfl.index, data = {'my_portval':pv/pv.ix[0], 'SPY':pspy/pspy.ix[0]})
plot_data(pfl_vs_spy, title = "My_Portfolio vs SPY", ylabel = "Accumulative Return")


#------------------------Out-Sample test---------------------------# 
tsd = dt.datetime(2009,12,31)
ted = dt.datetime(2011,12,31)
symbols = ['IBM']
outdates = pd.date_range(tsd, ted)
tprices = get_data(symbols, outdates)
tpibm = tprices['IBM']

tX = get_feature(tpibm)
# compare to the true price
tppred = tpibm.values[10:]*(tX.values.dot(clf.coef_.T) + clf.intercept_ + 1)
tpdiff = pd.DataFrame(index = tpibm.index[10:], data = {'price':tpibm.values[10:], 'pred':tppred})
plot_data(tpdiff)

tpfl = pd.DataFrame({'price':tpibm[10:], 'pred':tppred, 'shares':np.zeros(tppred.size), 'cash':np.ones(tppred.size)*10000})

tpv = trade(tpfl)

tpspy = tprices['SPY'][tpfl.index]
tpfl_vs_tspy = pd.DataFrame(index = tpfl.index, data = {'my_portval':tpv/tpv.ix[0], 'SPY':tpspy/tpspy.ix[0]})
plot_data(tpfl_vs_tspy, title = "My_Portfolio vs SPY", ylabel = "Accumulative Return")





In [125]:
plt.clf()
plt.plot(Y)
plt.plot(Ypred)
plt.show()
plt.clf()

In [124]:
plt.plot(X.values[:,1],Y,'*')
plt.show()
plt.clf()

In [115]:
freq, spec = spectrum(Y,range(Y.size))
print freq
plt.plot(spec)
plt.show()
plt.clf()

[ 0.          0.00201613  0.00201613  0.00403226  0.00403226  0.00604839
  0.00604839  0.00806452  0.00806452  0.01008065  0.01008065  0.01209677
  0.01209677  0.0141129   0.0141129   0.01612903  0.01612903  0.01814516
  0.01814516  0.02016129  0.02016129  0.02217742  0.02217742  0.02419355
  0.02419355  0.02620968  0.02620968  0.02822581  0.02822581  0.03024194
  0.03024194  0.03225806  0.03225806  0.03427419  0.03427419  0.03629032
  0.03629032  0.03830645  0.03830645  0.04032258  0.04032258  0.04233871
  0.04233871  0.04435484  0.04435484  0.04637097  0.04637097  0.0483871
  0.0483871   0.05040323  0.05040323  0.05241935  0.05241935  0.05443548
  0.05443548  0.05645161  0.05645161  0.05846774  0.05846774  0.06048387
  0.06048387  0.0625      0.0625      0.06451613  0.06451613  0.06653226
  0.06653226  0.06854839  0.06854839  0.07056452  0.07056452  0.07258065
  0.07258065  0.07459677  0.07459677  0.0766129   0.0766129   0.07862903
  0.07862903  0.08064516  0.08064516  0.08266129  0.

In [110]:
f = np.arange(10)
ff = np.arange(10,20)
np.where(np.logical_and(f>3, f<7), ff, 1)
#help(np.where)

array([ 1,  1,  1,  1, 14, 15, 16,  1,  1,  1])

### Reinforcement Learning
Regression Learning: historical observation -> predict future value
+ noisy
+ no confidence level
+ no trading decision
+ no # of shares

Reinforcement Learning: env, state, action, reward(automaton)
+ tells you directly what to do