Import dependencies and configure settings.

In [1]:
import os
import csv
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import scipy.stats as stats
import statsmodels.api as sm

import hawkes
import markDistributions as md
import scipy.optimize as op

from pylab import rcParams
from __future__ import division

%matplotlib inline
np.set_printoptions(precision=10)

Load GOOG order message data with 10 price levels into dict, taking only order events between 2pm and 3pm:

In [2]:
dat = 'GOOG_2012-06-21_34200000_57600000_message_10'
temp = ([],[],[],[],[],[])
with open('../../data/'+dat+'.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        temp[0].append(float(row[0]))
        temp[1].append(int(row[1]))
        temp[2].append(int(row[2]))
        temp[3].append(int(row[3]))
        temp[4].append(int(row[4]))
        temp[5].append(int(row[5]))

orders = {'Time':None, 'Type':None, 'Order ID':None,
        'Volume':None, 'Price':None, 'Direction':None}
orders['Time'] = np.asarray(temp[0])
orders['Type'] = np.asarray(temp[1])
orders['Order ID'] = np.asarray(temp[2])
orders['Volume'] = np.asarray(temp[3])
orders['Price'] = np.asarray(temp[4])
orders['Direction'] = np.asarray(temp[5])
del temp

In [3]:
orders['Time'] = orders['Time'][(orders['Time']>50400) & (orders['Time']<54000)]
orders['Type'] = orders['Type'][(orders['Time']>50400) & (orders['Time']<54000)]
orders['Order ID'] = orders['Order ID'][(orders['Time']>50400) & (orders['Time']<54000)]
orders['Volume'] = orders['Volume'][(orders['Time']>50400) & (orders['Time']<54000)]
orders['Price'] = orders['Price'][(orders['Time']>50400) & (orders['Time']<54000)]
orders['Direction'] = orders['Direction'][(orders['Time']>50400) & (orders['Time']<54000)]

In [4]:
ttemp = len(orders['Type'])
print("{0} order messages total:".format(len(orders['Type'])))
temp = len(orders['Type'][orders['Type']==1])
print("- {0} LO submissions ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==2])
print("- {0} partial LO cancellations ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==3])
print("- {0} LO cancellations ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==4])
print("- {0} visible LO execution ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==5])
print("- {0} hidden LO execution ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==7])
print("- {0} trading halts ({1:.2f}%)".format(temp,100*temp/ttemp))
del ttemp
del temp

19998 order messages total:
- 9676 LO submissions (48.38%)
- 4 partial LO cancellations (0.02%)
- 9145 LO cancellations (45.73%)
- 644 visible LO execution (3.22%)
- 529 hidden LO execution (2.65%)
- 0 trading halts (0.00%)


Extract data into usable form.

In [4]:
LTC_mask = (orders['Type']==1) | (orders['Type']==3) | (orders['Type']==4)
t = orders['Time'][LTC_mask]
x = orders['Volume'][LTC_mask]

j = orders['Type'][LTC_mask]
j[(j==1) & (orders['Direction'][LTC_mask]==-1)] = 0 # limit ask orders
j[(j==1) & (orders['Direction'][LTC_mask]==1)] = 1 # limit bid orders
j[(j==3) & (orders['Direction'][LTC_mask]==-1)] = 2 # limit ask orders cancellation
j[(j==3) & (orders['Direction'][LTC_mask]==1)] = 3 # limit bid orders cancellation
j[(j==4) & (orders['Direction'][LTC_mask]==1)] = 4 # market ask orders
j[(j==4) & (orders['Direction'][LTC_mask]==-1)] = 5 # market bid orders

Initialise a 6-D Hawkes point process and set the mark distributions.

In [5]:
d = 6
h = hawkes.Hawkes(d=d)

Fit the Pareto mark distribution to the volumes of each component. This is done separately in "mark_distribution_GOOG_10.ipynb".

In [6]:
markDistParam = [[470.4065983066, 6.2452733954],
                [1859.1482390663, 24.303771531],
                [448.0381138347, 5.9565764744],
                [767.8397123258, 10.5256303422],
                [566.2681280976, 8.7936728955],
                [3335.1005300322, 53.1459950274]]
h.setMarkDistParam(markDistParam)

Fit model to data.

In [7]:
result = h.MLE(t,j,x,method='L-BFGS-B')

The fitted parameters and stability check:

In [17]:
arr = result["x"]

np.set_printoptions(suppress=True)
np.set_printoptions(precision=5)

print("""eta: {0}\n""".format(arr[:6]))
print("""Q: \n{0}\n""".format(np.reshape(arr[6:42],(6,6))))
print("""alpha: {0}\n""".format(arr[42:48]))
print("""alph: {0}\n""".format(arr[48:54]))
print("""beta: {0}\n""".format(arr[54:60]))

print("""Spr(Q) of estimated model: {0}""".format(h.ibParam.getSpectralRadius()))

eta: [ 0.26558  0.24677  0.24886  0.23342  0.02999  0.0276 ]

Q: 
[[ 0.18211  0.02212  0.48243  0.17668  0.67913  0.38455]
 [ 0.00537  0.17683  0.10281  0.47556  0.3432   0.71057]
 [ 0.436    0.11057  0.16461  0.00264  0.21806  0.46511]
 [ 0.10361  0.40197  0.00999  0.18594  0.45128  0.21576]
 [ 0.00563  0.00576  0.00138  0.00328  0.66088  0.00373]
 [ 0.00793  0.00473  0.00596  0.00001  0.00944  0.67765]]

alpha: [ 10.  10.  10.  10.  10.  10.]

alph: [ 73.03494   0.        1.96604   0.00069   0.86737  25.89727]

beta: [ 5.82846  0.00796  7.11414  0.02453  3.52462  0.02047]

Spr(Q) of estimated model: 0.848014699814


In [10]:
print arr
print len(arr)

[  0.2655821625   0.2467706365   0.2488595796   0.2334196466   0.0299931322
   0.0275962919   0.182111157    0.0221222508   0.4824290013   0.1766814967
   0.6791265038   0.3845468916   0.005365127    0.1768301375   0.102805278
   0.4755566209   0.3431998706   0.7105693583   0.4359995561   0.1105672259
   0.1646056628   0.0026448732   0.2180595295   0.4651086535   0.1036088955
   0.4019705048   0.0099944545   0.185940748    0.4512819143   0.2157649044
   0.0056324858   0.0057599176   0.0013837344   0.0032767781   0.6608806473
   0.0037258582   0.0079340917   0.0047258357   0.0059642156   0.00001
   0.0094382205   0.6776540789  10.            10.            10.            10.
  10.            10.            73.0349372448   0.             1.9660371753
   0.0006869116   0.8673730157  25.8972730011   5.8284634569   0.0079625467
   7.1141382424   0.0245273974   3.5246200308   0.0204672999]
60
