In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from matplotlib.dates import DateFormatter

plt.rcParams['axes.axisbelow'] = True

START_TIME = 9.5 * 60 * 60     #  9:30:00.000 in s after midnight
END_TIME =    16 * 60 * 60        # 16:00:00.000 in s after midnight

#### Date Source

- 1. Go to https://lobsterdata.com/info/DataSamples.php and download the 10 level data for amazon and microsoft
- 2. Save in a folder named 'data' where this file is saved.

#### Message File Information

Dimension: (NumberEvents x 6)

Structure (each row):
- Time stamp (sec after midnight with decimal precision of at least milliseconds and up to nanoseconds depending on the period)
- Event type
- Order ID
- Size (# of shares)
- Price
- Direction

Event types:
- 1   Submission new limit order
- 2   Cancellation (partial)
- 3   Deletion (total order)
- 4   Execution of a visible limit order
- 5   Execution of a hidden limit order
- 7   Trading Halt (Detailed 
                            information below)

Direction:
- -1  Sell limit order
- 1  Buy limit order

> NOTE: Execution of a sell (buy) limit order corresponds to a buyer-(seller-) initiated trade, i.e. a BUY (SELL) trade.

In [12]:
amzn = pd.read_csv(
    "data/AMZN_2012-06-21_34200000_57600000_message_10.csv",
    names = ['Time', 'Event', 'OrderID', 'Size', 'Price', 'TradeDirection']
)
amzn['row_index'] = amzn.index.values
amzn = amzn[amzn['Time'] >= START_TIME]
amzn = amzn[amzn['Time'] <= END_TIME]
amzn.set_index(pd.to_datetime(amzn.Time, unit = "s"), inplace = True)

msft = pd.read_csv(
    "data/MSFT_2012-06-21_34200000_57600000_message_10.csv",
    names = ['Time', 'Event', 'OrderID', 'Size', 'Price', 'TradeDirection']
)
msft['row_index'] = msft.index.values
msft = msft[msft['Time'] >= START_TIME]
msft = msft[msft['Time'] <= END_TIME]
msft.set_index(pd.to_datetime(msft.Time, unit = "s"), inplace = True)

In [13]:
amzn.head()

Unnamed: 0_level_0,Time,Event,OrderID,Size,Price,TradeDirection,row_index
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1970-01-01 09:30:00.017459617,34200.01746,5,0,1,2238200,-1,0
1970-01-01 09:30:00.189607670,34200.189608,1,11885113,21,2238100,1,1
1970-01-01 09:30:00.189607670,34200.189608,1,3911376,20,2239600,-1,2
1970-01-01 09:30:00.189607670,34200.189608,1,11534792,100,2237500,1,3
1970-01-01 09:30:00.189607670,34200.189608,1,1365373,13,2240000,-1,4


In [14]:
msft.head()

Unnamed: 0_level_0,Time,Event,OrderID,Size,Price,TradeDirection,row_index
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1970-01-01 09:30:00.013994120,34200.013994,3,16085616,100,310400,-1,0
1970-01-01 09:30:00.013994120,34200.013994,1,16116348,100,310500,-1,1
1970-01-01 09:30:00.015247805,34200.015248,1,16116658,100,310400,-1,2
1970-01-01 09:30:00.015442111,34200.015442,1,16116704,100,310500,-1,3
1970-01-01 09:30:00.015789147,34200.015789,1,16116752,100,310600,-1,4


In [15]:
print(amzn.shape)
print(msft.shape)

(269748, 7)
(668765, 7)


In [16]:
# Precision
print(amzn.index.dtype)

datetime64[ns]


In [17]:
# Total trading volumn
msft.Size.sum()

347108132

#### Distribution of Events

In [19]:
#   - '1'   Submission new limit order
#   - '2'   Cancellation (partial)
#   - '3'   Deletion (total order)
#   - '4'   Execution of a visible limit order
#   - '5'   Execution of a hidden limit order

def summary(msg_book):
    summary=msg_book.groupby('Type')['Size'].agg(['count','sum'])
    summary=summary/summary.sum()
    summary['description']=summary.index.map({1:'New LO',2:'Cancel',3:'Deletion',4:'Execution visible',5:'Execution hidden'})
    return summary

summary(msft)

KeyError: 'Type'