In [None]:
import asyncio
from copra.websocket import Channel, Client
import matplotlib.pyplot as plt
from collections import OrderedDict
from time import sleep
from dateutil import parser
import copy
import datetime

class Level2(Client):
    K=10
    Bids = {} # Prices and volumes of bids
    Asks = {} # Prices and volumes of asks
    reference_price = None
    first_bid = None # Best Bid
    first_ask = None # Best Ask
    
    shortened_book = OrderedDict([(i,0) for i in range(-K,K+1) if i != 0])
    
    starting_ref_price = None
    starting_time = None
    starting_book = None
    updates = []    
    
    # Displays Order Book
    def display_order_book(self):
        bids = self.Bids.items()
        bids = [b for b in bids if (self.reference_price - b[0]) < self.K]
        bid_prices = [b[0] for b in bids]
        bid_volumes = [b[1] for b in bids]

        asks = self.Asks.items()
        asks = [a for a in asks if (a[0] - self.reference_price) < self.K]
        ask_prices = [a[0] for a in asks]
        ask_volumes = [a[1] for a in asks]
        
        b1 = plt.bar(bid_prices, bid_volumes, color='r')
        b2 = plt.bar(ask_prices, ask_volumes, color='g')
        v = plt.axvline(x=self.reference_price, color='b')
        plt.xticks([x for x in range(min(bid_prices), max(ask_prices)+1)])
        
        plt.title('ETC-USD Limit Order Book: Depth = {}'.format(self.K))
        plt.legend(['Reference Price = {}'.format(self.reference_price), 'Bids', 'Asks'])
        plt.xlabel('Price (USD Cents)')
        plt.ylabel('Volume')
        plt.show()
    
    # Updates the reference price of order book
    def update_order_book(self):
        sorted_bids = list(reversed(sorted(self.Bids.items())))
        sorted_asks = list(sorted(self.Asks.items()))        
        best_bid = sorted_bids[0][0]
        best_ask = sorted_asks[0][0]
        if ((best_bid + best_ask) % 2) != 0:
            self.reference_price = round((best_bid+best_ask)/2, 1)
        else:
            middle = (best_bid+best_ask)/2
            if self.reference_price > middle:
                self.reference_price = round((best_bid+best_ask)/2 + 0.5,1)
            else:
                self.reference_price = round((best_bid+best_ask)/2 - 0.5,1)

            
        self.first_bid = int(round(self.reference_price - 0.5))
        self.first_ask = int(round(self.reference_price + 0.5))
        
        for i in range(-self.K,0):
            self.shortened_book[i] = self.Bids.get(self.first_bid + i + 1,0)
        for i in range(1,self.K+1):
            self.shortened_book[i] = self.Asks.get(self.first_ask + i - 1,0)

    
    # Prints LOB
    def print_order_book(self):
        print("Reference Price: {}".format(self.reference_price))
        print("---------------------------------------")
        
        
        print("First {} Bids: ".format(self.K))
        for price in range(self.first_bid - self.K + 1, self.first_bid + 1):
            print("{}, {}".format(price, self.Bids.get(price, 0)))
        print("---------------------------------------")
        
        print("First {} Asks: ".format(self.K))
        for price in range(self.first_ask, self.first_ask + self.K):
            print("{}, {}".format(price, self.Asks.get(price, 0)))
        print("---------------------------------------")
        print("\n")

     
    # Receives message from API websocket
    def on_message(self, message):
        # Get snapshot of LOB from API and build internal representation
        if message['type'] == 'snapshot':
            for price, amount in message['bids']:
                self.Bids[int(round((float(price)*100)))] = float(amount)
            for price, amount in message['asks']:
                self.Asks[int(round((float(price)*100)))] = float(amount)
            self.update_order_book()
            self.starting_ref_price = self.reference_price
            self.starting_book = copy.copy(self.shortened_book)
            self.starting_time = datetime.datetime.now()
            print('Starting time: {}'.format(self.starting_time))
            self.display_order_book()
            self.print_order_book()
            
        # Update order book when new event occurs
        if message['type'] == 'l2update' and 'time' in message:
            significant_order = False
            for (side, price, amount) in message['changes']:
                print("Update: {}".format((side, price, amount)))
                print("Time: {}".format(message['time']))
                print("---------------------------------------")
                
                # Find i from the price. Keep track of event if
                # abs(i) <= K
                price = int(round((float(price)*100)))  
                i = price - self.reference_price
                if i < 0:
                    i = int(round(i - 0.5))
                else:
                    i = int(round(i + 0.5))
                if abs(i) <= self.K:
                    if i < 0:
                        change = float(amount) - self.Bids.get(price,0)
                    else:
                        change = float(amount) - self.Asks.get(price,0)
                    significant_order = True
                    
                    self.updates.append({
                        'reference_price': self.reference_price,
                        'LOB': copy.copy(self.shortened_book),
                        'K': i,
                        'change': change,
                        'time': parser.parse(message['time'])
                    })
                    
                # Update volume in order book    
                if side == "buy":
                    if amount == "0":
                        del self.Bids[price]
                    else:
                        self.Bids[price] = float(amount)
                elif side == "sell":
                    if amount == "0":
                        del self.Asks[price]
                    else:
                        self.Asks[price] = float(amount)
                        
            # Update reference price if needed   
            self.update_order_book()
            

loop = asyncio.get_event_loop()
channel = Channel('level2', 'ETC-USD')
ws = Level2(loop, channel)

async def my_task(seconds):
    global loop
    print('Collecting data for {} seconds'.format(seconds))
    await asyncio.sleep(seconds)
    await ws.close()
    return "Finished Task"


try:
    task_obj = loop.create_task(my_task(seconds=28800))
    loop.run_until_complete(task_obj)
finally:
    loop.close()
    print("Finished collecting data")

In [2]:
updates = ws.updates
updates

[{'K': 3,
  'LOB': OrderedDict([(-10, 10.0369),
               (-9, 1087.1369),
               (-8, 196.73194066),
               (-7, 984.95999735),
               (-6, 842.49047359),
               (-5, 8151.16545683),
               (-4, 1624.83277893),
               (-3, 246.95),
               (-2, 856.83407542),
               (-1, 479.79381546),
               (1, 242.40248448),
               (2, 1934.87370843),
               (3, 266.5369),
               (4, 3918.9539),
               (5, 110.1369),
               (6, 10.2619),
               (7, 30.0369),
               (8, 11.1369),
               (9, 13.19217),
               (10, 10.3619)]),
  'change': 58.5,
  'reference_price': 438.5,
  'time': datetime.datetime(2018, 12, 22, 8, 16, 1, 11000, tzinfo=tzutc())},
 {'K': 3,
  'LOB': OrderedDict([(-10, 10.0369),
               (-9, 1087.1369),
               (-8, 196.73194066),
               (-7, 984.95999735),
               (-6, 842.49047359),
               (-5, 8151.16

In [5]:
import itertools
from operator import itemgetter
grouped_by_time = OrderedDict([(k, list(v)) for k, v in itertools.groupby(updates, key=lambda x:x['time'])])
# Contains dictionary of time, reference price, order book, list of changes
cleaned_updates = []
for t, us in grouped_by_time.items():
    if len(us) == 1:
        u = us[0]
        cleaned_updates.append({ \
            'time': t, \
            'reference_price': u['reference_price'], \
            'LOB': copy.copy(u['LOB']), \
            'events': [(u['K'],u['change'])]                      
        })
    else:
        new_update = {'time': t}
        us = sorted(us, key=itemgetter('K'))
        grouped_by_k = OrderedDict((k, list(v)) for k, v in itertools.groupby(us, key=lambda x:x['K']))
        reference_k = 0
        for k in grouped_by_k:
            if abs(k) > abs(reference_k):
                reference_k = k
        new_update['reference_price'] = grouped_by_k[reference_k][0]['reference_price']
        new_update['LOB'] = copy.copy(grouped_by_k[reference_k][0]['LOB'])
        events = []
        for k in grouped_by_k:
            combined_change = 0
            for u in grouped_by_k[k]:
                combined_change = combined_change + u['change']
            events.append((k,combined_change))
        new_update['events'] = changes
        cleaned_updates.append(new_update)
        
cleaned_updates

[{'LOB': OrderedDict([(-10, 10.0369),
               (-9, 1087.1369),
               (-8, 196.73194066),
               (-7, 984.95999735),
               (-6, 842.49047359),
               (-5, 8151.16545683),
               (-4, 1624.83277893),
               (-3, 246.95),
               (-2, 856.83407542),
               (-1, 479.79381546),
               (1, 242.40248448),
               (2, 1934.87370843),
               (3, 266.5369),
               (4, 3918.9539),
               (5, 110.1369),
               (6, 10.2619),
               (7, 30.0369),
               (8, 11.1369),
               (9, 13.19217),
               (10, 10.3619)]),
  'events': [(3, 58.5)],
  'reference_price': 438.5,
  'time': datetime.datetime(2018, 12, 22, 8, 16, 1, 11000, tzinfo=tzutc())},
 {'LOB': OrderedDict([(-10, 10.0369),
               (-9, 1087.1369),
               (-8, 196.73194066),
               (-7, 984.95999735),
               (-6, 842.49047359),
               (-5, 8151.16545683),
    

In [6]:
# Get Average Event Sizes
import numpy as np
K = ws.K
event_sizes = OrderedDict([(i,[]) for i in range(-K,K+1) if i != 0])
for update in cleaned_updates:
    for (k,amount) in update['events']:
        event_sizes[k].append(abs(amount))
AESs = OrderedDict([(i,0) for i in range(-k,k+1) if i != 0])
for (k,event_sizes) in event_sizes.items():
    AESs[k] = np.mean(event_sizes)
    
AESs

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


OrderedDict([(-10, nan),
             (-9, nan),
             (-8, nan),
             (-7, 500.00000000000011),
             (-6, 499.99999999999994),
             (-5, 800.0),
             (-4, 323.58635161705882),
             (-3, 882.08249999999998),
             (-2, 65.5),
             (-1, 105.5),
             (1, 201.94459459459458),
             (2, 456.93688446608684),
             (3, 130.38966454740742),
             (4, nan),
             (5, nan),
             (6, nan),
             (7, nan),
             (8, nan),
             (9, nan),
             (10, nan)])

In [15]:
from dateutil.tz import tzutc
import math

def transform_book(book):
    res = OrderedDict([(k,0) for k in range(-K,K+1) if k != 0])
    for k,actual_size in book.items():
        if math.isnan(AESs[k]):
            size = 1
        else:
            size = int(actual_size / AESs[k])
        res[k] = size
    return res

# Get times between events
maximum_sizes = OrderedDict([(k,0) for k in range(-K,K+1) if k != 0])
for update in cleaned_updates:
    transformed_book = transform_book(update['LOB'])
    for k,size in transformed_book.items():
        if size > maximum_sizes[k]:
            maximum_sizes[k] = size
            
time_between_events_pos = OrderedDict([(k,OrderedDict()) for k in range(-K,K+1) if k != 0])
time_between_events_neg = OrderedDict([(k,OrderedDict()) for k in range(-K,K+1) if k != 0])

for k in range(-K,K+1):
    if k != 0:
        for i in range(maximum_sizes[k] + 1):
            time_between_events_pos[k][i] = []
        for i in range(1,maximum_sizes[k] + 1):
            time_between_events_neg[k][i] = []           
                   
t = ws.starting_time
last_change_time_at_k = OrderedDict([(k,reference_change_time) for k in range(-K,K+1) if k != 0])
old_reference_price = ws.starting_ref_price

for update in cleaned_updates:
    reference_price = update['reference_price']
    transformed_book = transform_book(update['LOB'])
    t = update['time']
    if reference_price != old_reference_price:
        last_change_time_at_k = OrderedDict([(k,reference_change_time) for k in range(-K,K+1) if k != 0])
        reference_change_time = t
        old_reference_price = reference_price
    for (k,change) in update['events']:
        time_between_change = (t-last_change_time_at_k[k]).total_seconds()
        size = transformed_book[k]
        if change > 0:
            time_between_events_pos[k][size].append(time_between_change)
            last_change_time_at_k[k]= t
        else:
            if size > 0:
                time_between_events_neg[k][size].append(time_between_change)
                last_change_time_at_k[k] = t

for k,v in time_between_events_pos.items():
    print(k,v)

-10 OrderedDict([(0, []), (1, [])])
-9 OrderedDict([(0, []), (1, [])])
-8 OrderedDict([(0, []), (1, [])])
-7 OrderedDict([(0, []), (1, [82673.003, 2.589, 1.567, 3.256]), (2, [])])
-6 OrderedDict([(0, [0.302, 0.378, 0.48, 0.023, 0.024, 0.034, 0.813, 3.716]), (1, [])])
-5 OrderedDict([(0, []), (1, []), (2, []), (3, [0.037, 0.033, 0.446, 0.033]), (4, []), (5, []), (6, []), (7, []), (8, []), (9, [3.672, 2.092]), (10, [])])
-4 OrderedDict([(0, []), (1, []), (2, []), (3, [0.033, 0.02, 0.029, 0.02]), (4, []), (5, [18.55, 2.588])])
-3 OrderedDict([(0, [82692.731, 10.124]), (1, []), (2, []), (3, []), (4, []), (5, [18.224, 7.316, 9.264]), (6, [])])
-2 OrderedDict([(0, []), (1, []), (2, []), (3, []), (4, []), (5, []), (6, []), (7, []), (8, []), (9, []), (10, []), (11, []), (12, []), (13, [82724.545]), (14, [])])
-1 OrderedDict([(0, []), (1, []), (2, []), (3, []), (4, [41.101]), (5, []), (6, [])])
1 OrderedDict([(0, [0.003, 0.158, 0.64, 7.233, 0.016, 3.396, 0.462]), (1, [2.653, 1.229, 1.302, 4.775