<a href="https://colab.research.google.com/github/aravindskumar98/limitordermarket/blob/testing/dataPrep/FullRun_SnapShot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
# -*- coding: utf-8 -*-
"""
Created on Sun Feb  6 03:02:37 2022

@author: aravi
"""

import pandas as pd
import numpy as np
import pickle 

class env():
    
    def __init__(self,filepath,intervalSize):
        self.data_array = None
        self.load_data(filepath) ## Creates array of numpy array based on date
        self.orderbook_daily = []
        self.intervalSize = intervalSize * 60 ## in seconds
        for i,data in enumerate(self.data_array):
            print("Day : ",i+1)
            print('---------')
            self.orderbook_daily.append(Orderbook(data,self.intervalSize))

    def load_data(self,filepath):
        df = np.loadtxt(filepath,dtype = np.float32)
        # df = df[:10000,:]
        self.data_array = np.split(df, np.where(np.diff(df[:,1]))[0]+1)

## class for creating snapshots
class Orderbook:
    def __init__(self,data,intervalSize):
        # id,date,time,vd,vo,lp,isbuy
   
        ## Load data onto a numpy array
        self.data = data
        ## store the interval size/step size --> can be changed for finer control
        self.intervalSize = intervalSize
        self.tstart = self.data[0][2]
        self.tend = self.data[-1][2]
        # print(self.tstart,self.tend)
        
        self.snaps = []
        self.generate_snaps()        
    
    def generate_snaps(self):
        start = self.tstart
        count = 1
        while start+self.intervalSize < self.tend:
            print("Interval Number : ",count)
            snap = snapshot(self.data,start,self.intervalSize)
            snap.describe()
            print("..")
            self.snaps.append(snap)
            start+=self.intervalSize  
            count+=1

class snapshot:
    
    def __init__(self,orderbook,startTime,intervalSize):
        self.data = orderbook
        self.startTime = startTime
        self.intervalSize = intervalSize
        ## initialising the descriptors of the state
        self.sell, self.buy= [],[]
        self.trade_list = []
        self.sellSize,self.buySize,self.tradeSize = 0,0,0
        self.BASpread, self.volatility = 0,{"buy":0,"sell":0,"trade":0}
        
        self.generateSnap()

        
    ## This function resets the state and sets it back to the initial empty condition
    def resetState(self):
        self.sell, self.buy = [],[]
        self.trade_list = []
        self.sellSize, self.buySize, self.tradeSize = 0,0,0
        print("..")
    
    def sortHeap(self):
        ## heap will be implemented in later updates
        if self.buy:    
            self.buy.sort()
        if self.sell:
            self.sell.sort(reverse=True)
        
    def trade_act(self): ## this function executes trades in the current limit order snapshot
        if not self.buy or not self.sell:
            return
        
        while self.sell and self.buy and self.sell[-1][0] <= self.buy[-1][0]:
            # print(self.trade_list)
            tradeVal = min(self.buy[-1][1],self.sell[-1][1])
            self.trade_list.append((self.buy[-1][0],self.sell[-1][0],tradeVal))
            self.buy[-1][1]-=tradeVal
            self.sell[-1][1]-=tradeVal
            if self.buy[-1][1]==0:
                self.buy.pop()
            if self.sell[-1][1]==0:
                self.sell.pop()
            self.tradeSize+=1
            
    def generateSnap(self):
        ## reset leftovers from the last state
        self.resetState()
        
        ## Iterate through all transactions within the intervalSize
        for row in self.data:
            # print(row)
            if row[2] < self.startTime+self.intervalSize:
                if row[5]==0:
                    continue ### Assuming that garbage values can be removed
                elif row[-1]==1:
                    self.buy.append([row[5],row[4]])
                    self.buySize+=1
                else:
                    self.sell.append([row[5],row[4]])
                    self.sellSize+=1
                
                ## sort the buy side and sell side postings and generate current snapshot
                self.sortHeap()
                self.trade_act()
                        
        
        self.calculate_state()
    
    def find_volatility(self):
        trade = np.array(self.trade_list)
        buy = np.array(self.buy)
        sell = np.array(self.sell)
        self.volatility["trade"] = np.std(trade[:,0])
        self.volatility["buy"] = np.std(buy[:,0])
        self.volatility["sell"] = np.std(sell[:,0])
        
    
    def calculate_state(self):
        if self.buy and self.sell:
            self.BASpread = self.sell[-1][0]-self.buy[-1][0]
        self.find_volatility()

    ##function to display stuff
    def describe(self):
        print("SELL SIDE")
        size = min(len(self.sell),6)
        for i in range(1,min(len(self.sell),6)):
            print(self.sell[-size+i])
        
        print("<---------->")
        for i in range(1,min(len(self.buy),6)):
            print(self.buy[-i])
        print("BUY SIDE")
            
        for key in self.volatility:
            print(f"Volatility : {key} = {self.volatility[key]}")
        
        print(f"Current bid ask spread = {self.BASpread}")
            

Env = env("/content/drive/MyDrive/limitorderbook/numpydatabook_full_2.txt",30) 

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
SELL SIDE
[2210.0, 5.0]
[2210.0, 5.0]
[2210.0, 5.0]
[2210.0, 5.0]
[2210.0, 2.0]
<---------->
[2197.0, 1036.0]
[2193.1, 108.0]
[2193.0, 253.0]
[2193.0, 200.0]
[2193.0, 200.0]
BUY SIDE
Volatility : buy = 88.70521545410156
Volatility : sell = 86.54524230957031
Volatility : trade = 28.741233825683594
Current bid ask spread = 13.0
..
Interval Number :  2
..
SELL SIDE
[2229.75, 15.0]
[2229.0, 10.0]
[2228.9, 10.0]
[2228.85, 15.0]
[2228.0, 2.0]
<---------->
[2226.1, 62.0]
[2226.1, 47.0]
[2226.0, 50.0]
[2226.0, 1.0]
[2225.8, 24.0]
BUY SIDE
Volatility : buy = 89.95796203613281
Volatility : sell = 75.1060791015625
Volatility : trade = 27.680173873901367
Current bid ask spread = 1.89990234375
..
Interval Number :  3
..
SELL SIDE
[2214.7, 5.0]
[2214.5, 1.0]
[2212.25, 25.0]
[2212.25, 10.0]
[2211.5, 73.0]
<---------->
[2211.1, 18.0]
[2210.55, 15.0]
[2210.55, 15.0]
[2210.5, 7.0]
[2210.5, 4.0]
BUY SIDE
Volatility : buy = 84.35692596435547

In [43]:
len(Env.orderbook_daily)

63

In [48]:
Env.orderbook_daily[3].snaps[2].describe()

SELL SIDE
[3208.95, 30.0]
[3208.95, 2.0]
[3208.75, 196.0]
[3208.75, 5.0]
[3208.0, 26.0]
<---------->
[3206.0, 170.0]
[3206.0, 170.0]
[3205.05, 53.0]
[3205.05, 20.0]
[3205.05, 13.0]
BUY SIDE
Volatility : buy = 80.6750259399414
Volatility : sell = 47.76594924926758
Volatility : trade = 18.230669021606445
Current bid ask spread = 2.0


In [45]:
file_pi = open('/content/drive/MyDrive/limitorderbook/snapshots_total.obj', 'wb') 
pickle.dump(Env, file_pi)

In [46]:
filehandler = open('/content/drive/MyDrive/limitorderbook/snapshots_total.obj', 'rb') 
Env2 = pickle.load(filehandler)

In [49]:
Env2.orderbook_daily[3].snaps[2].describe()

SELL SIDE
[3208.95, 30.0]
[3208.95, 2.0]
[3208.75, 196.0]
[3208.75, 5.0]
[3208.0, 26.0]
<---------->
[3206.0, 170.0]
[3206.0, 170.0]
[3205.05, 53.0]
[3205.05, 20.0]
[3205.05, 13.0]
BUY SIDE
Volatility : buy = 80.6750259399414
Volatility : sell = 47.76594924926758
Volatility : trade = 18.230669021606445
Current bid ask spread = 2.0
