### **High Frequency Trading Strategies Design using ML and DL**

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 13 22:40:00 2022

@author: Bradley

HFT: Machine Learning Techniques on Full Orderbook Tick Data
"""

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from datetime import datetime
import csv

plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.figsize'] = (8,5) #提前设置图片形状大小

%config InlineBackend.figure_format = 'svg'

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')  # 忽略一些warnings

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.display import display
pd.set_option('expand_frame_repr', False)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)
pd.set_option('display.width', 180)

#### **Data Pre-processing**

**Goal: Transform from full orderbook data (`CN_Futures_2014.01.02.csv`) to organized tick data (`order_book_3_2014_1_2.csv`)**

**我们分析的对象是SGX China A50 Futures在2014年1月2日的高频订单簿数据。目标是根据其订单簿数据挖掘因子并实现使用机器学习算法滚动预测买卖时点。**

In [7]:

def order_book_tranform(year,month,day,path,best_price_number,series):
    ## read file
    def read_file(year,month,day,path,series):
        data = []
        if len(str(month)) == 1:
            month_ = '0' + str(month)
        else:
            month_ = str(month)
        if len(str(day)) == 1:
            day_ = '0' + str(day)
        else:
            day_ = str(day)
        datapath = str(path) + str(year) + '.' + str(month_) + '.' + str(day_) + '.csv'
        data = pd.read_csv(datapath)
        data = data[data.Series == series]
        return data.reset_index(drop = True)

    def insert(order_book_data,data_to_insert,ob_position):
        top = order_book_data[0:ob_position]
        bottom = order_book_data[ob_position:]
        return pd.concat((top,data_to_insert,bottom)).reset_index(drop = True)

    def draw_out(order_book_data,ob_position):
        top = order_book_data[0:ob_position]
        bottom = order_book_data[ob_position + 1:]
        return pd.concat((top,bottom)).reset_index(drop = True)

    def order_book_to_csv(order_book_bid,order_book_ask,data,i):
        
        order_book_bid_sum = order_book_bid[['Price','QuantityDifference']].groupby(by = ['Price'],as_index = False,sort = False).sum()
        order_book_ask_sum = order_book_ask[['Price','QuantityDifference']].groupby(by = ['Price'],as_index = False).sum()
        order_book_bid_sum = order_book_bid_sum[order_book_bid_sum.QuantityDifference != 0.0].reset_index(drop = True)
        order_book_ask_sum = order_book_ask_sum[order_book_ask_sum.QuantityDifference != 0.0].reset_index(drop = True)
        order_book_bid_ask = pd.concat([order_book_bid_sum[['Price','QuantityDifference']],order_book_ask_sum[['Price','QuantityDifference']]],axis = 1)    
       
        with open('order_book_'+str(best_price_number)+'_'+str(year)+'_'+str(month)+'_'+str(day)+'.csv','a') as f:
            order_book = csv.writer(f)
            order_book.writerow(["TimeStamp",data.TimeStamp[i-1:i].iloc[0]])
            order_book = csv.writer(f,delimiter=',')
            for i in range(0,min(len(order_book_bid_ask),best_price_number),1):
                order_book.writerow(order_book_bid_ask[i:i+1].values.tolist()[0])
        return order_book_bid_sum,order_book_ask_sum

    data = read_file(year,month,day,path,series)
    
    with open('order_book_' + str(best_price_number) + '_' + str(year) + '_' + str(month) + '_' + str(day) + '.csv', 'wb') as csvfile:
        f = csv.writer(csvfile) 

    data[['QuantityDifference']] = data[['QuantityDifference']].astype(float)
    data['QuantityDifference_'] = data['QuantityDifference']
    data_ask = data[(data.BidOrAsk == 'A')].reset_index(drop=True)
    data_bid = data[(data.BidOrAsk == 'B')].reset_index(drop=True)
    order_book_bid = []
    order_book_ask = []
    x1 = data[(data.BidOrAsk == 'A')].TimeStamp.unique()
    x2 = data[(data.BidOrAsk == 'B')].TimeStamp.unique()
    temp_ask = 0
    temp_bid = 0
    
    def first_order_create(index_,data):
        timestamp = data.TimeStamp.unique()[index_]
        print('timestamp = %s'%(timestamp)) 
        bid = []
        ask = []
        timestamp_ = []
        index_find = data[data['TimeStamp'].str.contains(timestamp)].index[-1]
        y = data[:index_find + 1]
        bid.append(y[(y.BidOrAsk == 'B')][["Price","OrderNumber","QuantityDifference","QuantityDifference_"]]) # bid
        ask.append(y[(y.BidOrAsk == 'A')][["Price","OrderNumber","QuantityDifference","QuantityDifference_"]]) # ask
        a = bid[0].sort_values(by=['Price'], ascending = [False])
        b = ask[0].sort_values(by=['Price'], ascending = [True])
        # a = bid[0].sort(['Price'],ascending = [False])
        # b = ask[0].sort(['Price'],ascending = [True])
        order_book_bid = a[a.QuantityDifference != 0].reset_index(drop = True)
        order_book_ask = b[b.QuantityDifference != 0].reset_index(drop = True)
        order_book_bid_sum = order_book_bid[['Price','QuantityDifference']].groupby(by = ['Price'],as_index = False,sort = False).sum()
        order_book_ask_sum = order_book_ask[['Price','QuantityDifference']].groupby(by = ['Price'],as_index = False).sum()
        
        if len(order_book_bid_sum[order_book_bid_sum.QuantityDifference == 0.0]) != 0 and len(order_book_ask_sum[order_book_ask_sum.QuantityDifference == 0.0]) != 0:
            
            print('Exist Bid Ask Order Book Price = Zero')
            price_bid_zero = order_book_bid_sum[order_book_bid_sum.QuantityDifference == 0.0]['Price'][0]
            price_ask_zero = order_book_ask_sum[order_book_ask_sum.QuantityDifference == 0.0]['Price'][0]
            order_book_bid = order_book_bid[order_book_bid.Price != price_bid_zero]
            order_book_ask = order_book_ask[order_book_ask.Price != price_ask_zero]
        elif len(order_book_bid_sum[order_book_bid_sum.QuantityDifference == 0.0]) != 0 and len(order_book_ask_sum[order_book_ask_sum.QuantityDifference == 0.0]) == 0:
            
            print('Exist Bid Order Book Price = Zero')
            price_bid_zero = order_book_bid_sum[order_book_bid_sum.QuantityDifference == 0.0]['Price'][0]
            order_book_bid = order_book_bid[order_book_bid.Price != price_bid_zero]

        elif len(order_book_bid_sum[order_book_bid_sum.QuantityDifference == 0.0]) == 0 and len(order_book_ask_sum[order_book_ask_sum.QuantityDifference == 0.0]) != 0:
            
            print('Exist Ask Order Book Price = Zero')
            price_ask_zero = order_book_ask_sum[order_book_ask_sum.QuantityDifference == 0.0]['Price'][0]
            order_book_ask = order_book_ask[order_book_ask.Price != price_ask_zero]

        order_book_bid_sum = order_book_bid_sum[order_book_bid_sum.QuantityDifference != 0].reset_index(drop = True)
        order_book_ask_sum = order_book_ask_sum[order_book_ask_sum.QuantityDifference != 0].reset_index(drop = True)
        order_book_bid_ask = pd.concat([order_book_bid_sum[['Price','QuantityDifference']],order_book_ask_sum[['Price','QuantityDifference']]],axis = 1)        
        
        return order_book_bid, order_book_ask, order_book_bid_ask, timestamp, y, index_find
    
    def with_first_order_book(best_price_number,year,month,day,timestamp,order_book_bid_ask,index_):
        with open('order_book_'+str(best_price_number)+'_'+str(year)+'_'+str(month)+'_'+str(day)+'.csv','a') as f:  
            order_book = csv.writer(f)
            if index_ == 0:
                order_book.writerow(["Bid","Bid_Quantity","Ask","Ask_Quantity"])
            order_book.writerow(["TimeStamp",timestamp])
            order_book = csv.writer(f,delimiter=',')
            for i in range(0,min(len(order_book_bid_ask),best_price_number),1):
                order_book.writerow(order_book_bid_ask[i:i+1].values.tolist()[0])     
    
    # 建立初始委託簿
    first_order_book_data_lenth = 0
    order_book_bid_time = 0
    order_book_ask_time = 0
    
    for time in range(0,1000,1):
        index_ = time
        order_book_bid, order_book_ask, order_book_bid_ask,\
        timestamp, y, index_find = first_order_create(index_, data)  

        if len(order_book_bid) != 0 and len(order_book_ask) != 0:
            with_first_order_book(best_price_number,year,month,day,timestamp,order_book_bid_ask,index_)  
            break
        elif len(order_book_bid) == 0 and len(order_book_ask) != 0: 
            with_first_order_book(best_price_number,year,month,day,timestamp,order_book_bid_ask,index_)
            temp_ask +=1
        elif len(order_book_bid) != 0 and len(order_book_ask) == 0:
            with_first_order_book(best_price_number,year,month,day,timestamp,order_book_bid_ask,index_)
            temp_bid +=1
            
    print('-------------------------------------------')
    print('index_find = %s'%(index_find))
    
    # 这里作为举例，只取了100条，可以取全部length的data，生成的结果就是"order_book_3_2014_1_2.csv"文件
    for i in range(index_find + 1,100,1):#len(data), 1):
        print('---------------------------------')
        print(data[['Price','QuantityDifference','BidOrAsk','TimeStamp']][i:i+1])
        print(i ,temp_bid, temp_ask)
        print(data.TimeStamp[i], x2[temp_bid], x1[temp_ask])
        time_second = int(data[i:i+1].TimeStamp.iloc[0][18]) + int(data[i:i+1].TimeStamp.iloc[0][17])*10 +\
                      int(data[i:i+1].TimeStamp.iloc[0][15])*60 + int(data[i:i+1].TimeStamp.iloc[0][14])*600 +\
                      int(data[i:i+1].TimeStamp.iloc[0][12])*3600 + int(data[i:i+1].TimeStamp.iloc[0][11])*36000

        if time_second > 57600:
            break
        if time_second == 32400 and time_second >= 57300:
            order_book_bid = order_book_bid.sort(['Price'],ascending = [False]).reset_index(drop = True)
            order_book_ask = order_book_ask.sort(['Price'],ascending = [True]).reset_index(drop = True)
            pass 
        
        if data.BidOrAsk[i] == 'A':
            data_ask_Quantity = data.BestQuantity[i]
            if int(data[['QuantityDifference']][i:i+1].values) > 0 :
                if order_book_bid.Price[0] >= data[i:i+1].Price.iloc[0] and time_second < 32400:
                    for k in range(0,len(order_book_bid)):
                        diff = order_book_bid.QuantityDifference_[k] - data[i:i+1].QuantityDifference_.iloc[0] 
                        if order_book_bid.Price[k] >= data[i:i+1].Price.iloc[0] and diff >= 0:
                            order_book_bid.QuantityDifference_[k] = diff
                            data[i:i+1].QuantityDifference_.iloc[0] = 0
                            break
                        elif order_book_bid.Price[k] >= data[i:i+1].Price.iloc[0] and diff < 0: 
                            order_book_bid.QuantityDifference_[k] = 0
                            data[i:i+1].QuantityDifference_.iloc[0] = -diff
                            pass
                        else:
                            break
                if data.TimeStamp[i] == x1[temp_ask]:
                    
                    position_ = int(data[['OrderBookPosition']][i:i+1].iloc[0]) - 1 
                    order_book_ask = insert(order_book_ask,data[['Price','OrderNumber','QuantityDifference','QuantityDifference_']][i:i+1],position_)
                    if time_second > 32400 and time_second < 57300: 
                        if position_ == 0 and len(order_book_ask) > 1:   
                            if order_book_ask[position_ + 1:position_ + 1 + 1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]:  
                                print('Some error1(Ask & Q>0 & timestamp not change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:
                                pass
                        elif 0 < position_< (len(order_book_ask)-1):
                            if order_book_ask[position_ + 1:position_+1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0] or order_book_ask[position_ - 1:position_ - 1 + 1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:
                                print('Some error1(Ask & Q>0 & timestamp not change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:
                                pass
                        elif position_ == len(order_book_ask)-1:    
                            if order_book_ask[position_ - 1:position_ - 1 + 1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:        
                                print('Some error1(Ask & Q>0 & timestamp not change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:
                                pass
                        elif position_ == 0 and len(order_book_ask) == 1:
                            pass
                    else:
                        pass
                elif data.TimeStamp[i] != x1[temp_ask]:
                    if temp_ask == 0:
                        temp_ask = temp_ask + 1
                        best_price = data[i:(i+1)]['BestPrice']
                        position_ = int(data[['OrderBookPosition']][i:i+1].iloc[0]) - 1 
                        order_book_ask = insert(order_book_ask,data[['Price','OrderNumber','QuantityDifference','QuantityDifference_']][i:i+1],position_)                  

                        if time_second > 32400 and time_second < 57300:
                            if position_ == 0 and len(order_book_ask) > 1:
                                if order_book_ask[position_+1:position_+1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]:
                                    print('Some error2(Ask & Q>0 & timestamp change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                    break
                                else:
                                    pass
                            elif 0 < position_< len(order_book_ask)-1:
                                if order_book_ask[position_+ 1:position_+1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0] or order_book_ask[position_-1:position_-1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]: 
                                    print('Some error2(Ask & Q>0 & timestamp change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))    
                                    break
                                else:
                                    pass
                            elif position_ == len(order_book_ask)-1: 
                                if order_book_ask[position_-1:position_-1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]: 
                                    print('Some error2(Ask & Q>0 & timestamp change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))     
                                    break
                                else:
                                    pass
                            elif position_ == 0 and len(order_book_ask) == 1:
                                pass
                        else:
                            pass
                    else:
                        order_book_bid_sum,order_book_ask_sum = order_book_to_csv(order_book_bid,order_book_ask,data,i)
                        if time_second > 32400 and time_second < 57300:
                            if round(float(data[i:i+1].TimeStamp.iloc[0][18:29]) - float(data[i-1:i].TimeStamp.iloc[0][18:28]),4) > 0.03 or\
                            round(float(data[i:i+1].TimeStamp.iloc[0][18:29]) - float(data[i-1:i].TimeStamp.iloc[0][18:28]),4) < 0:
                                if data[i-1:i].BidOrAsk.iloc[0] == 'A':
                                    if order_book_ask_sum[0:1].values.tolist()[0][1] == data[i-1:i].BestQuantity.iloc[0]:
                                        pass
                                    else:
                                        # print 'Best ask quantity is false'
                                        pass
                                        #break 
                                else:
                                    j = i - 1
                                    while j >= 1:
                                        if data[j-1:j].BidOrAsk.iloc[0] == 'A':
                                            if order_book_ask_sum[0:1].values.tolist()[0][1] == data[j-1:j].BestQuantity.iloc[0]:
                                                break
                                        else:
                                            j = j - 1
                                            pass
                            else:
                                pass
                        else:
                            pass
                        position_ = int(data[['OrderBookPosition']][i:i+1].iloc[0]) - 1 
                        temp_ask = temp_ask + 1
                       
                        order_book_ask = insert(order_book_ask,data[['Price','OrderNumber','QuantityDifference','QuantityDifference_']][i:i+1],position_)
                        if time_second > 32400 and time_second < 57300:
                            if position_ == 0:
                                 
                                if order_book_ask[position_+1:position_+1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]: 
                                    print('Some error3(Ask & Q>0 & timestamp change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                    break
                                else:
                                    pass
                            elif 0 < position_< len(order_book_ask)-1:
                                if order_book_ask[position_+1:position_+1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]: 
                                    print('Some error3(Ask & Q>0 & timestamp change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                    break
                                else:
                                    pass
                            elif position_ == len(order_book_ask)-1: 
                                if order_book_ask[position_-1:position_-1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]: 
                                    print('Some error3(Ask & Q>0 & timestamp change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))      
                                    break
                                else:
                                    pass
                            elif position_ == 0 and len(order_book_ask[0]) == 1:
                                pass
                        else:
                            pass
            elif int(data[['QuantityDifference']][i:i+1].values) < 0:    
                if data.TimeStamp[i] == x1[temp_ask]: 
                    order_number_ =  data['OrderNumber'][i:i + 1].iloc[0]
                    position_ = order_book_ask[order_book_ask.OrderNumber == order_number_].index[0]                
                    price_ = data[i:i+1]['Price'].iloc[0]
                    if time_second > 32400 and time_second < 57300:
                        if position_ == 0 and len(order_book_ask) > 1:
                            if order_book_ask[position_+1:position_+1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]:    
                                print('Some error4(Ask & Q<0 & timestamp not change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:                  
                                pass
                        elif 0 < position_< len(order_book_ask)-1:
                            if order_book_ask[position_+1:position_+1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]:                             
                                print('Some error4(Ask & Q<0 & timestamp not change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:  
                                pass 
                        elif position_ == len(order_book_ask)-1:
                            if position_ > 0 and order_book_ask[position_-1:position_-1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]: 
                                print('Some error4(Ask & Q<0 & timestamp not change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))  
                                break
                            elif position_ == 0:
                                pass                        
                            else:                     
                                pass
                        elif position_ == 0 and len(order_book_ask) == 1:    
                            pass
                    else:
                        pass
                    if order_book_ask[(order_book_ask.OrderNumber == order_number_)&(order_book_ask.Price == price_)]['QuantityDifference'].iloc[0] == abs(data[i:i+1]['QuantityDifference'].iloc[0]):
                        order_book_ask = order_book_ask.drop(order_book_ask.index[[position_]]).reset_index(drop = True)                
                    else:
                        order_book_ask['QuantityDifference'][order_book_ask.OrderNumber == order_number_] = order_book_ask['QuantityDifference'][order_book_ask.OrderNumber == order_number_] + data[i:i+1]['QuantityDifference'].iloc[0]

                elif data.TimeStamp[i] != x1[temp_ask]:
                    
                    order_book_bid_sum,order_book_ask_sum = order_book_to_csv(order_book_bid,order_book_ask,data,i)
                    if time_second > 32400 and time_second < 57300:
                        if round(float(data[i:i+1].TimeStamp.iloc[0][18:29]) - float(data[i-1:i].TimeStamp.iloc[0][18:28]),4) > 0.03 or\
                        round(float(data[i:i+1].TimeStamp.iloc[0][18:29]) - float(data[i-1:i].TimeStamp.iloc[0][18:28]),4) < 0:
                            if data[i-1:i].BidOrAsk.iloc[0] == 'A':
                                if order_book_ask_sum[0:1].values.tolist()[0][1] == data[i-1:i].BestQuantity.iloc[0]:
                                    pass
                                else:
                                    print('Best ask quantity is false')
                                    #break
                                    pass
                            else:
                                j = i - 1
                                while j >= 1:
                                    if data[j-1:j].BidOrAsk.iloc[0] == 'A':
                                        if order_book_ask_sum[0:1].values.tolist()[0][1] == data[j-1:j].BestQuantity.iloc[0]:
                                            break
                                    else:
                                        j = j - 1
                                        pass
                    else:
                        pass
                    order_number_ =  data['OrderNumber'][i : i + 1].iloc[0]
                    position_ = order_book_ask[order_book_ask.OrderNumber == order_number_].index[0]
                    price_ = data[i:i+1]['Price'].iloc[0]
                    temp_ask = temp_ask + 1
                    if time_second > 32400 and time_second < 57300:
                        if position_ == 0 and len(order_book_ask) > 1:
                            if order_book_ask[position_ + 1:position_+1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]:
                                print('Some error5(Ask & Q<0 & timestamp change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))   
                                break
                            else:
                                pass
                        elif 0 < position_< len(order_book_ask)-1:    
                            if order_book_ask[position_ + 1:position_+1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]:                             
                                print('Some error5(Ask & Q<0 & timestamp change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))     
                                break
                            else:
                                pass
                        elif position_ == len(order_book_ask)-1:
                            if position_ > 0 and order_book_ask[position_-1:position_-1 + 1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]: 
                                print('Some error5(Ask & Q<0 & timestamp change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))     
                                break
                            elif position_ == 0:
                                pass                       
                            else:                       
                                pass
                        elif position_ == 0 and len(order_book_ask) == 1:
                            pass      
                    else:
                        pass
                    if order_book_ask[(order_book_ask.OrderNumber == order_number_)&(order_book_ask.Price == price_)]['QuantityDifference'].iloc[0] == abs(data[i:i+1]['QuantityDifference'].iloc[0]):                    
                        order_book_ask = order_book_ask.drop(order_book_ask.index[[position_]]).reset_index(drop = True)
                    else:
                        order_book_ask['QuantityDifference'][order_book_ask.OrderNumber == order_number_] = order_book_ask['QuantityDifference'][order_book_ask.OrderNumber == order_number_] + data[i:i+1]['QuantityDifference'].iloc[0]

        elif data.BidOrAsk[i] == 'B':
            data_bid_Quantity = data.BestQuantity[i]
            if int(data[['QuantityDifference']][i:i+1].values) > 0: 
               
                if order_book_ask.Price[0] <= data[i:i+1].Price.iloc[0] and time_second < 32400:
                    for k in range(0,len(order_book_ask)):
                        diff = order_book_ask.QuantityDifference_[k] - data[i:i+1].QuantityDifference_.iloc[0]
                        if order_book_ask.Price[k] <= data[i:i+1].Price.iloc[0] and diff >= 0:
                            order_book_ask.QuantityDifference_[k] = diff
                            data[i:i+1].QuantityDifference_.iloc[0] = 0
                            break
                        elif order_book_ask.Price[k] <= data[i:i+1].Price.iloc[0] and diff < 0:
                            order_book_ask.QuantityDifference_[k] = 0
                            data[i:i+1].QuantityDifference_.iloc[0] = - diff
                            pass
                        else:
                            break
                if data.TimeStamp[i] == x2[temp_bid]:
                    position_ = int(data[['OrderBookPosition']][i:i+1].iloc[0]) - 1 
                    order_book_bid = insert(order_book_bid,data[['Price','OrderNumber','QuantityDifference','QuantityDifference_']][i:i+1],position_)                
                    if time_second > 32400 and time_second < 57300:
                        if position_ == 0 and len(order_book_bid) > 1:              
                            if order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:                                            
                                print('Some error1(Bid & Q>0 & timestamp not change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:                    
                                pass 
                        elif 0 < position_< len(order_book_bid)-1: 
                            if order_book_bid[position_-1:position_-1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0] or order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:                             
                                print('Some error1(Bid & Q>0 & timestamp not change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break    
                            else:   
                                pass   
                        elif position_ == len(order_book_bid)-1 and len(order_book_bid) > 1:   
                            if order_book_bid[position_-1:position_-1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]:                        
                                print('Some error1(Bid & Q>0 & timestamp not change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:                
                                pass
                        elif position_ == 0 and len(order_book_bid[temp_bid]) == 1:
                            pass 
                    else:
                        pass
                elif data.TimeStamp[i] != x2[temp_bid]:
                    if temp_bid == 0:
                        best_price = data[i:(i+1)]['BestPrice']             
                        position_ = int(data[['OrderBookPosition']][i:i+1].iloc[0]) - 1 
                        temp_bid = temp_bid + 1                 
                        order_book_bid = insert(order_book_bid,data[['Price','OrderNumber','QuantityDifference','QuantityDifference_']][i:i+1],position_)                                     
                        if time_second > 32400 and time_second < 57300:
                            if position_ == 0  and len(order_book_bid) > 1:
                                if order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0] or order_book_bid['Price'][0:1].iloc[0] != data['BestPrice'][i]:                                
                                    print('Some error2(Bid & Q>0 & timestamp change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0]) + data['OrderNumber'][i:i+1].iloc[0])
                                    break
                                else:                   
                                    pass      
                            elif 0 < position_< len(order_book_bid)-1:    
                                if order_book_bid[position_-1:position_-1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0] or order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:                                
                                    print('Some error2(Bid & Q>0 & timestamp change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                    break
                                else: 
                                    pass
                            elif position_ == len(order_book_bid)-1:
                                if order_book_bid[position_-1:position_-1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0] or order_book_bid['Price'][0:1].iloc[0] != data['BestPrice'][i]:                                
                                    print('Some error2(Bid & Q>0 & timestamp change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))                                        
                                    pass
                                else:
                                    pass
                            elif position_ == 0 and len(order_book_bid) == 1: 
                                    pass
                        else:
                            pass
                    else:
                        if time_second > 32400 and time_second < 57300:
                            if round(float(data[i:i+1].TimeStamp.iloc[0][18:29]) - float(data[i-1:i].TimeStamp.iloc[0][18:28]),4) > 0.03 or\
                            round(float(data[i:i+1].TimeStamp.iloc[0][18:29]) - float(data[i-1:i].TimeStamp.iloc[0][18:28]),4) < 0:
                                order_book_bid_sum,order_book_ask_sum = order_book_to_csv(order_book_bid,order_book_ask,data,i)                    
                                if data[i-1:i].BidOrAsk.iloc[0] == 'B':
                                    if order_book_bid_sum[0:1].values.tolist()[0][1] == data[i-1:i].BestQuantity.iloc[0]:
                                        pass
                                    else:
                                        print('Best bid quantity is false')
                                        #break
                                        pass
                                else:
                                    j = i - 1
                                    while j >= 1:
                                        if data[j-1:j].BidOrAsk.iloc[0] == 'B':
                                            if order_book_bid_sum[0:1].values.tolist()[0][1] == data[j-1:j].BestQuantity.iloc[0]:
                                                break
                                            else:
                                                print('Best bid quantity is false')
                                                #break
                                                pass
                                        else:
                                            j = j - 1
                                            pass
                        else:
                            pass
                        position_ = int(data[['OrderBookPosition']][i:i+1].iloc[0]) - 1 
                        temp_bid = temp_bid + 1 
                        order_book_bid = insert(order_book_bid,data[['Price','OrderNumber','QuantityDifference','QuantityDifference_']][i:i+1],position_)                       
                        if time_second > 32400 and time_second < 57300:
                            if position_ == 0 and len(order_book_bid) > 1:
                                if order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0] or order_book_bid['Price'][0:1].iloc[0] != data['BestPrice'][i]:                                
                                    print('Some error3(Bid & Q>0 & timestamp change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))     
                                    break
                                else:                    
                                    pass
                            elif 0 < position_< len(order_book_bid)-1:    
                                if order_book_bid[position_-1:position_-1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0] or order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:                                
                                    print('Some error3(Bid & Q>0 & timestamp change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                    break
                                else: 
                                    pass
                            elif position_ == len(order_book_bid)-1:   
                                if order_book_bid[position_-1:position_-1 + 1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0] or order_book_bid['Price'][0:1].iloc[0] != data['BestPrice'][i]:                                
                                    print('Some error3(Bid & Q>0 & timestamp change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))      
                                    break
                                else:                      
                                    pass
                            elif position_ == 0 and len(order_book_bid[0]) == 1:
                                pass
                        else:
                            pass
            elif int(data[['QuantityDifference']][i:i+1].values) < 0:    
                if data.TimeStamp[i] == x2[temp_bid]: 
                    order_number_ =  data['OrderNumber'][i : i + 1].iloc[0]
                    position_ = order_book_bid[order_book_bid.OrderNumber == order_number_].index[0]                
                    price_ = data[i:i+1]['Price'].iloc[0]
                    if time_second > 32400 and time_second < 57300:
                        if position_ == 0 and len(order_book_bid) > 1:    
                            if order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:                            
                                print('Some error4(Bid & Q<0 & timestamp not change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:                     
                                pass               
                        elif 0 < position_< len(order_book_bid)-1:      
                            if order_book_bid[position_-1:position_-1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0] or order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:                            
                                print('Some error4(Bid & Q<0 & timestamp not change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else: 
                                pass
                        elif position_ == len(order_book_bid)-1:    
                            if order_book_bid[position_-1:position_-1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]:                            
                                print('Some error4(Bid & Q<0 & timestamp not change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))    
                                break
                            elif position_ == 0:
                                pass
                            else:                        
                                pass
                        elif position_ == 0 and len(order_book_bid) == 1:
                            pass
                    else:
                        pass
                    if order_book_bid[(order_book_bid.OrderNumber == order_number_)&(order_book_bid.Price == price_)]['QuantityDifference'].iloc[0] == abs(data[i:i+1]['QuantityDifference'].iloc[0]):                    
                        order_book_bid = order_book_bid.drop(order_book_bid.index[[position_]]).reset_index(drop = True)                
                    else:
                        order_book_bid['QuantityDifference'][order_book_bid.OrderNumber == order_number_] = order_book_bid['QuantityDifference'][order_book_bid.OrderNumber == order_number_] + data[i:i+1]['QuantityDifference'].iloc[0]                            
                elif data.TimeStamp[i] != x2[temp_bid]:
                    if time_second > 32400 and time_second < 57300:
                        if round(float(data[i:i+1].TimeStamp.iloc[0][18:29]) - float(data[i-1:i].TimeStamp.iloc[0][18:28]),4) > 0.03 or\
                        round(float(data[i:i+1].TimeStamp.iloc[0][18:29]) - float(data[i-1:i].TimeStamp.iloc[0][18:28]),4) < 0:
                            order_book_bid_sum,order_book_ask_sum = order_book_to_csv(order_book_bid,order_book_ask,data,i)
                            if data[i-1:i].BidOrAsk.iloc[0] == 'B':
                                if order_book_bid_sum[0:1].values.tolist()[0][1] == data[i-1:i].BestQuantity.iloc[0]:
                                    pass
                                else:
                                    print('Best bid quantity is false')
                                    #break
                                    pass
                            else:
                                j = i - 1
                                while j >= 1:
                                    if data[j-1:j].BidOrAsk.iloc[0] == 'B':
                                        if order_book_bid_sum[0:1].values.tolist()[0][1] == data[j-1:j].BestQuantity.iloc[0]:
                                            break
                                        else:
                                            print('Best bid quantity is false')
                                            #break
                                            pass
                                    else:
                                        j = j - 1
                                        pass
                    else:
                        pass
                    order_number_ =  data['OrderNumber'][i:i+1].iloc[0]
                    position_ = order_book_bid[order_book_bid.OrderNumber == order_number_].index[0]
                    price_ = data[i:i+1]['Price'].iloc[0]
                    temp_bid = temp_bid + 1
                    if time_second > 32400 and time_second < 57300:
                        if position_ == 0 and len(order_book_bid) > 1:   
                            if order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:                            
                                print('Some error5(Bid & Q<0 & timestamp change & 1),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:                   
                                pass   
                        elif 0 < position_< len(order_book_bid)-1:    
                            if order_book_bid[position_-1:position_-1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0] or order_book_bid[position_+1:position_+1+1]["Price"].iloc[0] > data['Price'][i:i+1].iloc[0]:                            
                                print('Some error5(Bid & Q<0 & timestamp change & 2),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            else:  
                                pass
                        elif position_ == len(order_book_bid)-1:    
                            if order_book_bid[position_-1:position_-1+1]["Price"].iloc[0] < data['Price'][i:i+1].iloc[0]:                            
                                print('Some error5(Bid & Q<0 & timestamp change & 3),position = %d,index = %d,price = %d,OrderNumber = %s'%(position_,i,data['Price'][i:i+1].iloc[0],data['OrderNumber'][i:i+1].iloc[0]))
                                break
                            elif position_ == 0:
                                pass
                            else:                      
                                pass    
                        elif position_ == 0 and len(order_book_bid) == 1:
                            pass  
                    else:
                        pass
                    if order_book_bid[(order_book_bid.OrderNumber == order_number_)&(order_book_bid.Price == price_)]['QuantityDifference'].iloc[0] == abs(data[i:i+1]['QuantityDifference'].iloc[0]):
                        order_book_bid = order_book_bid.drop(order_book_bid.index[[position_]]).reset_index(drop = True)                
                    else:
                        order_book_bid['QuantityDifference'][order_book_bid.OrderNumber == order_number_] = order_book_bid['QuantityDifference'][order_book_bid.OrderNumber == order_number_] + data[i:i+1]['QuantityDifference'].iloc[0]
    return data, order_book_bid, order_book_ask, order_book_bid_sum, order_book_ask_sum

In [8]:
import time

if __name__ == '__main__':
    path = './data/CN_Futures_'
    year = 2014
    # 决定生成的每一个tick中有多少组bid和ask
    best_price_number = 3
    
    # Future Delivery Months 近月合約代碼 : series
    # 2014 :CNF14:1月, CNG14:2月, CNH14:3月, CNJ14:4月, CNK14:5月, CNM14:6月,
    #       CNN14:7月, CNQ14:8月, CNU14:9月, CNV14:10月, CNX14:11月, CNZ14:12月
    # 2015 :CNF15:1月, CNG15:2月, CNH15:3月, CNJ15:4月, CNK15:5月, CNM15:6月,
    #       CNN15:7月, CNQ15:8月, CNU15:9月, CNV15:10月, CNX15:11月, CNZ15:12月
        
    series = 'CNF14' 
    month = 1
    day_ = [2]
    for i in day_:
        print(i)
        start = time.time()
        data, order_book_bid, order_book_ask, order_book_bid_sum, order_book_ask_sum = order_book_tranform(year,month,i,path,best_price_number,series)
        end = time.time()
        print("Total time = %f"%(end - start))   

2
timestamp = 2014-01-02D04:19:51.857166800
-------------------------------------------
index_find = 16
---------------------------------
    Price  QuantityDifference BidOrAsk                      TimeStamp
17      0               210.0        A  2014-01-02D08:45:00.130582600
17 0 0
2014-01-02D08:45:00.130582600 2014-01-02D04:19:51.857166800 2014-01-02D04:19:51.857166800
---------------------------------
     Price  QuantityDifference BidOrAsk                      TimeStamp
18  700500                -2.0        B  2014-01-02D08:45:00.131582600
18 0 1
2014-01-02D08:45:00.131582600 2014-01-02D04:19:51.857166800 2014-01-02D08:45:00.130582600
---------------------------------
     Price  QuantityDifference BidOrAsk                      TimeStamp
19  682000                 2.0        B  2014-01-02D08:45:00.131582600
19 1 1
2014-01-02D08:45:00.131582600 2014-01-02D08:45:00.131582600 2014-01-02D08:45:00.130582600
---------------------------------
     Price  QuantityDifference BidOrAsk      

In [9]:
data

Unnamed: 0,Series,SequenceNumber,TimeStamp,OrderNumber,OrderBookPosition,Price,QuantityDifference,Trade,BidOrAsk,BestPrice,BestQuantity,QuantityDifference_
0,CNF14,1,2014-01-02D04:19:51.857166800,642F9A8039E51EE5,1,757000,3.0,,A,715500,2,3.0
1,CNF14,2,2014-01-02D04:19:51.857166800,490D725B88E56DE5,1,755000,10.0,,A,715500,2,10.0
2,CNF14,3,2014-01-02D04:19:51.857166800,E1C9F25394A679A6,1,719500,1.0,,A,715500,2,1.0
3,CNF14,4,2014-01-02D04:19:51.857166800,ECA71A88142AF92B,4,768500,1.0,,A,715500,2,1.0
4,CNF14,5,2014-01-02D04:19:51.857166800,E325782FEB07CE67,1,700500,2.0,,B,700500,2,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...
97475,CNF14,97476,2014-01-03D02:20:00.060617300,E3B7552341112551,13,770000,-6.0,,A,708500,2,-6.0
97476,CNF14,97477,2014-01-03D02:20:00.060617300,473C5923A5D088F0,13,770000,-2.0,,A,708500,2,-2.0
97477,CNF14,97478,2014-01-03D02:20:00.060617300,633F552361CA460A,13,770000,-6.0,,A,708500,2,-6.0
97478,CNF14,97479,2014-01-03D02:20:00.060617300,47BC592368224B42,13,770000,-6.0,,A,708500,2,-6.0


In [10]:
order_book_bid

Unnamed: 0,Price,OrderNumber,QuantityDifference,QuantityDifference_
0,709500,C7C3FA1363164816,210.0,0.0
1,707000,43E3747FD245B565,1.0,1.0
2,702000,C36B74779D8980A9,1.0,1.0
3,701000,EF3D516B3FA423E4,1.0,1.0
4,700500,E325782FEB07CE67,2.0,2.0
5,699000,4959765357F03CF0,2.0,2.0
6,697500,43A57467BFB59F55,1.0,1.0
7,695500,E70A765B175EFC1F,1.0,1.0
8,690500,B3F5144E612645E6,1.0,1.0
9,690000,E27C9AD83BDC20DC,2.0,2.0


In [11]:
order_book_ask

Unnamed: 0,Price,OrderNumber,QuantityDifference,QuantityDifference_
0,709500,47C3FA130EB2F3B3,210.0,0.0
1,712000,6F35516BC930AD70,2.0,2.0
2,712000,C363747FE0C9C3E9,2.0,2.0
3,715500,C365FC27C2C8A628,2.0,2.0
4,717500,CBADF86F77EA578A,1.0,1.0
5,719500,E1C9F25394A679A6,1.0,1.0
6,720000,EFB5D16396077A47,2.0,2.0
7,725000,6BF7787F751753F7,4.0,4.0
8,725000,6FBD516B9E438283,1.0,1.0
9,731500,E14C560FA31A855A,1.0,1.0


In [15]:
order_book_ask_sum

Unnamed: 0,Price,QuantityDifference
0,709500,210.0
1,712000,4.0
2,715500,2.0
3,717500,1.0
4,719500,1.0
5,720000,2.0
6,725000,5.0
7,731500,1.0
8,736000,2.0
9,738000,4.0


In [13]:
order_book_bid_sum

Unnamed: 0,Price,QuantityDifference
0,709500,210.0
1,707000,1.0
2,702000,1.0
3,701000,1.0
4,700500,2.0
5,699000,2.0
6,697500,1.0
7,695500,1.0
8,690500,1.0
9,690000,77.0
