## Libraries and Settings

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import importlib
from pprint import pprint

In [None]:
import clean_and_transform as cat
import feature_engineering as fe

importlib.reload(cat)
importlib.reload(fe)

In [None]:
# %matplotlib notebook
%matplotlib inline
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200

## Load

In [None]:
L1 = pd.read_csv("ABC_Level_One_Tick_Data.csv")
L2 = pd.read_csv("ABC_Level_Two_Tick_Data.csv")

## Format and clean

In [None]:
# Drop any row that contains missing values
L1.dropna(inplace=True)
L2.dropna(inplace=True)

In [None]:
# format datetime columns as datetimes
L1 = cat.format_columns_as_date(L1, 'Time_Hour', "%Y-%m-%d %H:%M:%S+00:00")
L2 = cat.format_columns_as_date(L2, 'Time_Minute', "%d/%m/%Y %H:%M")

In [None]:
# Calculate columns containing the midprices at each level, called Lx_MidPrice
list_of_column_name_dictionaries = []
for ii in range(1,11):    
    columns_dict = {
        "bidpricecol": "L"+str(ii)+"_BidPrice",
        "askpricecol": "L"+str(ii)+"_AskPrice",
        "midpricecol": "L"+str(ii)+"_MidPrice"
    }
    
    list_of_column_name_dictionaries += [columns_dict]

L2 = fe.calculate_mid_prices(L2, list_of_column_name_dictionaries)

In [None]:
# Calculate columns containing the costs at each level, called Lx_Cost
list_of_column_name_dictionaries = []
for ii in range(1,11):    
    columns_dict = {
        "bidpricecol": "L"+str(ii)+"_BidPrice",
        "midpricecol": "L"+str(ii)+"_MidPrice",
        "costcol": "L"+str(ii)+"_Cost"
    }
    
    list_of_column_name_dictionaries += [columns_dict]

L2 = fe.calculate_costs(L2, list_of_column_name_dictionaries)

In [None]:
# drop situations where bid price is greater than ask price (as no trades will go ahead in this aggregated data)
list_of_column_name_dictionaries = []
for ii in range(1,11):    
    columns_dict = {
        "bidpricecol": "L"+str(ii)+"_BidPrice", 
        "askpricecol": "L"+str(ii)+"_AskPrice", 
        "bidsizecol": "L"+str(ii)+"_BidSize",
        "asksizecol": "L"+str(ii)+"_AskSize"
    }
    
    list_of_column_name_dictionaries += [columns_dict]

L2 = cat.zero_size_when_bid_gt_ask(L2, list_of_column_name_dictionaries)

In [None]:
# make time be our dataframe index to enable quick subsetting
L2.set_index(["Time_Minute"], drop=False, inplace=True)

In [None]:
# Restrict to trading times only
L2 = L2.between_time("09:00","16:00")

In [None]:
# calculate total bid size across all levels each day
L2 = fe.calculate_total_bid_size_by_day(L2)

In [None]:
# calculate total bid size in level 1 each day
L2 = fe.calculate_level_n_total_bid_size_by_day(L2, 1)

In [None]:
# calculate value variance in level 1 each day
L2 = fe.calculate_level_n_value_variance_by_day(L2, 1)

In [None]:
# make time be our dataframe index again as it gets dropped in the above
L2.set_index(["Time_Minute"], drop=False, inplace=True)

## Create Order Data for Model Training

In [None]:
# Create a list of fake order dictionaries
fake_orders = fe.create_fake_orders(
    L2,
    num_orders = 20000, 
    min_quantity = 1e4, 
    max_quantity = 1e8, 
    min_horizon = 1,
    max_horizon = 60,
    min_datetime = "2018-02-05 09:00:00",
    max_datetime = "2018-06-12 15:56:00")

In [None]:
# Now fulfil those orders and return a dataframe of the results
fulfilled_order_data = fe.fulfil_list_of_orders(L2, fake_orders)

In [None]:
# And write down the data
fulfilled_order_data.to_csv("./quantity_horizon_cost_data.csv", sep = ",", index = False)

## MISC CODE

In [None]:
pprint(fake_orders[:10])

In [None]:
fulfilment_dict = fe.fulfil_order(L2, fake_orders[5], debug_printing=True)

In [None]:
pprint(fulfilment_dict)

In [None]:
plt.scatter(L2_oneDay["Time_Minute"], L2_oneDay["L1_BidPrice"], marker = ".", color="blue")
plt.scatter(L2_oneDay["Time_Minute"], L2_oneDay["L1_MidPrice"], marker = ".", color="red")
plt.scatter(L2_oneDay["Time_Minute"], L2_oneDay["L1_AskPrice"], marker = ".", color = "green")
plt.show()

In [None]:
plt.scatter(L2_oneDay["Time_Minute"], L2_oneDay["L1_Cost"], marker = ".", color="blue")