# Calibration of the market models


In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import os
import glob
import gzip
import warnings
#warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

### Import HFT data

In [2]:
def load_data(file_path):
    """Load and preprocess data from gzipped CSV file."""
    with gzip.open(file_path, 'rt') as f:
        df = pd.read_csv(f)
    
    # Convert timestamp to datetime
    df['datetime'] = pd.to_datetime(df['ets'], format='%Y%m%d:%H:%M:%S.%f')
    
    # Extract mid-price
    df['mid_price'] = (df['bp0'] + df['ap0']) / 2
    
    return df

# Get all data files
bnpp_files = sorted(glob.glob('../../HFT/Data/BNPP/BNPP_*.csv.gz'))
sg_files = sorted(glob.glob('../../HFT/Data/SG/SG_*.csv.gz'))

print("BNPP files:", [os.path.basename(f) for f in bnpp_files])
print("SG files:", [os.path.basename(f) for f in sg_files])

# Load all day data for each stock for initial analysis
bnpp_data_dict = {}
sg_data_dict = {}
for file in bnpp_files:
    date_str = os.path.basename(file).split('_')[1].split('.')[0]
    bnpp_data_dict[date_str] = load_data(file)
for file in sg_files:
    date_str = os.path.basename(file).split('_')[1].split('.')[0]
    sg_data_dict[date_str] = load_data(file)

# print one day of data for each stock
bnpp_data = bnpp_data_dict['20170117']  # Example date
sg_data = sg_data_dict['20170117']  # Example date
print("\nBNPP data shape:", bnpp_data.shape)
print("SG data shape:", sg_data.shape)

# Display the first few rows of each dataset
print("\nBNPP data sample:")
display(bnpp_data.head())

print("\nSG data sample:")
display(sg_data.head())

BNPP files: ['BNPP_20170117.csv.gz', 'BNPP_20170118.csv.gz', 'BNPP_20170119.csv.gz', 'BNPP_20170120.csv.gz', 'BNPP_20170123.csv.gz', 'BNPP_20170124.csv.gz', 'BNPP_20170125.csv.gz', 'BNPP_20170126.csv.gz', 'BNPP_20170127.csv.gz', 'BNPP_20170130.csv.gz', 'BNPP_20170131.csv.gz', 'BNPP_20170201.csv.gz']
SG files: ['SG_20170117.csv.gz', 'SG_20170118.csv.gz', 'SG_20170119.csv.gz', 'SG_20170120.csv.gz', 'SG_20170123.csv.gz', 'SG_20170124.csv.gz', 'SG_20170125.csv.gz', 'SG_20170126.csv.gz', 'SG_20170127.csv.gz', 'SG_20170130.csv.gz', 'SG_20170131.csv.gz', 'SG_20170201.csv.gz']

BNPP data shape: (572162, 12)
SG data shape: (841149, 12)

BNPP data sample:


Unnamed: 0.1,Unnamed: 0,ets,etype,eprice,eqty,eside,bp0,bq0,ap0,aq0,datetime,mid_price
0,0,20170117:09:00:46.877289,A,61000,2336,B,61000,2336,61030,200,2017-01-17 09:00:46.877289,61015.0
1,1,20170117:09:01:00.727609,A,61150,16,S,61010,149,61080,108,2017-01-17 09:01:00.727609,61045.0
2,2,20170117:09:01:00.748616,C,61190,151,S,61010,149,61080,108,2017-01-17 09:01:00.748616,61045.0
3,3,20170117:09:01:00.748622,C,61180,302,S,61010,149,61080,108,2017-01-17 09:01:00.748622,61045.0
4,4,20170117:09:01:00.774298,A,61240,302,S,61010,149,61080,108,2017-01-17 09:01:00.774298,61045.0



SG data sample:


Unnamed: 0.1,Unnamed: 0,ets,etype,eprice,eqty,eside,bp0,bq0,ap0,aq0,datetime,mid_price
0,0,20170117:09:01:00.270164,A,45610,1400,B,46010,1066,46085,1445,2017-01-17 09:01:00.270164,46047.5
1,1,20170117:09:01:00.312121,A,46485,700,S,46010,1066,46085,1445,2017-01-17 09:01:00.312121,46047.5
2,2,20170117:09:01:00.358162,A,46000,124,B,46010,1066,46085,1445,2017-01-17 09:01:00.358162,46047.5
3,3,20170117:09:01:00.359972,A,45950,182,B,46010,1066,46085,1445,2017-01-17 09:01:00.359972,46047.5
4,4,20170117:09:01:00.360001,A,45970,22,B,46010,1066,46085,1445,2017-01-17 09:01:00.360001,46047.5
