In [1]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
from datetime import date,timedelta


import os
os.add_dll_directory('C:\Aorda\PSG\lib')
import psgpython as psg 
from psg_loader import load_psg


### Script to iteratively Fit HMM model 
- Flexibility to fit via PSG constrained optimization or Hmmlearn Viterbi algorithm
- Fits and returns model params across dataset

In [10]:

def remove_duplicates(series):
    """ Remove identical consecutive observations
    """
    cleaned_series=series[np.insert(np.diff(series).astype(bool), 0, True)]
    dropped_els=len(series)-len(cleaned_series)
    
    return cleaned_series


def prep_features(dt):
    """ Prep features by accessing grouped feature csv 
    """
    grouped_features=pd.read_csv(f'data/agg_features/grouped_features_{dt}.csv')
    bidsize=remove_duplicates(grouped_features['Bid_Size'].values)
    offersize=remove_duplicates(grouped_features['Offer_Size'].values)
    bookimbalance=remove_duplicates(grouped_features['OB_IB'].values)
    spread=remove_duplicates(grouped_features['spread'].values)

    feature_dict=dict(zip(['Bid_Size','Offer_Size','OB_IB','spread'],[bidsize,offersize,bookimbalance,spread]))

    return feature_dict

def extract_params(param_dict,method):
    """ Extract Params from optimized model 
    """
    if method==1:
        param_df=pd.DataFrame.from_dict(param_dict,orient='index',columns=['p1','p2','a11','a12', 'a21', 'a22','mu1','si1','mu2','si2'])

        # standardizing mu1 < mu2 as arbitrarily labeled, swapping mus and sigmas if needed 
        for x,row in param_df.iterrows():
            if row.loc['mu1']<row.loc['mu2']:
                continue
            mu1,mu2,sigma1,sigma2=row.loc['mu1'],row.loc['mu2'],row.loc['si1'],row.loc['si2']
            
            row.loc['mu1'],row.loc['mu2'],row.loc['si1'],row.loc['si2']=mu2,mu1,sigma2,sigma1

        param_df=param_df.drop(columns=['p1','p2'])
        return param_df

    elif method==2:
        param_df=pd.DataFrame.from_dict(param_dict,orient='index')
        means= pd.DataFrame(param_df['Mean'].to_list(),columns=['mu1','mu2'])
        covar=pd.DataFrame(param_df['Sigma'].to_list(), columns = ['sigma1', 'sigma2'])
        transit=pd.DataFrame(param_df['Transition'].to_list(), columns = ['a11', 'a12','a21','a22'])
        dates=pd.DataFrame(param_df.index,columns=['date'])
        
        new_param_df=pd.DataFrame(pd.concat([means,covar,transit,dates],axis=1))
        new_param_df.index=new_param_df['date'].values
        new_param_df=new_param_df.drop(columns=['date'])

       # standardizing mu1 < mu2 as arbitrarily labeled, swapping mus and sigmas if needed 
        for x,row in new_param_df.iterrows():
            if row.loc['mu1']<row.loc['mu2']:
                continue
            mu1,mu2,sigma1,sigma2=row.loc['mu1'],row.loc['mu2'],row.loc['sigma1'],row.loc['sigma2']
            
            row.loc['mu1'],row.loc['mu2'],row.loc['sigma1'],row.loc['sigma2']=mu2,mu1,sigma2,sigma1

        new_param_df=new_param_df[['a11','a12','a21','a22','mu1','sigma1','mu2','sigma2']]
        return new_param_df


def fit_hmm(method):
    """ Fit HMM model with PSG and HMMLearn 
    """
    start=date(2020,1,1)
    days=[start+timedelta(days=i) for i in range(0,30)]

    spread_params={}
    bidsize_params={}
    offersize_params={}
    bookimbalance_params={}
    
    # psg training
    if method==1:
        for dt in days:
            try:
                dt_features=prep_features(dt)
            except:
                continue
            
            # formatted as numpy float 
            np.savetxt(r'psg_text_hmm/vector_bidsize.txt', dt_features['Bid_Size'])
            np.savetxt(r'psg_text_hmm/vector_offersize.txt', dt_features['Offer_Size'])
            np.savetxt(r'psg_text_hmm/vector_bookimbalance.txt', dt_features['OB_IB'])
            np.savetxt(r'psg_text_hmm/vector_spread.txt', dt_features['spread'])


            psg_spread_prob = psg.psg_importfromtext('./psg_text_hmm/problem_hmm_normal_spread.txt')
            psg_spread_prob['problem_statement'] = '\n'.join(psg_spread_prob['problem_statement'])
            spread_solution=psg.psg_solver(psg_spread_prob)
            params=list(spread_solution.values())[4][1]
            spread_params[dt]=params

            psg_bidsize_prob = psg.psg_importfromtext('./psg_text_hmm/problem_hmm_normal_bidsize.txt')
            psg_bidsize_prob['problem_statement'] = '\n'.join(psg_bidsize_prob['problem_statement'])
            bidsize_solution=psg.psg_solver(psg_bidsize_prob)
            params=list(bidsize_solution.values())[4][1]
            bidsize_params[dt]=params

            psg_offersize_prob = psg.psg_importfromtext('./psg_text_hmm/problem_hmm_normal_offersize.txt')
            psg_offersize_prob['problem_statement'] = '\n'.join(psg_offersize_prob['problem_statement'])
            offersize_solution=psg.psg_solver(psg_offersize_prob)
            params=list(offersize_solution.values())[4][1]
            offersize_params[dt]=params

            psg_bookimbalance_prob = psg.psg_importfromtext('./psg_text_hmm/problem_hmm_normal_bookimbalance.txt')
            psg_bookimbalance_prob['problem_statement'] = '\n'.join(psg_bookimbalance_prob['problem_statement'])
            bookimbalance_solution=psg.psg_solver(psg_bookimbalance_prob)
            params=list(bookimbalance_solution.values())[4][1]
            bookimbalance_params[dt]=params
            
    elif method==2:
        for dt in days:
            
            try:
                dt_features=prep_features(dt)
            except:
                continue
            print(f"Fitting HMM usign HMM-Learn for {dt}")
            
            
            spread=dt_features['spread'].reshape(-1, 1)
            spread_model=GaussianHMM(n_components=2,algorithm='viterbi',covariance_type="spherical",min_covar=1e-4, n_iter=1000,tol=1e-8)
            fitted_spread_model=spread_model.fit(spread)
            spread_mu=fitted_spread_model.means_.flatten()
            spread_covar=fitted_spread_model.covars_.flatten()
            spread_transit=fitted_spread_model.transmat_.flatten()
            spread_params[dt]={"Mean":spread_mu, "Sigma":spread_covar,"Transition":spread_transit}

            bidsize=dt_features['Bid_Size'].reshape(-1, 1)
            bidsize_model=GaussianHMM(n_components=2,algorithm='viterbi',covariance_type="spherical",min_covar=1e-4, n_iter=1000,tol=1e-8)
            fitted_bidsize_model=bidsize_model.fit(bidsize)
            bidsize_mu=fitted_bidsize_model.means_.flatten()
            bidsize_covar=fitted_bidsize_model.covars_.flatten()
            bidsize_transit=fitted_bidsize_model.transmat_.flatten()
            bidsize_params[dt]={"Mean":bidsize_mu, "Sigma":bidsize_covar,"Transition":bidsize_transit}

            offersize=dt_features['Offer_Size'].reshape(-1, 1)
            offersize_model=GaussianHMM(n_components=2,algorithm='viterbi',covariance_type="spherical",min_covar=1e-4, n_iter=1000,tol=1e-8)
            fitted_offersize_model=offersize_model.fit(offersize)
            offersize_mu=fitted_offersize_model.means_.flatten()
            offersize_covar=fitted_offersize_model.covars_.flatten()
            offersize_transit=fitted_offersize_model.transmat_.flatten()
            offersize_params[dt]={"Mean":offersize_mu, "Sigma":offersize_covar,"Transition":offersize_transit}

            bookimbalance=dt_features['OB_IB'].reshape(-1, 1)
            bookimbalance_model=GaussianHMM(n_components=2,algorithm='viterbi',covariance_type="spherical",min_covar=1e-4, n_iter=1000,tol=1e-8)
            fitted_bookimbalance_model=bookimbalance_model.fit(bookimbalance)
            bookimbalance_mu=fitted_bookimbalance_model.means_.flatten()
            bookimbalance_covar=fitted_bookimbalance_model.covars_.flatten()
            bookimbalance_transit=fitted_bookimbalance_model.transmat_.flatten()
            bookimbalance_params[dt]={"Mean":bookimbalance_mu, "Sigma":bookimbalance_covar,"Transition":bookimbalance_transit}


    else:
        print("Not a valid method")
        return

    features_labels=["spread","bidsize","offersize","bookimbalance"]
    spread_df=extract_params(spread_params,method)
    bidsize_df=extract_params(bidsize_params,method)
    offersize_df=extract_params(offersize_params,method)
    bookimbalance_df=extract_params(bookimbalance_params,method)

    dict_df=dict(zip(features_labels,[spread_df,bidsize_df,offersize_df,bookimbalance_df]))
    return dict_df

### PSG


In [11]:
psg_df=fit_hmm(method=1)

OK. Problem Imported

Running solver
Reading problem formulation
Asking for data information
Getting data
    100.0% of scenarios is processed
100% of vector_spread was read
Start optimization
Ext.iteration=0  Objective=0.740725099987E+00  Residual=0.000000000000E+00
Ext.iteration=10  Objective=0.740725099987E+00  Residual=0.000000000000E+00
Optimization is stopped
Solution is optimal
Calculating resulting outputs. Writing solution.
Objective: objective = 32086.1760096 [-4.512213776820E+16]
Solver has normally finished. Solution was saved.
Problem: problem_hmm_normal, solution_status = optimal
Timing: data_loading_time = 0.09, preprocessing_time = 18.47, solving_time = 1.44
Variables: optimal_point = point_problem_hmm_normal
Objective: objective = 32086.1760096 [-4.512213776820E+16]
Constraint: sum_of_probabilities_for_states = vector_sum_of_probabilities_for_states
Function: hmm_normal(2,vector_spread) =  3.208617600959E+04
OK. Solver Finished

OK. Problem Imported

Running solver
Rea

In [34]:
for feature,df in psg_df.items():
    print(f"Feature {feature} df saved")
    df.to_csv(f'data/results/psg_{feature}.csv')
    psg_df[feature]=df.rename(columns={'si1':'sigma1','si2':'sigma2'})

Feature spread df saved
Feature bidsize df saved
Feature offersize df saved
Feature bookimbalance df saved


### HMM Model

In [21]:
hmm_df=fit_hmm(method=2)

Fitting HMM usign HMM-Learn for 2020-01-02
Fitting HMM usign HMM-Learn for 2020-01-03
Fitting HMM usign HMM-Learn for 2020-01-06
Fitting HMM usign HMM-Learn for 2020-01-07
Fitting HMM usign HMM-Learn for 2020-01-08
Fitting HMM usign HMM-Learn for 2020-01-09
Fitting HMM usign HMM-Learn for 2020-01-10
Fitting HMM usign HMM-Learn for 2020-01-13
Fitting HMM usign HMM-Learn for 2020-01-14
Fitting HMM usign HMM-Learn for 2020-01-15
Fitting HMM usign HMM-Learn for 2020-01-16
Fitting HMM usign HMM-Learn for 2020-01-17
Fitting HMM usign HMM-Learn for 2020-01-21
Fitting HMM usign HMM-Learn for 2020-01-22
Fitting HMM usign HMM-Learn for 2020-01-23
Fitting HMM usign HMM-Learn for 2020-01-24
Fitting HMM usign HMM-Learn for 2020-01-27
Fitting HMM usign HMM-Learn for 2020-01-28
Fitting HMM usign HMM-Learn for 2020-01-29
Fitting HMM usign HMM-Learn for 2020-01-30


In [22]:
for feature,df in hmm_df.items():
    print(f"Feature {feature} df saved")
    df.to_csv(f'data/results/hmm_{feature}.csv')

Feature spread df saved
Feature bidsize df saved
Feature offersize df saved
Feature bookimbalance df saved


### Parameter Estimates for HMM on Spread

In [35]:
psg_df['spread'].describe()

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.806272,0.193728,0.133529,0.866471,0.060623,0.020881,0.181856,0.122592
std,0.105667,0.105667,0.098822,0.098822,0.067114,0.034069,0.133457,0.063987
min,0.56927,0.046254,0.028994,0.660056,0.034876,0.009084,0.106166,0.076417
25%,0.729465,0.103503,0.068379,0.825096,0.03881,0.010491,0.12053,0.088471
50%,0.797302,0.202698,0.094486,0.905514,0.043875,0.011771,0.141912,0.100189
75%,0.896497,0.270535,0.174904,0.931621,0.05029,0.015209,0.17549,0.108457
max,0.953746,0.43073,0.339944,0.971006,0.342463,0.164497,0.709377,0.286053


In [36]:
hmm_df['spread'].describe()

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.885729,0.114271,0.212052,0.787948,0.060675,0.001542,0.18223,0.018964
std,0.076464,0.076464,0.109721,0.109721,0.067108,0.006011,0.13338,0.022522
min,0.722159,0.028971,0.054669,0.568398,0.034913,8.5e-05,0.106534,0.005854
25%,0.856195,0.06721,0.103314,0.713064,0.038854,0.000112,0.120932,0.007856
50%,0.906327,0.093673,0.232466,0.767534,0.043936,0.000141,0.14239,0.010063
75%,0.93279,0.143805,0.286936,0.896686,0.050359,0.000234,0.175794,0.011787
max,0.971029,0.277841,0.431602,0.945331,0.34248,0.027074,0.709358,0.082092


In [41]:
spread_err=psg_df['spread'].describe()-hmm_df['spread'].describe()
spread_err

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,-0.079457,0.079457,-0.078524,0.078524,-5.2e-05,0.019339,-0.000374,0.103628
std,0.029202,0.029202,-0.010899,-0.010899,5e-06,0.028057,7.7e-05,0.041465
min,-0.152889,0.017283,-0.025674,0.091658,-3.7e-05,0.009,-0.000368,0.070563
25%,-0.12673,0.036293,-0.034935,0.112033,-4.4e-05,0.010379,-0.000402,0.080614
50%,-0.109024,0.109024,-0.13798,0.13798,-6.1e-05,0.011629,-0.000478,0.090126
75%,-0.036293,0.12673,-0.112033,0.034935,-6.9e-05,0.014974,-0.000303,0.09667
max,-0.017283,0.152889,-0.091658,0.025674,-1.8e-05,0.137423,1.9e-05,0.20396


In [60]:
pd.DataFrame(100*spread_err.loc['mean']/psg_df['spread'].describe().loc['mean']).T

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
mean,-9.854877,41.014793,-58.806522,9.062456,-0.08515,92.613461,-0.205678,84.530947


### Parameter Estimates for HMM on Bidsize

In [25]:
psg_df['bidsize'].describe()

Unnamed: 0,a11,a12,a21,a22,mu1,si1,mu2,si2
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.775628,0.224372,0.057393,0.942607,1.070006,0.219555,1.466907,0.370816
std,0.215686,0.215686,0.041205,0.041205,0.098283,0.023631,0.206305,0.169462
min,0.1439,0.010338,0.001707,0.864498,0.930758,0.177427,1.21304,0.149726
25%,0.643866,0.037887,0.019853,0.910883,0.960312,0.20053,1.27958,0.258228
50%,0.8398,0.1602,0.062197,0.937803,1.072182,0.221013,1.4243,0.311933
75%,0.962113,0.356134,0.089117,0.980147,1.149388,0.235914,1.56967,0.441264
max,0.989662,0.8561,0.135502,0.998293,1.20665,0.270283,1.964026,0.801932


In [26]:
hmm_df['bidsize'].describe()

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.920191,0.079809,0.113107,0.886893,1.022551,0.047892,1.374949,0.11969
std,0.089367,0.089367,0.130762,0.130762,0.091619,0.018063,0.20862,0.14545
min,0.588843,0.002972,0.010337,0.489804,0.93078,0.031489,1.202485,0.029626
25%,0.903108,0.023483,0.026223,0.893503,0.946616,0.03851,1.242078,0.061052
50%,0.937814,0.062186,0.079808,0.920192,0.981811,0.041331,1.27898,0.073684
75%,0.976517,0.096892,0.106497,0.973777,1.069392,0.051964,1.408916,0.099618
max,0.997028,0.411157,0.510196,0.989663,1.206646,0.113643,1.964034,0.643217


In [42]:
bidsize_err=psg_df['bidsize'].describe()-hmm_df['bidsize'].describe()
bidsize_err

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,-0.144563,0.144563,-0.055714,0.055714,0.047455,0.171663,0.091957,0.251126
std,0.126319,0.126319,-0.089557,-0.089557,0.006664,0.005568,-0.002316,0.024012
min,-0.444943,0.007366,-0.00863,0.374694,-2.2e-05,0.145938,0.010555,0.1201
25%,-0.259242,0.014404,-0.006371,0.01738,0.013696,0.162021,0.037502,0.197176
50%,-0.098014,0.098014,-0.017611,0.017611,0.090371,0.179682,0.14532,0.238249
75%,-0.014404,0.259242,-0.01738,0.006371,0.079996,0.18395,0.160754,0.341646
max,-0.007366,0.444943,-0.374694,0.00863,4e-06,0.15664,-8e-06,0.158716


In [59]:
pd.DataFrame(100*bidsize_err.loc['mean']/psg_df['bidsize'].describe().loc['mean']).T

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
mean,-18.638133,64.430033,-97.075327,5.910633,4.435003,78.1867,6.268789,67.722447


### Parameter Estimates for HMM on OfferSize

In [27]:
psg_df['offersize'].describe()

Unnamed: 0,a11,a12,a21,a22,mu1,si1,mu2,si2
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.799846,0.200154,0.104828,0.895172,1.10159,0.215371,1.435147,0.35669
std,0.266062,0.266062,0.21827,0.21827,0.107237,0.060179,0.288689,0.195105
min,0.0,0.00576,0.000442,0.0,0.862478,1e-06,1.18328,0.113905
25%,0.731759,0.043505,0.006716,0.874183,1.018361,0.202605,1.25897,0.269635
50%,0.939493,0.060507,0.028757,0.971243,1.089679,0.218045,1.299874,0.277071
75%,0.956495,0.268241,0.125817,0.993284,1.192509,0.241782,1.526534,0.351048
max,0.99424,1.0,1.0,0.999558,1.243101,0.293308,2.141238,0.866924


In [28]:
hmm_df['offersize'].describe()

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.6505255,0.349475,0.365502,0.6344976,1.075612,0.033295,1.262011,0.099223
std,0.3884013,0.388401,0.393324,0.3933239,0.084543,0.027891,0.041947,0.110859
min,7.887506e-95,0.010218,0.009802,4.004058e-151,0.862487,3e-06,1.182793,0.027779
25%,0.4305699,0.050448,0.047887,0.2892175,1.02052,4e-06,1.244576,0.072063
50%,0.8668956,0.133104,0.122369,0.8776311,1.098612,0.039326,1.251964,0.075506
75%,0.9495518,0.56943,0.710783,0.9521134,1.098612,0.051208,1.29928,0.085959
max,0.9897821,1.0,1.0,0.9901982,1.239977,0.075778,1.339824,0.563636


In [43]:
offersize_err=psg_df['offersize'].describe()-hmm_df['offersize'].describe()
offersize_err

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,0.1493208,-0.1493208,-0.2606748,0.2606748,0.025979,0.182076,0.173136,0.257467
std,-0.1223393,-0.1223393,-0.175054,-0.175054,0.022694,0.032288,0.246743,0.084246
min,-7.887506e-95,-0.004458136,-0.009359762,-4.004058e-151,-9e-06,-2e-06,0.000487,0.086125
25%,0.3011891,-0.006943297,-0.04117016,0.5849653,-0.002159,0.202601,0.014394,0.197572
50%,0.07259756,-0.07259756,-0.09361216,0.09361216,-0.008933,0.178719,0.047911,0.201564
75%,0.006943297,-0.3011891,-0.5849653,0.04117016,0.093896,0.190574,0.227254,0.265089
max,0.004458136,-8.104628e-15,-1.053602e-13,0.009359762,0.003123,0.21753,0.801414,0.303288


In [58]:
pd.DataFrame(100*offersize_err.loc['mean']/psg_df['offersize'].describe().loc['mean']).T

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
mean,18.668686,-74.603058,-248.670116,29.120065,2.35828,84.540556,12.063981,72.182228


### Parameter Estimes for HMM on BookImbalance

In [29]:
psg_df['bookimbalance'].describe()

Unnamed: 0,a11,a12,a21,a22,mu1,si1,mu2,si2
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.859427,0.140573,0.104604,0.895396,0.65676,0.2092,1.03111,0.318886
std,0.084247,0.084247,0.066329,0.066329,0.066811,0.021918,0.12975,0.110057
min,0.639234,0.02358,0.030673,0.761885,0.503831,0.169518,0.893228,0.248889
25%,0.83837,0.097462,0.043665,0.849729,0.613157,0.195624,0.975975,0.284053
50%,0.881739,0.118261,0.087388,0.912612,0.666259,0.208292,1.008497,0.297682
75%,0.902538,0.16163,0.150271,0.956335,0.709919,0.221199,1.030171,0.30865
max,0.97642,0.360766,0.238115,0.969327,0.757812,0.245684,1.522145,0.78056


In [30]:
hmm_df['bookimbalance'].describe()

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.842937,0.157063,0.158365,0.841635,0.668789,0.047312,1.023137,0.114361
std,0.170552,0.170552,0.181151,0.181151,0.073893,0.016688,0.137635,0.117042
min,0.178968,0.031642,0.023576,0.16524,0.503867,0.028746,0.822539,0.061964
25%,0.82868,0.054544,0.074421,0.864808,0.627313,0.038272,0.966517,0.080691
50%,0.869033,0.130967,0.115979,0.884021,0.68111,0.043393,1.008515,0.08985
75%,0.945456,0.17132,0.135192,0.925579,0.713346,0.054976,1.030171,0.095753
max,0.968358,0.821032,0.83476,0.976424,0.822439,0.106498,1.522328,0.609443


In [45]:
bookimbalance_err=psg_df['bookimbalance'].describe()-hmm_df['bookimbalance'].describe()
bookimbalance_err

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,0.016491,-0.016491,-0.053761,0.053761,-0.012028,0.161888,0.007972553,0.204524
std,-0.086305,-0.086305,-0.114823,-0.114823,-0.007082,0.00523,-0.007884963,-0.006985
min,0.460266,-0.008062,0.007097,0.596645,-3.6e-05,0.140772,0.07068898,0.186925
25%,0.00969,0.042918,-0.030755,-0.015078,-0.014157,0.157352,0.009458404,0.203362
50%,0.012706,-0.012706,-0.02859,0.02859,-0.01485,0.164899,-1.807641e-05,0.207832
75%,-0.042918,-0.00969,0.015078,0.030755,-0.003427,0.166223,-8.009914e-07,0.212896
max,0.008062,-0.460266,-0.596645,-0.007097,-0.064627,0.139185,-0.0001829136,0.171116


In [57]:
pd.DataFrame(100*bookimbalance_err.loc['mean']/psg_df['bookimbalance'].describe().loc['mean']).T

Unnamed: 0,a11,a12,a21,a22,mu1,sigma1,mu2,sigma2
mean,1.918781,-11.730973,-51.394934,6.004156,-1.831481,77.384149,0.773201,64.137179
