In [27]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
from datetime import date,timedelta


import os
os.add_dll_directory('C:\Aorda\PSG\lib')
import psgpython as psg 
from psg_loader import load_psg


### Script to iteratively Fit HMM model 
- Method parameter allows one to fit with PSG or HMMLearn
- Utilizes all potential features

In [30]:
def remove_duplicates(series):
    
    cleaned_series=series[np.insert(np.diff(series).astype(bool), 0, True)]
    dropped_els=len(series)-len(cleaned_series)
    
    return cleaned_series


def prep_features(dt):
    grouped_features=pd.read_csv(f'data/agg_features/grouped_features_{dt}.csv')
    bidsize=remove_duplicates(grouped_features['Bid_Size'].values)
    offersize=remove_duplicates(grouped_features['Offer_Size'].values)
    bookimbalance=remove_duplicates(grouped_features['OB_IB'].values)
    spread=remove_duplicates(grouped_features['spread'].values)

    feature_dict=dict(zip(['Bid_Size','Offer_Size','OB_IB','spread'],[bidsize,offersize,bookimbalance,spread]))

    return feature_dict

def extract_params(param_dict,method):
    if method==1:
        param_df=pd.DataFrame.from_dict(param_dict,orient='index')

        return param_df

    elif method==2:
        param_df=pd.DataFrame.from_dict(param_dict,orient='index')

        return param_df

def fit_hmm(method):
    start=date(2020,1,1)
    days=[start+timedelta(days=i) for i in range(0,30)]

    spread_params={}
    
    # psg training
    if method==1:
        for dt in days:
            try:
                dt_features=prep_features(dt)
            except:
                continue
            
            # formatted as numpy float 
            np.savetxt(r'psg_text_hmm/vector_bidsize.txt', dt_features['Bid_Size'])
            np.savetxt(r'psg_text_hmm/vector_offersize.txt', dt_features['Offer_Size'])
            np.savetxt(r'psg_text_hmm/vector_bookimbalance.txt', dt_features['OB_IB'])
            np.savetxt(r'psg_text_hmm/vector_spread.txt', dt_features['spread'])


            psg_spread_prob = psg.psg_importfromtext('./psg_text_hmm/problem_hmm_normal_spread.txt')
            psg_spread_prob['problem_statement'] = '\n'.join(psg_spread_prob['problem_statement'])
            spread_solution=psg.psg_solver(psg_spread_prob)
            params=list(spread_solution.values())[4][1]
            spread_params[dt]=params

            


    elif method==2:
        for dt in days:
            
            try:
                dt_features=prep_features(dt)
            except:
                continue
            print(f"Fitting HMM usign HMM-Learn for {dt}")
            spread=dt_features['spread'].reshape(-1, 1)
            spread_model=GaussianHMM(n_components=2,algorithm='viterbi',covariance_type="spherical",min_covar=1e-4, n_iter=1000,tol=1e-8)
            fitted_spread_model=spread_model.fit(spread)
            spread_mu=fitted_spread_model.means_.flatten()
            spread_covar=fitted_spread_model.covars_.flatten()
            spread_params[dt]={"Mean":spread_mu, "Covar":spread_covar}


    else:
        print("Not a valid method")
        return

    spread_df=extract_params(spread_params,method)
    return spread_df

### PSG


In [None]:
p1,p2,a11,a12,a21,a22,mu1,si1,mu2,si2=

### HMM Model

In [31]:
spread_df=fit_hmm(method=2)

Fitting HMM usign HMM-Learn for 2020-01-02
Fitting HMM usign HMM-Learn for 2020-01-03
Fitting HMM usign HMM-Learn for 2020-01-06
Fitting HMM usign HMM-Learn for 2020-01-07
Fitting HMM usign HMM-Learn for 2020-01-08
Fitting HMM usign HMM-Learn for 2020-01-09
Fitting HMM usign HMM-Learn for 2020-01-10
Fitting HMM usign HMM-Learn for 2020-01-13
Fitting HMM usign HMM-Learn for 2020-01-14
Fitting HMM usign HMM-Learn for 2020-01-15
Fitting HMM usign HMM-Learn for 2020-01-16
Fitting HMM usign HMM-Learn for 2020-01-17
Fitting HMM usign HMM-Learn for 2020-01-21
Fitting HMM usign HMM-Learn for 2020-01-22
Fitting HMM usign HMM-Learn for 2020-01-23
Fitting HMM usign HMM-Learn for 2020-01-24
Fitting HMM usign HMM-Learn for 2020-01-27
Fitting HMM usign HMM-Learn for 2020-01-28
Fitting HMM usign HMM-Learn for 2020-01-29
Fitting HMM usign HMM-Learn for 2020-01-30


In [32]:
spread_df

Unnamed: 0,Mean,Covar
2020-01-02,"[0.03611565744436117, 0.10653405402521314]","[0.00011519867835537364, 0.008167491545107373]"
2020-01-03,"[0.03820631240944112, 0.13901387208393493]","[0.00011200217509046332, 0.010155770965391126]"
2020-01-06,"[0.03696658408396042, 0.12127290092379646]","[8.930844188485957e-05, 0.009513719095693382]"
2020-01-07,"[0.11990922028972147, 0.03491305798613509]","[0.008083011690052034, 9.565596440753123e-05]"
2020-01-08,"[0.04384862110674828, 0.15047417800603308]","[0.00018868383050721745, 0.011655690410702527]"
2020-01-09,"[0.039069735322202835, 0.11500743593003339]","[8.460333017025526e-05, 0.005853561425508938]"
2020-01-10,"[0.04048036835964801, 0.11608240881865185]","[0.00010453673693660872, 0.005972174454210285]"
2020-01-13,"[0.14604495621219427, 0.047738036700425396]","[0.010029079631572563, 0.0002752838358726027]"
2020-01-14,"[0.049089861737984866, 0.2064669311581797]","[0.000209609255689347, 0.05759951811987854]"
2020-01-15,"[0.05395966506127353, 0.23887464099706165]","[0.0003274220462946563, 0.08209249656839274]"


### Plot of Values across Time


### Stationary Distribution