## Startup

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from hmmlearn import hmm

import os
import pickle

In [None]:
random_state=42
np.random.seed(random_state)

In [None]:
from scripts.params import get_params

params = get_params()

## Data Retrieval

In [None]:
dataroute=os.path.join("..",  "data")
dumproute=os.path.join("..",  "dump")
resultsroute=os.path.join("..",  "results")

In [None]:
name=f'finaldf_train_{params["tablename"]}.pickle'
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    df=pickle.load(handle)


In [None]:
df.head()

Unnamed: 0,^MERV_rets,^MERV_log_rets,^MERV_gk_vol,GGAL.BA_rets,GGAL.BA_log_rets,GGAL.BA_gk_vol,GGAL_rets,GGAL_log_rets,GGAL_gk_vol,YPFD.BA_rets,...,BBAR.BA_gk_vol,BBAR_rets,BBAR_log_rets,BBAR_gk_vol,USD_rets,USD_log_rets,USD_gk_vol,USD_^MERV_rets,USD_^MERV_log_rets,USD_^MERV_gk_vol
2013-01-03,0.007552,0.007524,0.000129,0.010616,0.01056,0.000677,-0.012748,-0.01283,0.001228,-0.006863,...,0.000169,-0.005725,-0.005742,0.00096,0.00883,0.008791,1.4e-05,0.001247,0.001246,0.000129
2013-01-04,0.007092,0.007067,0.000158,-0.006303,-0.006323,0.000208,-0.010043,-0.010094,0.000554,0.004936,...,0.000406,-0.019194,-0.01938,0.000635,0.018043,0.017882,0.000133,-0.005727,-0.005743,0.000158
2013-01-07,-0.001035,-0.001035,2.2e-05,0.002114,0.002112,6.3e-05,-0.014493,-0.014599,0.000517,0.010805,...,0.000492,0.015655,0.015534,0.000511,-0.002488,-0.002492,4.8e-05,-0.009769,-0.009817,2.2e-05
2013-01-08,0.008285,0.008251,8.2e-05,-0.008439,-0.008474,0.000153,-0.016176,-0.016309,0.001085,0.049563,...,0.000438,-0.015414,-0.015534,0.000642,0.015356,0.015239,6.4e-05,-0.001117,-0.001118,8.2e-05
2013-01-09,0.017826,0.017669,0.000273,0.0,0.0,0.0,0.011958,0.011887,0.005238,0.0,...,0.0,-0.003914,-0.003922,0.000147,-0.008671,-0.008709,0.001065,0.017245,0.017098,0.000273


In [None]:
tickerlist=params["tickerlist"]

## HMM Training

In [None]:
range_states=range(1,16)
emptydf=pd.DataFrame(columns=["AIC", "BIC"], index=range_states)
emptydf.fillna(np.inf, inplace=True)
results_dict_df={stock:emptydf for stock in tickerlist}

In [None]:
aic_best_model={stock:None for stock in tickerlist}
bic_best_model={stock:None for stock in tickerlist}

In [None]:
for stock in tickerlist:
    columns = [f'{stock}_log_rets', f'{stock}_gk_vol']
    insample_data = df[columns]

    param_dict={
        "covariance_type" : "diag", 
        "n_iter" : 500,
        "random_state" : random_state
        #no voy a usar startprob_prior por devlog 20-06-23
        }

    for nstate in range_states:
        model = hmm.GaussianHMM(n_components= nstate, **param_dict, verbose=False)
        results = model.fit(insample_data)

        convergence=results.monitor_.converged
        # esta es la condición de si el modelo convergió

        all_states_found=np.isclose(a=(model.transmat_.sum(axis=1)), b=1).all()
        # esta es la condición de que todos los estados (nstates) hayan sido observados
        # si no, alguna fila en la matriz de transición del modelo son 0.
        # el errormsg es "Some rows of transmat_ have zero sum because no transition from the state was ever observed".

        startprob_check = (model.startprob_.sum()==1)
        # esta es la condición de que los estados al inicializar estén definidos
        
        good_model = convergence and all_states_found and startprob_check

        if good_model:
            try:
                results_dict_df[stock].loc[nstate, "AIC"]=model.aic(insample_data)
                results_dict_df[stock].loc[nstate, "BIC"]=model.bic(insample_data)
            except ValueError:
                pass
        else: 
            print(">"*10,f"{stock} {nstate} did not converge")
            results_dict_df[stock].loc[nstate, "BIC"]=np.inf
            results_dict_df[stock].loc[nstate, "BIC"]=np.inf

Model is not converging.  Current: 18854.600714436052 is not greater than 18855.73073827267. Delta is -1.1300238366166013
Model is not converging.  Current: 18150.45060648472 is not greater than 18151.012747677036. Delta is -0.5621411923166306
Model is not converging.  Current: 18859.18589914981 is not greater than 18864.433488621307. Delta is -5.247589471498941


>>>>>>>>>> ^MERV 2 did not converge


Model is not converging.  Current: 18906.152865429125 is not greater than 18906.40679425361. Delta is -0.2539288244843192
Model is not converging.  Current: 18184.342352220236 is not greater than 18184.37392065687. Delta is -0.03156843663600739
Model is not converging.  Current: 18893.543841853665 is not greater than 18897.57606814377. Delta is -4.032226290106337
Model is not converging.  Current: 18904.941147439862 is not greater than 18908.745436699235. Delta is -3.8042892593730357
Model is not converging.  Current: 18871.28534979241 is not greater than 18876.603915613836. Delta is -5.318565821424272
Model is not converging.  Current: 18198.450041297587 is not greater than 18198.454556353892. Delta is -0.004515056305535836
  self.means_ = ((means_weight * means_prior + stats['obs'])
Model is not converging.  Current: 18900.05218193964 is not greater than 18903.42363037149. Delta is -3.3714484318479663


>>>>>>>>>> ^MERV 15 did not converge


Model is not converging.  Current: 17746.98884698709 is not greater than 17749.887170814392. Delta is -2.898323827303102
Model is not converging.  Current: 17767.738850783353 is not greater than 17768.03631132645. Delta is -0.2974605430972588
Model is not converging.  Current: 17747.379395552358 is not greater than 17748.469048154537. Delta is -1.0896526021788304
Model is not converging.  Current: 17148.048118381404 is not greater than 17149.571333818105. Delta is -1.5232154367004114
Model is not converging.  Current: 17741.233619404436 is not greater than 17743.302334055254. Delta is -2.068714650817128
Model is not converging.  Current: 17726.79941652626 is not greater than 17726.83641027017. Delta is -0.03699374390998855
Model is not converging.  Current: 17753.78543565406 is not greater than 17754.58585606698. Delta is -0.8004204129210848
Model is not converging.  Current: 17176.803443578803 is not greater than 17176.81466860932. Delta is -0.011225030517380219
Model is not convergin

>>>>>>>>>> GGAL 3 did not converge


Model is not converging.  Current: 16507.512649946817 is not greater than 16509.147236096032. Delta is -1.6345861492154654
Model is not converging.  Current: 17388.679891260814 is not greater than 17388.696415755858. Delta is -0.016524495043995557
Model is not converging.  Current: 17372.470554094958 is not greater than 17378.61695158039. Delta is -6.146397485430498
Model is not converging.  Current: 17383.37408295959 is not greater than 17383.74102771204. Delta is -0.3669447524480347


>>>>>>>>>> GGAL 11 did not converge


Model is not converging.  Current: 16840.248608945196 is not greater than 16841.30997025269. Delta is -1.0613613074929162
Model is not converging.  Current: 17420.882265013443 is not greater than 17421.053269931308. Delta is -0.17100491786550265
Model is not converging.  Current: 16885.38918328365 is not greater than 16885.43809708445. Delta is -0.04891380080152885
Model is not converging.  Current: 17800.584346802458 is not greater than 17802.604045122414. Delta is -2.0196983199566603
Model is not converging.  Current: 17223.111725584044 is not greater than 17224.07818962253. Delta is -0.9664640384871745
Model is not converging.  Current: 17834.527199608863 is not greater than 17834.550895870856. Delta is -0.023696261992881773


>>>>>>>>>> YPFD.BA 3 did not converge


  self.means_ = ((means_weight * means_prior + stats['obs'])
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows

>>>>>>>>>> YPFD.BA 5 did not converge


Model is not converging.  Current: 17805.326376384393 is not greater than 17811.65090952547. Delta is -6.324533141076245
Model is not converging.  Current: 17780.7814843408 is not greater than 17785.093116217548. Delta is -4.311631876746105


>>>>>>>>>> YPFD.BA 10 did not converge


Model is not converging.  Current: 17804.350816088987 is not greater than 17807.893629645903. Delta is -3.5428135569163715
Model is not converging.  Current: 17793.361317217285 is not greater than 17802.090970974237. Delta is -8.729653756952757
Model is not converging.  Current: 17192.358243430037 is not greater than 17194.1726161825. Delta is -1.8143727524620772
Model is not converging.  Current: 17789.43640103317 is not greater than 17792.647506621965. Delta is -3.211105588794453
Model is not converging.  Current: 17775.391633511168 is not greater than 17784.3730893977. Delta is -8.981455886532785
Model is not converging.  Current: 17647.14483585503 is not greater than 17648.976865823697. Delta is -1.8320299686674844
Model is not converging.  Current: 17103.254564681993 is not greater than 17103.695503924708. Delta is -0.4409392427151033
Model is not converging.  Current: 17736.51869181244 is not greater than 17740.285656396263. Delta is -3.766964583825029
Model is not converging.  C

>>>>>>>>>> YPF 7 did not converge


Model is not converging.  Current: 17627.224972260443 is not greater than 17629.227544209116. Delta is -2.002571948672994


>>>>>>>>>> YPF 9 did not converge


  self.means_ = ((means_weight * means_prior + stats['obs'])
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows

>>>>>>>>>> YPF 10 did not converge


Model is not converging.  Current: 17676.79013846039 is not greater than 17678.175785588377. Delta is -1.3856471279868856
Model is not converging.  Current: 17120.74316823258 is not greater than 17129.292441437596. Delta is -8.549273205015197
Model is not converging.  Current: 17662.297112076227 is not greater than 17677.352141222298. Delta is -15.05502914607132
Model is not converging.  Current: 16662.54021481955 is not greater than 16662.987619628624. Delta is -0.4474048090742144
Model is not converging.  Current: 17064.541553559175 is not greater than 17069.63348150047. Delta is -5.09192794129558


>>>>>>>>>> EDN.BA 7 did not converge


Model is not converging.  Current: 17093.401918080188 is not greater than 17093.59395313588. Delta is -0.19203505569385015
Model is not converging.  Current: 17075.78044081237 is not greater than 17086.25070063738. Delta is -10.470259825011453


>>>>>>>>>> EDN.BA 11 did not converge


Model is not converging.  Current: 17124.77453401155 is not greater than 17124.788104878386. Delta is -0.013570866834925255
Model is not converging.  Current: 17074.773813532625 is not greater than 17085.356721549357. Delta is -10.58290801673138
Model is not converging.  Current: 16565.770178800347 is not greater than 16567.146166650873. Delta is -1.375987850526144
Model is not converging.  Current: 16569.60817819587 is not greater than 16571.53116494629. Delta is -1.9229867504182039
Model is not converging.  Current: 16556.64546814234 is not greater than 16557.24678264772. Delta is -0.6013145053802873
Model is not converging.  Current: 16578.533843740595 is not greater than 16578.546828146762. Delta is -0.012984406166651752
Model is not converging.  Current: 16123.578011228332 is not greater than 16123.882451482108. Delta is -0.30444025377619255


>>>>>>>>>> EDN 7 did not converge


Model is not converging.  Current: 16406.97428225551 is not greater than 16407.571426235856. Delta is -0.5971439803470275
  self.means_ = ((means_weight * means_prior + stats['obs'])
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state

>>>>>>>>>> EDN 10 did not converge


Model is not converging.  Current: 16486.934543091622 is not greater than 16500.71266134754. Delta is -13.778118255919253
Model is not converging.  Current: 16140.691603971374 is not greater than 16141.127539039635. Delta is -0.4359350682607328
  self.means_ = ((means_weight * means_prior + stats['obs'])
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum becaus

>>>>>>>>>> EDN 13 did not converge


Model is not converging.  Current: 16099.890961242789 is not greater than 16099.898317233263. Delta is -0.00735599047402502
Model is not converging.  Current: 16823.944155880148 is not greater than 16826.531123983852. Delta is -2.586968103703839
Model is not converging.  Current: 17444.741605353378 is not greater than 17449.044915339153. Delta is -4.303309985774831


>>>>>>>>>> BMA.BA 3 did not converge


  self.means_ = ((means_weight * means_prior + stats['obs'])
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows

>>>>>>>>>> BMA.BA 5 did not converge
>>>>>>>>>> BMA.BA 6 did not converge
>>>>>>>>>> BMA.BA 7 did not converge


Model is not converging.  Current: 17389.125362292994 is not greater than 17392.33676174676. Delta is -3.2113994537648978
Model is not converging.  Current: 17374.970343390716 is not greater than 17393.454433085917. Delta is -18.484089695200964
Model is not converging.  Current: 17353.107724572594 is not greater than 17362.69517607548. Delta is -9.587451502884505


>>>>>>>>>> BMA.BA 12 did not converge


Model is not converging.  Current: 17378.042282758754 is not greater than 17378.093300669898. Delta is -0.05101791114429943
Model is not converging.  Current: 17362.41109296726 is not greater than 17375.104196873668. Delta is -12.693103906407487
Model is not converging.  Current: 17422.074699419252 is not greater than 17422.971729823734. Delta is -0.8970304044814839
Model is not converging.  Current: 17301.880072640575 is not greater than 17304.297173053474. Delta is -2.417100412898435
Model is not converging.  Current: 17365.257578606164 is not greater than 17370.854155502573. Delta is -5.596576896408806
Model is not converging.  Current: 16836.870609729274 is not greater than 16837.902067447147. Delta is -1.0314577178723994


>>>>>>>>>> BMA 2 did not converge


Model is not converging.  Current: 17353.243482581554 is not greater than 17354.21111516992. Delta is -0.9676325883665413


>>>>>>>>>> BMA 6 did not converge


Model is not converging.  Current: 17365.25950396896 is not greater than 17372.50716235344. Delta is -7.247658384480019


>>>>>>>>>> BMA 7 did not converge


Model is not converging.  Current: 17314.72342540452 is not greater than 17332.028038164568. Delta is -17.30461276004644
Model is not converging.  Current: 16809.56452517124 is not greater than 16812.268326401743. Delta is -2.7038012305019947


>>>>>>>>>> BMA 12 did not converge


Model is not converging.  Current: 16827.358926085064 is not greater than 16828.712243179958. Delta is -1.3533170948940096


>>>>>>>>>> BMA 13 did not converge


Model is not converging.  Current: 16224.868100927211 is not greater than 16225.750419174956. Delta is -0.8823182477444789
Model is not converging.  Current: 17322.557273851602 is not greater than 17332.312465865427. Delta is -9.755192013824853


>>>>>>>>>> BMA 15 did not converge


  self.means_ = ((means_weight * means_prior + stats['obs'])
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows

>>>>>>>>>> BBAR.BA 5 did not converge


Model is not converging.  Current: 19864.92931540424 is not greater than 19864.961429654006. Delta is -0.03211424976689159


>>>>>>>>>> BBAR.BA 7 did not converge


  self.means_ = ((means_weight * means_prior + stats['obs'])
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows

>>>>>>>>>> BBAR.BA 12 did not converge


  self.means_ = ((means_weight * means_prior + stats['obs'])
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows

>>>>>>>>>> BBAR.BA 13 did not converge


Model is not converging.  Current: 19681.50121160597 is not greater than 19691.923595212855. Delta is -10.422383606884978
Model is not converging.  Current: 18657.780668670785 is not greater than 18659.7293264589. Delta is -1.9486577881143603
Model is not converging.  Current: 16980.476950533706 is not greater than 16980.912996492345. Delta is -0.4360459586387151
Model is not converging.  Current: 17171.29087869195 is not greater than 17171.495405420286. Delta is -0.20452672833562247
Model is not converging.  Current: 17129.43925706523 is not greater than 17131.721490589865. Delta is -2.2822335246346483
Model is not converging.  Current: 16636.330064094564 is not greater than 16636.986003868267. Delta is -0.6559397737037216
Model is not converging.  Current: 17095.557460314736 is not greater than 17103.462959912966. Delta is -7.905499598229653


>>>>>>>>>> BBAR 4 did not converge


Model is not converging.  Current: 16293.223092897138 is not greater than 16297.273067091071. Delta is -4.04997419393294


>>>>>>>>>> BBAR 7 did not converge


Model is not converging.  Current: 17104.214221790317 is not greater than 17108.370578226477. Delta is -4.156356436160422
Model is not converging.  Current: 17093.564168934616 is not greater than 17093.57013615478. Delta is -0.005967220164166065
Model is not converging.  Current: 17070.867095785892 is not greater than 17078.0082196774. Delta is -7.14112389150614
  self.means_ = ((means_weight * means_prior + stats['obs'])
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of tra

>>>>>>>>>> BBAR 13 did not converge


Model is not converging.  Current: 17105.41849493449 is not greater than 17107.6124740924. Delta is -2.19397915790978
Model is not converging.  Current: 17043.366773032645 is not greater than 17061.395448137006. Delta is -18.028675104360445


In [None]:
for stock in tickerlist:
    columns = [f'{stock}_log_rets', f'{stock}_gk_vol']
    insample_data = df[columns]
    
    best_aic_nstate=results_dict_df[stock]["AIC"].astype(float).idxmin()
    best_bic_nstate=results_dict_df[stock]["BIC"].astype(float).idxmin()
    print(f"For stock {stock}, best AIC: {best_aic_nstate} best BIC: {best_bic_nstate}")

    aic_best_model[stock]=hmm.GaussianHMM(n_components = best_aic_nstate, **param_dict).fit(insample_data)
    bic_best_model[stock]=hmm.GaussianHMM(n_components = best_bic_nstate, **param_dict).fit(insample_data)

Model is not converging.  Current: 18859.18589914981 is not greater than 18864.433488621307. Delta is -5.247589471498941
Model is not converging.  Current: 18150.45060648472 is not greater than 18151.012747677036. Delta is -0.5621411923166306


Model is not converging.  Current: 17767.738850783353 is not greater than 17768.03631132645. Delta is -0.2974605430972588


For stock ^MERV, best AIC: 4 best BIC: 3
For stock GGAL.BA, best AIC: 4 best BIC: 3


Model is not converging.  Current: 17746.98884698709 is not greater than 17749.887170814392. Delta is -2.898323827303102
Model is not converging.  Current: 17419.9513089545 is not greater than 17421.032240704913. Delta is -1.0809317504135834
Model is not converging.  Current: 17834.527199608863 is not greater than 17834.550895870856. Delta is -0.023696261992881773
Model is not converging.  Current: 17223.111725584055 is not greater than 17224.078189622516. Delta is -0.9664640384617087


For stock GGAL, best AIC: 4 best BIC: 3
For stock YPFD.BA, best AIC: 4 best BIC: 3
For stock YPF, best AIC: 4 best BIC: 3


Model is not converging.  Current: 17736.51869181244 is not greater than 17740.285656396263. Delta is -3.766964583825029
Model is not converging.  Current: 17103.254564681993 is not greater than 17103.695503924708. Delta is -0.4409392427151033


For stock EDN.BA, best AIC: 4 best BIC: 3


Model is not converging.  Current: 16569.60817819587 is not greater than 16571.53116494629. Delta is -1.9229867504182039


For stock EDN, best AIC: 4 best BIC: 3


Model is not converging.  Current: 16565.770178800347 is not greater than 16567.146166650873. Delta is -1.375987850526144
Model is not converging.  Current: 17444.741605353378 is not greater than 17449.044915339153. Delta is -4.303309985774831
Model is not converging.  Current: 16823.944155880148 is not greater than 16826.531123983852. Delta is -2.586968103703839
Model is not converging.  Current: 17365.257578606623 is not greater than 17370.854155502944. Delta is -5.596576896321494


For stock BMA.BA, best AIC: 4 best BIC: 3
For stock BMA, best AIC: 4 best BIC: 3
For stock BBAR.BA, best AIC: 4 best BIC: 3


Model is not converging.  Current: 17129.43925706523 is not greater than 17131.721490589865. Delta is -2.2822335246346483


For stock BBAR, best AIC: 4 best BIC: 3


Model is not converging.  Current: 17171.290878685777 is not greater than 17171.495405426213. Delta is -0.20452674043553998


# Generating out of sample data

In [None]:
name=f'finaldf_test_{params["tablename"]}.pickle'
filename=os.path.join(dataroute, name)
with open(filename, 'rb') as handle:
    df_test=pickle.load(handle)

In [1]:
model

NameError: name 'model' is not defined

In [None]:
def generate_HMM_samples_residuals_2(model, insample_data, oos_data):
    """_summary_

    Args:
        model (_type_): _description_
        insample_data (_type_): _description_
        oos_data (_type_): _description_
    """
    # pseudocodigo
    # agarra el mejor modelo (esto con una cantidad optima de params ya esta)
    # fittear t-j con t-j-252d
    # Darle un año de datos hasta t-j para que me prediga la secuencia (probabilidad) de estados.
        # Le pido que me prediga las probabilidades de cada estado durante el periodo t-j, t-j-252: 
        # esto me da una matriz de (252 x n estados)
        # esto entiendo es https://hmmlearn.readthedocs.io/en/latest/api.html#hmmlearn.hmm.GaussianHMM.predict_proba
    # Tomo la ultima fila de la matriz
    # Multiplico esa por el vector de medias estimadas: este punto es mi forecast. 
        # esto es model.means (!)    
    
    split_date = oos_data.index[0]
    dates_to_forecast = len(oos_data.index)

    oos_data = pd.concat([insample_data, oos_data])
    del insample_data

    # vamos a implementar recursive window forecasting
    
    index = oos_data.index
    end_loc = np.where(index >= split_date)[0].min()
    # esto es un int del iloc
    # preciso usar ints de iloc porque el timedelta se me va a romper con el fin de semana
    rolling_window = 252

    forecasts = {}

    nstate=model
    model = hmm.GaussianHMM(n_components = nstate, **param_dict, verbose=False)

    for i in len(oos_data):
        

In [56]:
def generate_HMM_samples_residuals(model, insample_data, oos_data):
    """
    
    """
    # como el modelo es memoryless, sólo necesito 1 día de observación para saber en qué estado estoy
    # por lo tanto, en vez de complicarme con dos datasets, puedo agregarle el ultimo día de insample_data al ppio de oos_data
    # pseudocodigo
    oos_data=pd.concat([insample_data[-1:], oos_data])
    del insample_data

    samples=pd.DataFrame(columns=oos_data.columns)
    residuals=pd.DataFrame(columns=oos_data.columns)

    # for i=0
    for i in range(1,
                   len(oos_data.index)):
        prev_obs=oos_data[i-1:i]

        todays_obs = oos_data[i:i+1]
        todays_date = todays_obs.index

        state=model.decode(prev_obs)[1][-1]
        # decode()[0] is the log probability, decode()[1] is the sequence of states, [-1] is the last state
        # since we have added the last datum of insample_data to oos_data, then the 
            # TODO: revisar que tenga sentido decodear solo el ultimo día.
            # La alternativa es agregar diez días de insample al principio y usar un decode con diez dias, 
            # me quedo con el ultimo valor del array que maximiza la log-likelihood de la secuencia entera
            # pero como es memoryless, not sure if it makesense
        
        sample = model.sample(n_samples=1, random_state=random_state, currstate=state)[0]
        # sample()[0] is the array with observations of the sampled variables, sample()[1] is the value of the currstate
        sample = pd.DataFrame(data=sample, columns=oos_data.columns, index=todays_date)

        samples=pd.concat([samples, sample])   
        # sampling given state t-1
        # observar realización en t+i
        residual = todays_obs-sample
        
        residuals=pd.concat([residuals, residual])
    
    return samples, residuals

In [57]:
aic_best_residuals={stock:None for stock in tickerlist}
bic_best_residuals={stock:None for stock in tickerlist}

In [60]:
for stock in tickerlist:
    columns = [f'{stock}_log_rets', f'{stock}_gk_vol']
    insample_data = df[columns]
    oos_data=df_test[columns]

    samples, aic_best_residuals[stock] = generate_HMM_samples_residuals(
        aic_best_model[stock], 
        insample_data=insample_data, 
        oos_data=oos_data)

    samples, bic_best_residuals[stock] = generate_HMM_samples_residuals(
        bic_best_model[stock], 
        insample_data=insample_data, 
        oos_data=oos_data)

# Guardado de datos

In [62]:
with open(os.path.join(resultsroute, f"""HMM_univ_{params["tablename"]}_aic_bestmodels.pickle"""), "wb") as output_file:
    pickle.dump(aic_best_model, output_file)

with open(os.path.join(resultsroute, f"""HMM_univ_{params["tablename"]}_bic_bestmodels.pickle"""), "wb") as output_file:
    pickle.dump(bic_best_model, output_file)

In [63]:
with open(os.path.join(resultsroute, f"""HMM_univ_{params["tablename"]}_aic_residuals.pickle"""), "wb") as output_file:
    pickle.dump(aic_best_residuals, output_file)

with open(os.path.join(resultsroute, f"""HMM_univ_{params["tablename"]}_bic_residuals.pickle"""), "wb") as output_file:
    pickle.dump(bic_best_residuals, output_file)

# Graficando

In [72]:
def plot_close_rets_vol(model, data, key, IC):
    prediction= model.predict(data)
    states=set(prediction)

    fig=plt.figure(figsize = (20, 20))
    plt.tight_layout()
    plt.title(f"{key} Log returns and intraday Vol\n{model.n_components} states / best by {IC}")

    for subplot, var in zip(range(1,3), data.columns):    
        plt.subplot(2,1,subplot)
        for i in set(prediction):
            state = (prediction == i)
            x = data.index[state]
            y = data[var].iloc[state]
            plt.plot(x, y, '.')
        plt.legend(states, fontsize=16)
        
        plt.grid(True)
        plt.xlabel("datetime", fontsize=16)
        plt.ylabel(var, fontsize=16)
            
    plt.savefig(os.path.join(resultsroute, "graphs", 
                             f"HMM", 
                             f"{key}_model_{IC}.png"))

In [None]:
for dictionary, IC in zip([aic_best_model, bic_best_model], ["AIC", "BIC"]):
    for key, model in dictionary.items():
        columns = [f'{stock}_log_rets', f'{stock}_gk_vol']
        insample_data = df[columns]
        oos_data=df_test[columns]
        train_end=insample_data.index.max()
        data=pd.concat([insample_data, oos_data])

        plot_close_rets_vol(model, data, key, IC)

## HMM Selection

Selecting the Number of States in Hidden Markov Models: Pragmatic Solutions Illustrated Using Animal Movement
https://sci-hub.st/10.1007/s13253-017-0283-8