In [1]:
import os
import sys
import warnings
from glob import glob

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from statsmodels import api as sm
from statsmodels.formula import api as smf

In [27]:
sys.path.append('/home/jovyan/work/summary_parser')
from summary_parser import summary_parser

In [2]:
warnings.filterwarnings("ignore")
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 100)

In [13]:
files = glob("/home/jovyan/work/data/*")
ar_coefs = [os.path.splitext(os.path.basename(file))[0] for file in files]

In [15]:
dfs = dict(zip(ar_coefs, [pd.read_csv(file, index_col=0) for file in files]))

In [67]:
for df in dfs.values():
    df.reset_index(inplace=True)
    df.rename(columns={"index":"linear"}, inplace=True)

In [76]:
models = {}
models_HAC = {}
for ar_coef, df in dfs.items():
    lr = smf.ols(formula="test~linear", data=df)
    nlags = int((len(df)/100)**(1/3)*4)
    models[ar_coef] = lr.fit()
    models_HAC[ar_coef] = lr.fit(cov_type="HAC", cov_kwds={"maxlags": nlags})

In [77]:
conf_intervals = dict(
    zip(
        models.keys(),
        [
            summary_parser(model.summary()).summary_dfs[1][["0.025", "0.975"]].T
            for model in models.values()
        ],
    )
)
conf_intervals_HAC = dict(
    zip(
        models.keys(),
        [
            summary_parser(model.summary()).summary_dfs[1][["0.025", "0.975"]].T
            for model in models_HAC.values()
        ],
    )
)

In [78]:
for ar_coef, conf_interval in conf_intervals.items():
    index = pd.MultiIndex.from_tuples(
        [(ar_coef, _index) for _index in conf_interval.index]
    )
    conf_interval.index = index
for ar_coef, conf_interval_HAC in conf_intervals_HAC.items():
    index = pd.MultiIndex.from_tuples(
        [(ar_coef, _index) for _index in conf_interval_HAC.index]
    )
    conf_interval_HAC.index = index

In [79]:
conf_intervals = pd.concat(conf_intervals.values())
conf_intervals_HAC = pd.concat(conf_intervals_HAC.values())

In [95]:
conf_intervals = dict(
    zip(
        models.keys(),
        [
            summary_parser(model.summary()).summary_dfs[1][["stderr"]].T
            for model in models.values()
        ],
    )
)
conf_intervals_HAC = dict(
    zip(
        models.keys(),
        [
            summary_parser(model.summary()).summary_dfs[1][["stderr"]].T
            for model in models_HAC.values()
        ],
    )
)

In [96]:
for ar_coef, conf_interval in conf_intervals.items():
    index = pd.MultiIndex.from_tuples(
        [(ar_coef, _index) for _index in conf_interval.index]
    )
    conf_interval.index = index
for ar_coef, conf_interval_HAC in conf_intervals_HAC.items():
    index = pd.MultiIndex.from_tuples(
        [(ar_coef, _index) for _index in conf_interval_HAC.index]
    )
    conf_interval_HAC.index = index

In [97]:
conf_intervals = pd.concat(conf_intervals.values())
conf_intervals_HAC = pd.concat(conf_intervals_HAC.values())

In [98]:
conf_intervals

Unnamed: 0,Unnamed: 1,Intercept,linear
test_ar1_-0.9,stderr,0.473,0.014
test_ar1_0.9,stderr,0.407,0.012
test_ar1_0,stderr,0.258,0.008


In [99]:
conf_intervals_HAC

Unnamed: 0,Unnamed: 1,Intercept,linear
test_ar1_-0.9,stderr,0.924,0.021
test_ar1_0.9,stderr,0.212,0.006
test_ar1_0,stderr,0.216,0.007
