**Imports**

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from scipy import optimize
from IPython.display import display
from itertools import combinations
from tqdm.notebook import tqdm
import numdifftools as nd
import statsmodels.api as sm
from statsmodels.formula.api import ols, logit
from os import path
import python_scripts.utils.model_utils as mut

# Fitting models for comparison

In [3]:
def main(model_data_path, nruns, init_dict, init_tau_range, save_path=''):
    df = pd.read_csv(model_data_path, index_col='sid')
    df = df.loc[df.trial.le(250), :]
    df = df.loc[df.nam.gt(0), :]

    # Set up model comparison (get paramter combinations)
    np.random.seed(1)
    var_set = list(init_dict.keys())
    subsets = []
    for nb_vars in range(1, len(var_set)+1):
        for subset in combinations(var_set, nb_vars):
            subsets.append(subset)
            
    # Collect model data
    print('Each model subset\'s results are appended to {}'.format(path.abspath(save_path)))
    first = True
    for subset in tqdm(subsets, desc='Progress'):
        comp_data = []
        model_form = ','.join(subset)
        init_dict_subset = {k: init_dict[k] for k in subset}
        init_dict_subset['tau'] = init_tau_range
        for i, sdf in tqdm(df.groupby('sid'), desc='Variable set = ({})'.format(model_form), leave=False):
            model = mut.SoftmaxChoiceModel(
                objective = mut.neg_log_likelihood, 
                data = sdf,
                init_dict = init_dict_subset.copy(),
            )
            model.transform_inp_data(mut.normalize)
            repeat = True
            while repeat:
                model.n_best_stop(n_stop=nruns, max_iter=200, show_progress=False)
                group, nam = sdf.iloc[0].loc[['group', 'nam']]
                model_aic = model.get_aic()
                repeat = np.isnan(model_aic)
            comp_data.append([i, group, nam, model_form, model.get_aic(), model.get_param_csv()])
        if save_path:
            pd.DataFrame(
                comp_data, 
                columns=['sid', 'group', 'nam', 'vars', 'aic', 'params']
            ).to_csv(save_path, mode='w' if first else 'a', header=first, index=False)
        first = False

    
main(
    model_data_path = 'data/model_data.csv',
    nruns = 50,
    init_dict = {
        'rpc':[[-1, 1], True],
        'rlp':[[-1, 1], True],
        'abst': [[-1, 1], True]
    },
    init_tau_range = [[0, 100], True],
    save_path = 'data/model_results/param_fits_raw.csv',
)

Each model subset's results are appended to /Users/alexten/Projects/Humans-monitor-LP/data/model_results/param_fits_raw.csv


HBox(children=(HTML(value='Progress'), FloatProgress(value=0.0, max=7.0), HTML(value='')))

HBox(children=(HTML(value='Variable set = (rpc)'), FloatProgress(value=0.0, max=365.0), HTML(value='')))

HBox(children=(HTML(value='Variable set = (rlp)'), FloatProgress(value=0.0, max=365.0), HTML(value='')))

HBox(children=(HTML(value='Variable set = (abst)'), FloatProgress(value=0.0, max=365.0), HTML(value='')))

HBox(children=(HTML(value='Variable set = (rpc,rlp)'), FloatProgress(value=0.0, max=365.0), HTML(value='')))

HBox(children=(HTML(value='Variable set = (rpc,abst)'), FloatProgress(value=0.0, max=365.0), HTML(value='')))

HBox(children=(HTML(value='Variable set = (rlp,abst)'), FloatProgress(value=0.0, max=365.0), HTML(value='')))

HBox(children=(HTML(value='Variable set = (rpc,rlp,abst)'), FloatProgress(value=0.0, max=365.0), HTML(value=''…


