In [4]:
#####
#####  SET UP ENVIRONMENT
#####

# Import requisite packages
import pandas as pd
import hddm
import pickle
from patsy import dmatrix
from kabuki.utils import concat_models
import pathlib

# Name this model
modelName = 'm04_va_visit4'  # Change this!

# Check whether save directories exist; if not, create them
pathlib.Path('./PPC/').mkdir(parents=True, exist_ok=True)

In [5]:

import pymc as pm
import numpy as np
import pymc.progressbar as pbar

def _parents_to_random_posterior_sample(bottom_node, pos=None):
    """Walks through parents and sets them to pos sample."""
    for i, parent in enumerate(bottom_node.extended_parents):
        if not isinstance(parent, pm.Node): # Skip non-stochastic nodes
            continue

        if pos is None:
            # Set to random posterior position
            pos = np.random.randint(0, len(parent.trace()))

        assert len(parent.trace()) >= pos, "pos larger than posterior sample size"
        parent.value = parent.trace()[pos]

def _post_pred_generate(bottom_node, samples=500, data=None, append_data=True):
    """Generate posterior predictive data from a single observed node."""
    datasets = []
    ##############################
    # Sample and generate stats
    for sample in range(samples):
        _parents_to_random_posterior_sample(bottom_node)
        # Generate data from bottom node
        sampled_data = bottom_node.random()
        sampled_data.reset_index(inplace=True)
        if append_data and data is not None:
            sampled_data = sampled_data.join(data.reset_index(), lsuffix='_sampled')
        datasets.append(sampled_data)
    return datasets

def post_pred_gen(model, groupby=None, samples=500, append_data=False, progress_bar=True):
    results = {}

    # Progress bar
    if progress_bar:
        n_iter = len(model.get_observeds())
        bar = pbar.progress_bar(n_iter)
        bar_iter = 0
    else:
        print("Sampling...")

    if groupby is None:
        iter_data = ((name, model.data.ix[obs['node'].value.index]) for name, obs in model.iter_observeds())
    else:
        iter_data = model.data.groupby(groupby)

    for name, data in iter_data:
        node = model.get_data_nodes(data.index)

        if progress_bar:
            bar_iter += 1
            bar.update(bar_iter)

        if node is None or not hasattr(node, 'random'):
            continue # Skip

        ##############################
        # Sample and generate stats
        datasets = _post_pred_generate(node, samples=samples, data=data, append_data=append_data)
        results[name] = pd.concat(datasets, names=['sample'], keys=list(range(len(datasets))))

    if progress_bar:
        bar_iter += 1
        bar.update(bar_iter)

    return pd.concat(results, names=['node'])



In [6]:

#####
#####  LOAD MODELS AND TRACES FROM PREVIOUSLY ESTIMATED REGRESSIONS
#####

# Load previously estimated models
m0 = pickle.load(open('./Models/'+modelName+'_0', 'rb'))
m1 = pickle.load(open('./Models/'+modelName+'_1', 'rb'))
m2 = pickle.load(open('./Models/'+modelName+'_2', 'rb'))
m3 = pickle.load(open('./Models/'+modelName+'_3', 'rb'))
m4 = pickle.load(open('./Models/'+modelName+'_4', 'rb'))

# Combine traces
models = [m0, m1, m2, m3, m4]
m = concat_models(models)

In [7]:
# m.nodes_db
# ppc_data = hddm.utils.post_pred_gen(m)

In [8]:
# Generate predicted results from previously estimated models
ppc_data = hddm.utils.post_pred_gen(m, samples=1000, append_data=True)
ppc_data.to_csv('./PPC/'+modelName+'_simData.csv')

NotImplementedError: Supply a grouping so that at most 1 observed node codes for each group.