In [1]:
import pymc3 as pm
import pandas as pd
from numpy.ma import masked_values


In [2]:
# Import data, filling missing values with sentinels (-999)
test_scores = pd.read_csv(pm.get_data('test_scores.csv')).fillna(-999)

# Extract variables: test score, gender, number of siblings, previous disability, age,
# mother with HS education or better, hearing loss identified by 3 months
# of age
(score, male, siblings, disability,
    age, mother_hs, early_ident) = test_scores[['score', 'male', 'siblings',
                                                'prev_disab', 'age_test',
                                                'mother_hs', 'early_ident']].astype(float).values.T


In [None]:
with pm.Model() as model:
    # Impute missing values
    sib_mean = pm.Exponential('sib_mean', 1.)
    siblings_imp = pm.Poisson('siblings_imp', sib_mean,
                              observed=siblings)
    p_disab = pm.Beta('p_disab', 1., 1.)
    disability_imp = pm.Bernoulli(
        'disability_imp', p_disab, observed=masked_values(disability, value=-999))

In [4]:
masked_array = masked_values(disability, value=-999)

numpy.ma.core.MaskedArray