In [1]:
import numpy as np

Create some sample data:

In [2]:
sample_size = 1000
obs_cond = np.random.choice([0,1,2],[sample_size],p=[.7,.1,.2]) # cold = 0, flu = 1, allergies = 2

runny_nose_cold = np.random.choice([0,1],[sample_size],p=[.1,.9])
headache_cold = np.random.choice([0,1],[sample_size],p=[.5,.5])
fever_cold = np.random.choice([0,1],[sample_size],p=[.99,.01])

runny_nose_flu = np.random.choice([0,1],[sample_size],p=[.5,.5])
headache_flu = np.random.choice([0,1],[sample_size],p=[.5,.5])
fever_flu = np.random.choice([0,1],[sample_size],p=[.5,.5])

runny_nose_al = np.random.choice([0,1],[sample_size],p=[.1,.9])
headache_al = np.random.choice([0,1],[sample_size],p=[.99,.01])
fever_al = np.random.choice([0,1],[sample_size],p=[.99,.01])

runny_nose = np.stack([runny_nose_cold,runny_nose_flu,runny_nose_al])
headache = np.stack([headache_cold,headache_flu,headache_al])
fever = np.stack([fever_cold,fever_flu,fever_al])

In [3]:
obs_runny_nose = np.array([runny_nose[j,i] for i,j in enumerate(obs_cond)])
obs_headache = np.array([headache[j,i] for i,j in enumerate(obs_cond)])
obs_fever = np.array([fever[j,i] for i,j in enumerate(obs_cond)])

Hide some of the sample data:

In [4]:
msk_cond = np.ma.masked_where(np.random.rand(sample_size) > .9, obs_cond)
msk_runny_nose = np.ma.masked_where(np.random.rand(sample_size) > .9, obs_runny_nose)
msk_headache = np.ma.masked_where(np.random.rand(sample_size) > .9, obs_headache)
msk_fever = np.ma.masked_where(np.random.rand(sample_size) > .9, obs_fever)

Code the probabilisitic model in pymc3:

In [5]:
from pymc3 import Model, Categorical, Bernoulli, Normal, Dirichlet, Uniform, find_MAP
#from pymc3.math import sigmoid

In [6]:
with Model() as med_model:
    prior_conds = Dirichlet('prior_probs',a=np.array([1.,1.,1.]))
    cond = Categorical('cond', p=prior_conds, observed=msk_cond)
    
    W = Uniform('W',lower=0.,upper=1.,shape=[3,3])
    
    runny_nose = Bernoulli('runny_nose',W[0,cond], observed=msk_runny_nose)
    headache = Bernoulli('headache',W[1,cond],observed=msk_headache)
    fever = Bernoulli('fever',W[2,cond],observed=msk_fever)
    
    

Fit the model (using a MAP estimate - really this is EM, but ok...):

In [7]:
map_estimate = find_MAP(model=med_model)

Optimization terminated successfully.
         Current function value: 1991.051968
         Iterations: 27
         Function evaluations: 33
         Gradient evaluations: 33


Compare the estimate to the generating parameters...

The prior probs should be 0.7,.1,.2. or seen as a stick breaking process, they should be:
.7, .3333,.6666, and we get very close to this:

In [8]:
 1./(1.+np.exp(-map_estimate['prior_probs_stickbreaking_']))

array([ 0.83979179,  0.31768953])

The transition matrix, $W$ should be very close to 
$$\begin{array}{ccc}
.9&.5&.9,\\
.5&.5&.01\\
.01&.5&.01
\end{array}$$
and we nearly get that:

In [9]:
print(1./(1+np.exp(-map_estimate['W_interval_'])))

[[ 0.80055021  0.43820225  0.83157895]
 [ 0.40990371  0.43820224  0.01052632]
 [ 0.02475928  0.34831461  0.01052632]]
