# Probabilistic Microsimulation

Playing around with ideas about probabilistic microsimulation

In [50]:
# Initialise

import pymc3 as pm

from IPython.core.pylabtools import figsize
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
import random
import math

## Define the population

The population is defined by a series of lists; one list for each characteristic of the individuals.

In [51]:
N = 100 # The number of individuals we're going to create
a = np.arange(N) # A list of indixces (0, 1, ..., N)

# All distributions etc. need to be defined within the context of a pymc model
model = pm.Model()

# Age is represented using a poisson distribution with the lambda parameter representing the expected age
# Lambda is randomly drawn from a normal distribution with mean 30 (mean pop age = 30)
pop_ages

# Income is also poisson.
pop_inc = []

# Assume sex is either male or female, so represent with a Bernouilli distribution
pop_sex = []



with model:
    for i in a:
        age = abs(random.gauss(mu=30, sigma=20)) # (make absolute so no one is negative age)
        inc = abs(random.gauss(mu=30, sigma=20)) # (make absolute so no one is negative age)
        pop_age.append( pm.Poisson(name="age"+str(i), mu=age) )
        pop_inc.append( pm.Poisson(name="inc"+str(i), mu=inc) )
        pop_sex.append( pm.Bernoulli(name="sex"+str(i), p=0.5) )

INFO (theano.gof.compilelock): Refreshing lock /Users/nick/.theano/compiledir_Darwin-18.2.0-x86_64-i386-64bit-i386-3.7.1-64/lock_dir/lock
INFO (theano.gof.compilelock): Refreshing lock /Users/nick/.theano/compiledir_Darwin-18.2.0-x86_64-i386-64bit-i386-3.7.1-64/lock_dir/lock


The population is defined by a series of probability distributions, one for each of the characteristics of the individuals

In [101]:
N = 100 # The number of individuals we're going to create
a = np.arange(N) # A list of indixces (0, 1, ..., N)

# All distributions etc. need to be defined within the context of a pymc model
model = pm.Model()

with model:
    # Individual age is represented using a poisson distribution with the lambda parameter representing the 
    # expected age of the person.
    # The lambda parameter is itself represented with a poisson distribution (assume mean age is 30 for now)
    # We want to find good values for those parameters ... 
    # (shape creates N different distributions; one for each individual)
    pop_age_param = pm.Poisson("age_param", mu=30, shape=N)
    pop_age = pm.Poisson("age", mu=pop_age_param, shape=N)

    # Income is also poisson. Assume the expected income is 25k
    pop_inc_param = pm.Poisson("inc_param", mu=25, shape=N)
    pop_inc = pm.Poisson("inc", mu=pop_inc_param, shape=N)

    # Assume sex is either male or female, so represent with a Bernouilli distribution
    pop_sex = pm.Bernoulli("sex", p = 0.5, shape=N)

## Interrogate the (prior) population

Lets have a look at the population priors; before observing any real data.

In [102]:
# Initial values ('test_value')
with model:
    print("Ages: " +    str([ str(pop_age[i].tag.test_value) for i in a ] ) )
    print("Incomes: " + str([ str(pop_inc[i].tag.test_value) for i in a ] ) )
    print("Sexes: " +   str([ str(pop_sex[i].tag.test_value) for i in a ] ) )
#with model:
    #print("Ages: " +    str([ x.tag.test_value for x in pop_age ] ) )
    #print("Incomes: " + str([ x.tag.test_value for x in pop_inc ] ) )
    #print("Sexes: " +   str([ x.tag.test_value for x in pop_sex ] ) )

Ages: ['30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30', '30']
Incomes: ['25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25', '25',

Now lets observe some 'real' data (an age, income, sex table) ... 

x