In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pymc as pm 
import pandas as pd

data = pd.read_csv('data/area_full.csv')

family_names = data.FAMILY.unique()

n_families = len(data.FAMILY.unique())
family_key = data['FAMILY'].unique()
num_traits = 7



In [2]:
family = family_key[2]
family_data = np.array(data.filter(regex = 'area')).mean(axis = 0)
data.filter(regex = 'area').cov() - data.groupby('FAMILY').mean().filter(regex = 'area').cov()

Unnamed: 0,area1,area2,area3,area4,area5,area6,area7
area1,0.006748,0.007186,0.006133,0.015449,0.01102,0.00155,0.008112
area2,0.007186,0.180478,0.045377,0.096293,0.093743,0.017461,0.051372
area3,0.006133,0.045377,0.101442,0.128771,0.082016,0.015696,0.04188
area4,0.015449,0.096293,0.128771,0.318022,0.182661,0.032203,0.102638
area5,0.01102,0.093743,0.082016,0.182661,0.196226,0.027516,0.075415
area6,0.00155,0.017461,0.015696,0.032203,0.027516,0.009765,0.013296
area7,0.008112,0.051372,0.04188,0.102638,0.075415,0.013296,0.075347


In [3]:
%%writefile mixed_model.py

import matplotlib.pyplot as plt
import numpy as np
import pymc as pm 
import pandas as pd

data = pd.read_csv('data/area_full.csv')

family_names = data.FAMILY.unique()

n_families = len(data.FAMILY.unique())
family_key = data['FAMILY'].unique()
num_traits = 7

# Overall mean prior
mu = pm.MvNormalCov("mu", 
                    value=np.array(data.filter(regex = 'area')).mean(axis = 0),
                    mu=np.zeros(num_traits),
                    C=np.eye(num_traits)*100.)

# Family means prior
mu_f = pm.MvNormalCov("mu_f", 
                      value=np.zeros(num_traits),
                      mu=np.zeros(num_traits),
                      C=np.eye(num_traits)*100.)

# G matrix priors, covariance matrix of family means
G = pm.WishartCov('G',
                  value=data.groupby('FAMILY').mean().filter(regex = 'area').cov(),
                  n=num_traits+1,
                  C=np.eye(num_traits)*100.)

# R matrix prior, residual within-family covariance
R =  pm.WishartCov('R',
                   value=data.filter(regex = 'area').cov() - data.groupby('FAMILY').mean().filter(regex = 'area').cov(),
                   n=num_traits+1,
                   C=np.eye(num_traits)*100.)

betas = {}
lik_families = {}

for family in family_key:
    
    family_data = np.array(data[(data['FAMILY'] == family)].filter(regex = 'area'))
    
    betas[str(family)] = pm.MvNormalCov('betas_{}'.format(str(family)),
                                        value = family_data.mean(axis = 0),
                                        mu = mu + mu_f,
                                        C = G)
    
    lik_families[str(family)] = pm.MvNormalCov('data_{}'.format(family),
                                               mu = betas[str(family)],
                                               C = R,
                                               value = family_data,
                                               observed = True)

Overwriting mixed_model.py


In [4]:
import mixed_model

In [5]:
M = pm.MCMC(mixed_model)

In [6]:
pm.graph.graph(M,format='png',path='',name='graph_mine',prog='dot')

<pydot.Dot at 0x7fe280c5a490>

In [9]:
M.sample(iter=2000, burn=1000, thin=10, tune_interval= 100)

 [-----------------100%-----------------] 2000 of 2000 complete in 266.1 sec

In [11]:
M.stats()['G']['quantiles'][50] + M.stats()['R']['quantiles'][50]

array([[ 0.0125907 ,  0.01518758,  0.01416214,  0.03506518,  0.02588474,
         0.00402081,  0.01932937],
       [ 0.01518758,  0.33376542,  0.10527816,  0.17847253,  0.18340059,
         0.03002586,  0.09987002],
       [ 0.01416214,  0.10527816,  0.19578793,  0.25839871,  0.17935266,
         0.02663308,  0.09487345],
       [ 0.03506518,  0.17847253,  0.25839871,  0.64096127,  0.38626796,
         0.05711341,  0.22546231],
       [ 0.02588474,  0.18340059,  0.17935266,  0.38626796,  0.3890506 ,
         0.04820086,  0.17678824],
       [ 0.00402081,  0.03002586,  0.02663308,  0.05711341,  0.04820086,
         0.01623454,  0.02727924],
       [ 0.01932937,  0.09987002,  0.09487345,  0.22546231,  0.17678824,
         0.02727924,  0.15219761]])

In [12]:
data.filter(regex = 'area').cov()

Unnamed: 0,area1,area2,area3,area4,area5,area6,area7
area1,0.01233,0.014401,0.013364,0.03624,0.026189,0.004233,0.019496
area2,0.014401,0.32785,0.100443,0.185271,0.181952,0.030065,0.098529
area3,0.013364,0.100443,0.183614,0.252834,0.171213,0.026805,0.088257
area4,0.03624,0.185271,0.252834,0.637447,0.379336,0.058951,0.216603
area5,0.026189,0.181952,0.171213,0.379336,0.368628,0.047701,0.164382
area6,0.004233,0.030065,0.026805,0.058951,0.047701,0.015773,0.026595
area7,0.019496,0.098529,0.088257,0.216603,0.164382,0.026595,0.142579
