In [1]:
import xarray
import numpy as np
import sys
sys.path.insert(0,'../code/')
from lme_forecast_general import LME
import pandas as pd
import utils

#### Read data

In [2]:
data = pd.read_csv('./data/mp_ratio.csv')
data = data.sort_values(by=['location_id','age_group_id','sex_id','year_id'])
data = data.iloc[:,1:]

In [3]:
data.head()

Unnamed: 0,age_group_id,location_id,sex_id,year_id,scenario,value,haq,intercept,lme_fit
0,13,6,1,1990,0,-2.891245,43.258941,1,-3.332449
1,13,6,1,1991,0,-2.897362,44.078531,1,-3.33434
2,13,6,1,1992,0,-2.875354,44.913657,1,-3.336268
3,13,6,1,1993,0,-2.870614,45.764612,1,-3.338232
4,13,6,1,1994,0,-2.881733,46.631696,1,-3.340233


In [4]:
Y = data['value'].values
haq = data[(data['age_group_id'] == 13) & (data['sex_id'] == 1)]['haq'].values
haq = haq - np.mean(haq)
n_locs = 195
n_ages = len(set(data['age_group_id'].values))
T = 28

#### Build model

$$ y = \beta \text{HAQ} + \beta_I \text{age-sex} + \pi_{0,l,a} + \pi_{1,l,a}\text{HAQ} $$

In [5]:
model = LME([n_locs,n_ages,2,T], 2, Y, [(haq,[True,False,False,True])], 
            indicators=[[False,True,True,False]],
            global_effects_indices=[0], global_intercept=False,
            random_effects_list=[(0,[True, True, False, False]), (None,[True, True,False,False])])

In [6]:
import time
t0 = time.time()
model.optimize(inner_max_iter=1000)
print('elapsed', time.time()-t0)

n_groups 2340
k_beta 25
k_gamma 2
total number of fixed effects variables 28
fit with gamma fixed...
finished...elapsed 3.6957876682281494
elapsed 16.551188945770264


In [7]:
lme_fit = data['lme_fit'].values
np.linalg.norm(model.yfit_no_random - lme_fit)/np.linalg.norm(lme_fit)

0.007491440359318602

In [8]:
model.postVarGlobal()
model.postVarRandom()

In [9]:
beta_samples, u_samples = model.draw()

In [10]:
location_ids = sorted(set(data['location_id'].values))
age_ids = sorted(set(data['age_group_id'].values))
coord_dict = {'location_id':location_ids, 'age_group_id': age_ids}
dataset = utils.saveDraws(beta_samples[0,:].reshape((1,-1)),u_samples,['haq'], 
                          [['location_id','age_group_id'], ['location_id','age_group_id']], 
                          ['pi_location_age','pi_haq_slope_location_age'], coord_dict)

In [11]:
dataset

<xarray.Dataset>
Dimensions:                    (age_group_id: 12, cov: 1, draw: 10, location_id: 195)
Coordinates:
  * location_id                (location_id) int64 6 7 8 10 ... 385 422 435 522
  * age_group_id               (age_group_id) int64 13 14 15 16 ... 30 31 32 235
  * draw                       (draw) int64 1 2 3 4 5 6 7 8 9 10
  * cov                        (cov) <U3 'haq'
Data variables:
    pi_location_age            (location_id, age_group_id, draw) float64 -0.02809 ... 0.0009205
    pi_haq_slope_location_age  (location_id, age_group_id, draw) float64 0.6678 ... -0.3904
    beta_global                (cov, draw) float64 -0.002608 ... -0.002358