## Check the mapping in the app data

In [None]:
# Change to the smartva repo (on the develop branch)
%cd '/ihme/scratch/users/josephj7/repos/smartva'

In [7]:
# Pull in the current mapping data for child weights for both child and neonate modules
import smartva

# ODK to Presymptom mapping
from smartva.data.child_pre_symptom_data import VAR_CONVERSION_MAP as ch_odk_to_pre
from smartva.data.neonate_pre_symptom_data import VAR_CONVERSION_MAP as nn_odk_to_pre

# Additional headers
from smartva.data.child_pre_symptom_data import GENERATED_VARS_DATA as ch_add_headers
from smartva.data.neonate_pre_symptom_data import GENERATED_VARS_DATA as nn_add_headers

# Presymptom to Symptom mapping
from smartva.data.child_symptom_data import VAR_CONVERSION_MAP as ch_pre_to_symp
from smartva.data.neonate_symptom_data import VAR_CONVERSION_MAP as nn_pre_to_symp

# Pull in the conversion data and cutoffs
from smartva.data.child_symptom_data import DURATION_CUTOFF_DATA as ch_cutoffs
from smartva.data.neonate_symptom_data import DURATION_CUTOFF_DATA as nn_cutoffs
from smartva.data.common_data import WEIGHT_CONVERSION_DATA as weight_data

In [10]:
# Examine the current mapping
# ODK to Presymptom 
print ch_odk_to_pre['child_1_8']
print ch_odk_to_pre['child_1_8a']
print nn_odk_to_pre['child_1_8']
print nn_odk_to_pre['child_1_8a']

c1_08a
c1_08b
c1_08a
c1_08b


In [16]:
# Presymptom to Symptom
# look for any of these variables mapping from the presymptom to symptom in both child and neonate
vars = ['c1_08a', 'c1_08b', 'c1_08num']
for v in vars:
    try:
        print "In the child module " + v + " maps to " + ch_pre_to_symp[v]
    except:
        pass
    try:
        print "In the neonate module " + v + " maps to " + nn_pre_to_symp[v]
    except:
        pass

In the child module c1_08num maps to s14
In the neonate module c1_08num maps to s14


In [15]:
# Examine the cutoffs
print "Child cutoff: " + str(ch_cutoffs['s14'])
print "Neonate cutoff: " + str(nn_cutoffs['s14'])

# Examine the weight conversion data
print weight_data['child_1_8']


Child cutoff: 1000
Neonate cutoff: 2500
{1: 'child_1_8a', 2: 'child_1_8b'}


Why is the child cutoff lower? In what world does it make sense to have a one month old under 1 kg? This must be a really high risk symptom for very rare causes.

## Create test data

In [1]:
import numpy as np, pandas as pd

In [8]:
# We're going to test child and neonate 
# with weights coded in both kg and g
# both above and below the threshold
test_data = pd.DataFrame(index=range(12))

In [9]:
# Fill in all the needed headers
for k in ch_odk_to_pre:
    test_data[k] = np.nan
for k in nn_odk_to_pre:
    test_data[k] = np.nan
for k in ch_add_headers:
    test_data[k] = np.nan
for k in nn_odk_to_pre:
    test_data[k] = np.nan

In [29]:
# Fill in the study id
test_data['sid'] = test_data.index

# The first six rows are neonates (under 28 days). The last six rows are children (above 28 days)
test_data['gen_5_4'] = 4 # age in days
test_data['gen_5_4c'] = [15] * 6 + [500] * 6 
test_data['agedays'] = test_data['gen_5_4c'] # just in case
test_data['gen_5_4d'] = [1] * 6 + [2] * 6

# The first three rows of each age group are weight in grams. The last three are weight in kilograms
test_data['child_1_8'] = ([1]*3 + [2]*3) * 2

# Weights are above and below age-specific thresholds
# In case I have the units backwards, I've include a huge number for both units
# The smallest number will be below the threshold regardless of the units
# This ensures something is endorsed regardless of the units 
# and whether endorsement depends on being above or below the threshold
test_data['child_1_8a'] = [2000, 3000, 10000, 2.0, 3.0, 10000, 800, 1200, 10000, 0.8, 1.2, 10000]
test_data['child_1_8b'] = [2000, 3000, 10000, 2.0, 3.0, 10000, 800, 1200, 10000, 0.8, 1.2, 10000]


In [31]:
# Check the non-missing entries that we input
test_data[['gen_5_4','gen_5_4c','agedays','gen_5_4d','child_1_8','child_1_8a','child_1_8b']]

Unnamed: 0,gen_5_4,gen_5_4c,agedays,gen_5_4d,child_1_8,child_1_8a,child_1_8b
0,4,15,15,1,1,2000.0,2000.0
1,4,15,15,1,1,3000.0,3000.0
2,4,15,15,1,1,10000.0,10000.0
3,4,15,15,1,2,2.0,2.0
4,4,15,15,1,2,3.0,3.0
5,4,15,15,1,2,10000.0,10000.0
6,4,500,500,2,1,800.0,800.0
7,4,500,500,2,1,1200.0,1200.0
8,4,500,500,2,1,10000.0,10000.0
9,4,500,500,2,2,0.8,0.8


## Run smartva

In [19]:
# Make a temp directory
import tempfile
dname = tempfile.mkdtemp()

# Save the test data
fname = 'test_data.csv'
test_data.to_csv(fname, index=False)


In [16]:
!git branch

* [32mdevelop[m
  master[m


In [20]:
%%time
!python app.py $fname $dname --country "Unknown"


Starting analysis with options:
- Input file: test_data.csv
- Output folder: /tmp/tmpc8CnN6
- Country: None
- HIV Region: True
- Malaria Region: True
- HCE variables: True
- Free text variables: True

Preparing variable headers.
Initial data prep
|################################################################|Time: 0:00:00
Child :: Processing pre-symptom data
|################################################################|Time: 0:00:00
Child :: Processing symptom data
|################################################################|Time: 0:00:00
Child :: Processing tariffs
Child :: Generating validated VA cause list.
|################################################################|Time: 0:00:10
Child :: Generating VA cause list.
|################################################################|Time: 0:00:00
Child :: Generating cause rankings.
|################################################################|Time: 0:00:00
Neonate :: Processing pre-symptom data
|##################

## Analyze output

In [23]:
child_out = pd.read_csv(dname +'/intermediate-files/child-symptom.csv')

In [24]:
# Check s14 endorsement for children
child_out['s14']

0    0
1    0
2    0
3    0
4    0
5    0
Name: s14, dtype: int64

In [26]:
neonate_out = pd.read_csv(dname + '/intermediate-files/neonate-symptom.csv')

In [27]:
# Check s14 endorsement for neonates
neonate_out['s14']

0    0
1    0
2    0
3    0
4    0
5    0
Name: s14, dtype: int64

We got nothing... Something should have been endorsed.