In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns

random_seeds = 20090425, 19700903



In [2]:
DATA_IN = '../data/raw/'
DATA_OUT = '../data/clean/'

# Data Import

## Line list

2015-2016 (through June 2016) in stata format

In [None]:
line_list_raw = pd.read_stata(DATA_IN+'linelist_latin_20160717.dta', convert_dates=True)
#line_list_raw.head()

In [5]:
line_list_date_cols = ['onsetofdisease'] + line_list_raw.columns[line_list_raw.columns.str.contains('date')].tolist()

In [6]:
for col in line_list_date_cols:
    try:
        line_list_raw[col] = pd.to_datetime(line_list_raw[col])
    except ValueError:
        pass

Drop fields with >75% missing values.

In [7]:
line_list_raw = line_list_raw.dropna(axis=1, thresh=int(line_list_raw.shape[0]*0.25))
line_list_raw.shape

(49908, 189)

In [9]:
line_list_raw.dtypes.value_counts()

object            129
float32            27
datetime64[ns]     15
category            9
float64             7
int8                2
dtype: int64

In [None]:
#line_list_raw[line_list_raw.dateofbirth.str.split('-').apply(lambda x: x[-1]).dropna() > '2016']

## Birth cohort

Routine and SIA vaccination coverage, birth cohort population size, and total suspected cases in each year since 1925

In [33]:
birth_cohort_raw = pd.read_csv(DATA_IN+'birth cohort.csv')
birth_cohort_raw.tail()

Unnamed: 0,Age,Birth cohort,Measles.cases,MCV1,MCV2,SIA 1994 (M),SIA 1996 (M),SIA 2000 (M),SIA 2007 (M),SIA 2012 (MR),Target,Coverage.survey,serosurvey 2004,sero1996,sero9799,Population,Total cases
86,4,2011,53.0,0.98,0.99,,,,,,0.95,,,,,67706.8,0.0
87,3,2012,71.0,0.99,0.99,,,,,,0.95,,,,,67706.8,0.0
88,2,2013,158.0,0.99,0.99,,,,,,0.95,,,,,67706.8,0.0
89,1,2014,682.0,0.99,,,,,,,0.95,,,,,67706.8,0.0
90,0,2015,294.0,,,,,,,,,,,,,67706.8,18188.0


## Supplemental immunization

Details of coverage of outbreak response campaigns in 2015 and 2016, by age group and Aimag

In [76]:
sia_2015 = pd.read_excel(DATA_IN+'MR SIA 2015.xlsx', skiprows=49, index_col=0).dropna(axis=1)
sia_2015

Unnamed: 0,0,1,2,3,4,5
Arkhangai,860,1840,1843,1873,1808,1837
Bayan-Ulgii,1361,2203,2447,2233,2186,1872
Bayankhongor,928,1631,1797,1854,1710,2019
Bulgan,574,1084,1195,1139,1076,1430
Gobi-Altai,494,989,1132,1183,1186,1276
Gobisumber,210,404,376,362,357,391
Darkhan-uul,1205,2162,2081,2094,2071,2115
Dornogobi,780,1391,1483,1455,1419,1496
Dornod,555,797,1147,1335,1344,1953
Dundgobi,400,782,811,835,853,1127


In [63]:
sia_2016 = pd.read_excel(DATA_IN+'MR SIA 2016.xlsx', skiprows=2, index_col=0).drop('TOTAL').dropna()

In [64]:
sia_2016.index.name = 'age'

In [65]:
sia_2016.index = pd.Series(sia_2016.index.str.split(' ')).apply(lambda x: int(x[0]))

In [66]:
sia_2016.tail()

Unnamed: 0_level_0,Ar,Bu,Bh,Bu.1,Ga,Gs,Da,DoG,Do,Du,...,Um,Su,Se,Tu,Uvs,Khov,Khuv,Khe,UB,TOTAL
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
26,1125.0,1338.0,1292.0,765.0,727.0,310.0,1379.0,1161.0,1057.0,562.0,...,1016.0,913.0,1855.0,1461.0,937.0,1131.0,1684.0,958.0,25282.0,48428.0
27,1140.0,1263.0,1276.0,741.0,732.0,325.0,1264.0,1168.0,983.0,571.0,...,1112.0,889.0,1878.0,1363.0,926.0,1092.0,1694.0,920.0,24421.0,47394.0
28,1069.0,1240.0,1204.0,735.0,721.0,290.0,1364.0,1084.0,1001.0,530.0,...,1057.0,868.0,1706.0,1376.0,871.0,1056.0,1664.0,960.0,23952.0,46230.0
29,1067.0,1156.0,1195.0,728.0,685.0,266.0,1232.0,1093.0,952.0,557.0,...,1053.0,860.0,1713.0,1294.0,848.0,971.0,1577.0,930.0,21818.0,43462.0
30,1332.0,1290.0,1425.0,772.0,797.0,419.0,1444.0,1073.0,1066.0,608.0,...,1081.0,1138.0,1903.0,1612.0,916.0,1239.0,2022.0,1017.0,28994.0,54439.0


In [67]:
sia_2016.shape

(13, 23)

## Population

Population by age group in each province

In [38]:
population = pd.read_csv(DATA_IN+'denominators.csv', index_col=0)
population.head()

Unnamed: 0_level_0,Total,0-4,5-9,10-14,15-19,20-24,25-29,30-34,35-39,40-44,45-49,50-54,55-59,60-64,65-69,70+
Province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Arkhangai,92896,10440,8360,7883,9249,9469,8207,6856,6824,6294,5498,4643,3266,2202,1237,2468
Bayan-Ulgii,93165,11655,10219,9929,8794,8872,8531,7207,6199,5713,4633,3842,2747,1665,1006,2153
Bayankhongor,79310,9760,7429,6675,7812,7992,7436,6280,5806,5165,4383,3714,2436,1480,1008,1934
Bulgan,60324,6415,5351,4556,5423,5587,4977,4499,4592,4426,4066,3494,2619,1666,877,1776
Gobi-Altai,56698,5953,5392,5046,5778,5631,4837,4401,4345,3994,3389,2730,1974,1064,706,1458
