# Measles_Risk_County

### Task 1. Raw measles risk
Calculate raw risk for each county with $$ r_{ij}^{t} = C_{i}^{t} \times V_{ij}^{t} \times NME_{j}^{t} \times P_{j}^{t} $$
where <br/>
$i$ is the origin country, <br/>
$j$ is the US county, <br/>
$t$ is the year, <br/>
$r_{ij}^{t}$ is the measles risk from country $i$ to county $j$ in year $t$, <br/>
$C_{i}^{t}$ is the case incidence in Country $i$ in year $t$, <br/>
$V_{ij}^{t}$ is the travel volume (million) from country $i$ to county $j$ in year $t$, <br/>
$NME_{j}^{t}$ is the NME rate in county $j$ in year $t$, <br/>
$P_{j}^{t}$ is the county $j$ population in year $t$. <br/>
$$ r_{j}^{t} = \sum_{i} r_{ij}^{t} = (\sum_{i} C_{i}^{t} \times V_{ij}^{t}) \times NME_{j}^{t} \times P_{j}^{t}$$
where <br/>
$r_{j}^{t}$ is the measles risk of county $j$ in year $t$, <br/>

### Task 2. Rearrange travel volume
For counties where is no international travel - update $V_{ij}^{t}$. <br/>
Task 2.1: calculate the average of raw risk in neighboring counties <br/>
Task 2.2: proportion to population <br/>

# Task 1: Calculate measles risk in county level

In [1]:
# environment setting
import pandas as pd
year = 2019
year_pop = '2015' # we use 2015 data for 2015-2019
year_iata = 2017 # we use 2017 IATA data for 2018 and 2019
folder = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\\'
pd.set_option("display.max_rows", 999)

#### Import world population

In [2]:
# ref: http://worldpopulationreview.com/countries/
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\Original_Country_populations.csv'
df_pop_raw = pd.read_csv(in_table)
print(len(df_pop_raw))
df_pop_raw.head(5)

233


Unnamed: 0,Country,Code,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,Afghanistan,4,25893,26617,27294,28004,28803,29709,30697,31732,32758,33736
1,Albania,8,3054,3024,2992,2963,2941,2927,2920,2919,2921,2923
2,Algeria,12,33778,34300,34861,35466,36118,36820,37566,38339,39113,39872
3,American Samoa,16,59,58,57,56,56,55,55,55,55,56
4,Andorra,20,81,83,84,84,84,84,82,81,79,78


In [3]:
# ref: http://worldpopulationreview.com/country-codes/
# manually add: Caribbean Netherlands (535), Saint Helena (654), Kosovo (383), Channel Islands (830, CHI is a self-defined iso 3 code) in country-codes
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\country_code.csv'
df_code = pd.read_csv(in_table)
print(len(df_code))
df_code.head(5)

237


Unnamed: 0,name,alpha2,alpha3,num3
0,Afghanistan,AF,AFG,4
1,Albania,AL,ALB,8
2,Algeria,DZ,DZA,12
3,American Samoa,AS,ASM,16
4,Andorra,AD,AND,20


In [4]:
df_pop = pd.merge(df_pop_raw, df_code, how='left', left_on='Code',right_on='num3')
df_pop = df_pop[['Country',year_pop,'alpha3']]
print(str(len(df_pop_raw) - len(df_pop.alpha3.isnull())) + " row(s) have NaN as ISO 3 (alpha3).")
df_pop.head(5)

0 row(s) have NaN as ISO 3 (alpha3).


Unnamed: 0,Country,2015,alpha3
0,Afghanistan,33736,AFG
1,Albania,2923,ALB
2,Algeria,39872,DZA
3,American Samoa,56,ASM
4,Andorra,78,AND


#### Outbreak data

In [8]:
# note: update names - Saint Vincent and the Grenadines -> Saint Vincent and Grenadines; United Republic of Tanzania -> Tanzania
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\Original_infectedCountries2019Lauren.csv'
df_outbreak_raw = pd.read_csv(in_table)
df_outbreak_raw['Total'] = df_outbreak_raw[["Suspected", "Confirmed", "Suspected from the other source"]].max(axis=1)
print(len(df_outbreak_raw))
df_outbreak_raw.head(3)

194


Unnamed: 0,Country,Suspected,Confirmed,Suspected from the other source,Total
0,Afghanistan,82.0,36.0,,82.0
1,Albania,250.0,137.0,,250.0
2,Algeria,,,,


In [9]:
df_outbreak = pd.merge(df_outbreak_raw, df_pop, how='left', left_on='Country',right_on='Country')
df_outbreak = df_outbreak[['alpha3', 'Country', 'Total', year_pop]]
print(str(len(df_outbreak_raw) - df_outbreak.alpha3.notnull().sum()) + " row(s) have NaN as ISO 3 (alpha3).")
df_outbreak.sort_values(by='alpha3').head(5)

0 row(s) have NaN as ISO 3 (alpha3).


Unnamed: 0,alpha3,Country,Total,2015
0,AFG,Afghanistan,82.0,33736
4,AGO,Angola,192.0,27859
1,ALB,Albania,250.0,2923
3,AND,Andorra,0.0,78
181,ARE,United Arab Emirates,28.0,9154


#### Outbreak data (OLD)

In [None]:
in_table = r'C:\Users\Ensheng\Desktop\mapping\measles_world_year\ISO3_Outbreaks.csv'
df_outbreak_raw = pd.read_csv(in_table)
df_outbreak_raw = df_outbreak_raw[['ISO3', 'Total']]
print(len(df_outbreak_raw))
df_outbreak_raw.head(3)

In [None]:
df_outbreak = pd.merge(df_outbreak_raw, df_pop, how='left', left_on='ISO3',right_on='alpha3')
df_outbreak = df_outbreak[['alpha3', 'Country', 'Total', year_pop]]
print(str(len(df_outbreak_raw) - df_outbreak.alpha3.notnull().sum()) + " row(s) have NaN as ISO 3 (alpha3).")
df_outbreak.sort_values(by='alpha3').head(5)

#### Import $V_{ij}^{t}$

In [10]:
# IATA data
in_table = r'C:\Users\Ensheng\Desktop\mapping\IATA\flow_XY.csv'
df_iata = pd.read_csv(in_table)
df_iata = df_iata.loc[df_iata['year'] == year_iata] # slice for certain year
df_iata = df_iata[['FIPS', 'ISO', 'paxVolume']]
print(len(df_iata))
df_iata.head(5)

38109


Unnamed: 0,FIPS,ISO,paxVolume
332866,1033.0,MEX,2
332867,1033.0,CHE,2
332868,1045.0,ARG,16
332869,1045.0,ISL,7
332870,1045.0,ITA,103


#### Import $NME_{j}^{t}$ and $P_{j}^{t}$

In [11]:
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\ModelInputOutputAll 4_23.csv'
df_nme = pd.read_csv(in_table)
print(len(df_nme))
df_nme.head(5)

3142


Unnamed: 0,County Name,State,FIPS,2015_NME,2016_NME,State_Avg_NME,Population,Static,Year2011,Year2012,Year2013,Year2014,Year2015,Year2016,Year2017,Year2018,Year2019
0,Autauga,Alabama,1001,,,0.006,55504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Baldwin,Alabama,1003,,,0.006,212628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Barbour,Alabama,1005,,,0.006,25270,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bibb,Alabama,1007,,,0.006,22668,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Blount,Alabama,1009,,,0.006,58013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
df_nme['County'] = df_nme['County Name'] + ', ' + df_nme['State']

In [13]:
df_nme.loc[df_nme["2016_NME"].notnull(), 'FIPS_NME'] = df_nme['2016_NME']
df_nme.loc[(df_nme["FIPS_NME"].isnull()) & (df_nme["2015_NME"].notnull()), 'FIPS_NME'] = df_nme['2015_NME']
df_nme.loc[(df_nme["FIPS_NME"].isnull()) & (df_nme["State_Avg_NME"].notnull()), 'FIPS_NME'] = df_nme['State_Avg_NME']

In [14]:
df_nme = df_nme[['FIPS','County','2016_NME','2015_NME','State_Avg_NME','FIPS_NME','Population']]
print("No NME for the following counties:")
df_nme.loc[df_nme['FIPS_NME'].isnull()]

No NME for the following counties:


Unnamed: 0,FIPS,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
3119,56001,"Albany, Wyoming",,,,,38332
3120,56003,"Big Horn, Wyoming",,,,,11906
3121,56005,"Campbell, Wyoming",,,,,46242
3122,56007,"Carbon, Wyoming",,,,,15303
3123,56009,"Converse, Wyoming",,,,,13809
3124,56011,"Crook, Wyoming",,,,,7410
3125,56013,"Fremont, Wyoming",,,,,39803
3126,56015,"Goshen, Wyoming",,,,,13378
3127,56017,"Hot Springs, Wyoming",,,,,4696
3128,56019,"Johnson, Wyoming",,,,,8476


#### Calculate $r_{ij}^{t}$

In [15]:
df_temp = pd.merge(df_iata, df_outbreak, how='left', left_on='ISO',right_on='alpha3')
df_factors = pd.merge(df_temp, df_nme, how='left', left_on='FIPS',right_on='FIPS')
df_factors.head(5)

Unnamed: 0,FIPS,ISO,paxVolume,alpha3,Country,Total,2015,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
0,1033.0,MEX,2,MEX,Mexico,769.0,125891.0,"Colbert, Alabama",,,0.006,0.006,54500.0
1,1033.0,CHE,2,CHE,Switzerland,105.0,8320.0,"Colbert, Alabama",,,0.006,0.006,54500.0
2,1045.0,ARG,16,ARG,Argentina,5.0,43418.0,"Dale, Alabama",,,0.006,0.006,49226.0
3,1045.0,ISL,7,ISL,Iceland,1.0,330.0,"Dale, Alabama",,,0.006,0.006,49226.0
4,1045.0,ITA,103,ITA,Italy,385.0,59504.0,"Dale, Alabama",,,0.006,0.006,49226.0


In [16]:
# rename and reorder col.
df_factors.loc[:,('FIPS_Pop')] = df_factors['Population']
df_factors.loc[:,('ISO_Case')] = df_factors['Total']
df_factors.loc[:,('ISO_Pop')] = df_factors[year_pop]
df_factors = df_factors[['FIPS','County','FIPS_NME','FIPS_Pop','ISO','Country','ISO_Case','ISO_Pop','paxVolume']]
print(len(df_factors))
df_factors.head(5)

38109


Unnamed: 0,FIPS,County,FIPS_NME,FIPS_Pop,ISO,Country,ISO_Case,ISO_Pop,paxVolume
0,1033.0,"Colbert, Alabama",0.006,54500.0,MEX,Mexico,769.0,125891.0,2
1,1033.0,"Colbert, Alabama",0.006,54500.0,CHE,Switzerland,105.0,8320.0,2
2,1045.0,"Dale, Alabama",0.006,49226.0,ARG,Argentina,5.0,43418.0,16
3,1045.0,"Dale, Alabama",0.006,49226.0,ISL,Iceland,1.0,330.0,7
4,1045.0,"Dale, Alabama",0.006,49226.0,ITA,Italy,385.0,59504.0,103


In [17]:
# slice
df_factors = df_factors.loc[df_factors['ISO_Case'].notnull()]
print(len(df_factors))
df_factors = df_factors.loc[df_factors['paxVolume'].notnull()]
print(len(df_factors))

30758
30758


#### Calculate $r_{j}^{t}$

In [18]:
df_factors['Route_Risk'] = (df_factors['ISO_Case'] / df_factors['ISO_Pop']) * df_factors['paxVolume'] * df_factors['FIPS_NME'] * df_factors['FIPS_Pop']

In [19]:
df_risk = df_factors.groupby(['FIPS','County'])['Route_Risk'].sum().reset_index()
df_risk.loc[:,('FIPS_RawRisk')] = df_risk['Route_Risk']
df_risk.head(5)

Unnamed: 0,FIPS,County,Route_Risk,FIPS_RawRisk
0,1033.0,"Colbert, Alabama",12.24854,12.24854
1,1045.0,"Dale, Alabama",18804.88,18804.88
2,1073.0,"Jefferson, Alabama",4623331.0,4623331.0
3,1089.0,"Madison, Alabama",1785091.0,1785091.0
4,1097.0,"Mobile, Alabama",1675536.0,1675536.0


#### Normalize and list the Top 25

In [20]:
highest_risk = df_risk['FIPS_RawRisk'].max()
df_risk['Risk'] = df_risk['FIPS_RawRisk'] / highest_risk
df_risk['FIPS_Rank'] = df_risk['Risk'].rank(ascending=False)
df_risk = df_risk[['FIPS','County','FIPS_RawRisk','Risk','FIPS_Rank']]
df_risk.sort_values('Risk',ascending = False).head(25).reset_index()

Unnamed: 0,index,FIPS,County,FIPS_RawRisk,Risk,FIPS_Rank
0,115,17031.0,"Cook, Illinois",20383940000.0,1.0,1.0
1,88,12086.0,"Miami-Dade, Florida",11630600000.0,0.570577,2.0
2,45,6037.0,"Los Angeles, California",11136400000.0,0.546332,3.0
3,258,36081.0,"Queens, New-York",7994375000.0,0.39219,4.0
4,365,53033.0,"King, Washington",2735795000.0,0.134213,5.0
5,27,4013.0,"Maricopa, Arizona",2616899000.0,0.12838,6.0
6,231,32003.0,"Clark, Nevada",2382140000.0,0.116864,7.0
7,333,48201.0,"Harris, Texas",1667728000.0,0.081816,8.0
8,80,12011.0,"Broward, Florida",1661975000.0,0.081534,9.0
9,238,34013.0,"Essex, New-Jersey",1556097000.0,0.076339,10.0


In [21]:
df = pd.merge(df_factors, df_risk , how='left', left_on='FIPS',right_on='FIPS')
result = df.sort_values(by=['Risk','Route_Risk'], ascending=False)
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '.csv'
result.to_csv(output_csv, index=False, encoding='utf-8')