# Measles_Risk_County

### Task 1. Raw measles risk
Calculate raw risk for each county with $$ r_{ij}^{t} = C_{i}^{t} \times V_{ij}^{t} \times NME_{j}^{t} \times P_{j}^{t} $$
where <br/>
$i$ is the origin country, <br/>
$j$ is the US county, <br/>
$t$ is the year, <br/>
$r_{ij}^{t}$ is the measles risk from country $i$ to county $j$ in year $t$, <br/>
$C_{i}^{t}$ is the case incidence in Country $i$ in year $t$, <br/>
$V_{ij}^{t}$ is the travel volume (million) from country $i$ to county $j$ in year $t$, <br/>
$NME_{j}^{t}$ is the NME rate in county $j$ in year $t$, <br/>
$P_{j}^{t}$ is the county $j$ population in year $t$. <br/>
$$ r_{j}^{t} = \sum_{i} r_{ij}^{t} = (\sum_{i} C_{i}^{t} \times V_{ij}^{t}) \times NME_{j}^{t} \times P_{j}^{t}$$
where <br/>
$r_{j}^{t}$ is the measles risk of county $j$ in year $t$, <br/>

### Task 2. Rearrange travel volume
For counties where is no international travel - update $V_{ij}^{t}$. <br/>
Task 2.1: calculate the average of raw risk in neighboring counties <br/>
Task 2.2: proportion to population <br/>

## Task 1: Calculate measles risk in county level

In [1]:
# environment setting
import pandas as pd
year = 2019
year_pop = '2015' # we use 2015 data for 2015-2019
year_iata = 2017 # we use 2017 IATA data for 2018 and 2019
folder = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\\'
pd.set_option("display.max_rows", 999)

#### Import world population

In [2]:
# ref: http://worldpopulationreview.com/countries/
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\Original_Country_populations.csv'
df_pop_raw = pd.read_csv(in_table)
print(len(df_pop_raw))
df_pop_raw.head(5)

233


Unnamed: 0,Country,Code,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,Afghanistan,4,25893,26617,27294,28004,28803,29709,30697,31732,32758,33736
1,Albania,8,3054,3024,2992,2963,2941,2927,2920,2919,2921,2923
2,Algeria,12,33778,34300,34861,35466,36118,36820,37566,38339,39113,39872
3,American Samoa,16,59,58,57,56,56,55,55,55,55,56
4,Andorra,20,81,83,84,84,84,84,82,81,79,78


In [3]:
# ref: http://worldpopulationreview.com/country-codes/
# manually add: Caribbean Netherlands (535), Saint Helena (654), Kosovo (383), Channel Islands (830, CHI is a self-defined iso 3 code) in country-codes
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\country_code.csv'
df_code = pd.read_csv(in_table)
print(len(df_code))
df_code.head(5)

237


Unnamed: 0,name,alpha2,alpha3,num3
0,Afghanistan,AF,AFG,4
1,Albania,AL,ALB,8
2,Algeria,DZ,DZA,12
3,American Samoa,AS,ASM,16
4,Andorra,AD,AND,20


In [4]:
df_pop = pd.merge(df_pop_raw, df_code, how='left', left_on='Code',right_on='num3')
df_pop = df_pop[['Country',year_pop,'alpha3']]
print(str(len(df_pop_raw) - len(df_pop.alpha3.isnull())) + " row(s) have NaN as ISO 3 (alpha3).")
df_pop.head(5)

0 row(s) have NaN as ISO 3 (alpha3).


Unnamed: 0,Country,2015,alpha3
0,Afghanistan,33736,AFG
1,Albania,2923,ALB
2,Algeria,39872,DZA
3,American Samoa,56,ASM
4,Andorra,78,AND


#### Outbreak data

In [5]:
# note: update names - Saint Vincent and the Grenadines -> Saint Vincent and Grenadines; United Republic of Tanzania -> Tanzania
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\Original_infectedCountries2019Lauren.csv'
df_outbreak_raw = pd.read_csv(in_table)
df_outbreak_raw['Total'] = df_outbreak_raw[["Suspected", "Confirmed", "Suspected from the other source"]].max(axis=1)
#df_outbreak_raw['Total'] = df_outbreak_raw["Suspected"]
df_outbreak_raw = df_outbreak_raw.fillna(0) # replace NaN with 0, optional, won't affect the output
print(len(df_outbreak_raw))
df_outbreak_raw.head(3)

194


Unnamed: 0,Country,Suspected,Confirmed,Suspected from the other source,Total
0,Afghanistan,82.0,36.0,0.0,82.0
1,Albania,250.0,137.0,0.0,250.0
2,Algeria,0.0,0.0,0.0,0.0


In [6]:
df_outbreak = pd.merge(df_outbreak_raw, df_pop, how='left', left_on='Country',right_on='Country')
df_outbreak = df_outbreak[['alpha3', 'Country', 'Total', year_pop]]
print(str(len(df_outbreak_raw) - df_outbreak.alpha3.notnull().sum()) + " row(s) have NaN as ISO 3 (alpha3).")
df_outbreak.sort_values(by='alpha3').head(5)

0 row(s) have NaN as ISO 3 (alpha3).


Unnamed: 0,alpha3,Country,Total,2015
0,AFG,Afghanistan,82.0,33736
4,AGO,Angola,192.0,27859
1,ALB,Albania,250.0,2923
3,AND,Andorra,0.0,78
181,ARE,United Arab Emirates,28.0,9154


#### Import $V_{ij}^{t}$

In [7]:
# IATA data
in_table = r'C:\Users\Ensheng\Desktop\mapping\IATA\flow_XY.csv'
df_iata = pd.read_csv(in_table)
df_iata = df_iata.loc[df_iata['year'] == year_iata] # slice for certain year
df_iata = df_iata[['FIPS', 'ISO', 'paxVolume']]
print(len(df_iata))
df_iata.head(5)

38109


Unnamed: 0,FIPS,ISO,paxVolume
332866,1033.0,MEX,2
332867,1033.0,CHE,2
332868,1045.0,ARG,16
332869,1045.0,ISL,7
332870,1045.0,ITA,103


#### Import $NME_{j}^{t}$ and $P_{j}^{t}$

In [8]:
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\ModelInputOutputAll 4_23.csv'
df_nme = pd.read_csv(in_table)
print(len(df_nme))
df_nme.head(5)

3142


Unnamed: 0,County Name,State,FIPS,2015_NME,2016_NME,State_Avg_NME,Population,Static,Year2011,Year2012,Year2013,Year2014,Year2015,Year2016,Year2017,Year2018,Year2019
0,Autauga,Alabama,1001,,,0.006,55504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Baldwin,Alabama,1003,,,0.006,212628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Barbour,Alabama,1005,,,0.006,25270,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bibb,Alabama,1007,,,0.006,22668,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Blount,Alabama,1009,,,0.006,58013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
df_nme['County'] = df_nme['County Name'] + ', ' + df_nme['State']

In [10]:
df_nme.loc[df_nme["2016_NME"].notnull(), 'FIPS_NME'] = df_nme['2016_NME']
df_nme.loc[(df_nme["FIPS_NME"].isnull()) & (df_nme["2015_NME"].notnull()), 'FIPS_NME'] = df_nme['2015_NME']
df_nme.loc[(df_nme["FIPS_NME"].isnull()) & (df_nme["State_Avg_NME"].notnull()), 'FIPS_NME'] = df_nme['State_Avg_NME']

In [11]:
df_nme = df_nme[['FIPS','County','2016_NME','2015_NME','State_Avg_NME','FIPS_NME','Population']]
print("No NME for the following counties:")
df_nme.loc[df_nme['FIPS_NME'].isnull()]

No NME for the following counties:


Unnamed: 0,FIPS,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
3119,56001,"Albany, Wyoming",,,,,38332
3120,56003,"Big Horn, Wyoming",,,,,11906
3121,56005,"Campbell, Wyoming",,,,,46242
3122,56007,"Carbon, Wyoming",,,,,15303
3123,56009,"Converse, Wyoming",,,,,13809
3124,56011,"Crook, Wyoming",,,,,7410
3125,56013,"Fremont, Wyoming",,,,,39803
3126,56015,"Goshen, Wyoming",,,,,13378
3127,56017,"Hot Springs, Wyoming",,,,,4696
3128,56019,"Johnson, Wyoming",,,,,8476


#### Calculate $r_{ij}^{t}$

In [12]:
df_temp = pd.merge(df_iata, df_outbreak, how='left', left_on='ISO',right_on='alpha3')
df_factors = pd.merge(df_temp, df_nme, how='left', left_on='FIPS',right_on='FIPS')
df_factors.head(5)

Unnamed: 0,FIPS,ISO,paxVolume,alpha3,Country,Total,2015,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
0,1033.0,MEX,2,MEX,Mexico,769.0,125891.0,"Colbert, Alabama",,,0.006,0.006,54500.0
1,1033.0,CHE,2,CHE,Switzerland,105.0,8320.0,"Colbert, Alabama",,,0.006,0.006,54500.0
2,1045.0,ARG,16,ARG,Argentina,5.0,43418.0,"Dale, Alabama",,,0.006,0.006,49226.0
3,1045.0,ISL,7,ISL,Iceland,1.0,330.0,"Dale, Alabama",,,0.006,0.006,49226.0
4,1045.0,ITA,103,ITA,Italy,385.0,59504.0,"Dale, Alabama",,,0.006,0.006,49226.0


In [13]:
# rename and reorder col.
df_factors.loc[:,('FIPS_Pop')] = df_factors['Population']
df_factors.loc[:,('ISO_Case')] = df_factors['Total']
df_factors.loc[:,('ISO_Pop')] = df_factors[year_pop]
df_factors = df_factors[['FIPS','County','FIPS_NME','FIPS_Pop','ISO','Country','ISO_Case','ISO_Pop','paxVolume']]
print(len(df_factors))
df_factors.head(5)

38109


Unnamed: 0,FIPS,County,FIPS_NME,FIPS_Pop,ISO,Country,ISO_Case,ISO_Pop,paxVolume
0,1033.0,"Colbert, Alabama",0.006,54500.0,MEX,Mexico,769.0,125891.0,2
1,1033.0,"Colbert, Alabama",0.006,54500.0,CHE,Switzerland,105.0,8320.0,2
2,1045.0,"Dale, Alabama",0.006,49226.0,ARG,Argentina,5.0,43418.0,16
3,1045.0,"Dale, Alabama",0.006,49226.0,ISL,Iceland,1.0,330.0,7
4,1045.0,"Dale, Alabama",0.006,49226.0,ITA,Italy,385.0,59504.0,103


In [14]:
# slice
df_factors = df_factors.loc[df_factors['ISO_Case'].notnull()]
print(len(df_factors))
df_factors = df_factors.loc[df_factors['paxVolume'].notnull()]
print(len(df_factors))

33477
33477


#### Calculate $r_{j}^{t}$

In [15]:
df_factors['Route_Risk'] = (df_factors['ISO_Case'] / df_factors['ISO_Pop']) * df_factors['paxVolume'] * df_factors['FIPS_NME'] * df_factors['FIPS_Pop']

In [16]:
df_risk = df_factors.groupby(['FIPS','County'])['Route_Risk'].sum().reset_index()
df_risk.loc[:,('FIPS_RawRisk')] = df_risk['Route_Risk']
df_risk.head(5)

Unnamed: 0,FIPS,County,Route_Risk,FIPS_RawRisk
0,1033.0,"Colbert, Alabama",12.24854,12.24854
1,1045.0,"Dale, Alabama",18804.88,18804.88
2,1073.0,"Jefferson, Alabama",4623331.0,4623331.0
3,1089.0,"Madison, Alabama",1785091.0,1785091.0
4,1097.0,"Mobile, Alabama",1675536.0,1675536.0


#### Normalize and list the Top 25

In [17]:
highest_risk = df_risk['FIPS_RawRisk'].max()
df_risk['Risk'] = df_risk['FIPS_RawRisk'] / highest_risk
df_risk['FIPS_Rank'] = df_risk['Risk'].rank(ascending=False)
df_risk = df_risk[['FIPS','County','FIPS_RawRisk','Risk','FIPS_Rank']]
df_risk = df_risk.sort_values('Risk',ascending = False).reset_index()
df_risk.head(25)

Unnamed: 0,index,FIPS,County,FIPS_RawRisk,Risk,FIPS_Rank
0,115,17031.0,"Cook, Illinois",20383940000.0,1.0,1.0
1,88,12086.0,"Miami-Dade, Florida",11630600000.0,0.570577,2.0
2,45,6037.0,"Los Angeles, California",11136400000.0,0.546332,3.0
3,258,36081.0,"Queens, New-York",7994375000.0,0.39219,4.0
4,365,53033.0,"King, Washington",2735795000.0,0.134213,5.0
5,27,4013.0,"Maricopa, Arizona",2616899000.0,0.12838,6.0
6,231,32003.0,"Clark, Nevada",2382140000.0,0.116864,7.0
7,333,48201.0,"Harris, Texas",1667728000.0,0.081816,8.0
8,80,12011.0,"Broward, Florida",1661975000.0,0.081534,9.0
9,238,34013.0,"Essex, New-Jersey",1556097000.0,0.076339,10.0


In [18]:
result = df_risk
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '_raw.csv'
result.to_csv(output_csv, index=False, encoding='utf-8')

In [19]:
df_complete = pd.merge(df_factors, df_risk , how='left', left_on='FIPS',right_on='FIPS')
df_complete = df_complete.sort_values(by=['Risk','Route_Risk'], ascending=False)
df_complete['Route_Rank'] = df_complete.groupby('FIPS_Rank')['Route_Risk'].rank(ascending=False,method='dense')
df_complete = df_complete.rename(index=str, columns={"County_x": "County"})
df_complete = df_complete.drop(columns=['County_y'])
print(len(df_complete))
df_complete.head(10)

33477


Unnamed: 0,FIPS,County,FIPS_NME,FIPS_Pop,ISO,Country,ISO_Case,ISO_Pop,paxVolume,Route_Risk,index,FIPS_RawRisk,Risk,FIPS_Rank,Route_Rank
10091,17031.0,"Cook, Illinois",0.051,5211263.0,ISR,Israel,3400.0,8065.0,50807,5692608000.0,115.0,20383940000.0,1.0,1.0,1.0
10014,17031.0,"Cook, Illinois",0.051,5211263.0,UKR,Ukraine,25368.0,44658.0,21332,3220562000.0,115.0,20383940000.0,1.0,1.0,2.0
9974,17031.0,"Cook, Illinois",0.051,5211263.0,PHL,Philippines,12734.0,101716.0,76592,2548427000.0,115.0,20383940000.0,1.0,1.0,3.0
9958,17031.0,"Cook, Illinois",0.051,5211263.0,MEX,Mexico,769.0,125891.0,952496,1546351000.0,115.0,20383940000.0,1.0,1.0,4.0
10031,17031.0,"Cook, Illinois",0.051,5211263.0,BRA,Brazil,10213.0,205962.0,63009,830389800.0,115.0,20383940000.0,1.0,1.0,5.0
10005,17031.0,"Cook, Illinois",0.051,5211263.0,THA,Thailand,2986.0,68658.0,41421,478776000.0,115.0,20383940000.0,1.0,1.0,6.0
10092,17031.0,"Cook, Illinois",0.051,5211263.0,IND,India,7976.0,1309054.0,267720,433532400.0,115.0,20383940000.0,1.0,1.0,7.0
9826,17031.0,"Cook, Illinois",0.051,5211263.0,MEX,Mexico,769.0,125891.0,211915,344038100.0,115.0,20383940000.0,1.0,1.0,8.0
9924,17031.0,"Cook, Illinois",0.051,5211263.0,JPN,Japan,1602.0,127975.0,101330,337123200.0,115.0,20383940000.0,1.0,1.0,9.0
9921,17031.0,"Cook, Illinois",0.051,5211263.0,ITA,Italy,385.0,59504.0,152038,261444700.0,115.0,20383940000.0,1.0,1.0,10.0


In [20]:
result = df_complete
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '_raw_route.csv'
result.to_csv(output_csv, index=False, encoding='utf-8')

## Task 2: Travel volume proportional to the population (or pop density)

#### Import neighboring relationship table

In [21]:
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\nbr.csv'
df_nbr = pd.read_csv(in_table)
df_nbr = df_nbr[['src_FIPS', 'nbr_FIPS']]
print(len(df_nbr))
df_nbr.head(5)

18680


Unnamed: 0,src_FIPS,nbr_FIPS
0,1001.0,1021.0
1,1001.0,1047.0
2,1001.0,1051.0
3,1001.0,1085.0
4,1001.0,1101.0


In [22]:
# find all counties with IATA data
df_iataCounty = df_iata.groupby(['FIPS'])['paxVolume'].sum().reset_index()
df_iataCounty = df_iataCounty.loc[df_iataCounty['paxVolume'].notnull()]
print(str(len(df_nme)) + " counties in the US.")
print(str(len(df_iataCounty)) + " counties have IATA travel data.")

3142 counties in the US.
394 counties have IATA travel data.


In [23]:
# subset of df_nbr to show only src_FIPS with IATA data
df_temp = pd.merge(df_nbr, df_iataCounty, how='left', left_on='src_FIPS',right_on='FIPS')
df_hub = df_temp.loc[df_temp['paxVolume'].notnull()]
print(str(len(df_hub)) + " neighboring relationships remain.") # we will only work with these counties and their neighbors
print(str(df_hub.src_FIPS.nunique()) + " hub counties.")
df_hub.head(10)

2227 neighboring relationships remain.
390 hub counties.


Unnamed: 0,src_FIPS,nbr_FIPS,FIPS,paxVolume
97,1033.0,1059.0,1033.0,4.0
98,1033.0,1077.0,1033.0,4.0
99,1033.0,1079.0,1033.0,4.0
100,1033.0,28141.0,1033.0,4.0
131,1045.0,1005.0,1045.0,5087.0
132,1045.0,1031.0,1045.0,5087.0
133,1045.0,1061.0,1045.0,5087.0
134,1045.0,1067.0,1045.0,5087.0
135,1045.0,1069.0,1045.0,5087.0
136,1045.0,1109.0,1045.0,5087.0


In [24]:
print("The following (island) counties have IATA data but no neighboring counties: ")
print(set(df_iataCounty.FIPS.unique()) - set(df_hub.src_FIPS.unique()))

The following (island) counties have IATA data but no neighboring counties: 
{15001.0, 25019.0, 15003.0, 15007.0}


#### Update hub county list

In [25]:
# src_FIPS is the hub county, nbr_FIPS lists all neighboring counties along with itself, the hub county
# this will also clear out the island county issue
df_iataCounty["src_FIPS"] = df_iataCounty["FIPS"]
df_iataCounty["nbr_FIPS"] = df_iataCounty["FIPS"]
df_iataCounty = df_iataCounty[["src_FIPS","nbr_FIPS"]]
df_hub = df_hub[["src_FIPS","nbr_FIPS"]]
df_hub = df_hub.append(df_iataCounty)
print(str(len(df_hub)) + " neighboring relationships remain.")
print(str(df_hub.src_FIPS.nunique()) + " hub counties.")
df_hub = df_hub.sort_values(["src_FIPS","nbr_FIPS"]).reset_index()
df_hub.head(10)

2621 neighboring relationships remain.
394 hub counties.


Unnamed: 0,index,src_FIPS,nbr_FIPS
0,0,1033.0,1033.0
1,97,1033.0,1059.0
2,98,1033.0,1077.0
3,99,1033.0,1079.0
4,100,1033.0,28141.0
5,131,1045.0,1005.0
6,132,1045.0,1031.0
7,1,1045.0,1045.0
8,133,1045.0,1061.0
9,134,1045.0,1067.0


#### Merge county population

In [26]:
df_pop = pd.merge(df_hub, df_nme , how='left', left_on='nbr_FIPS',right_on='FIPS')
df_pop.head(5)

Unnamed: 0,index,src_FIPS,nbr_FIPS,FIPS,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
0,0,1033.0,1033.0,1033,"Colbert, Alabama",,,0.006,0.006,54500
1,97,1033.0,1059.0,1059,"Franklin, Alabama",,,0.006,0.006,31495
2,98,1033.0,1077.0,1077,"Lauderdale, Alabama",,,0.006,0.006,92538
3,99,1033.0,1079.0,1079,"Lawrence, Alabama",,,0.006,0.006,33049
4,100,1033.0,28141.0,28141,"Tishomingo, Mississippi",,,0.006,0.006,19542


#### Calculate population percentage

In [27]:
df_pop_tmp = df_pop.groupby(['src_FIPS', 'nbr_FIPS']).agg({'Population': 'sum'})
# Change: groupby df_nbr_tmp and divide by sum
df_poppct = df_pop_tmp.groupby(level=0) \
.apply(lambda x: 100 * x / float(x.sum())) \
.rename(columns={'Population':'POPPCT'}) \
.reset_index()

In [28]:
print(len(df_poppct)) # should be the same as len(df_hub), the count of neighboring pairs + the count of hub counties
df_poppct.head(15)

2621


Unnamed: 0,src_FIPS,nbr_FIPS,POPPCT
0,1033.0,1033.0,23.580416
1,1033.0,1059.0,13.626884
2,1033.0,1077.0,40.038248
3,1033.0,1079.0,14.299251
4,1033.0,28141.0,8.455202
5,1045.0,1005.0,8.216523
6,1045.0,1031.0,16.866796
7,1045.0,1045.0,16.005801
8,1045.0,1061.0,8.59077
9,1045.0,1067.0,5.575335


#### Calculate travel volume for each route

In [29]:
df_iata.head(5)

Unnamed: 0,FIPS,ISO,paxVolume
332866,1033.0,MEX,2
332867,1033.0,CHE,2
332868,1045.0,ARG,16
332869,1045.0,ISL,7
332870,1045.0,ITA,103


In [30]:
df_route = pd.merge(df_iata, df_poppct, how='left', left_on='FIPS',right_on='src_FIPS')
print(len(df_route))
df_route.head(15)

253571


Unnamed: 0,FIPS,ISO,paxVolume,src_FIPS,nbr_FIPS,POPPCT
0,1033.0,MEX,2,1033.0,1033.0,23.580416
1,1033.0,MEX,2,1033.0,1059.0,13.626884
2,1033.0,MEX,2,1033.0,1077.0,40.038248
3,1033.0,MEX,2,1033.0,1079.0,14.299251
4,1033.0,MEX,2,1033.0,28141.0,8.455202
5,1033.0,CHE,2,1033.0,1033.0,23.580416
6,1033.0,CHE,2,1033.0,1059.0,13.626884
7,1033.0,CHE,2,1033.0,1077.0,40.038248
8,1033.0,CHE,2,1033.0,1079.0,14.299251
9,1033.0,CHE,2,1033.0,28141.0,8.455202


In [31]:
df_route["IncomingTravel"] = df_route["paxVolume"] * df_route["POPPCT"] / 100
df_route.head(15)

Unnamed: 0,FIPS,ISO,paxVolume,src_FIPS,nbr_FIPS,POPPCT,IncomingTravel
0,1033.0,MEX,2,1033.0,1033.0,23.580416,0.471608
1,1033.0,MEX,2,1033.0,1059.0,13.626884,0.272538
2,1033.0,MEX,2,1033.0,1077.0,40.038248,0.800765
3,1033.0,MEX,2,1033.0,1079.0,14.299251,0.285985
4,1033.0,MEX,2,1033.0,28141.0,8.455202,0.169104
5,1033.0,CHE,2,1033.0,1033.0,23.580416,0.471608
6,1033.0,CHE,2,1033.0,1059.0,13.626884,0.272538
7,1033.0,CHE,2,1033.0,1077.0,40.038248,0.800765
8,1033.0,CHE,2,1033.0,1079.0,14.299251,0.285985
9,1033.0,CHE,2,1033.0,28141.0,8.455202,0.169104


In [32]:
df_iata_new = df_route.groupby(['nbr_FIPS','ISO'])['IncomingTravel'].sum().reset_index()
print(len(df_iata_new))
df_iata_new.head(5)

208207


Unnamed: 0,nbr_FIPS,ISO,IncomingTravel
0,1001.0,ABW,4.932538
1,1001.0,AFG,1.973015
2,1001.0,ALB,0.36994
3,1001.0,ARE,18.25039
4,1001.0,ARG,6.165672


In [33]:
# update df_iata with travel volume for more counties
df_iata_new["FIPS"] = df_iata_new["nbr_FIPS"]
df_iata_new["paxVolume"] = df_iata_new["IncomingTravel"]
df_iata = df_iata_new[["FIPS","ISO","paxVolume"]]

#### Calculate risk (same as Task 1)

#### Calculate $r_{ij}^{t}$

In [34]:
df_temp = pd.merge(df_iata, df_outbreak, how='left', left_on='ISO',right_on='alpha3')
df_factors = pd.merge(df_temp, df_nme, how='left', left_on='FIPS',right_on='FIPS')
df_factors.head(5)

Unnamed: 0,FIPS,ISO,paxVolume,alpha3,Country,Total,2015,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
0,1001.0,ABW,4.932538,,,,,"Autauga, Alabama",,,0.006,0.006,55504
1,1001.0,AFG,1.973015,AFG,Afghanistan,82.0,33736.0,"Autauga, Alabama",,,0.006,0.006,55504
2,1001.0,ALB,0.36994,ALB,Albania,250.0,2923.0,"Autauga, Alabama",,,0.006,0.006,55504
3,1001.0,ARE,18.25039,ARE,United Arab Emirates,28.0,9154.0,"Autauga, Alabama",,,0.006,0.006,55504
4,1001.0,ARG,6.165672,ARG,Argentina,5.0,43418.0,"Autauga, Alabama",,,0.006,0.006,55504


In [35]:
# rename and reorder col.
df_factors.loc[:,('FIPS_Pop')] = df_factors['Population']
df_factors.loc[:,('ISO_Case')] = df_factors['Total']
df_factors.loc[:,('ISO_Pop')] = df_factors[year_pop]
df_factors = df_factors[['FIPS','County','FIPS_NME','FIPS_Pop','ISO','Country','ISO_Case','ISO_Pop','paxVolume']]
print(len(df_factors))
df_factors.head(5)

208207


Unnamed: 0,FIPS,County,FIPS_NME,FIPS_Pop,ISO,Country,ISO_Case,ISO_Pop,paxVolume
0,1001.0,"Autauga, Alabama",0.006,55504,ABW,,,,4.932538
1,1001.0,"Autauga, Alabama",0.006,55504,AFG,Afghanistan,82.0,33736.0,1.973015
2,1001.0,"Autauga, Alabama",0.006,55504,ALB,Albania,250.0,2923.0,0.36994
3,1001.0,"Autauga, Alabama",0.006,55504,ARE,United Arab Emirates,28.0,9154.0,18.25039
4,1001.0,"Autauga, Alabama",0.006,55504,ARG,Argentina,5.0,43418.0,6.165672


In [36]:
# slice
df_factors = df_factors.loc[df_factors['ISO_Case'].notnull()]
print(len(df_factors))
df_factors = df_factors.loc[df_factors['paxVolume'].notnull()]
print(len(df_factors))

183259
183259


#### Calculate $r_{j}^{t}$

In [37]:
df_factors['Route_Risk'] = (df_factors['ISO_Case'] / df_factors['ISO_Pop']) * df_factors['paxVolume'] * df_factors['FIPS_NME'] * df_factors['FIPS_Pop']

In [38]:
df_risk = df_factors.groupby(['FIPS','County'])['Route_Risk'].sum().reset_index()
df_risk.loc[:,('FIPS_RawRisk')] = df_risk['Route_Risk']
df_risk.head(5)

Unnamed: 0,FIPS,County,Route_Risk,FIPS_RawRisk
0,1001.0,"Autauga, Alabama",5870.668337,5870.668337
1,1003.0,"Baldwin, Alabama",581245.534315,581245.534315
2,1005.0,"Barbour, Alabama",793.175736,793.175736
3,1007.0,"Bibb, Alabama",2743.589913,2743.589913
4,1009.0,"Blount, Alabama",17969.808073,17969.808073


#### Normalize and list the Top 25

In [43]:
highest_risk = df_risk['FIPS_RawRisk'].max()
df_risk['Risk'] = df_risk['FIPS_RawRisk'] / highest_risk
df_risk['FIPS_Rank'] = df_risk['Risk'].rank(ascending=False)
df_risk = df_risk[['FIPS','County','FIPS_RawRisk','Risk','FIPS_Rank']]
df_risk = df_risk.sort_values('Risk',ascending = False).reset_index()
df_risk.head(25)

Unnamed: 0,index,FIPS,County,FIPS_RawRisk,Risk,FIPS_Rank
0,0,17031.0,"Cook, Illinois",11979930000.0,1.0,1.0
1,1,12086.0,"Miami-Dade, Florida",7223368000.0,0.602956,2.0
2,2,6037.0,"Los Angeles, California",6631907000.0,0.553585,3.0
3,3,12011.0,"Broward, Florida",3610566000.0,0.301385,4.0
4,4,36047.0,"Kings, New-York",2499862000.0,0.208671,5.0
5,5,4013.0,"Maricopa, Arizona",2021321000.0,0.168726,6.0
6,6,36081.0,"Queens, New-York",1982117000.0,0.165453,7.0
7,7,53033.0,"King, Washington",1331833000.0,0.111172,8.0
8,8,32003.0,"Clark, Nevada",1140331000.0,0.095187,9.0
9,9,48201.0,"Harris, Texas",1130746000.0,0.094387,10.0


In [40]:
result = df_risk
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '_pop.csv'
result.to_csv(output_csv, index=False, encoding='utf-8')

In [41]:
df_complete = pd.merge(df_factors, df_risk , how='left', left_on='FIPS',right_on='FIPS')
df_complete = df_complete.sort_values(by=['Risk','Route_Risk'], ascending=False)
df_complete['Route_Rank'] = df_complete.groupby('FIPS_Rank')['Route_Risk'].rank(ascending=False,method='dense')
df_complete = df_complete.rename(index=str, columns={"County_x": "County"})
df_complete = df_complete.drop(columns=['County_y'])
print(len(df_complete))
df_complete.head(10)

183259


Unnamed: 0,FIPS,County,FIPS_NME,FIPS_Pop,ISO,Country,ISO_Case,ISO_Pop,paxVolume,Route_Risk,index,FIPS_RawRisk,Risk,FIPS_Rank,Route_Rank
40764,17031.0,"Cook, Illinois",0.051,5211263,ISR,Israel,3400.0,8065.0,29884.095087,3348327000.0,365,11979930000.0,1.0,1.0,1.0
40848,17031.0,"Cook, Illinois",0.051,5211263,UKR,Ukraine,25368.0,44658.0,12580.611616,1899336000.0,365,11979930000.0,1.0,1.0,2.0
40814,17031.0,"Cook, Illinois",0.051,5211263,PHL,Philippines,12734.0,101716.0,45148.21005,1502205000.0,365,11979930000.0,1.0,1.0,3.0
40789,17031.0,"Cook, Illinois",0.051,5211263,MEX,Mexico,769.0,125891.0,684340.958244,1111008000.0,365,11979930000.0,1.0,1.0,4.0
40705,17031.0,"Cook, Illinois",0.051,5211263,BRA,Brazil,10213.0,205962.0,37393.320287,492803100.0,365,11979930000.0,1.0,1.0,5.0
40839,17031.0,"Cook, Illinois",0.051,5211263,THA,Thailand,2986.0,68658.0,24388.37897,281899800.0,365,11979930000.0,1.0,1.0,6.0
40759,17031.0,"Cook, Illinois",0.051,5211263,IND,India,7976.0,1309054.0,157542.686617,255116800.0,365,11979930000.0,1.0,1.0,7.0
40768,17031.0,"Cook, Illinois",0.051,5211263,JPN,Japan,1602.0,127975.0,59703.541605,198632600.0,365,11979930000.0,1.0,1.0,8.0
40765,17031.0,"Cook, Illinois",0.051,5211263,ITA,Italy,385.0,59504.0,89670.504404,154197500.0,365,11979930000.0,1.0,1.0,9.0
40791,17031.0,"Cook, Illinois",0.051,5211263,MKD,Macedonia,165.0,2079.0,5694.951255,120124800.0,365,11979930000.0,1.0,1.0,10.0


In [42]:
result = df_complete
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '_pop_route.csv'
result.to_csv(output_csv, index=False, encoding='utf-8')