# Measles_Risk_Population

### Task 1. Raw measles risk
Calculate raw risk for each county with $$ r_{ij}^{t} = C_{i}^{t} \times V_{ij}^{t} \times NME_{j}^{t} \times P_{j}^{t} $$
where <br/>
$i$ is the origin country, <br/>
$j$ is the US county, <br/>
$t$ is the year, <br/>
$r_{ij}^{t}$ is the measles risk from country $i$ to county $j$ in year $t$, <br/>
$C_{i}^{t}$ is the case incidence in Country $i$ in year $t$, <br/>
$V_{ij}^{t}$ is the travel volume (million) from country $i$ to county $j$ in year $t$, <br/>
$NME_{j}^{t}$ is the NME rate in county $j$ in year $t$, <br/>
$P_{j}^{t}$ is the county $j$ population in year $t$. <br/>
$$ r_{j}^{t} = \sum_{i} r_{ij}^{t} = (\sum_{i} C_{i}^{t} \times V_{ij}^{t}) \times NME_{j}^{t} \times P_{j}^{t}$$
where <br/>
$r_{j}^{t}$ is the measles risk of county $j$ in year $t$, <br/>

### Task 2. Rearrange travel volume by population
For counties where is no international travel - update $V_{ij}^{t}$. <br/>
Task 2.1: calculate the average of raw risk in neighboring counties <br/>
Task 2.2: proportion to population <br/>

## Task 1: Calculate measles risk in county level

In [1]:
# environment setting
import datetime
t = datetime.datetime.now()
import pandas as pd
year = 2019
year_pop = 'pop2019' # we use 2015 data for 2015-2019
year_iata = 2017 # we use 2017 IATA data for 2018 and 2019
folder = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\\'
pd.set_option("display.max_rows", 999)

#### Import world population

In [2]:
# ref: http://worldpopulationreview.com/countries/
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\world_population.csv'
df_pop = pd.read_csv(in_table)
print(len(df_pop))
df_pop.head(5)

230


Unnamed: 0,Rank,name,pop2019,pop2018,GrowthRate,area,Density
0,39,Afghanistan,37209.007,36373.176,1.022979,652230.0,57.048905
1,138,Albania,2938.428,2934.363,1.001385,28748.0,102.213302
2,34,Algeria,42679.018,42008.054,1.015972,2381741.0,17.919252
3,208,American Samoa,55.727,55.679,1.000862,199.0,280.035176
4,202,Andorra,77.072,76.953,1.001546,468.0,164.683761


In [3]:
# ref: http://worldpopulationreview.com/country-codes/
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\country_code.csv'
df_code = pd.read_csv(in_table)
print(len(df_code))
df_code.head(5)

237


Unnamed: 0,name,alpha2,alpha3,num3
0,Afghanistan,AF,AFG,4
1,Albania,AL,ALB,8
2,Algeria,DZ,DZA,12
3,American Samoa,AS,ASM,16
4,Andorra,AD,AND,20


#### Import WHO data (exclude 2019)

In [56]:
# ref: https://www.who.int/immunization/monitoring_surveillance/burden/vpd/surveillance_type/active/measles_monthlydata/en/
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\measlescasesbycountrybymonth.xls'
df_who = pd.read_excel(in_table,sheet_name='WEB')
df_who = df_who.loc[df_who['Year'] == year]
print(len(df_who))
df_who.head(3)

194


Unnamed: 0,Region,ISO3,Country,Year,January,February,March,April,May,June,July,August,September,October,November,December
8,AFR,AGO,Angola,2019,40.0,49.0,229.0,440.0,0.0,,,,,,,
17,AFR,BDI,Burundi,2019,0.0,6.0,34.0,12.0,0.0,,,,,,,
26,AFR,BEN,Benin,2019,290.0,154.0,45.0,4.0,0.0,,,,,,,


In [57]:
col_list= list(df_who)
col_list.remove('Year')
df_who['Total'] = df_who[col_list].sum(axis=1)
print(len(df_who))
df_outbreak_raw = df_who[['ISO3','Country','Total']]
df_outbreak_raw.head(3)

194


Unnamed: 0,ISO3,Country,Total
8,AGO,Angola,758.0
17,BDI,Burundi,52.0
26,BEN,Benin,493.0


#### WHO 2019 Susptected Data (Optional)

In [4]:
# ref: https://www.who.int/immunization/monitoring_surveillance/burden/vpd/surveillance_type/active/measles_monthlydata/en/
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\WHO_2019_Suspected.xlsx'
df_who = pd.read_excel(in_table)
print(len(df_who))
df_outbreak_raw = df_who.fillna(0)
df_outbreak_raw.head(3)

194


Unnamed: 0,Country,ISO3,Total
0,Algeria,DZA,0.0
1,Angola,AGO,854.0
2,Benin,BEN,512.0


In [5]:
df_pop3 = pd.merge(df_pop, df_code, how='left', left_on='name',right_on='name')
df_outbreak = pd.merge(df_outbreak_raw, df_pop3, how='left', left_on='ISO3',right_on='alpha3')
print(len(df_outbreak))
df_outbreak = df_outbreak[['alpha3', 'Country', 'Total', year_pop]]
print(str(len(df_outbreak_raw) - df_outbreak.alpha3.notnull().sum()) + " row(s) have NaN as ISO 3 (alpha3).")
df_outbreak.sort_values(by='alpha3').head(5)

194
0 row(s) have NaN as ISO 3 (alpha3).


Unnamed: 0,alpha3,Country,Total,pop2019
82,AFG,Afghanistan,169.0,37209.007
1,AGO,Angola,854.0,31787.566
103,ALB,Albania,508.0,2938.428
104,AND,Andorra,0.0,77.072
101,ARE,United Arab Emirates,66.0,9682.088


#### Import $V_{ij}^{t}$

In [6]:
# IATA data
in_table = r'C:\Users\Ensheng\Desktop\mapping\IATA\flow_XY.csv'
df_iata = pd.read_csv(in_table)
df_iata = df_iata.loc[df_iata['year'] == year_iata] # slice for certain year
df_iata = df_iata[['FIPS', 'ISO', 'paxVolume']]
print(len(df_iata))
df_iata.head(5)

38109


Unnamed: 0,FIPS,ISO,paxVolume
332866,1033.0,MEX,2
332867,1033.0,CHE,2
332868,1045.0,ARG,16
332869,1045.0,ISL,7
332870,1045.0,ITA,103


#### Import $NME_{j}^{t}$ and $P_{j}^{t}$

In [7]:
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\ModelInputOutputAll 4_23.csv'
df_nme = pd.read_csv(in_table)
print(len(df_nme))
df_nme.head(5)

3142


Unnamed: 0,County Name,State,FIPS,2015_NME,2016_NME,State_Avg_NME,Population,Static,Year2011,Year2012,Year2013,Year2014,Year2015,Year2016,Year2017,Year2018,Year2019
0,Autauga,Alabama,1001,,,0.006,55504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Baldwin,Alabama,1003,,,0.006,212628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Barbour,Alabama,1005,,,0.006,25270,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bibb,Alabama,1007,,,0.006,22668,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Blount,Alabama,1009,,,0.006,58013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
df_nme['County'] = df_nme['County Name'] + ', ' + df_nme['State']

In [9]:
df_nme.loc[df_nme["2016_NME"].notnull(), 'FIPS_NME'] = df_nme['2016_NME']
df_nme.loc[(df_nme["FIPS_NME"].isnull()) & (df_nme["2015_NME"].notnull()), 'FIPS_NME'] = df_nme['2015_NME']
df_nme.loc[(df_nme["FIPS_NME"].isnull()) & (df_nme["State_Avg_NME"].notnull()), 'FIPS_NME'] = df_nme['State_Avg_NME']

In [10]:
df_nme = df_nme[['FIPS','County','2016_NME','2015_NME','State_Avg_NME','FIPS_NME','Population']]
print("No NME for the following counties:")
df_nme.loc[df_nme['FIPS_NME'].isnull()]

No NME for the following counties:


Unnamed: 0,FIPS,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
3119,56001,"Albany, Wyoming",,,,,38332
3120,56003,"Big Horn, Wyoming",,,,,11906
3121,56005,"Campbell, Wyoming",,,,,46242
3122,56007,"Carbon, Wyoming",,,,,15303
3123,56009,"Converse, Wyoming",,,,,13809
3124,56011,"Crook, Wyoming",,,,,7410
3125,56013,"Fremont, Wyoming",,,,,39803
3126,56015,"Goshen, Wyoming",,,,,13378
3127,56017,"Hot Springs, Wyoming",,,,,4696
3128,56019,"Johnson, Wyoming",,,,,8476


#### Calculate $r_{ij}^{t}$

In [11]:
df_temp = pd.merge(df_iata, df_outbreak, how='left', left_on='ISO',right_on='alpha3')
df_factors = pd.merge(df_temp, df_nme, how='left', left_on='FIPS',right_on='FIPS')
df_factors.head(5)

Unnamed: 0,FIPS,ISO,paxVolume,alpha3,Country,Total,pop2019,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
0,1033.0,MEX,2,MEX,Mexico,1122.0,132328.035,"Colbert, Alabama",,,0.006,0.006,54500.0
1,1033.0,CHE,2,CHE,Switzerland,218.0,8608.259,"Colbert, Alabama",,,0.006,0.006,54500.0
2,1045.0,ARG,16,ARG,Argentina,14.0,45101.781,"Dale, Alabama",,,0.006,0.006,49226.0
3,1045.0,ISL,7,ISL,Iceland,7.0,340.566,"Dale, Alabama",,,0.006,0.006,49226.0
4,1045.0,ITA,103,ITA,Italy,385.0,59216.525,"Dale, Alabama",,,0.006,0.006,49226.0


In [12]:
# rename and reorder col.
df_factors.loc[:,('FIPS_Pop')] = df_factors['Population']
df_factors.loc[:,('ISO_Case')] = df_factors['Total']
df_factors.loc[:,('ISO_Pop')] = df_factors[year_pop]
df_factors = df_factors[['FIPS','County','FIPS_NME','FIPS_Pop','ISO','Country','ISO_Case','ISO_Pop','paxVolume']]
print(len(df_factors))
df_factors.head(5)

38109


Unnamed: 0,FIPS,County,FIPS_NME,FIPS_Pop,ISO,Country,ISO_Case,ISO_Pop,paxVolume
0,1033.0,"Colbert, Alabama",0.006,54500.0,MEX,Mexico,1122.0,132328.035,2
1,1033.0,"Colbert, Alabama",0.006,54500.0,CHE,Switzerland,218.0,8608.259,2
2,1045.0,"Dale, Alabama",0.006,49226.0,ARG,Argentina,14.0,45101.781,16
3,1045.0,"Dale, Alabama",0.006,49226.0,ISL,Iceland,7.0,340.566,7
4,1045.0,"Dale, Alabama",0.006,49226.0,ITA,Italy,385.0,59216.525,103


In [13]:
# slice
df_factors = df_factors.loc[df_factors['ISO_Case'].notnull()]
print(len(df_factors))
df_factors = df_factors.loc[df_factors['paxVolume'].notnull()]
print(len(df_factors))

33477
33477


#### Calculate $r_{j}^{t}$

In [14]:
df_factors['Route_Risk'] = (df_factors['ISO_Case'] / df_factors['ISO_Pop']) * df_factors['paxVolume'] * df_factors['FIPS_NME'] * df_factors['FIPS_Pop']

In [15]:
df_risk = df_factors.groupby(['FIPS','County'])['Route_Risk'].sum().reset_index()
df_risk.loc[:,('FIPS_RawRisk')] = df_risk['Route_Risk']
df_risk.head(5)

Unnamed: 0,FIPS,County,Route_Risk,FIPS_RawRisk
0,1033.0,"Colbert, Alabama",22.10745,22.10745
1,1045.0,"Dale, Alabama",22448.39,22448.39
2,1073.0,"Jefferson, Alabama",4521877.0,4521877.0
3,1089.0,"Madison, Alabama",1484266.0,1484266.0
4,1097.0,"Mobile, Alabama",1391013.0,1391013.0


#### Normalize and list the Top 25

In [16]:
# import county seats
# ref: https://en.wikipedia.org/wiki/List_of_the_most_populous_counties_in_the_United_States
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\County_Seat.xlsx'
df_seat = pd.read_excel(in_table)
print(len(df_seat))
df_seat.head(3)

100


Unnamed: 0,County,City
0,"Los Angeles, California",Los Angeles
1,"Cook, Illinois",Chicago
2,"Harris, Texas",Houston


In [17]:
highest_risk = df_risk['FIPS_RawRisk'].max()
df_risk['Risk'] = df_risk['FIPS_RawRisk'] / highest_risk
df_risk['FIPS_Rank'] = df_risk['Risk'].rank(ascending=False)
df_risk = pd.merge(df_risk, df_seat, how='left', left_on='County',right_on='County')
df_risk = df_risk[['FIPS','County','City','FIPS_RawRisk','Risk','FIPS_Rank']]
df_risk = df_risk.sort_values('Risk',ascending = False).reset_index()
df_risk.head(50)

Unnamed: 0,index,FIPS,County,City,FIPS_RawRisk,Risk,FIPS_Rank
0,115,17031.0,"Cook, Illinois",Chicago,19752440000.0,1.0,1.0
1,45,6037.0,"Los Angeles, California",Los Angeles,8138341000.0,0.412017,2.0
2,88,12086.0,"Miami-Dade, Florida",Miami,6147145000.0,0.311209,3.0
3,258,36081.0,"Queens, New York","Queens, NYC",5523428000.0,0.279633,4.0
4,365,53033.0,"King, Washington",Seattle,2635080000.0,0.133405,5.0
5,27,4013.0,"Maricopa, Arizona",Phoenix,2243461000.0,0.113579,6.0
6,231,32003.0,"Clark, Nevada",Las Vegas,1730028000.0,0.087586,7.0
7,333,48201.0,"Harris, Texas",Houston,1582767000.0,0.08013,8.0
8,80,12011.0,"Broward, Florida",Fort Lauderdale,1547997000.0,0.07837,9.0
9,104,15003.0,"Honolulu, Hawaii",Honolulu,1312289000.0,0.066437,10.0


In [18]:
result = df_risk
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '_raw_' + t.strftime('%m%d%y%H%M') + '.csv'
result.to_csv(output_csv, index=False, encoding='utf-8')

In [1]:
df_complete = pd.merge(df_factors, df_risk , how='left', left_on='FIPS',right_on='FIPS')
df_complete = df_complete.sort_values(by=['Risk','Route_Risk'], ascending=False)
df_complete['Route_Rank'] = df_complete.groupby('FIPS_Rank')['Route_Risk'].rank(ascending=False,method='dense')
df_complete = df_complete.rename(index=str, columns={"County_x": "County"})
df_complete = df_complete.drop(columns=['County_y'])
print(len(df_complete))
df_complete.head(10)

NameError: name 'pd' is not defined

In [20]:
result = df_complete
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '_raw_route_' + t.strftime('%m%d%y%H%M') + '.csv'
result.to_csv(output_csv, index=False, encoding='utf-8')

## Task 2: Travel volume proportional to the population (or pop density)

#### Import neighboring relationship table

In [21]:
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\nbr.csv'
df_nbr = pd.read_csv(in_table)
df_nbr = df_nbr[['src_FIPS', 'nbr_FIPS']]
print(len(df_nbr))
df_nbr.head(5)

18680


Unnamed: 0,src_FIPS,nbr_FIPS
0,1001.0,1021.0
1,1001.0,1047.0
2,1001.0,1051.0
3,1001.0,1085.0
4,1001.0,1101.0


In [22]:
# find all counties with IATA data
df_iataCounty = df_iata.groupby(['FIPS'])['paxVolume'].sum().reset_index()
df_iataCounty = df_iataCounty.loc[df_iataCounty['paxVolume'].notnull()]
print(str(len(df_nme)) + " counties in the US.")
print(str(len(df_iataCounty)) + " counties have IATA travel data.")

3142 counties in the US.
394 counties have IATA travel data.


In [23]:
# subset of df_nbr to show only src_FIPS with IATA data
df_temp = pd.merge(df_nbr, df_iataCounty, how='left', left_on='src_FIPS',right_on='FIPS')
df_hub = df_temp.loc[df_temp['paxVolume'].notnull()]
print(str(len(df_hub)) + " neighboring relationships remain.") # we will only work with these counties and their neighbors
print(str(df_hub.src_FIPS.nunique()) + " hub counties.")
df_hub.head(10)

2227 neighboring relationships remain.
390 hub counties.


Unnamed: 0,src_FIPS,nbr_FIPS,FIPS,paxVolume
97,1033.0,1059.0,1033.0,4.0
98,1033.0,1077.0,1033.0,4.0
99,1033.0,1079.0,1033.0,4.0
100,1033.0,28141.0,1033.0,4.0
131,1045.0,1005.0,1045.0,5087.0
132,1045.0,1031.0,1045.0,5087.0
133,1045.0,1061.0,1045.0,5087.0
134,1045.0,1067.0,1045.0,5087.0
135,1045.0,1069.0,1045.0,5087.0
136,1045.0,1109.0,1045.0,5087.0


In [24]:
print("The following (island) counties have IATA data but no neighboring counties: ")
print(set(df_iataCounty.FIPS.unique()) - set(df_hub.src_FIPS.unique()))

The following (island) counties have IATA data but no neighboring counties: 
{15001.0, 25019.0, 15003.0, 15007.0}


#### Update hub county list

In [25]:
# src_FIPS is the hub county, nbr_FIPS lists all neighboring counties along with itself, the hub county
# this will also clear out the island county issue
df_iataCounty["src_FIPS"] = df_iataCounty["FIPS"]
df_iataCounty["nbr_FIPS"] = df_iataCounty["FIPS"]
df_iataCounty = df_iataCounty[["src_FIPS","nbr_FIPS"]]
df_hub = df_hub[["src_FIPS","nbr_FIPS"]]
df_hub = df_hub.append(df_iataCounty)
print(str(len(df_hub)) + " neighboring relationships remain.")
print(str(df_hub.src_FIPS.nunique()) + " hub counties.")
df_hub = df_hub.sort_values(["src_FIPS","nbr_FIPS"]).reset_index()
df_hub.head(10)

2621 neighboring relationships remain.
394 hub counties.


Unnamed: 0,index,src_FIPS,nbr_FIPS
0,0,1033.0,1033.0
1,97,1033.0,1059.0
2,98,1033.0,1077.0
3,99,1033.0,1079.0
4,100,1033.0,28141.0
5,131,1045.0,1005.0
6,132,1045.0,1031.0
7,1,1045.0,1045.0
8,133,1045.0,1061.0
9,134,1045.0,1067.0


#### Merge county population

In [26]:
df_pop = pd.merge(df_hub, df_nme , how='left', left_on='nbr_FIPS',right_on='FIPS')
df_pop.head(5)

Unnamed: 0,index,src_FIPS,nbr_FIPS,FIPS,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
0,0,1033.0,1033.0,1033,"Colbert, Alabama",,,0.006,0.006,54500
1,97,1033.0,1059.0,1059,"Franklin, Alabama",,,0.006,0.006,31495
2,98,1033.0,1077.0,1077,"Lauderdale, Alabama",,,0.006,0.006,92538
3,99,1033.0,1079.0,1079,"Lawrence, Alabama",,,0.006,0.006,33049
4,100,1033.0,28141.0,28141,"Tishomingo, Mississippi",,,0.006,0.006,19542


#### Calculate population percentage

In [27]:
df_pop_tmp = df_pop.groupby(['src_FIPS', 'nbr_FIPS']).agg({'Population': 'sum'})
# Change: groupby df_nbr_tmp and divide by sum
df_poppct = df_pop_tmp.groupby(level=0) \
.apply(lambda x: 100 * x / float(x.sum())) \
.rename(columns={'Population':'POPPCT'}) \
.reset_index()

In [28]:
print(len(df_poppct)) # should be the same as len(df_hub), the count of neighboring pairs + the count of hub counties
df_poppct.head(15)

2621


Unnamed: 0,src_FIPS,nbr_FIPS,POPPCT
0,1033.0,1033.0,23.580416
1,1033.0,1059.0,13.626884
2,1033.0,1077.0,40.038248
3,1033.0,1079.0,14.299251
4,1033.0,28141.0,8.455202
5,1045.0,1005.0,8.216523
6,1045.0,1031.0,16.866796
7,1045.0,1045.0,16.005801
8,1045.0,1061.0,8.59077
9,1045.0,1067.0,5.575335


#### Calculate travel volume for each route

In [29]:
df_iata.head(5)

Unnamed: 0,FIPS,ISO,paxVolume
332866,1033.0,MEX,2
332867,1033.0,CHE,2
332868,1045.0,ARG,16
332869,1045.0,ISL,7
332870,1045.0,ITA,103


In [30]:
df_route = pd.merge(df_iata, df_poppct, how='left', left_on='FIPS',right_on='src_FIPS')
print(len(df_route))
df_route.head(15)

253571


Unnamed: 0,FIPS,ISO,paxVolume,src_FIPS,nbr_FIPS,POPPCT
0,1033.0,MEX,2,1033.0,1033.0,23.580416
1,1033.0,MEX,2,1033.0,1059.0,13.626884
2,1033.0,MEX,2,1033.0,1077.0,40.038248
3,1033.0,MEX,2,1033.0,1079.0,14.299251
4,1033.0,MEX,2,1033.0,28141.0,8.455202
5,1033.0,CHE,2,1033.0,1033.0,23.580416
6,1033.0,CHE,2,1033.0,1059.0,13.626884
7,1033.0,CHE,2,1033.0,1077.0,40.038248
8,1033.0,CHE,2,1033.0,1079.0,14.299251
9,1033.0,CHE,2,1033.0,28141.0,8.455202


In [31]:
df_route["IncomingTravel"] = df_route["paxVolume"] * df_route["POPPCT"] / 100
df_route.head(15)

Unnamed: 0,FIPS,ISO,paxVolume,src_FIPS,nbr_FIPS,POPPCT,IncomingTravel
0,1033.0,MEX,2,1033.0,1033.0,23.580416,0.471608
1,1033.0,MEX,2,1033.0,1059.0,13.626884,0.272538
2,1033.0,MEX,2,1033.0,1077.0,40.038248,0.800765
3,1033.0,MEX,2,1033.0,1079.0,14.299251,0.285985
4,1033.0,MEX,2,1033.0,28141.0,8.455202,0.169104
5,1033.0,CHE,2,1033.0,1033.0,23.580416,0.471608
6,1033.0,CHE,2,1033.0,1059.0,13.626884,0.272538
7,1033.0,CHE,2,1033.0,1077.0,40.038248,0.800765
8,1033.0,CHE,2,1033.0,1079.0,14.299251,0.285985
9,1033.0,CHE,2,1033.0,28141.0,8.455202,0.169104


In [32]:
df_iata_new = df_route.groupby(['nbr_FIPS','ISO'])['IncomingTravel'].sum().reset_index()
print(len(df_iata_new))
df_iata_new.head(5)

208207


Unnamed: 0,nbr_FIPS,ISO,IncomingTravel
0,1001.0,ABW,4.932538
1,1001.0,AFG,1.973015
2,1001.0,ALB,0.36994
3,1001.0,ARE,18.25039
4,1001.0,ARG,6.165672


In [33]:
# update df_iata with travel volume for more counties
df_iata_new["FIPS"] = df_iata_new["nbr_FIPS"]
df_iata_new["paxVolume"] = df_iata_new["IncomingTravel"]
df_iata = df_iata_new[["FIPS","ISO","paxVolume"]]
df_iata.head(5)

#### Calculate risk (same as Task 1)

#### Calculate $r_{ij}^{t}$

In [34]:
df_temp = pd.merge(df_iata, df_outbreak, how='left', left_on='ISO',right_on='alpha3')
df_factors = pd.merge(df_temp, df_nme, how='left', left_on='FIPS',right_on='FIPS')
df_factors.head(5)

Unnamed: 0,FIPS,ISO,paxVolume,alpha3,Country,Total,pop2019,County,2016_NME,2015_NME,State_Avg_NME,FIPS_NME,Population
0,1001.0,ABW,4.932538,,,,,"Autauga, Alabama",,,0.006,0.006,55504
1,1001.0,AFG,1.973015,AFG,Afghanistan,169.0,37209.007,"Autauga, Alabama",,,0.006,0.006,55504
2,1001.0,ALB,0.36994,ALB,Albania,508.0,2938.428,"Autauga, Alabama",,,0.006,0.006,55504
3,1001.0,ARE,18.25039,ARE,United Arab Emirates,66.0,9682.088,"Autauga, Alabama",,,0.006,0.006,55504
4,1001.0,ARG,6.165672,ARG,Argentina,14.0,45101.781,"Autauga, Alabama",,,0.006,0.006,55504


In [35]:
# rename and reorder col.
df_factors.loc[:,('FIPS_Pop')] = df_factors['Population']
df_factors.loc[:,('ISO_Case')] = df_factors['Total']
df_factors.loc[:,('ISO_Pop')] = df_factors[year_pop]
df_factors = df_factors[['FIPS','County','FIPS_NME','FIPS_Pop','ISO','Country','ISO_Case','ISO_Pop','paxVolume']]
print(len(df_factors))
df_factors.head(5)

208207


Unnamed: 0,FIPS,County,FIPS_NME,FIPS_Pop,ISO,Country,ISO_Case,ISO_Pop,paxVolume
0,1001.0,"Autauga, Alabama",0.006,55504,ABW,,,,4.932538
1,1001.0,"Autauga, Alabama",0.006,55504,AFG,Afghanistan,169.0,37209.007,1.973015
2,1001.0,"Autauga, Alabama",0.006,55504,ALB,Albania,508.0,2938.428,0.36994
3,1001.0,"Autauga, Alabama",0.006,55504,ARE,United Arab Emirates,66.0,9682.088,18.25039
4,1001.0,"Autauga, Alabama",0.006,55504,ARG,Argentina,14.0,45101.781,6.165672


In [36]:
# slice
df_factors = df_factors.loc[df_factors['ISO_Case'].notnull()]
print(len(df_factors))
df_factors = df_factors.loc[df_factors['paxVolume'].notnull()]
print(len(df_factors))

183259
183259


#### Calculate $r_{j}^{t}$

In [37]:
df_factors['Route_Risk'] = (df_factors['ISO_Case'] / df_factors['ISO_Pop']) * df_factors['paxVolume'] * df_factors['FIPS_NME'] * df_factors['FIPS_Pop']

In [38]:
df_risk = df_factors.groupby(['FIPS','County'])['Route_Risk'].sum().reset_index()
df_risk.loc[:,('FIPS_RawRisk')] = df_risk['Route_Risk']
df_risk.head(5)

Unnamed: 0,FIPS,County,Route_Risk,FIPS_RawRisk
0,1001.0,"Autauga, Alabama",6729.798222,6729.798222
1,1003.0,"Baldwin, Alabama",481015.247795,481015.247795
2,1005.0,"Barbour, Alabama",946.856161,946.856161
3,1007.0,"Bibb, Alabama",2683.385252,2683.385252
4,1009.0,"Blount, Alabama",17575.483031,17575.483031


#### Normalize and list the Top 25

In [39]:
highest_risk = df_risk['FIPS_RawRisk'].max()
df_risk['Risk'] = df_risk['FIPS_RawRisk'] / highest_risk
df_risk['FIPS_Rank'] = df_risk['Risk'].rank(ascending=False)
df_risk = pd.merge(df_risk, df_seat, how='left', left_on='County',right_on='County')
df_risk = df_risk[['FIPS','County','City','FIPS_RawRisk','Risk','FIPS_Rank']]
df_risk = df_risk.sort_values('Risk',ascending = False).reset_index()
df_risk.head(50)

Unnamed: 0,index,FIPS,County,City,FIPS_RawRisk,Risk,FIPS_Rank
0,365,17031.0,"Cook, Illinois",Chicago,11608790000.0,1.0,1.0
1,129,6037.0,"Los Angeles, California",Los Angeles,4867942000.0,0.419332,2.0
2,255,12086.0,"Miami-Dade, Florida",Miami,4218430000.0,0.363382,3.0
3,228,12011.0,"Broward, Florida",Fort Lauderdale,2109763000.0,0.181738,4.0
4,75,4013.0,"Maricopa, Arizona",Phoenix,1754361000.0,0.151124,5.0
5,1060,36047.0,"Kings, New York","Brooklyn, NYC",1727191000.0,0.148783,6.0
6,1074,36081.0,"Queens, New York","Queens, NYC",1369473000.0,0.117969,7.0
7,319,15003.0,"Honolulu, Hawaii",Honolulu,1312289000.0,0.113043,8.0
8,1687,53033.0,"King, Washington",Seattle,1282174000.0,0.110449,9.0
9,1504,48201.0,"Harris, Texas",Houston,1073141000.0,0.092442,10.0


In [40]:
result = df_risk
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '_pop_' + t.strftime('%m%d%y%H%M') + '.csv'
result.to_csv(output_csv, index=False, encoding='utf-8')

In [41]:
df_complete = pd.merge(df_factors, df_risk , how='left', left_on='FIPS',right_on='FIPS')
df_complete = df_complete.sort_values(by=['Risk','Route_Risk'], ascending=False)
df_complete['Route_Rank'] = df_complete.groupby('FIPS_Rank')['Route_Risk'].rank(ascending=False,method='dense')
df_complete = df_complete.rename(index=str, columns={"County_x": "County"})
df_complete = df_complete.drop(columns=['County_y'])
print(len(df_complete))
df_complete.head(10)

183259


Unnamed: 0,FIPS,County,FIPS_NME,FIPS_Pop,ISO,Country,ISO_Case,ISO_Pop,paxVolume,Route_Risk,index,City,FIPS_RawRisk,Risk,FIPS_Rank,Route_Rank
40848,17031.0,"Cook, Illinois",0.051,5211263,UKR,Ukraine,34251.0,43795.22,12580.611616,2614938000.0,365,Chicago,11608790000.0,1.0,1.0,1.0
40789,17031.0,"Cook, Illinois",0.051,5211263,MEX,Mexico,1122.0,132328.035,684340.958244,1542150000.0,365,Chicago,11608790000.0,1.0,1.0,2.0
40791,17031.0,"Cook, Illinois",0.051,5211263,MKD,The Republic of North Macedonia,829.0,2086.72,5694.951255,601303200.0,365,Chicago,11608790000.0,1.0,1.0,3.0
40764,17031.0,"Cook, Illinois",0.051,5211263,ISR,Israel,614.0,8583.916,29884.095087,568114900.0,365,Chicago,11608790000.0,1.0,1.0,4.0
40759,17031.0,"Cook, Illinois",0.051,5211263,IND,India,15240.0,1368737.513,157542.686617,466204200.0,365,Chicago,11608790000.0,1.0,1.0,5.0
40839,17031.0,"Cook, Illinois",0.051,5211263,THA,Thailand,3428.0,69306.16,24388.37897,320601200.0,365,Chicago,11608790000.0,1.0,1.0,6.0
40768,17031.0,"Cook, Illinois",0.051,5211263,JPN,Japan,2412.0,126854.745,59703.541605,301705900.0,365,Chicago,11608790000.0,1.0,1.0,7.0
40713,17031.0,"Cook, Illinois",0.051,5211263,CHN,China,10789.0,1420062.022,137213.650095,277066700.0,365,Chicago,11608790000.0,1.0,1.0,8.0
40782,17031.0,"Cook, Illinois",0.051,5211263,LTU,Lithuania,329.0,2864.459,8634.110308,263562800.0,365,Chicago,11608790000.0,1.0,1.0,9.0
40740,17031.0,"Cook, Illinois",0.051,5211263,FRA,France,628.0,65480.71,79671.721567,203078400.0,365,Chicago,11608790000.0,1.0,1.0,10.0


In [42]:
result = df_complete
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '_pop_route_' + t.strftime('%m%d%y%H%M') + '.csv'
result.to_csv(output_csv, index=False, encoding='utf-8')