# Neighboring_Counties

### Task 1. Raw measles risk
Calculate raw risk for each county with $$ r_{ij}^{t} = C_{i}^{t} \times V_{ij}^{t} \times NME_{j}^{t} \times P_{j}^{t} $$
where <br/>
$i$ is the origin country, <br/>
$j$ is the US county, <br/>
$t$ is the year, <br/>
$r_{ij}^{t}$ is the measles risk from country $i$ to county $j$ in year $t$, <br/>
$C_{i}^{t}$ is the incident cases in Country $i$ in year $t$, <br/>
$V_{ij}^{t}$ is the travel volume (million) from country $i$ to county $j$ in year $t$, <br/>
$NME_{j}^{t}$ is the NME rate in county $j$ in year $t$, <br/>
$P_{j}^{t}$ is the county $j$ population in year $t$. <br/>
$$ r_{j}^{t} = \sum_{i} r_{ij}^{t} $$
where <br/>
$r_{j}^{t}$ is the measles risk of county $j$ in year $t$, <br/>

### Task 2. Risk smoothing
For counties where is no international travel - directly update $r_{ij}^{t}$. <br/>
Task 2.1: calculate the average of raw risk in neighboring counties <br/>
Task 2.2: proportion to shared boundary length <br/>
Task 2.3: proportion to population <br/>

### Task 3. Travel volume update
For counties where is no international travel - update $V_{ij}^{t}$. <br/>
Task 3.1: calculate the average of international incoming travel volume in neighboring counties <br/>
Task 3.2: proportion to shared boundary length <br/>
Task 3.3: proportion to population <br/>
Task 3.4: diffusion by paramenter 1, 0.8, 0.6, 0.4, etc.

# Task 1: Calculate measles risk in county level

In [1]:
import pandas as pd
year = 2019
year_iata = 2017 # we will use 2017 IATA data for 2018 and 2019
folder = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\\'

#### Import $C_{i}^{t}$

In [2]:
in_table = r'C:\Users\Ensheng\Desktop\mapping\measles_world_year\world_measles_year.csv'
df_ci = pd.read_csv(in_table)
df_ci = df_ci.loc[df_ci['Year_x'] == year]
df_ci = df_ci[['ISO_3DIGIT', 'Total']]
print(len(df_ci))
df_ci.head(5)

244


Unnamed: 0,ISO_3DIGIT,Total
8,ABW,
17,AFG,82.0
26,AGO,192.0
35,AIA,
44,ALB,250.0


#### Import $V_{ij}^{t}$

In [3]:
# IATA data
in_table = r'C:\Users\Ensheng\Desktop\mapping\IATA\flow_XY.csv'
df_iata = pd.read_csv(in_table)
df_iata = df_iata.loc[df_iata['year'] == year_iata] # slice for certain year
df_iata = df_iata[['ISO', 'FIPS', 'paxVolume']]
print(len(df_iata))
df_iata.head(5)

3575


Unnamed: 0,ISO,FIPS,paxVolume
39301,MEX,1033.0,2
39302,CHE,1033.0,2
39303,JPN,1045.0,190
39304,KOR,1045.0,348
39305,MEX,1045.0,306


#### Import $NME_{j}^{t}$ and $P_{j}^{t}$

In [4]:
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\county_28980.csv'
df_nme = pd.read_csv(in_table)
df_nme = df_nme.loc[df_nme['Year'] == year] # slice for certain year
df_nme = df_nme[['GEOID', 'Labels', 'Avg_NME', 'Population']]
print(len(df_nme))
df_nme.head(5)

3220


Unnamed: 0,GEOID,Labels,Avg_NME,Population
25760,6075,"San Francisco, CA",0.006,884363.0
25761,25025,"Suffolk, MA",0.01,797939.0
25762,31007,"Banner, NE",0.014,742.0
25763,37181,"Vance, NC",0.016,44211.0
25764,48421,"Sherman, TX",0.016,3067.0


#### Calculate $r_{ij}^{t}$

In [5]:
r_temp = pd.merge(df_iata, df_ci, how='left', left_on='ISO',right_on='ISO_3DIGIT')
r_ij = pd.merge(df_nme, r_temp, how='left', left_on='GEOID',right_on='FIPS')
print(len(r_ij))
r_ij.head(15)

6399


Unnamed: 0,GEOID,Labels,Avg_NME,Population,ISO,FIPS,paxVolume,ISO_3DIGIT,Total
0,6075,"San Francisco, CA",0.006,884363.0,,,,,
1,25025,"Suffolk, MA",0.01,797939.0,ITA,25025.0,158713.0,ITA,385.0
2,25025,"Suffolk, MA",0.01,797939.0,MEX,25025.0,201987.0,MEX,769.0
3,25025,"Suffolk, MA",0.01,797939.0,PRI,25025.0,156280.0,PRI,
4,25025,"Suffolk, MA",0.01,797939.0,CAN,25025.0,409849.0,CAN,34.0
5,25025,"Suffolk, MA",0.01,797939.0,CHN,25025.0,177993.0,CHN,3909.0
6,25025,"Suffolk, MA",0.01,797939.0,DEU,25025.0,148558.0,DEU,161.0
7,25025,"Suffolk, MA",0.01,797939.0,DOM,25025.0,186301.0,DOM,30.0
8,25025,"Suffolk, MA",0.01,797939.0,ESP,25025.0,123038.0,ESP,48.0
9,25025,"Suffolk, MA",0.01,797939.0,FRA,25025.0,142002.0,FRA,313.0


In [6]:
# rename
r_ij.loc[:,('ci')] = r_ij['Total']
r_ij.loc[:,('vij')] = r_ij['paxVolume']
r_ij.loc[:,('nmej')] = r_ij['Avg_NME']
r_ij.loc[:,('pj')] = r_ij['Population']
r_ij = r_ij[['GEOID','Labels','ISO','ci','vij','nmej','pj']]
print(len(r_ij))

6399


In [7]:
# slice
r_ij = r_ij.loc[r_ij['ISO'].notnull()]
r_ij = r_ij.loc[r_ij['ci'].notnull()]
print(len(r_ij))

3146


In [8]:
# calculate r_ij
r_ij['rij'] = r_ij['ci'] * r_ij['vij'] * r_ij['nmej'] * r_ij['pj']

#### Calculate $r_{j}^{t}$

In [9]:
df_risk = r_ij.groupby(['GEOID','Labels'])['rij'].sum().reset_index()
df_risk.loc[:,('County')] = df_risk['Labels']
df_risk.loc[:,('risk')] = df_risk['rij']
df_risk = df_risk[['GEOID','County','risk']]

In [10]:
df_risk.head(5)

Unnamed: 0,GEOID,County,risk
0,1033,"Colbert, AL",571596.0
1,1045,"Dale, AL",283367200.0
2,1073,"Jefferson, AL",126452200000.0
3,1089,"Madison, AL",23762600000.0
4,1097,"Mobile, AL",81419420000.0


#### Normalize

In [11]:
highest_risk = df_risk['risk'].max()
df_risk.risk = df_risk.risk / highest_risk

#### List Top 25 risky counties

In [12]:
df_risk.sort_values('risk',ascending = False).head(30).reset_index()

Unnamed: 0,index,GEOID,County,risk
0,115,17031,"Cook, IL",1.0
1,45,6037,"Los Angeles, CA",0.583794
2,88,12086,"Miami-Dade, FL",0.453405
3,365,53033,"King, WA",0.217526
4,231,32003,"Clark, NV",0.163932
5,258,36081,"Queens, NY",0.137794
6,91,12095,"Orange, FL",0.132284
7,333,48201,"Harris, TX",0.129035
8,27,4013,"Maricopa, AZ",0.122941
9,104,15003,"Honolulu, HI",0.110584


# Task 2: Risk smoothing

#### Import the relationship table of neighboring counties

In [13]:
in_table = r'C:\Users\Ensheng\Desktop\mapping\diffusion_model\nbr.csv'
df_nbr = pd.read_csv(in_table)
df_nbr = df_nbr[['src_FIPS', 'nbr_FIPS', 'LENGTH']]
print(len(df_nbr))
df_nbr.head(5)

18680


Unnamed: 0,src_FIPS,nbr_FIPS,LENGTH
0,1001.0,1021.0,0.549297
1,1001.0,1047.0,0.344167
2,1001.0,1051.0,0.297455
3,1001.0,1085.0,0.41373
4,1001.0,1101.0,0.120051


In [137]:
print(str(len(df_nme)) + " counties in the US.")
print(str(len(df_risk)) + " counties with risk values after the first round.")
print(str(len(set(df_nme.GEOID) - set(df_risk.GEOID))) + " counties are still missing the risk value.")

3220 counties in the US.
394 counties with risk values after the first round.
2826 counties are still missing the risk value.


## Task 2.1: Method - averaging

In [138]:
df_temp = pd.merge(df_nme, df_risk, how='left', left_on='GEOID',right_on='GEOID')
df_us_tmp = df_temp.loc[df_temp['risk'].isnull()]
df_us_tmp = df_us_tmp[['GEOID','Labels','risk']]
print(len(df_us_tmp))
df_us_tmp.head(5)

2826


Unnamed: 0,GEOID,Labels,risk
0,6075,"San Francisco, CA",
2,31007,"Banner, NE",
3,37181,"Vance, NC",
4,48421,"Sherman, TX",
5,50011,"Franklin, VT",


In [139]:
df_temp = pd.merge(df_us_tmp, df_nbr, how='left', left_on='GEOID',right_on='src_FIPS')
df_us_risk = pd.merge(df_temp, df_risk, how='left', left_on='nbr_FIPS',right_on='GEOID')
df_us_risk.sort_values('GEOID_x').head(15)

Unnamed: 0,GEOID_x,Labels,risk_x,src_FIPS,nbr_FIPS,LENGTH,GEOID_y,County,risk_y
16134,1001,"Autauga, AL",,1001.0,1021.0,0.549297,,,
16138,1001,"Autauga, AL",,1001.0,1101.0,0.120051,1101.0,"Montgomery, AL",6e-06
16137,1001,"Autauga, AL",,1001.0,1085.0,0.41373,,,
16136,1001,"Autauga, AL",,1001.0,1051.0,0.297455,,,
16135,1001,"Autauga, AL",,1001.0,1047.0,0.344167,,,
8859,1003,"Baldwin, AL",,1003.0,1099.0,0.165312,,,
8860,1003,"Baldwin, AL",,1003.0,1129.0,0.040061,,,
8861,1003,"Baldwin, AL",,1003.0,12033.0,0.95841,12033.0,"Escambia, FL",6.9e-05
8856,1003,"Baldwin, AL",,1003.0,1025.0,0.27731,,,
8858,1003,"Baldwin, AL",,1003.0,1097.0,0.620398,1097.0,"Mobile, AL",7.4e-05


In [140]:
df_us_risk = df_us_risk.groupby(['GEOID_x','Labels']) \
.agg({'nbr_FIPS':'size', 'risk_y':'sum'}) \
.rename(columns={'nbr_FIPS':'count','risk_y':'sum_risk'}) \
.reset_index()
df_us_risk.head(5)

Unnamed: 0,GEOID_x,Labels,count,sum_risk
0,1001,"Autauga, AL",5,5.52798e-06
1,1003,"Baldwin, AL",6,0.0001436483
2,1005,"Barbour, AL",8,2.592624e-07
3,1007,"Bibb, AL",6,0.0001156955
4,1009,"Blount, AL",6,0.0001156955


In [141]:
df_us_risk["risk_ave"] = df_us_risk["sum_risk"]/df_us_risk["count"]
df_us_risk.head(5)

Unnamed: 0,GEOID_x,Labels,count,sum_risk,risk_ave
0,1001,"Autauga, AL",5,5.52798e-06,1.105596e-06
1,1003,"Baldwin, AL",6,0.0001436483,2.394138e-05
2,1005,"Barbour, AL",8,2.592624e-07,3.24078e-08
3,1007,"Bibb, AL",6,0.0001156955,1.928258e-05
4,1009,"Blount, AL",6,0.0001156955,1.928258e-05


In [142]:
df_us_risk.loc[:,('GEOID')] = df_us_risk['GEOID_x']
df_us_risk.loc[:,('County')] = df_us_risk['Labels']
df_us_risk.loc[:,('risk')] = df_us_risk['risk_ave']
df_us_risk = df_us_risk[['GEOID','County','risk']]
df_us_risk.head(10)

Unnamed: 0,GEOID,County,risk
0,1001,"Autauga, AL",1.105596e-06
1,1003,"Baldwin, AL",2.394138e-05
2,1005,"Barbour, AL",3.24078e-08
3,1007,"Bibb, AL",1.928258e-05
4,1009,"Blount, AL",1.928258e-05
5,1011,"Bullock, AL",1.105596e-06
6,1013,"Butler, AL",0.0
7,1015,"Calhoun, AL",0.0
8,1017,"Chambers, AL",0.0
9,1019,"Cherokee, AL",0.0


#### Merge with result from Task 1

In [151]:
df_us_risk_cmp = df_us_risk.append(df_risk)
print(len(df_us_risk_cmp))
df_us_risk_cmp.sort_values('risk',ascending = False).head(30).reset_index()

Unnamed: 0,index,GEOID,County,risk
0,115,17031,"Cook, IL",1.0
1,45,6037,"Los Angeles, CA",0.583794
2,88,12086,"Miami-Dade, FL",0.453405
3,527,17097,"Lake, IL",0.333333
4,500,17043,"DuPage, IL",0.25
5,365,53033,"King, WA",0.217526
6,523,17089,"Kane, IL",0.2
7,179,6111,"Ventura, CA",0.194605
8,616,18089,"Lake, IN",0.166667
9,571,17197,"Will, IL",0.166667


In [153]:
result = df_us_risk_cmp.sort_values('risk',ascending = False)
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '.csv'
result.to_csv(output_csv,index = False,encoding='utf-8')

## Task 2.2: Method - risk proportional to boundary length

In [19]:
print(str(len(df_nme)) + " counties in the US.")
print(str(len(df_risk)) + " counties with risk values after the first round.")
print(str(len(set(df_nme.GEOID) - set(df_risk.GEOID))) + " counties are still missing the risk value.")

3220 counties in the US.
394 counties with risk values after the first round.
2826 counties are still missing the risk value.


#### calculate boundary percentage

In [24]:
df_nbr.head(3)

Unnamed: 0,src_FIPS,nbr_FIPS,LENGTH
0,1001.0,1021.0,0.549297
1,1001.0,1047.0,0.344167
2,1001.0,1051.0,0.297455


In [34]:
df_nbr_tmp = df_nbr.groupby(['src_FIPS', 'nbr_FIPS']).agg({'LENGTH': 'sum'})
# Change: groupby df_nbr_tmp and divide by sum
df_len_pct = df_nbr_tmp.groupby(level=0) \
.apply(lambda x: 100 * x / float(x.sum())) \
.rename(columns={'LENGTH':'LENPCT'}) \
.reset_index()

In [36]:
df_len_pct.head(5)

Unnamed: 0,src_FIPS,nbr_FIPS,LENPCT
0,1001.0,1021.0,31.848829
1,1001.0,1047.0,19.955174
2,1001.0,1051.0,17.246777
3,1001.0,1085.0,23.988507
4,1001.0,1101.0,6.960713


#### Select counties with IATA data and list all their neighoring counties

In [101]:
df_temp = pd.merge(df_len_pct, df_risk, how='left', left_on='src_FIPS',right_on='GEOID')
print(len(df_temp))
df_us_tmp = df_temp.loc[df_temp['risk'].notnull()]
#df_us_tmp = df_us_tmp[['GEOID','Labels','risk']]
print(len(df_us_tmp))
df_us_tmp.head(5)

18680
2227


Unnamed: 0,src_FIPS,nbr_FIPS,LENPCT,GEOID,County,risk
97,1033.0,1059.0,31.300106,1033.0,"Colbert, AL",5.229728e-10
98,1033.0,1077.0,38.82833,1033.0,"Colbert, AL",5.229728e-10
99,1033.0,1079.0,13.797939,1033.0,"Colbert, AL",5.229728e-10
100,1033.0,28141.0,16.073626,1033.0,"Colbert, AL",5.229728e-10
131,1045.0,1005.0,21.503137,1045.0,"Dale, AL",2.592624e-07


#### Calculate new risk

In [102]:
df_us_tmp["updated_risk"] = df_us_tmp["risk"] * df_us_tmp["LENPCT"] / 100
len(df_us_tmp)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


2227

In [105]:
df_us_tmp = df_us_tmp[['nbr_FIPS','updated_risk']]
df_risk22 = df_us_tmp.groupby('nbr_FIPS')['updated_risk'].sum().reset_index()
print (len(df_risk22))
df_risk22.head(5)

1614


Unnamed: 0,nbr_FIPS,updated_risk
0,1001.0,3.342633e-07
1,1003.0,5.39644e-05
2,1005.0,5.574954e-08
3,1007.0,1.951451e-06
4,1009.0,2.008204e-05


In [110]:
# add County
df_temp = pd.merge(df_risk22, df_nme, how='left', left_on='nbr_FIPS',right_on='GEOID')
df_temp.loc[:,('County')] = df_temp['Labels']
df_temp.loc[:,('risk')] = df_temp['updated_risk']
df_temp = df_temp[['GEOID','County','risk']]
df_risk22 = df_temp
print (len(df_risk22))
df_risk22.head(5)

1614


Unnamed: 0,GEOID,County,risk
0,1001,"Autauga, AL",3.342633e-07
1,1003,"Baldwin, AL",5.39644e-05
2,1005,"Barbour, AL",5.574954e-08
3,1007,"Bibb, AL",1.951451e-06
4,1009,"Blount, AL",2.008204e-05


#### Merge with result from Task 1

In [111]:
df_us_risk_cmp = df_risk22.append(df_risk)
print(len(df_us_risk_cmp))

2008


In [112]:
# consider the situation where a county receives travel flows from more than one neighboring counties
# note: for counties far away from any IATA county, we won't calculate it. Therefore, the we have 1813 counties with final risk value
# if a complete list for all US counties is needed, please rerun the script based on df_risk = df_us_risk_cmp
df_us_risk_cmp = df_us_risk_cmp.groupby(['GEOID','County'])['risk'].sum().reset_index()
print(len(df_us_risk_cmp))

1813


In [114]:
result = df_us_risk_cmp.sort_values('risk',ascending = False)
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '.csv'
result.to_csv(output_csv,index = False,encoding='utf-8')

#### List Top XX counties with the highest risk values

In [113]:
df_us_risk_cmp.sort_values('risk',ascending = False).head(30).reset_index()

Unnamed: 0,index,GEOID,County,risk
0,365,17031,"Cook, IL",1.0
1,129,6037,"Los Angeles, CA",0.584571
2,255,12086,"Miami-Dade, FL",0.460748
3,369,17043,"DuPage, IL",0.316024
4,418,17197,"Will, IL",0.294466
5,256,12087,"Monroe, FL",0.228244
6,1687,53033,"King, WA",0.217526
7,228,12011,"Broward, FL",0.202916
8,125,6029,"Kern, CA",0.201538
9,382,17097,"Lake, IL",0.191532


# Task 3: Update travel volume

## Task 3.1: Method - averaging travel volume

#### Import known travel volume for counties

In [201]:
df_travel = df_iata.groupby(['FIPS'])['paxVolume'].sum().reset_index()
print(str(len(df_nme)) + " counties in the US.")
print(str(len(df_travel)) + " counties have IATA travel data.")

3220 counties in the US.
394 counties have IATA travel data.


#### Find counties without paxVolume

In [202]:
df_temp = pd.merge(df_nme, df_travel, how='left', left_on='GEOID',right_on='FIPS')
df_notravel = df_temp.loc[df_temp['paxVolume'].isnull()]
print(str(len(df_notravel)) + " counties have NO IATA travel data.")
df_notravel.head(5)

2826 counties have NO IATA travel data.


Unnamed: 0,GEOID,Labels,Avg_NME,Population,FIPS,paxVolume
0,6075,"San Francisco, CA",0.006,884363.0,,
2,31007,"Banner, NE",0.014,742.0,,
3,37181,"Vance, NC",0.016,44211.0,,
4,48421,"Sherman, TX",0.016,3067.0,,
5,50011,"Franklin, VT",0.037,49025.0,,


#### Calcuate travel volume for counties without paxVolume

In [203]:
df_temp = pd.merge(df_notravel, df_nbr, how='left', left_on='GEOID',right_on='src_FIPS')
df_travel_nbr = pd.merge(df_temp, df_travel, how='left', left_on='nbr_FIPS',right_on='FIPS')
df_travel_nbr.head(5)

Unnamed: 0,GEOID,Labels,Avg_NME,Population,FIPS_x,paxVolume_x,src_FIPS,nbr_FIPS,LENGTH,FIPS_y,paxVolume_y
0,6075,"San Francisco, CA",0.006,884363.0,,,6075.0,6081.0,0.110851,6081.0,3632362.0
1,31007,"Banner, NE",0.014,742.0,,,31007.0,31033.0,0.043872,,
2,31007,"Banner, NE",0.014,742.0,,,31007.0,31105.0,0.674669,,
3,31007,"Banner, NE",0.014,742.0,,,31007.0,31123.0,0.261559,,
4,31007,"Banner, NE",0.014,742.0,,,31007.0,31157.0,0.682435,31157.0,23.0


In [204]:
df_travel_nbr = df_travel_nbr[['GEOID','Labels','Population','Avg_NME','nbr_FIPS','paxVolume_y']]
df_travel_nbr.sort_values('GEOID').head(15)

Unnamed: 0,GEOID,Labels,Population,Avg_NME,nbr_FIPS,paxVolume_y
16134,1001,"Autauga, AL",55504.0,0.006,1021.0,
16138,1001,"Autauga, AL",55504.0,0.006,1101.0,7872.0
16137,1001,"Autauga, AL",55504.0,0.006,1085.0,
16136,1001,"Autauga, AL",55504.0,0.006,1051.0,
16135,1001,"Autauga, AL",55504.0,0.006,1047.0,
8859,1003,"Baldwin, AL",212628.0,0.006,1099.0,
8860,1003,"Baldwin, AL",212628.0,0.006,1129.0,
8861,1003,"Baldwin, AL",212628.0,0.006,12033.0,36736.0
8856,1003,"Baldwin, AL",212628.0,0.006,1025.0,
8858,1003,"Baldwin, AL",212628.0,0.006,1097.0,18502.0


In [205]:
df_travel_nbr = df_travel_nbr.groupby(['GEOID','Labels','Population','Avg_NME']) \
.agg({'nbr_FIPS':'size', 'paxVolume_y':'sum'}) \
.rename(columns={'nbr_FIPS':'count','paxVolume_y':'sum_paxVolume'}) \
.reset_index()
print(len(df_travel_nbr))
df_travel_nbr.head(5)

2748


Unnamed: 0,GEOID,Labels,Population,Avg_NME,count,sum_paxVolume
0,1001,"Autauga, AL",55504.0,0.006,5,7872.0
1,1003,"Baldwin, AL",212628.0,0.006,6,55238.0
2,1005,"Barbour, AL",25270.0,0.006,8,3245.0
3,1007,"Bibb, AL",22668.0,0.006,6,72784.0
4,1009,"Blount, AL",58013.0,0.006,6,72784.0


In [206]:
df_travel_nbr["travel_ave"] = df_travel_nbr["sum_paxVolume"]/df_travel_nbr["count"]
df_travel_nbr.head(5)

Unnamed: 0,GEOID,Labels,Population,Avg_NME,count,sum_paxVolume,travel_ave
0,1001,"Autauga, AL",55504.0,0.006,5,7872.0,1574.4
1,1003,"Baldwin, AL",212628.0,0.006,6,55238.0,9206.333333
2,1005,"Barbour, AL",25270.0,0.006,8,3245.0,405.625
3,1007,"Bibb, AL",22668.0,0.006,6,72784.0,12130.666667
4,1009,"Blount, AL",58013.0,0.006,6,72784.0,12130.666667


#### Calcuate $r_{j}^{t}$

In [207]:
# calculate r_j
df_travel_nbr['risk'] = df_travel_nbr['travel_ave'] * df_travel_nbr['Avg_NME'] * df_travel_nbr['Population']

In [188]:
# normalize
df_travel_nbr.risk = df_travel_nbr.risk / highest_risk

In [189]:
df_travel_nbr.loc[:,('County')] = df_travel_nbr['Labels']
df_travel_nbr = df_travel_nbr[['GEOID','County','risk']]
df_travel_nbr.head(10)

Unnamed: 0,GEOID,County,risk
0,1001,"Autauga, AL",4.797119e-10
1,1003,"Baldwin, AL",1.074604e-08
2,1005,"Barbour, AL",5.626925e-11
3,1007,"Bibb, AL",1.509521e-09
4,1009,"Blount, AL",3.863235e-09
5,1011,"Bullock, AL",8.909899e-11
6,1013,"Butler, AL",0.0
7,1015,"Calhoun, AL",0.0
8,1017,"Chambers, AL",0.0
9,1019,"Cherokee, AL",0.0


In [192]:
len(df_travel_nbr)

2748

#### Merge with counties with IATA data

In [190]:
df_us_risk_cmp = df_travel_nbr.append(df_risk)
print(len(df_us_risk_cmp))
df_us_risk_cmp.sort_values('risk',ascending = False).head(30).reset_index()

3142


Unnamed: 0,index,GEOID,County,risk
0,115,17031,"Cook, IL",1.0
1,45,6037,"Los Angeles, CA",0.583794
2,88,12086,"Miami-Dade, FL",0.453405
3,365,53033,"King, WA",0.217526
4,231,32003,"Clark, NV",0.163932
5,258,36081,"Queens, NY",0.137794
6,91,12095,"Orange, FL",0.132284
7,333,48201,"Harris, TX",0.129035
8,27,4013,"Maricopa, AZ",0.122941
9,104,15003,"Honolulu, HI",0.110584


In [233]:
result = df_us_risk_cmp.sort_values('risk',ascending = False)
output_csv = folder + 'MeaslesRisk_US_' +  str(year) + '.csv'
result.to_csv(output_csv,index = False,encoding='utf-8')

## Task 3.2: Method - travel volume proportional to boundary length