# Compute county spread score

In [1]:
import numpy as np
import pandas as pd

In [2]:
# load data
table1 = pd.read_csv(
    "../../data/commute/table1.csv",
    index_col=0,
    dtype={
        'Resident County FIPS':str,
        'Work County FIPS':str,
        'Resident State FIPS':str,
        'Work State FIPS':str,
    },
)

In [3]:
# visualize the head
table1.head()

Unnamed: 0,Resident State FIPS,Resident County FIPS,Resident State Name,Resident County Name,Work State FIPS,Work County FIPS,Work State Name,Work County Name,commute,error
0,1,1001,Alabama,Autauga County,1,1001,Alabama,Autauga County,8828.0,752.0
1,1,1001,Alabama,Autauga County,1,1013,Alabama,Butler County,6.0,10.0
2,1,1001,Alabama,Autauga County,1,1021,Alabama,Chilton County,504.0,228.0
3,1,1001,Alabama,Autauga County,1,1043,Alabama,Cullman County,27.0,44.0
4,1,1001,Alabama,Autauga County,1,1047,Alabama,Dallas County,296.0,130.0


In [4]:
# preprocess: drop unused cols
table1 = table1.drop(['Resident State FIPS', 'Work State FIPS'], axis=1)

In [5]:
# preprocess: drop rows with missing Work County FIPS, usually these are foreign places
table1 = table1.dropna(subset=['Work County FIPS'])

## Method 1: use resident commute sum to normalize

In [6]:
# preprocess: compute the lower bound for commute
table1['resident_weight'] = np.maximum(0, table1['commute'] - table1['error'])
total_weight = table1.groupby("Resident County FIPS")['resident_weight'].sum()

In [7]:
# preprocess: compute the weight
table1 = table1.join(total_weight, on='Resident County FIPS', rsuffix='_total')
table1['resident_weight'] = table1['resident_weight'] / table1['resident_weight_total'] 

In [8]:
# visualize the high resident weight areas
table1[table1['resident_weight'] > 0.99]

Unnamed: 0,Resident County FIPS,Resident State Name,Resident County Name,Work County FIPS,Work State Name,Work County Name,commute,error,resident_weight,resident_weight_total
2959,02013,Alaska,Aleutians East Borough,02013,Alaska,Aleutians East Borough,2504.0,148.0,1.000000,2356.0
2963,02016,Alaska,Aleutians West Census Area,02016,Alaska,Aleutians West Census Area,3788.0,114.0,1.000000,3674.0
3015,02050,Alaska,Bethel Census Area,02050,Alaska,Bethel Census Area,6017.0,215.0,0.999483,5805.0
3034,02070,Alaska,Dillingham Census Area,02070,Alaska,Dillingham Census Area,1782.0,86.0,0.998234,1699.0
3078,02110,Alaska,Juneau City and Borough,02110,Alaska,Juneau City and Borough,17273.0,435.0,0.999525,16846.0
...,...,...,...,...,...,...,...,...,...,...
137279,56027,Wyoming,Niobrara County,56027,Wyoming,Niobrara County,1006.0,123.0,1.000000,883.0
137381,56037,Wyoming,Sweetwater County,56037,Wyoming,Sweetwater County,21644.0,587.0,0.991431,21239.0
137399,56039,Wyoming,Teton County,56039,Wyoming,Teton County,13175.0,550.0,0.994251,12698.0
138115,72049,Puerto Rico,Culebra Municipio,72049,Puerto Rico,Culebra Municipio,661.0,166.0,1.000000,495.0


## Method 2: use work commute sum to normalize

In [9]:
# preprocess: compute the lower bound for commute
table1['work_weight'] = np.maximum(0, table1['commute'] - table1['error'])
total_weight = table1.groupby("Work County FIPS")['work_weight'].sum()

In [10]:
# preprocess: compute the weight
table1 = table1.join(total_weight, on='Work County FIPS', rsuffix='_total')
table1['work_weight'] = table1['work_weight'] / table1['work_weight_total'] 

In [11]:
# visualize the high work weight areas
table1[table1['work_weight'] > 0.99]

Unnamed: 0,Resident County FIPS,Resident State Name,Resident County Name,Work County FIPS,Work State Name,Work County Name,commute,error,resident_weight,resident_weight_total,work_weight,work_weight_total
2959,02013,Alaska,Aleutians East Borough,02013,Alaska,Aleutians East Borough,2504.0,148.0,1.000000,2356.0,0.997038,2363.0
3015,02050,Alaska,Bethel Census Area,02050,Alaska,Bethel Census Area,6017.0,215.0,0.999483,5805.0,0.997250,5818.0
3034,02070,Alaska,Dillingham Census Area,02070,Alaska,Dillingham Census Area,1782.0,86.0,0.998234,1699.0,0.997647,1700.0
3041,02090,Alaska,Fairbanks North Star Borough,02090,Alaska,Fairbanks North Star Borough,49307.0,1028.0,0.988250,48853.0,0.996347,48456.0
3062,02100,Alaska,Haines Borough,02100,Alaska,Haines Borough,1271.0,181.0,0.984643,1107.0,0.997255,1093.0
...,...,...,...,...,...,...,...,...,...,...,...,...
131858,54071,West Virginia,Pendleton County,54071,West Virginia,Pendleton County,1988.0,169.0,0.807368,2253.0,0.990741,1836.0
137083,56013,Wyoming,Fremont County,56013,Wyoming,Fremont County,16698.0,508.0,0.984853,16439.0,0.996983,16239.0
137279,56027,Wyoming,Niobrara County,56027,Wyoming,Niobrara County,1006.0,123.0,1.000000,883.0,0.993251,889.0
138115,72049,Puerto Rico,Culebra Municipio,72049,Puerto Rico,Culebra Municipio,661.0,166.0,1.000000,495.0,1.000000,495.0


In [12]:
# preprocess: drop unused cols
table1['raw_weight'] = np.maximum(0, table1['commute'] - table1['error'])
final = table1[['Resident County FIPS', 'Work County FIPS', 'resident_weight', 'work_weight', 'raw_weight']]

In [13]:
final

Unnamed: 0,Resident County FIPS,Work County FIPS,resident_weight,work_weight,raw_weight
0,01001,01001,0.386708,0.715450,8076.0
1,01001,01013,0.000000,0.000000,0.0
2,01001,01021,0.013216,0.030582,276.0
3,01001,01043,0.000000,0.000000,0.0
4,01001,01047,0.007949,0.014844,166.0
...,...,...,...,...,...
139427,72153,72127,0.003106,0.000089,20.0
139428,72153,72133,0.000000,0.000000,0.0
139429,72153,72147,0.000000,0.000000,0.0
139430,72153,72149,0.000000,0.000000,0.0


In [14]:
# visualize New York County weight
# 36061 is New York County, 36005 is Bronx, 36047 is Kings
final[final['Resident County FIPS'] == '36061'].sort_values('resident_weight', ascending=False)

Unnamed: 0,Resident County FIPS,Work County FIPS,resident_weight,work_weight,raw_weight
78788,36061,36061,0.863014,0.304687,725962.0
78776,36061,36005,0.032239,0.075522,27119.0
78785,36061,36047,0.031432,0.032247,26440.0
78794,36061,36081,0.024835,0.030259,20891.0
78805,36061,36119,0.009248,0.018644,7779.0
...,...,...,...,...,...
78744,36061,28087,0.000000,0.000000,0.0
78745,36061,29095,0.000000,0.000000,0.0
78746,36061,29189,0.000000,0.000000,0.0
78747,36061,29510,0.000000,0.000000,0.0


In [15]:
final.to_csv("../../intermediate/05_spread_score.csv")