In [272]:
import pandas as pd
import numpy as np
import os
import datetime
import geopandas as gpd
from shapely.geometry import Point
from scipy.spatial.distance import cdist
from geopy import distance

In [273]:
#Takes in the county centers & neighboring county data, creates distance based on specific columns of county_centers
def dist_col(lat,long,county_centers,neighborcounties):
    #Creating GeoPandas DataFrames to do fast distance comparison
    C_C = gpd.GeoDataFrame({ #County_center gpd
        'fips': c,
        'geometry': (a, b),
    } for a, b, c in zip(county_centers[lat], county_centers[long], county_centers['fips']))
    C_C = C_C.set_index('fips')
    #making the distance array between neigboring counties
    dist = []
    for ind in neighborcounties.index:
        #Getting the fips for the origin and destination
        org = neighborcounties['orgfips'][ind]
        adj = neighborcounties['adjfips'][ind]
        #county center data only given up to 56000s
        if org < 60000 and adj < 60000:
            #checking if the locations have NaNs
            if ~np.isnan(C_C.loc[org]['geometry'][0]) and ~np.isnan(C_C.loc[adj]['geometry'][0]):
                dist.append(distance.distance(C_C.loc[org]['geometry'],C_C.loc[adj]['geometry']).km)
            else:
                #Setting default distance for counties w/ a NaN value as 0 (county did not exist)
                dist.append(0)
        else:
            #Setting default distance for counties w/o data as 200
            dist.append(200)
    return dist

In [303]:
#Loading in mobility data
county_centers = pd.read_csv('../../../../data/us/geolocation/county_centers.csv', encoding='latin1')
nearest_hei = pd.read_csv('../../../../data/us/geolocation/nearest_hei.csv', encoding='latin1')
neighborcounties = pd.read_csv('../../../../data/us/geolocation/neighborcounties.csv')

In [275]:
#Adding in distances between neighboring counties
neighborcounties['Center_00'] = dist_col('clat00','clon00',county_centers,neighborcounties)
neighborcounties['Center_10'] = dist_col('clat10','clon10',county_centers,neighborcounties)
neighborcounties['Pop_00'] = dist_col('pclat00','pclon00',county_centers,neighborcounties)
neighborcounties['Pop_10'] = dist_col('pclat10','pclon10',county_centers,neighborcounties)
neighborcounties.to_csv('neighborcounties.csv')

In [276]:
county_centers.head()

Unnamed: 0,fips,clon00,clat00,clon10,clat10,pclon00,pclat00,pclon10,pclat10
0,1001,-86.577176,32.523283,-86.64449,32.536382,-86.501832,32.500323,-86.494165,32.500389
1,1003,-87.74826,30.592781,-87.746067,30.659218,-87.76054,30.565383,-87.762381,30.548923
2,1005,-85.331312,31.856515,-85.405456,31.87067,-85.306746,31.847869,-85.310038,31.844036
3,1007,-87.123243,33.040054,-87.127148,33.015893,-87.127019,33.025947,-87.127659,33.030921
4,1009,-86.554768,33.978461,-86.567246,33.977448,-86.582617,33.962601,-86.591491,33.955243


In [283]:
neighborcounties.head()
#This is the counties and the distances in km between the neighboring counties, based on the 4 measures of county location

Unnamed: 0,orgfips,adjfips,instate,Center_00,Center_10,Pop_00,Pop_10
0,1001,1021,1,38.01735,36.062205,45.031875,45.583516
1,1001,1047,1,48.59487,49.584019,51.172049,51.851301
2,1001,1051,1,39.029865,47.598804,27.591414,26.068095
3,1001,1085,1,39.804126,43.085016,39.075715,39.269769
4,1001,1101,1,36.547481,55.509355,28.54022,29.103281


In [278]:
#Making a dataframe to give the number of neighbors & average distances from neighboring counties
neighborstats = pd.DataFrame(np.unique(neighborcounties['orgfips']), columns = ['fips'])
#making the number neighbor column
neighborstats['num_neighbors'] = neighborstats.apply(lambda x: 0, axis=1)
neighborstats = neighborstats.astype({'num_neighbors': 'int64'})
neighborstats['center_00'] = neighborstats.apply(lambda x: 0, axis=1)
neighborstats = neighborstats.astype({'center_00': 'float64'})
neighborstats['center_10'] = neighborstats.apply(lambda x: 0, axis=1)
neighborstats = neighborstats.astype({'center_10': 'float64'})
neighborstats['pop_00'] = neighborstats.apply(lambda x: 0, axis=1)
neighborstats = neighborstats.astype({'pop_00': 'float64'})
neighborstats['pop_10'] = neighborstats.apply(lambda x: 0, axis=1)
neighborstats = neighborstats.astype({'pop_10': 'float64'})
neighborstats = neighborstats.set_index('fips')

#summing over all entries to count number of neighbors
for ind in neighborcounties.index:
    #Getting the fips for the origin (signifies a neighbor)
    org = neighborcounties['orgfips'][ind]
    neighborstats['num_neighbors'][org] += 1
    neighborstats['center_00'][org] += neighborcounties['Center_00'][ind]
    neighborstats['center_10'][org] += neighborcounties['Center_10'][ind]
    neighborstats['pop_00'][org] += neighborcounties['Pop_00'][ind]
    neighborstats['pop_10'][org] += neighborcounties['Pop_10'][ind]
    
neighborstats['center_00'] = neighborstats['center_00']/neighborstats['num_neighbors']
neighborstats['center_10'] = neighborstats['center_10']/neighborstats['num_neighbors']
neighborstats['pop_00'] = neighborstats['pop_00']/neighborstats['num_neighbors']
neighborstats['pop_10'] = neighborstats['pop_10']/neighborstats['num_neighbors']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [287]:
neighborstats.head()
#This is a csv showing the number of neighbors each county has
#It also gives the mean distance from each county to its respective neighbors, when using
#one of the 4 measures for the county location


Unnamed: 0_level_0,num_neighbors,center_00,center_10,pop_00,pop_10
fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1001,5,40.398738,46.36788,38.282254,38.375193
1003,6,81.715372,78.91155,83.472329,84.558001
1005,8,48.226337,47.494927,49.141199,49.1628
1007,6,50.573753,50.372913,50.700097,50.598052
1009,6,48.806545,48.377201,47.908587,47.806852


In [280]:
neighborstats.to_csv('neighborstats.csv')