In [1]:
# Import dependencies
import pandas as pd

In [2]:
# File data
file_data = "../../Data/Raw_Data/CountiesLatLong.csv"
df = pd.read_csv(file_data)
df.head()

Unnamed: 0,county,county_ascii,county_full,county_fips,state_id,state_name,lat,lng,population
0,Los Angeles,Los Angeles,Los Angeles County,6037,CA,California,34.3207,-118.2248,10081570
1,Cook,Cook,Cook County,17031,IL,Illinois,41.8401,-87.8168,5198275
2,Harris,Harris,Harris County,48201,TX,Texas,29.8577,-95.3936,4646630
3,Maricopa,Maricopa,Maricopa County,4013,AZ,Arizona,33.349,-112.4915,4328810
4,San Diego,San Diego,San Diego County,6073,CA,California,33.0341,-116.7353,3316073


In [3]:
# Rename column headers for useful columns
df = df.rename({"county_full": "County", "county_fips": "FIPS", "state_name": "State", "lat": "Lat", "lng" : "Long" }, axis=1)
df.head()

Unnamed: 0,county,county_ascii,County,FIPS,state_id,State,Lat,Long,population
0,Los Angeles,Los Angeles,Los Angeles County,6037,CA,California,34.3207,-118.2248,10081570
1,Cook,Cook,Cook County,17031,IL,Illinois,41.8401,-87.8168,5198275
2,Harris,Harris,Harris County,48201,TX,Texas,29.8577,-95.3936,4646630
3,Maricopa,Maricopa,Maricopa County,4013,AZ,Arizona,33.349,-112.4915,4328810
4,San Diego,San Diego,San Diego County,6073,CA,California,33.0341,-116.7353,3316073


In [4]:
# Drop columns "county", "county_ascii", "state_id", "population"
df = df.drop(["county", "county_ascii", "state_id", "population"], axis=1)
df.head()

Unnamed: 0,County,FIPS,State,Lat,Long
0,Los Angeles County,6037,California,34.3207,-118.2248
1,Cook County,17031,Illinois,41.8401,-87.8168
2,Harris County,48201,Texas,29.8577,-95.3936
3,Maricopa County,4013,Arizona,33.349,-112.4915
4,San Diego County,6073,California,33.0341,-116.7353


In [5]:
# Reorder columns
df = df[["FIPS", "County", "State", "Lat", "Long"]]
df.head()

Unnamed: 0,FIPS,County,State,Lat,Long
0,6037,Los Angeles County,California,34.3207,-118.2248
1,17031,Cook County,Illinois,41.8401,-87.8168
2,48201,Harris County,Texas,29.8577,-95.3936
3,4013,Maricopa County,Arizona,33.349,-112.4915
4,6073,San Diego County,California,33.0341,-116.7353


In [6]:
# Sort on FIPS
df = df.sort_values(by=["FIPS"])
df.head()

Unnamed: 0,FIPS,County,State,Lat,Long
913,1001,Autauga County,Alabama,32.5349,-86.6427
312,1003,Baldwin County,Alabama,30.7275,-87.7226
1589,1005,Barbour County,Alabama,31.8696,-85.3932
1700,1007,Bibb County,Alabama,32.9986,-87.1265
893,1009,Blount County,Alabama,33.9809,-86.5674


In [7]:
# Reset index
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,FIPS,County,State,Lat,Long
0,1001,Autauga County,Alabama,32.5349,-86.6427
1,1003,Baldwin County,Alabama,30.7275,-87.7226
2,1005,Barbour County,Alabama,31.8696,-85.3932
3,1007,Bibb County,Alabama,32.9986,-87.1265
4,1009,Blount County,Alabama,33.9809,-86.5674


In [8]:
# How many counties are we counting
df.shape

(3142, 5)

In [11]:
# Import File Data for Tick Data
file_data = "../../Data/Cleaned_Data/Lyme_Tick_Present_by_County.csv"
tick_df = pd.read_csv(file_data)
tick_df = tick_df.drop(["Unnamed: 0"], axis=1)
tick_df.head()

Unnamed: 0,FIPS,State,County,Lyme_Burgdorferi_Present,Lyme_Mayonii_Present
0,1001,AL,Autauga County ...,0,0
1,1003,AL,Baldwin County ...,0,0
2,1005,AL,Barbour County ...,0,0
3,1007,AL,Bibb County ...,0,0
4,1009,AL,Blount County ...,0,0


In [12]:
# Merge lat/long with tick data
merge = pd.concat([df, tick_df], axis=1)
merge.head()

Unnamed: 0,FIPS,County,State,Lat,Long,FIPS.1,State.1,County.1,Lyme_Burgdorferi_Present,Lyme_Mayonii_Present
0,1001.0,Autauga County,Alabama,32.5349,-86.6427,1001,AL,Autauga County ...,0,0
1,1003.0,Baldwin County,Alabama,30.7275,-87.7226,1003,AL,Baldwin County ...,0,0
2,1005.0,Barbour County,Alabama,31.8696,-85.3932,1005,AL,Barbour County ...,0,0
3,1007.0,Bibb County,Alabama,32.9986,-87.1265,1007,AL,Bibb County ...,0,0
4,1009.0,Blount County,Alabama,33.9809,-86.5674,1009,AL,Blount County ...,0,0


In [13]:
# How many counties are we counting
merge.shape

(3145, 10)

In [14]:
# How many isnull
sum(merge.isnull().values.ravel())

15

In [15]:
# Which columns have null values
for col in merge:
    print(f'{col} has {merge[col].isnull().sum()} nulls')

FIPS has FIPS    3
FIPS    0
dtype: int64 nulls
County has County    3
County    0
dtype: int64 nulls
State has State    3
State    0
dtype: int64 nulls
Lat has 3 nulls
Long has 3 nulls
FIPS has FIPS    3
FIPS    0
dtype: int64 nulls
State has State    3
State    0
dtype: int64 nulls
County has County    3
County    0
dtype: int64 nulls
Lyme_Burgdorferi_Present has 0 nulls
Lyme_Mayonii_Present has 0 nulls


In [16]:
# Drop null values
merge = merge.dropna()
merge.head()

Unnamed: 0,FIPS,County,State,Lat,Long,FIPS.1,State.1,County.1,Lyme_Burgdorferi_Present,Lyme_Mayonii_Present
0,1001.0,Autauga County,Alabama,32.5349,-86.6427,1001,AL,Autauga County ...,0,0
1,1003.0,Baldwin County,Alabama,30.7275,-87.7226,1003,AL,Baldwin County ...,0,0
2,1005.0,Barbour County,Alabama,31.8696,-85.3932,1005,AL,Barbour County ...,0,0
3,1007.0,Bibb County,Alabama,32.9986,-87.1265,1007,AL,Bibb County ...,0,0
4,1009.0,Blount County,Alabama,33.9809,-86.5674,1009,AL,Blount County ...,0,0


In [17]:
# Remove duplicate columns
merge = merge.loc[:,~merge.columns.duplicated()]
merge.head()

Unnamed: 0,FIPS,County,State,Lat,Long,Lyme_Burgdorferi_Present,Lyme_Mayonii_Present
0,1001.0,Autauga County,Alabama,32.5349,-86.6427,0,0
1,1003.0,Baldwin County,Alabama,30.7275,-87.7226,0,0
2,1005.0,Barbour County,Alabama,31.8696,-85.3932,0,0
3,1007.0,Bibb County,Alabama,32.9986,-87.1265,0,0
4,1009.0,Blount County,Alabama,33.9809,-86.5674,0,0


In [19]:
# Import File Data for Case Counts
file_data = "../../Data/Cleaned_Data/LymeCaseCountsbyCounty_FIPS.csv"
lyme_df = pd.read_csv(file_data)
# lyme_df = lyme_df.drop(["Unnamed: 0"], axis=1)
lyme_df.head()

Unnamed: 0,Ctyname,Ctyname_Updated,Stname,Unique Identifier,FIPS,STCODE,CTYCODE,Cases2000,Cases2001,Cases2002,...,Cases2010,Cases2011,Cases2012,Cases2013,Cases2014,Cases2015,Cases2016,Cases2017,Cases2018,Cases2019
0,Wyoming,Wyoming,Wyoming,Wyoming | Wyoming,99945,56,999,0,0,0,...,0,1,1,1,0,0,0,0,0,3
1,Wisconsin,Wisconsin,Wisconsin,Wisconsin | Wisconsin,99944,55,999,1,0,1,...,0,0,2,215,2,6,1,2,2,0
2,West Virginia,West Virginia,West Virginia,West Virginia | West Virginia,99943,54,999,0,0,0,...,0,4,4,6,1,125,137,71,148,256
3,Washington,Washington,Washington,Washington | Washington,99942,53,999,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Virginia,Virginia,Virginia,Virginia | Virginia,99941,51,999,0,0,0,...,199,93,0,94,46,93,89,134,34,124
