In [73]:
import geopandas as gpd

### Read in the SLE GeoJSON for 2015 to 2018

In [74]:
sle_data = gpd.read_file('./data/sle/output/sle2015_2018.geojson')

In [75]:
sle_data.head()

Unnamed: 0,date,city,region,positive,title,geometry
0,09/14/2015,Mecca,"Riverside County (California, USA)",SLEV,YTD Positive Sentinel Chicken,POINT (-116.0302618249568 33.5276819108859)
1,09/10/2015,Mecca,"Riverside County (California, USA)",SLEV,YTD Positive Sentinel Chicken,POINT (-116.0749179098212 33.54414473222415)
2,09/28/2015,Mecca,"Riverside County (California, USA)",SLEV,YTD Positive Sentinel Chicken,POINT (-116.0788138626815 33.54292818890128)
3,09/23/2015,Mecca,"Riverside County (California, USA)",SLEV,YTD Positive Sentinel Chicken,POINT (-116.0293803087907 33.5347901760566)
4,09/24/2015,Mecca,"Riverside County (California, USA)",SLEV,YTD Positive Sentinel Chicken,POINT (-116.0755536165777 33.54329531806545)


### Make sure all cities are properly capitalized

In [76]:
sle_data['city'] = sle_data['city'].str.title()

### Loop through the regions and remove '(California, USA)'

In [77]:
# Create an empty array that will hold region values
regions = []

# Loop over the region column
for value in sle_data['region']:
    if value is None:
        regions.append('None')
    else:
        # Split the name of the value in pieces by ' C' portion of of the current region name 
        chunks = value.split(' C')
        # Keep the first chunk of the split and append
        county = chunks[0]
        regions.append(county)

### Drop the current region column and re-add with the values from the regions list

In [78]:
# Drop the column
sle_data.drop('region', axis=1, inplace=True)

# Add back in using the region array
sle_data['region'] = regions

### Change values in the title column

In [79]:
# Create an empty array that will hold our title values
titles = []

# Loop over the title column
for title in sle_data['title']:
    if title == 'YTD Positive Sentinel Chicken':
        titles.append('Sentinel Chicken')
    elif title == 'YTD Positive Mosquito Pool':
        titles.append('Mosquito Pool')

### Drop the current title column and re-add with the values from the titles list

In [80]:
# Drop the column
sle_data.drop('title', axis=1, inplace=True)

# Add back in using the region array
sle_data['title'] = titles

### Rearrange columns newly added columns match the previous arrangement

In [81]:
# Add our column names to an array
sle_cols = sle_data.columns.tolist()

# Rearrange our columns
sle_cols_rearranged = ['date','city', 'region', 'positive', 'title', 'geometry']

# Assign new arrangement back to dataframe
sle_data = sle_data[sle_cols_rearranged]

In [82]:
sle_data.head()

Unnamed: 0,date,city,region,positive,title,geometry
0,09/14/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0302618249568 33.5276819108859)
1,09/10/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0749179098212 33.54414473222415)
2,09/28/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0788138626815 33.54292818890128)
3,09/23/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0293803087907 33.5347901760566)
4,09/24/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0755536165777 33.54329531806545)


### Rename columns

In [83]:
sle_data.rename(columns={'region' : 'county', 'positive' : 'virus', 'title' : 'spectype'}, inplace=True)

In [84]:
sle_data.head()

Unnamed: 0,date,city,county,virus,spectype,geometry
0,09/14/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0302618249568 33.5276819108859)
1,09/10/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0749179098212 33.54414473222415)
2,09/28/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0788138626815 33.54292818890128)
3,09/23/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0293803087907 33.5347901760566)
4,09/24/2015,Mecca,Riverside,SLEV,Sentinel Chicken,POINT (-116.0755536165777 33.54329531806545)


In [85]:
sle_data.to_file('./data/sle/output/sle2015_2018_cleaned.geojson', driver="GeoJSON")