## Workflow to update a TX County data layer (shapefile) with the latest COVID-19 cases and load it to ArcGIS Online
4-23-20: It looks like TX DSHS updated how they are reporting thier data. Keeping this workflow here for reference.

In [None]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import ssl
import geopandas as gpd
import pandas as pd
import numpy as np
from arcgis.gis import GIS

In [None]:
# scrape Texas Department of State Health Services
ssl._create_default_https_context = ssl._create_unverified_context # avoid cert error 
url = 'https://www.dshs.state.tx.us/news/updates.shtm#coronavirus'
html = urlopen(url)
soup = BeautifulSoup(html, 'lxml')

In [None]:
# get count of latest COVID-19
list_for_df = []
for tr in soup.find_all('tr'):
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    list_for_df.append(row)

# create df with counts, use filter() to remove empty list
count_df = pd.DataFrame(list(filter(None,list_for_df)), columns=['CNTY_NM', 'COUNT'])

In [None]:
# read in shapefile using GeoPandas
county_gdf_raw = gpd.GeoDataFrame(gpd.read_file(r'data/origs/Texas_County_Boundaries.shp'))
county_gdf = county_gdf_raw[['OBJECTID','CNTY_NM','geometry']]
county_gdf.head()

In [None]:
# merge count_df and county_gdf to add COVID19 count to geometry
merged_df = county_gdf.merge(count_df, left_on='CNTY_NM', right_on='CNTY_NM', how='left')
# convert "COUNT" field to int
merged_df["COUNT"] = merged_df.COUNT.fillna(0)
merged_df["COUNT"] = merged_df.COUNT.astype(np.int64)

In [None]:
# make sure projection looks good
%matplotlib inline
merged_df.plot()

In [None]:
import shutil
from datetime import date
today = date.today()

In [None]:
# write to file
merged_df.to_file('data/import/tx_counties_covid19_{}.shp'.format(today))

# dosen't create a .prj file, so use the one from the original data
shutil.copyfile('data/origs/Texas_County_Boundaries.prj', 'data/import/tx_counties_covid19_{}.prj'.format(today))


In [None]:
# zip shapefile
shutil.make_archive('data/import/tx_counties_covid19_{}'.format(today), 'zip', 'data/import')

In [None]:
# values for GIS() from a file 
f = open('agol.csv')
username = str(f.readline())[:-1]
password = str(f.readline())[:-1]
gis = GIS("https://www.arcgis.com", username, password)

In [None]:
# get the old id and delete it
old_shp_id = gis.content.get('40000379f7fc442aa53499ff4c4d2db6')
old_shp_id.delete()

In [None]:
# add data to AGOL

# create properties
tx_county_covid19_properties = {
    'title': 'Shapefile_TX_Counties_COVID19_Count_{}'.format(today),
    'snippet': 'TX DSHS provides a table of COVID-19 cases by county. This resource puts those data in a spatial format',
    'description': '''This is a simple spatial reference tracking to the COVID-19 cases in Texas. The data source is the Texas Department of State Health Services. DSHS updates their counts everyday at noon (Central). Expect this layer to be updated with the latest counts not long after DSHS release their numbers.\n


Data source: https://www.dshs.state.tx.us/news/updates.shtm#coronavirus \n

Want to see how this was created? https://github.com/dandresen/arcgis-python-api/blob/master/COVID-19%20TX%20Counties.ipynb''',
    'tags': 'tx, open data, COVID-19, COVID, TEXAS, shapefile',
    'type': 'Shapefile',
    'licenseInfo': '''This is a NON-OFFICIAL resource. The author does not assume any responsibly for errors in the data. The only data source used for COVID-19 counts is from the Texas Department of State Health Services website. Please visit their site to understand more about how the data was collected.'''
}

# get the file location and add to AGOL 
data_file_location = r'data/import/tx_counties_covid19_{}.zip'.format(today)
tx_county_covid19_file = gis.content.add(tx_county_covid19_properties, data=data_file_location)
tx_county_covid19_file.share(everyone=True)