## Workflow to update a TX county data layer with the latest COVID-19 cases

Still a work in progress...

In [None]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import ssl
import geopandas as gpd
import pandas as pd
import numpy as np
from arcgis.gis import GIS

In [None]:
# scrape Texas Department of State Health Services
ssl._create_default_https_context = ssl._create_unverified_context # avoid cert error 
url = 'https://www.dshs.state.tx.us/news/updates.shtm#coronavirus'
html = urlopen(url)
soup = BeautifulSoup(html, 'lxml')

In [None]:
# get count of latest COVID-19
list_for_df = []
for tr in soup.find_all('tr'):
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    list_for_df.append(row)

# create df with counts, use filter() to remove empty list
count_df = pd.DataFrame(list(filter(None,list_for_df)), columns=['CNTY_NM', 'COUNT'])

In [None]:
# read in shapefile using GeoPandas
county_gdf_raw = gpd.GeoDataFrame(gpd.read_file(r'data/origs/Texas_County_Boundaries.shp'))
county_gdf = county_gdf_raw[['OBJECTID','CNTY_NM','geometry']]
county_gdf.head()

In [None]:
# merge count_df and county_gdf to add COVID19 count to geometry
merged_df = county_gdf.merge(count_df, left_on='CNTY_NM', right_on='CNTY_NM', how='left')
# convert "COUNT" field to int
merged_df["COUNT"] = merged_df.COUNT.fillna(0)
merged_df["COUNT"] = merged_df.COUNT.astype(np.int64)

In [None]:
# make sure projection looks good
%matplotlib inline
merged_df.plot()

In [None]:
from datetime import date
today = date.today()

In [None]:
# write to file
# dosen't create a .prj file, so used the one from the original data
merged_df.to_file('data/tx_counties_covid19.shp')

# write logic to automate moving of .prj file and zipping 

In [None]:
# values for GIS() from a file 
f = open('agol.csv')
username = str(f.readline())[:-1]
password = str(f.readline())[:-1]
gis = GIS("https://www.arcgis.com", username, password)

In [None]:
# here is where we start to add data to AGOL

# create properties
tx_county_covid19_properties = {
    'title': 'TX Counties COVID19 Count',
    'tags': 'tx, open data', 'COVID-19', 'COVID', 'TEXAS'
    'type': 'Shapefile'
}

# get the file location
data_file_location = r'data/tx_counties_covid19.zip'

In [None]:
# add the data to agol
tx_county_covid19_file = gis.content.add(tx_county_covid19_properties, data=data_file_location)

In [None]:
# call publish method, which returns another arcgis.gis.Item instance for the feature layer
tx_county_covid19_feature_layer_item = tx_county_covid19_file.publish()


#### Info for layer
DSHS provides a table of COVID-19 cases by county. This resource maps those counts.

This is a simple spatial reference tracking to the COVID-19 cases in Texas. The data source is the Texas Department of State Health Services. DSHS updates their counts everyday at noon (Central). Expect this layer to be updated with the latest counts not long after DSHS release their numbers.

Data source: https://www.dshs.state.tx.us/news/updates.shtm#coronavirus

Want to see how this was created? https://github.com/dandresen/arcgis-python-api/blob/master/COVID-19%20TX%20Counties.ipynb

This is a NON-OFFICIAL resource. The author does not assume any responsibly for errors in the data. The only data source used for COVID-19 counts is from the Texas Department of State Health Services website. Please visit their site to understand more about how the data was collected.  

In [None]:
######################################################################

### The above workflow got the data on AGOL. I've just been doing the rest (symbols, info for layer, etc) in the UI.

### I'm still thinking through how to update this daily... that work will be below

In [None]:
# The shapefile can be updated, but the Feature Service cannot. Maybe just delete all of the items 
# and republish everyday?