In [349]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

In [436]:
#Get DailyCounts by US County
url = "https://www.livescience.com/coronavirus-updates-united-states.html"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html")

link_body = soup.find('div', {'class' : 'fancy_box_body'})
link_soup = BeautifulSoup(str(link_body)).findAll('a')

links = []
for link in link_soup:
    links.append(link.get('href'))

results = []
for link in links:
    response = requests.get(link)
    soup = BeautifulSoup(response.text, "html")
    for i in soup.findAll('p'):
        if 'Case' in i.text:
            results.append({'Link' : link, 'RawCases' : i.find_all_next('ul')[0]})

import re
pattern = re.compile("([a-zA-z\.\s']*):\s([0-9]*)")

results = []
for link in links:
    cases = []
    response = requests.get(link)
    soup = BeautifulSoup(response.text, "html")
    for i in soup.findAll('li'):
        m = pattern.match(i.text)
        if m:
            cases.append(m.groups())
    results.append({'Link' : link, 'Cases' : cases})

non_zero = []
for result in results:
    if len(result['Cases']) > 0:
        non_zero.append({'Link' : result['Link'], 'Cases' : get_list_of_dict(['County', 'Cases'], result['Cases'])})

def get_list_of_dict(keys, list_of_tuples):
    """
     This function will accept keys and list_of_tuples as args and return list of dicts
    """
    list_of_dict = [dict(zip(keys, values)) for values in list_of_tuples]
    return list_of_dict

for res in non_zero:
    if 'louisiana' in res['Link']:
        for county in res['Cases']:
            county.update({'County' : county['County'].strip()})

states = list(pd.read_csv('States.csv')['State'])
states = [state.lower() for state in states]
states = [re.sub(' ', '-', state) for state in states]

for res in non_zero:
    for state in states:
        if res['Link'] == 'https://www.livescience.com/maryland-washington-dc-coronavirus-updates.html':
            for entry in res['Cases']:
                entry.update({'State' : 'maryland'})
        elif state in res['Link']:
            for entry in res['Cases']:
                entry.update({'State' : re.sub('-', ' ', state)})

import addfips
af = addfips.AddFIPS()
#af.get_county_fips('Nye', state='Nevada')

for res in non_zero:
    for row in res['Cases']:
        af.add_county_fips(row, county_field="County", state_field="State")

all_cases = []
for res in non_zero:
    for row in res['Cases']:
        all_cases.append(row)

all_cases = pd.DataFrame(all_cases)

import numpy as np
COVIDCounts = pd.DataFrame(all_cases).replace(to_replace='None', value=np.nan).dropna()

COVIDCounts.columns = ['STATE', 'COUNT', 'COUNTY', 'COUNTYFIPS']

COVIDCounts = COVIDCounts.to_dict('records')

In [445]:
#Get Hospital Data
Hospitals = pd.read_csv('Hospitals.csv')

Hospitals = Hospitals[Hospitals['STATUS'] == 'OPEN']
Hospitals = Hospitals[Hospitals['BEDS'] != '-999']
types = ['GENERAL ACUTE CARE', 'CHILDRENS']
Hospitals = Hospitals[Hospitals.TYPE.isin(types)]
Hospitals = Hospitals.dropna()

grouped = Hospitals.groupby('COUNTYFIPS')

bedsPerCounty = grouped['BEDS'].sum().reset_index().to_dict('records')

Hospital_dicts = Hospitals.to_dict('records')
for hospital_dict in Hospital_dicts:
    for county in bedsPerCounty:
        if hospital_dict['COUNTYFIPS'] == county['COUNTYFIPS']:
            hospital_dict.update({'BedsInCounty' : county['BEDS']})

for hospital in Hospital_dicts:
    marketshare = int(hospital['BEDS'])/int(hospital['BedsInCounty'])
    hospital.update({'MarketShare' : marketshare})

for hospital_dict in Hospital_dicts:
    for entry in COVIDCounts:
        if str(hospital_dict['COUNTYFIPS']) == str(entry['COUNTYFIPS']):
            try:
                hospital_dict.update({'CasesInCounty' : int(entry['Count'])})
            except:
                pass
            
Hospitals = pd.DataFrame(Hospital_dicts)

In [447]:
#Add ICU Bed info
result_icu = pd.read_csv('ICU_BEDS.csv')

res = pd.merge(Hospitals, result_icu, how = 'left')

In [454]:
#Add State-level counts
covid = pd.read_csv('CovidCasesByStateMarch18.csv')

covid = covid[covid['Country/Region'] == 'US']

covid = covid[['Province/State', 'Confirmed', 'Deaths', 'Recovered']]

covid.columns = ['STATE', 'CONFIRMED_CASES', 'DEATHS', 'RECOVERED']

states = pd.read_csv('name-abbr.csv', names = ['STATE', 'ABBREVIATION'])

covid = pd.merge(covid, states)

covid.columns = ['FULL_STATE', 'CONFIRMED_CASES_STATE', 'DEATHS', 'RECOVERED', 'STATE']

In [476]:
#Reformat data and add patient counts
final = pd.merge(res, covid, how = 'outer', on = 'STATE')

COVIDCounts = pd.DataFrame(COVIDCounts)

final_fin = pd.merge(final, COVIDCounts, how = 'left', on = 'COUNTYFIPS')

final_fin = final_fin[['LATITUDE', 'LONGITUDE', 'NAME', 'ADDRESS', 'CITY', 'STATE_x', 'COUNTY_x', 'ZIP', 'TELEPHONE', 'TYPE',
       'STATUS', 'COUNTYFIPS', 'SOURCE', 'WEBSITE', 'OWNER', 'BedsInCounty',
       'MarketShare', 'ProviderNumber',  'BEDS', 'ICU_Beds',
       'CONFIRMED_CASES_STATE', 'DEATHS', 'RECOVERED', 'COUNT']]

final_fin.columns = ['LATITUDE', 'LONGITUDE', 'NAME', 'ADDRESS', 'CITY', 'STATE', 'COUNTY', 'ZIP', 'TELEPHONE', 'TYPE',
       'STATUS', 'COUNTYFIPS', 'SOURCE', 'WEBSITE', 'OWNER', 'TOTAL_BEDS_IN_COUNTY',
       'MARKET_SHARE', 'PROVIDERNUMBER', 'TOTAL_HOSPITAL_BEDS', 'ICU_HOSPITAL_BEDS',
       'CONFIRMED_CASES_STATE', 'DEATHS_STATE', 'RECOVERED_STATE', 'COUNTY_CASES']

final_fin = final_fin.dropna(subset=['COUNTY_CASES'])

final_records = final_fin.to_dict('records')


for record in final_records:
    try:
        current_patients = float(record['COUNTY_CASES']) * float(record['MARKET_SHARE'])
        record.update({'PATIENTS' : current_patients})
    except:
        record.update({'PATIENTS' : 'nan'})

In [482]:
#Add County Population Data
populations = pd.read_csv('CountyPopulations.csv', encoding = 'unicode-escape')[['GEO.display-label', 'respop72018']]

populations = populations[1:]
populations.columns = ['Location', 'Population']
populations = populations.to_dict('records')

for pop in populations:
    geo_list = pop['Location'].split(',')
    pop.update({'County' : geo_list[0].strip(), 'State' : geo_list[1].strip()})

for pop in populations:
    af.add_county_fips(pop, county_field="County", state_field="State")

populations = pd.DataFrame(populations)
populations = populations[['Population', 'fips']]
populations.columns = ['POPULATION', 'COUNTYFIPS']

full_data = pd.merge(populations, pd.DataFrame(final_records))

In [488]:
full_data.to_csv('COVID_Sample_2019.csv', index = False)