In [6]:
import requests
import json
import pandas as pd
from config import covid_api
from config import google_api
import gmaps

# NOTES:
# https://api.covid19api.com/world/total
# This is all data - returns 10MB+
# https://api.covid19api.com/all


In [7]:
# From COVID-19 API
# https://covid19api.com/

#base_url = 'https://api.covid19api.com/summary'
#headers = {"X-Access-Token":covid_api}

#response = requests.get(base_url, headers=headers).json()
#print(json.dumps(response, indent=4, sort_keys=True))
#response['Countries']

In [9]:
# -----------------------
# Reports overall fisk factor groups for North Carolina, based on census data
# -----------------------

# Import CRE csv from
# https://www2.census.gov/data/experimental-data-products/community-resilience-estimates/2020/

cre_path = "data/cre-2018-a11.csv"
cre_df = pd.read_csv(cre_path)

# Break down by North Carolina
nc_cre_df = cre_df.loc[cre_df['STABREV']=='NC']

# Clean up the columns
nc_cre_cleaned_df = nc_cre_df[['rfgrp','prednum','popuni']]

# Group by risk factors and add up
nc_overall_rf_df = nc_cre_cleaned_df.groupby('rfgrp').sum()

# Build new series to hold percentages
nc_overall_rf_df['percentage'] = ''

# Iterate over rows to calculate percentages and store in the new series 
for index, row in nc_overall_rf_df.iterrows():
    pct = (row['prednum'] / row['popuni'])*100
    nc_overall_rf_df.at[index,'percentage'] = pct

# Display the dataframe
nc_overall_rf_df


Unnamed: 0_level_0,prednum,popuni,percentage
rfgrp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0RF,5793084,20438202,28.3444
1-2RF,9733922,20438202,47.6261
3PLRF,4911196,20438202,24.0295


In [186]:
# -----------------------
# Reports fisk factor groups, by county, for North Carolina, based on census data
# -----------------------

# Break down NC data by county and risk factor levels
nc_county_rf_df = nc_cre_df.groupby(['ctname','rfgrp']).sum()
#nc_county_rf_df = nc_cre_df.groupby(['ctname']).sum()

# Keep only the series we need
nc_county_rf_df = nc_county_rf_df[['prednum','popuni']]

# Make a new series to hold percentages
nc_county_rf_df['percentage'] = ''

# Iterate over rows to calculate percentages and store in the new series 
for index, row in nc_county_rf_df.iterrows():
    pct = (row['prednum'] / row['popuni'])*100
    nc_county_rf_df.at[index,'percentage'] = round(pct,2)

# Save as CSV
nc_county_rf_df.to_csv(r'NC_counties_risk factors.csv')

# Display the dataframe
nc_county_rf_df


Unnamed: 0_level_0,Unnamed: 1_level_0,prednum,popuni,percentage
ctname,rfgrp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Alamance County, NC",0RF,96568,323580,29.84
"Alamance County, NC",1-2RF,149414,323580,46.18
"Alamance County, NC",3PLRF,77598,323580,23.98
"Alexander County, NC",0RF,19578,72192,27.12
"Alexander County, NC",1-2RF,34884,72192,48.32
...,...,...,...,...
"Yadkin County, NC",1-2RF,33378,74996,44.51
"Yadkin County, NC",3PLRF,21412,74996,28.55
"Yancey County, NC",0RF,8614,35740,24.1
"Yancey County, NC",1-2RF,16542,35740,46.28


In [167]:
# -----------------------
# This is a NYTimes report on national COVID-19 cases and deaths, sorted by county
# From: https://github.com/nytimes/covid-19-data (us-counties.csv)
# -----------------------

# Pull in the live data
counties_path = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
# If the above breaks, we can refer to the saved version of the csv, below:
#counties_path = "data/us-counties.csv"

# Save to a dataframe
covid_df = pd.read_csv(counties_path)

# Sort for NC
covid_county_df = covid_df.loc[covid_df['state']=='North Carolina']

# Drop irrelevant column(s)
covid_county_clean_df = covid_county_df.drop(['fips'], 1)

# Diplay the dataframe
covid_county_clean_df


Unnamed: 0,date,county,state,cases,deaths
483,2020-03-03,Wake,North Carolina,1,0
520,2020-03-04,Wake,North Carolina,1,0
567,2020-03-05,Wake,North Carolina,1,0
630,2020-03-06,Chatham,North Carolina,1,0
631,2020-03-06,Wake,North Carolina,1,0
...,...,...,...,...,...
339530,2020-07-16,Wayne,North Carolina,2055,34
339531,2020-07-16,Wilkes,North Carolina,641,7
339532,2020-07-16,Wilson,North Carolina,1036,27
339533,2020-07-16,Yadkin,North Carolina,395,5


In [168]:
# Add up all values, and group, to see totals per county
covid_county_sorted_df = covid_county_clean_df.groupby(['county']).sum()

# Sort by cases, highest to lowest
sorted_cases = covid_county_sorted_df.sort_values(by=['cases'],ascending=False).reset_index()
# Sort by deaths, highest to lowest
sorted_deaths = covid_county_sorted_df.sort_values(by=['deaths'],ascending=False).reset_index()

# Build a new dataframe to hold top counties and the value, sorted by cases and deaths
summary_df = pd.DataFrame(columns=['Top_cases','Cases','Top_deaths','Deaths'])

# Add the cases to the new dataframe
summary_df['Top_cases'] = sorted_cases['county'].values
summary_df['Cases'] = sorted_cases['cases'].values

# Add the deaths to the new dataframe
summary_df['Top_deaths'] = sorted_deaths['county'].values
summary_df['Deaths'] = sorted_deaths['deaths'].values

# Show the summary
summary_df

Unnamed: 0,Top_cases,Cases,Top_deaths,Deaths
0,Mecklenburg,560949,Mecklenburg,9117
1,Wake,261754,Guilford,6556
2,Durham,196794,Durham,4097
3,Guilford,147057,Henderson,3559
4,Forsyth,146709,Wake,3268
...,...,...,...,...
95,Clay,712,Haywood,0
96,Avery,559,Madison,0
97,Hyde,536,Avery,0
98,Madison,490,Beaufort,0


In [279]:
# -----------------------
# Report population, by county, with risk factors
# That is, drop all 0RF values
# -----------------------

# Reset the index on nc_county_rf_df so we can combine series
county_reset_df = nc_county_rf_df.reset_index()

# Drop the row with 0 risk factors
county_risk = county_reset_df.drop(county_reset_df[county_reset_df['rfgrp'] == '0RF'].index) 


county_risk_grp = county_risk.groupby('ctname').sum()
county_risk_grp['popuni'] = county_risk_grp['popuni']/2
county_risk_grp.reset_index(inplace=True)
county_risk_grp.rename(columns={'ctname':'county','prednum':'pop_at_risk','popuni':'total_pop'}, inplace=True)

# Clean up the county names, so we can combine data in next step
county_risk_grp['county'].replace(' County, NC','',regex=True,inplace=True)

# Display dataframe
county_risk_grp



Unnamed: 0,county,pop_at_risk,total_pop
0,Alamance,227012,323580.0
1,Alexander,52614,72192.0
2,Alleghany,16824,22292.0
3,Anson,35352,45898.0
4,Ashe,40196,54136.0
...,...,...,...
95,Wayne,185502,245294.0
96,Wilkes,101954,136236.0
97,Wilson,121610,161844.0
98,Yadkin,54790,74996.0


In [283]:
# -----------------------
# Combine dataframes from NYTimes data and CRE data
# -----------------------

# Reset index on covid_county_sorted_df so we can merge on county name
county_totals = covid_county_sorted_df.reset_index()

# Do the merge
combined_data = pd.merge(county_risk_grp, county_totals, on='county', how='outer')

# Display the dataframe
combined_data


Unnamed: 0,county,pop_at_risk,total_pop,cases,deaths
0,Alamance,227012,323580.0,51906,1827
1,Alexander,52614,72192.0,4589,17
2,Alleghany,16824,22292.0,1856,0
3,Anson,35352,45898.0,7356,50
4,Ashe,40196,54136.0,3036,37
...,...,...,...,...,...
95,Wayne,185502,245294.0,111033,1698
96,Wilkes,101954,136236.0,36219,368
97,Wilson,121610,161844.0,39885,1157
98,Yadkin,54790,74996.0,16505,239


In [298]:
# -----------------------
# Summary that compares 2 factors from above dataframe 
# -----------------------

for i, row in combined_data.iterrows():
    print(f'{row[0]} county deaths by cases: {round((row[4]/row[3])*100,2)}%')
    print(f'{row[0]} county deaths by population at risk: {round((row[4]/row[1])*100,2)}%')
    print(f'{row[0]} county deaths by total population: {round((row[4]/row[2])*100,2)}%')
    print(f'{row[0]} county cases by population at risk: {round((row[3]/row[1])*100,2)}%')
    print(f'{row[0]} county cases rate by total population: {round((row[3]/row[2])*100,2)}%')




Alamance county deaths by cases: 3.52%
Alamance county deaths by population at risk: 0.8%
Alamance county deaths by total population: 0.56%
Alamance county cases by population at risk: 22.86%
Alamance county cases rate by total population: 16.04%
Alexander county deaths by cases: 0.37%
Alexander county deaths by population at risk: 0.03%
Alexander county deaths by total population: 0.02%
Alexander county cases by population at risk: 8.72%
Alexander county cases rate by total population: 6.36%
Alleghany county deaths by cases: 0.0%
Alleghany county deaths by population at risk: 0.0%
Alleghany county deaths by total population: 0.0%
Alleghany county cases by population at risk: 11.03%
Alleghany county cases rate by total population: 8.33%
Anson county deaths by cases: 0.68%
Anson county deaths by population at risk: 0.14%
Anson county deaths by total population: 0.11%
Anson county cases by population at risk: 20.81%
Anson county cases rate by total population: 16.03%
Ashe county deaths b

In [10]:
# -----------------------
# COVID testing location data
# From https://github.com/codersagainstcovidorg/covid19testing-backend
# -----------------------

testing_df = pd.DataFrame(columns=['Name','Lat','Lon','City','State','Collecting','Evaluating'])
testing_df

base_url = "http://api.findcovidtesting.com/api/v1/location"
response = requests.get(base_url).json()

In [11]:
# -----------------------
# Use this to see how the response is structured:
#response[0]

# -----------------------
# Relevant info seems to be:
#     location_name
#     is_collecting_samples - boolean
#     is_evaluating_symptoms - boolean
#     location_address_locality = city
#     location_address_region = state
#     location_latitude
#     location_longitude


In [12]:
# Iterate through the response and store selected variables in dataframe

for i in range(len(response)):
    if response[i]['location_address_region'] == 'NC':
        testing_df.loc[i,'Name'] = response[i]['location_name']
        testing_df.loc[i,'Lat'] = response[i]['location_latitude']
        testing_df.loc[i,'Lon'] = response[i]['location_longitude']
        testing_df.loc[i,'City'] = response[i]['location_address_locality']
        testing_df.loc[i,'State'] = response[i]['location_address_region']
        testing_df.loc[i,'Collecting'] = response[i]['is_collecting_samples']
        testing_df.loc[i,'Evaluating'] = response[i]['is_evaluating_symptoms']

# Display the dataframe       
testing_df

Unnamed: 0,Name,Lat,Lon,City,State,Collecting,Evaluating
28,MEDAC Urgent Care - Military Cutoff,34.2249,-77.8305,Wilmington,NC,True,True
34,StarMed Family Practice and Urgent Care,35.2077,-80.7544,Charlotte,NC,True,True
43,Fayetteville VA Coastal Health Care System,35.0879,-78.8768,Fayetteville,NC,True,True
46,"Ernesto Graham MD, PA Obstetric's & Gynecology",35.0401,-78.9348,Fayetteville,NC,True,True
68,AppHealthCare - Alleghany Health Center,36.4924,-81.1459,Sparta,NC,True,True
...,...,...,...,...,...,...,...
9708,Dosher Urgent Care,33.9335,-78.0665,Brunswick County,NC,True,True
9751,Windsor Center (Guilford County Division of Pu...,36.0643,-79.7705,,NC,True,False
9825,Macon County Public Health,35.1969,-83.3715,Franklin,NC,True,True
9936,Ingles Parking Lot,35.5991,-82.4085,Swannanoa,NC,True,False


In [13]:
gmaps.configure(api_key=google_api)

# Build the map
# Set center and zoom level
fig = gmaps.figure(map_type='HYBRID',center=(35.3,-79.5), zoom_level=7)
locations = testing_df[['Lat', 'Lon']]
hover = testing_df['Name'].tolist()

symbols = gmaps.symbol_layer(locations, fill_color='white', stroke_color='white', scale = 2, info_box_content = hover)
fig.add_layer(symbols)


# Display the map
fig

Figure(layout=FigureLayout(height='420px'))