In [79]:
import requests
import pandas as pd
import numpy as np
import math
import json
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral6
from IPython.display import Markdown as md
import os

output_notebook()
url = "http://bing.com/covid/data"
r = requests.get(url)
j = r.json()
with open('state_populations.json') as data:
    state_pop = pd.read_json(data)
#j = r.json()
#print(j)
#df = pd.DataFrame(j['areas'][2])
df = pd.json_normalize(j['areas'][2], 'areas')
#df['totalConfirmed'].sum()
index = df['displayName'].to_list()
totalConfirmed = df['totalConfirmed'].to_list()
totalDeaths = df['totalDeaths'].to_list()
totalRecovered = df['totalRecovered'].to_list()
df2 = pd.DataFrame({'totalConfirmed': totalConfirmed,
                    'totalDeaths': totalDeaths,
                    'totalRecovered': totalRecovered}, index=index)
df2['totalRecovered'].fillna(0, inplace=True)
df2['deathRate'] = df2['totalDeaths'].div(df2['totalConfirmed'], fill_value=0)
df2['recoveryRate'] = df2['totalRecovered'].div(df2['totalConfirmed'], fill_value=0)
covid = pd.merge(left=df2, right=state_pop[['state_name','population']], how='left', left_index=True, right_on='state_name')
covid['infectionRate'] = covid['totalConfirmed'].div(covid['population'], fill_value=0)
covid = covid[['state_name', 'population', 'totalConfirmed', 'infectionRate', 'totalDeaths', 'deathRate', 'totalRecovered', 'recoveryRate']]
covid.set_index('state_name', inplace=True)

total_population = int(covid['population'].sum())
total_confirmed = int(covid['totalConfirmed'].sum())
total_deaths = int(covid['totalDeaths'].sum())
total_recoveries = int(covid['totalRecovered'].sum())
infection_rate = total_confirmed / total_population
death_rate = total_deaths / total_confirmed
recovery_rate = total_recoveries / total_confirmed

covid['expected'] = covid['population'].mul(infection_rate)
covid['delta'] = covid['totalConfirmed'].sub(covid['expected'])
covid['deltaRate'] = covid['totalConfirmed'].div(covid['delta'])

In [90]:
var_list = [covid.loc['Tennessee', 'infectionRate']]
var_list.append(covid.loc['Tennessee', 'deathRate'])
var_list.append(int(covid.loc['Tennessee', 'delta']))
var_list.append(covid.loc['Tennessee', 'deltaRate'])
var_list.append(int(covid.loc['Tennessee', 'expected']))
var_list.append(int(covid.loc['Tennessee', 'totalConfirmed']))

tennessee = """**`Tennessee:`**  
Infection rate is **`{:.4%}`**   
Death rate is **`{:.4%}`**   
Actual/expected delta is **`{:,}`** or **`{:.4%}`**  
Expected Infections: **`{:,}`**  
Actual Infections: **`{:,}`**
""".format(*var_list)

#tennessee = "**`Tennessee`**:  "
#tennessee += "The current infection rate is **`{:.4%}`**.  ".format(covid.loc['Tennessee', 'infectionRate'])
#tennessee += "The current death rate is **`{:.4%}`**.  ".format(covid.loc['Tennessee', 'deathRate'])
#tennessee += "The current actual/expected delta is **`{:,}`** or **`{:.4%}`**.  ".format(int(covid.loc['Tennessee', 'delta']), covid.loc['Tennessee', 'deltaRate'])
md(tennessee)

**`Tennessee:`**  
Infection rate is **`0.0114%`**   
Death rate is **`0.3827%`**   
Actual/expected delta is **`-550`** or **`-142.4623%`**  
Expected Infections: **`1,334`**  
Actual Infections: **`784`**


In [9]:
df.describe()

Unnamed: 0,totalConfirmed,totalDeaths,totalRecovered,lat,long
count,51.0,40.0,16.0,51.0,51.0
mean,1165.470588,19.75,18.75,39.577923,-93.475285
std,4296.87286,48.074061,40.824013,6.322399,19.157176
min,39.0,1.0,1.0,19.598518,-155.518555
25%,106.0,2.0,1.75,35.712851,-103.008621
50%,290.0,6.5,3.0,39.356419,-89.656242
75%,753.5,12.5,8.5,43.342552,-78.941738
max,30811.0,285.0,124.0,63.174122,-69.203949


In [73]:
source = ColumnDataSource(data=covid)

title_p = "COVID Confirmed: {:,} - Death Rate: {:.4%} - Recovery Rate: {:.4%}".format(total_confirmed, death_rate, recovery_rate)
title_q = "COVID Deaths: {:,} - Death Rate: {:.4%}".format(total_deaths, death_rate)
title_z = "COVID Recovered: {:,} - Recovery Rate: {:.4%}".format(total_recoveries, recovery_rate)
title_x = "COVID US National Infection Rate: {:.4%}".format(infection_rate)
title_y = "COVID US Confirmed / Expected Delta"

p = figure(x_range=index, plot_height=800, plot_width=1500, title=title_p)
p.vbar(x='state_name', top='totalConfirmed', width=0.1, source=source, line_color='white')
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = covid['totalConfirmed'].max() + 1000
p.xaxis.major_label_orientation = math.pi / 4

q = figure(x_range=index, plot_height=800, plot_width=1500, title=title_q)
q.vbar(x='state_name', top='totalDeaths', width=0.1, source=source, line_color='white')
q.xgrid.grid_line_color = None
q.y_range.start = 0
q.y_range.end = covid['totalDeaths'].max() + 100
q.xaxis.major_label_orientation = math.pi / 4

z = figure(x_range=index, plot_height=800, plot_width=1500, title=title_z)
z.vbar(x='state_name', top='totalRecovered', width=0.1, source=source, line_color='white')
z.xgrid.grid_line_color = None
z.y_range.start = 0
z.y_range.end = covid['totalRecovered'].max() + 50
z.xaxis.major_label_orientation = math.pi / 4

x = figure(x_range=index, plot_height=800, plot_width=1500, title=title_x)
x.vbar(x='state_name', top='infectionRate', width=0.1, source=source)
x.xgrid.grid_line_color = None
x.y_range.start = 0
x.y_range.end = covid['infectionRate'].max() + 0.0005
x.xaxis.major_label_orientation = math.pi / 4

y = figure(x_range=index, plot_height=800, plot_width=1500, title=title_y)
y.vbar(x='state_name', top='delta', width=0.1, source=source)
y.xgrid.grid_line_color = None
y.y_range.start = covid['delta'].min() - 500
y.y_range.end = covid['delta'].max() + 500
y.xaxis.major_label_orientation = math.pi / 4

show(x)
show(y)
show(p)
show(q)
show(z)

In [69]:
#ax = df2.plot.bar(rot=45, figsize=(25,5))

covid.head(80)

Unnamed: 0_level_0,population,totalConfirmed,infectionRate,totalDeaths,deathRate,totalRecovered,recoveryRate,expected,delta
state_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
New York,19440469,30811,0.001585,108.0,0.003505,285.0,0.00925,3757.372127,27053.627873
New Jersey,8936574,4402,0.000493,65.0,0.014766,2.0,0.000454,1727.223456,2674.776544
California,39937489,2617,6.6e-05,55.0,0.021016,6.0,0.002293,7718.94999,-5101.94999
Washington,7797095,2469,0.000317,123.0,0.049818,124.0,0.050223,1506.989745,962.010255
Michigan,10045029,2295,0.000228,43.0,0.018736,0.0,0.0,1941.460979,353.539021
Illinois,12659682,1865,0.000147,19.0,0.010188,2.0,0.001072,2446.81012,-581.81012
Massachusetts,6976597,1838,0.000263,11.0,0.005985,1.0,0.000544,1348.407341,489.592659
Louisiana,4645184,1795,0.000386,65.0,0.036212,0.0,0.0,897.801637,897.198363
Florida,21992985,1682,7.6e-05,22.0,0.01308,0.0,0.0,4250.711689,-2568.711689
Georgia,10736059,1247,0.000116,40.0,0.032077,0.0,0.0,2075.020353,-828.020353


In [70]:
covid.describe()

Unnamed: 0,population,totalConfirmed,infectionRate,totalDeaths,deathRate,totalRecovered,recoveryRate,expected,delta
count,51.0,51.0,51.0,40.0,51.0,51.0,51.0,51.0,51.0
mean,6496457.0,1255.607843,0.000137,17.75,0.013803,11.882353,0.018221,1255.607843,-3.733832e-14
std,7450656.0,4309.066252,0.000228,28.368229,0.013232,45.825603,0.073424,1440.031507,4032.39
min,567025.0,39.0,2.2e-05,1.0,0.0,0.0,0.0,109.592208,-5101.95
25%,1802113.0,123.5,4.9e-05,2.75,0.003187,0.0,0.0,348.304825,-732.7003
50%,4499692.0,346.0,7.2e-05,6.5,0.011111,0.0,0.0,869.681555,-305.2298
75%,7587794.0,893.5,0.000117,12.5,0.021072,2.0,0.002425,1466.537024,-104.0437
max,39937490.0,30811.0,0.001585,123.0,0.056911,285.0,0.425087,7718.94999,27053.63


In [37]:
df3 = pd.read_json(url)
df3.head(50)

Unnamed: 0,id,displayName,areas,totalConfirmed,totalDeaths,totalRecovered,lastUpdated
0,world,Global,"{'id': 'chinamainland', 'displayName': 'China ...",453074,20519,113121,2020-03-25T18:52:29.833Z
1,world,Global,"{'id': 'italy', 'displayName': 'Italy', 'areas...",453074,20519,113121,2020-03-25T18:52:29.833Z
2,world,Global,"{'id': 'unitedstates', 'displayName': 'United ...",453074,20519,113121,2020-03-25T18:52:29.833Z
3,world,Global,"{'id': 'spain', 'displayName': 'Spain', 'areas...",453074,20519,113121,2020-03-25T18:52:29.833Z
4,world,Global,"{'id': 'germany', 'displayName': 'Germany', 'a...",453074,20519,113121,2020-03-25T18:52:29.833Z
5,world,Global,"{'id': 'iran', 'displayName': 'Iran', 'areas':...",453074,20519,113121,2020-03-25T18:52:29.833Z
6,world,Global,"{'id': 'france', 'displayName': 'France', 'are...",453074,20519,113121,2020-03-25T18:52:29.833Z
7,world,Global,"{'id': 'switzerland', 'displayName': 'Switzerl...",453074,20519,113121,2020-03-25T18:52:29.833Z
8,world,Global,"{'id': 'southkorea', 'displayName': 'South Kor...",453074,20519,113121,2020-03-25T18:52:29.833Z
9,world,Global,"{'id': 'unitedkingdom', 'displayName': 'United...",453074,20519,113121,2020-03-25T18:52:29.833Z


In [13]:
#pt = df3.pivot_table()

countries = pd.json_normalize(j['areas'])
countries.head()

Unnamed: 0,id,displayName,areas,totalConfirmed,totalDeaths,totalRecovered,lastUpdated,lat,long,parentId
0,chinamainland,China (mainland),[],81218,3281.0,73650.0,2020-03-25T18:32:26.616Z,36.563114,103.735809,world
1,italy,Italy,"[{'id': 'lombardy_italy', 'displayName': 'Lomb...",74386,7503.0,9362.0,2020-03-25T18:32:26.616Z,43.529028,12.162184,world
2,unitedstates,United States,"[{'id': 'newyork_unitedstates', 'displayName':...",61062,838.0,387.0,2020-03-25T18:32:26.616Z,39.495914,-98.989983,world
3,spain,Spain,"[{'id': 'madrid_spain', 'displayName': 'Madrid...",47610,3434.0,5367.0,2020-03-25T18:32:26.616Z,40.387696,-3.557391,world
4,germany,Germany,"[{'id': 'nrw_germany', 'displayName': 'NRW', '...",35740,186.0,3540.0,2020-03-25T18:32:26.616Z,51.121806,10.400695,world


In [14]:
states = pd.json_normalize(countries['areas'][2])
states.head()

Unnamed: 0,id,displayName,areas,totalConfirmed,totalDeaths,totalRecovered,lastUpdated,lat,long,parentId
0,newyork_unitedstates,New York,"[{'id': 'newyork_newyork_unitedstates', 'displ...",30811,285.0,,2020-03-25T18:32:26.616Z,42.93708,-75.610695,unitedstates
1,newjersey_unitedstates,New Jersey,"[{'id': 'atlantic_newjersey_unitedstates', 'di...",3675,44.0,2.0,2020-03-25T18:32:26.616Z,40.138779,-74.67691,unitedstates
2,california_unitedstates,California,"[{'id': 'losangeles_california_unitedstates', ...",2266,42.0,6.0,2020-03-25T18:32:26.616Z,37.2551,-119.617523,unitedstates
3,washington_unitedstates,Washington,"[{'id': 'king_washington_unitedstates', 'displ...",2469,123.0,124.0,2020-03-25T18:32:26.616Z,47.411297,-120.556267,unitedstates
4,louisiana_unitedstates,Louisiana,[{'id': 'jeffersonparish_louisiana_unitedstate...,1388,46.0,,2020-03-25T18:32:26.616Z,30.966511,-91.851906,unitedstates


In [52]:
counties = pd.json_normalize(data=j['areas'][2]['areas'], record_path='areas')
counties.head(100)

Unnamed: 0,id,displayName,areas,totalConfirmed,totalDeaths,totalRecovered,lastUpdated,lat,long,parentId
0,newyork_newyork_unitedstates,New York,[],17856,,,2020-03-25T18:57:37.994Z,40.774364,-73.969772,newyork_unitedstates
1,westchester_newyork_unitedstates,Westchester,[],4691,,,2020-03-25T18:57:37.994Z,41.151657,-73.753326,newyork_unitedstates
2,nassau_newyork_unitedstates,Nassau,[],3285,,,2020-03-25T18:57:37.994Z,40.756714,-73.591393,newyork_unitedstates
3,suffolk_newyork_unitedstates,Suffolk,[],2260,,,2020-03-25T18:57:37.994Z,40.966293,-72.687111,newyork_unitedstates
4,rockland_newyork_unitedstates,Rockland,[],968,,,2020-03-25T18:57:37.994Z,41.152374,-74.024010,newyork_unitedstates
...,...,...,...,...,...,...,...,...,...,...
95,fresno_california_unitedstates,Fresno,[],13,,,2020-03-25T18:57:37.994Z,36.758274,-119.649193,california_unitedstates
96,stanislaus_california_unitedstates,Stanislaus,[],11,,,2020-03-25T18:57:37.994Z,37.559326,-120.997665,california_unitedstates
97,imperial_california_unitedstates,Imperial,[],6,,,2020-03-25T18:57:37.994Z,33.039684,-115.365303,california_unitedstates
98,madera_california_unitedstates,Madera,[],6,,1.0,2020-03-25T18:57:37.994Z,37.218010,-119.762726,california_unitedstates
