In [73]:
#import the required libraries 
import plotly.graph_objects as go
import plotly as py
import plotly.express as px
import pandas as pd
import numpy as np

In [49]:
# Code to read in data supplied by Professsor Mick
df_states_current = pd.read_json("https://covidtracking.com/api/states")
df_states_daily   = pd.read_json("https://covidtracking.com/api/states/daily")
df_states_info    = pd.read_json("https://covidtracking.com/api/states/info")
df_us_current     = pd.read_json("https://covidtracking.com/api/us")
df_us_daily       = pd.read_json("https://covidtracking.com/api/us/daily")
df_counties       = pd.read_json("https://covidtracking.com/api/counties")
df_tracker_urls   = pd.read_json("https://covidtracking.com/api/urls")
df_in_the_press   = pd.read_json("https://covidtracking.com/api/press")

In [170]:
# View some of the data
df_states_current.head()

Unnamed: 0,checkTimeEt,commercialScore,dataQualityGrade,dateChecked,dateModified,death,fips,grade,hash,hospitalized,...,onVentilatorCurrently,pending,posNeg,positive,positiveScore,recovered,score,state,total,totalTestResults
0,5/08 16:17,1.0,C,2020-05-08T20:17:00Z,2020-05-08T04:00:00Z,10,2,A,359d7caae8003430cab7ae640201799a78153394,,...,,,25473,377,1.0,305.0,4.0,AK,25473,25473
1,5/08 16:17,1.0,B,2020-05-08T20:17:00Z,2020-05-08T04:00:00Z,375,1,B,2f2783166bf83be747de4f63e6bb311bf53b35ec,1207.0,...,,,120114,9221,1.0,,3.0,AL,120114,120114
2,5/08 15:14,1.0,B,2020-05-08T19:14:00Z,2020-05-08T12:15:00Z,88,5,A,44c8dec1ac6c68ca90d49dbd1405484447a8b740,466.0,...,14.0,,63994,3694,1.0,2159.0,4.0,AR,63994,63994
3,5/08 14:43,1.0,A+,2020-05-08T18:43:00Z,2020-05-08T04:00:00Z,517,4,B,8439256def5bc17a88c19db7ee3b8aaa6e35041d,1482.0,...,197.0,,119907,10526,1.0,1747.0,3.0,AZ,119907,119907
4,5/08 15:16,1.0,B,2020-05-08T19:16:00Z,2020-05-08T18:00:00Z,2585,6,B,03dfa95a8fa0dfa6daf98863e624d55af434dfde,,...,,,875272,62512,1.0,,3.0,CA,875272,875272


In [167]:
# View all of the columns, since not everything was shown above
df_states_current.columns

Index(['checkTimeEt', 'commercialScore', 'dataQualityGrade', 'dateChecked',
       'dateModified', 'death', 'fips', 'grade', 'hash', 'hospitalized',
       'hospitalizedCumulative', 'hospitalizedCurrently', 'inIcuCumulative',
       'inIcuCurrently', 'lastUpdateEt', 'negative', 'negativeRegularScore',
       'negativeScore', 'notes', 'onVentilatorCumulative',
       'onVentilatorCurrently', 'pending', 'posNeg', 'positive',
       'positiveScore', 'recovered', 'score', 'state', 'total',
       'totalTestResults'],
      dtype='object')

In [172]:
# Bar chart of COVID-19 deaths per state
deaths = df_states_current[["state","death"]]
figure = px.bar(deaths, x = "state", y = "death", labels = {'death':'Corona Virus Deaths'})
figure.update_layout(title = 'COVID Deaths per State',xaxis = {'categoryorder':'total descending'})
figure.show()

In [176]:
# Bar chart of COVID-19 positive cases per state
states_positive = df_states_current[["state","positive"]]

figure2 = px.bar(states_positive, x = "state", y = "positive", labels = {'positive':'Positive COVID-19 Cases', 'state':'States'})
figure2.update_layout(title = 'Covid Positive Cases per State',xaxis = {'categoryorder':'total descending'})
figure2.show()

In [181]:
# I was curious of the positive and negative case distribution. 
# Specifically, I assumed the states with the most positive cases, would
# also have the most negative cases, as the concern of being infected
# would be higher. The main factor of why I questioned this was, was due
# to the shortage of tests. While states with larger populations would
# appear to need more tests, that doesn't seem necessary when there is a
# shortage. Therefore, I belive states with more cases should take 
# priority while there is a limited supply of testing kits.

pos_cases = df_states_current[['state', 'positive']]
neg_cases = df_states_current[['state', 'posNeg']]
total_cases = pos_cases + neg_cases

fig = go.Figure(data=[
    go.Bar(name = 'Positive', x = pos_cases['state'], y = pos_cases['positive']),
    go.Bar(name = 'Negative', x = neg_cases['state'], y = neg_cases['posNeg'])
])
# Change the bar mode
fig.update_layout(title = 'Negative & Positive Cases per State',
                  xaxis = {'categoryorder':'total descending'})
fig.update_layout(barmode='stack')
fig.show()

In [161]:
# After looking at the previous graph, I was curious what percent of
# people tested were positive. To figure this out, I used the sum of the
# posNeg cases, and the positive cases. The result was what I used for the
# total. I am aware the intial dataset had a total column, however the 
# number was unpredictable, as it did not equal the positive plus the 
# negPos columns, and/or was less than either a positive or posNeg values.

all = neg_cases['posNeg'] + pos_cases['positive']
pos_ratio = (pos_cases['positive'] / all) * 100
df = pd.DataFrame(pos_ratio)
df['state'] = pos_cases['state']
df

Unnamed: 0,0,state
0,1.458414,AK
1,7.129547,AL
2,5.457393,AR
3,8.070044,AZ
4,6.665927,CA
5,16.588581,CO
6,21.190308,CT
7,17.868177,DC
8,17.777455,DE
9,7.094328,FL


In [183]:
# PR had the same numbers for in multiple rows, so I decided to remove
# this due to the fact that it was inaccurate as you can see below.
chart = px.bar(df, x = "state", y = 0, labels = {'0':'Positive Corona Virus Cases'}, color = 0)
chart.update_layout(title = 'Percent Tested Positive By State',xaxis = {'categoryorder':'total descending'})
chart.show()

In [134]:
# Row 51 has the value for PR
neg_cases['posNeg']

0       25473
1      120114
2       63994
3      119907
4      875272
5       94536
6      120541
7       27115
8       28264
9      513341
10     227567
11      36618
12      70261
13      31270
14     399714
15     130128
16      47708
17      81401
18     202416
19     366023
20     152187
21      23466
22     260253
23     101270
24     108721
25      81191
26      20945
27     178613
28      42501
29      40412
30      30672
31     298759
32      89032
33      53344
34    1121543
35     184316
36      89857
37      72693
38     270559
39      85266
40      73442
41      21293
42     243578
43     477118
44     138688
45     129945
46      19008
47     230680
48     106855
49      59436
50      12038
51       2156
52         83
53       3777
54       2336
55       1092
Name: posNeg, dtype: int64

In [135]:
# Row 51 has the value for PR
pos_cases['positive']

0        377
1       9221
2       3694
3      10526
4      62512
5      18801
6      32411
7       5899
8       6111
9      39199
10     32106
11       629
12     11457
13      2178
14     73760
15     23146
16      6501
17      6129
18     30855
19     75333
20     30485
21      1374
22     46326
23     10088
24      9489
25      9090
26       458
27     13868
28      1425
29      7190
30      2843
31    135454
32      4493
33      5884
34    330407
35     23016
36      4424
37      3068
38     54238
39     10779
40      7142
41      3144
42     14441
43     36609
44      5919
45     22342
46       919
47     16231
48      9590
49      1310
50       635
51      2156
52         0
53       151
54        15
55        68
Name: positive, dtype: int64

In [162]:
# Here, PR (51) is removed from the data frame and the headers are renamed
df2 = df.drop([51])
df2_col = ['Percent Positive', 'State']
df2.columns = df2_col
df2

Unnamed: 0,Percent Positive,State
0,1.458414,AK
1,7.129547,AL
2,5.457393,AR
3,8.070044,AZ
4,6.665927,CA
5,16.588581,CO
6,21.190308,CT
7,17.868177,DC
8,17.777455,DE
9,7.094328,FL


In [164]:
# Fixed bar chart showing the percent of positive cases per state, based 
# on the total number of positive and negative tests
chart2 = px.bar(df2, x = "State", y = 'Percent Positive', labels = {'Percent Positive':'Percent Positive'}, color = 'Percent Positive')
chart2.update_layout(title = 'Percent Tested Positive (Out of everyone tested)',xaxis = {'categoryorder':'total descending'})
chart2.show()