In [2]:
# Setting up the environment.
import numpy as np
import pandas as pd
from scipy import stats

In [3]:
# Load the data from the John Hopkins github repo
df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-26-2020.csv', index_col=0)

In [4]:
# Dropping some columns and sorting

df1 = df[["Admin2", "Province_State", "Country_Region", "Confirmed", "Deaths", "Combined_Key"]] #getting the columns I want
df1 = df1[df1['Confirmed'] !=0] #dropping States with 0 confirmed
df1 = df1[(df1["Country_Region"] == "US")] #dropping countries other than the US
df1 = df1.sort_values(by=['Province_State','Confirmed'], ascending=True) #sorting by State and then Confirmed
df1 = df1.dropna() #dropping NA values
df1 = df1[df1.Province_State != 'Wuhan Evacuee'] #dropping this row because it is not US
df1['Combined_Key'] = df1['Combined_Key'].str.replace(r', US', '') #removing US from Combined key so it looks better in the hover text
#df1 = df1.groupby(['Province_State'])
df1 = df1.reset_index() #resetting index so FIPS is not the index
df1 = df1.rename(columns={'Province_State': 'State'})
df1 = df1.rename(columns={'Admin2': 'County'})
df1 = df1.rename(columns={'Country_Region': 'Country'})
df1 = df1.rename(columns={'Combined_Key': 'County/State'})
df1.head(5)

Unnamed: 0,FIPS,County,State,Country,Confirmed,Deaths,County/State
0,1013.0,Butler,Alabama,US,1,0,"Butler, Alabama"
1,1019.0,Cherokee,Alabama,US,1,0,"Cherokee, Alabama"
2,1023.0,Choctaw,Alabama,US,1,0,"Choctaw, Alabama"
3,1027.0,Clay,Alabama,US,1,0,"Clay, Alabama"
4,1029.0,Cleburne,Alabama,US,1,0,"Cleburne, Alabama"


In [9]:
import plotly.express as px
fig = px.treemap(df1, path=['State', 'County'], values='Confirmed', hover_name="County/State", hover_data=["Confirmed", "Deaths"])
fig.show()

import plotly.io as pio
pio.write_html(fig, file='Index.html', auto_open=True)