<a href="https://colab.research.google.com/github/jammy-bot/va-covid-plotly/blob/master/va_covid_plots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Virginia COVID-19 Cases

In [24]:
import pandas as pd

# importing plotly express for plot animation
try:
    import plotly.express as px
except:
    !pip install plotly
    import plotly.express as px

## Obtain Data

In [25]:
data_df = pd.read_csv("/content/VDH-COVID-19-PublicUseDataset-Cases.csv", 
                      dtype={"fips": str})

# viewing dataframe shape and first / last rows
print(data_df.shape, "\n")
display(data_df.head())

(18088, 7) 



Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths
0,03/17/2020,51001,Accomack,Eastern Shore,0,0,0
1,03/17/2020,51003,Albemarle,Thomas Jefferson,0,0,0
2,03/17/2020,51005,Alleghany,Alleghany,0,0,0
3,03/17/2020,51007,Amelia,Piedmont,0,0,0
4,03/17/2020,51009,Amherst,Central Virginia,0,0,0


# Scrub Data

__View dataset info__

In [26]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18088 entries, 0 to 18087
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Report Date          18088 non-null  object
 1   FIPS                 18088 non-null  int64 
 2   Locality             18088 non-null  object
 3   VDH Health District  18088 non-null  object
 4   Total Cases          18088 non-null  int64 
 5   Hospitalizations     18088 non-null  int64 
 6   Deaths               18088 non-null  int64 
dtypes: int64(4), object(3)
memory usage: 989.3+ KB


There are no rows missing data.

We will make a copy of the dataframe, for data preparation.

In [27]:
# instantiating a copy of the dataframe as cities_df
cities_df = data_df.copy()

# viewing first rows of dataframe
cities_df.head()

Unnamed: 0,Report Date,FIPS,Locality,VDH Health District,Total Cases,Hospitalizations,Deaths
0,03/17/2020,51001,Accomack,Eastern Shore,0,0,0
1,03/17/2020,51003,Albemarle,Thomas Jefferson,0,0,0
2,03/17/2020,51005,Alleghany,Alleghany,0,0,0
3,03/17/2020,51007,Amelia,Piedmont,0,0,0
4,03/17/2020,51009,Amherst,Central Virginia,0,0,0


In [28]:
# converting column name spaces to "_" and converting to lower case
cities_df.columns = [x.lower().replace(" ", "_") for x in cities_df.columns]

# `report_date` needs to remain a string for plotly express

# convert the `locality` column to string type
cities_df["locality"] = cities_df["locality"].astype(str)

# dropping the health district column
cities_df.drop(["vdh_health_district"], axis=1, inplace=True)

# viewing info after adjustments
cities_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18088 entries, 0 to 18087
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   report_date       18088 non-null  object
 1   fips              18088 non-null  int64 
 2   locality          18088 non-null  object
 3   total_cases       18088 non-null  int64 
 4   hospitalizations  18088 non-null  int64 
 5   deaths            18088 non-null  int64 
dtypes: int64(4), object(2)
memory usage: 848.0+ KB


## Explore Data

__Limit the localities of interest.__

In [29]:
# viewing statistical information for numerical data
# over a slice of two cities from the dataframe
cities_df[cities_df["locality"] == (
    "Chesapeake" or "Norfolk" or "Virginia Beach"
    )].describe()

Unnamed: 0,fips,total_cases,hospitalizations,deaths
count,136.0,136.0,136.0,136.0
mean,51550.0,608.110294,83.661765,11.933824
std,0.0,587.878,55.712519,8.226607
min,51550.0,0.0,0.0,0.0
25%,51550.0,165.0,38.0,5.0
50%,51550.0,429.5,82.0,13.0
75%,51550.0,848.0,124.25,18.5
max,51550.0,2391.0,199.0,27.0


In [30]:
# viewing the number of unique localities in the dataset
cities_df["locality"].nunique()

133

In [31]:
print(sorted(set(x for x in cities_df.locality)))

['Accomack', 'Albemarle', 'Alexandria', 'Alleghany', 'Amelia', 'Amherst', 'Appomattox', 'Arlington', 'Augusta', 'Bath', 'Bedford', 'Bland', 'Botetourt', 'Bristol', 'Brunswick', 'Buchanan', 'Buckingham', 'Buena Vista City', 'Campbell', 'Caroline', 'Carroll', 'Charles City', 'Charlotte', 'Charlottesville', 'Chesapeake', 'Chesterfield', 'Clarke', 'Colonial Heights', 'Covington', 'Craig', 'Culpeper', 'Cumberland', 'Danville', 'Dickenson', 'Dinwiddie', 'Emporia', 'Essex', 'Fairfax', 'Fairfax City', 'Falls Church', 'Fauquier', 'Floyd', 'Fluvanna', 'Franklin City', 'Franklin County', 'Frederick', 'Fredericksburg', 'Galax', 'Giles', 'Gloucester', 'Goochland', 'Grayson', 'Greene', 'Greensville', 'Halifax', 'Hampton', 'Hanover', 'Harrisonburg', 'Henrico', 'Henry', 'Highland', 'Hopewell', 'Isle of Wight', 'James City', 'King George', 'King William', 'King and Queen', 'Lancaster', 'Lee', 'Lexington', 'Loudoun', 'Louisa', 'Lunenburg', 'Lynchburg', 'Madison', 'Manassas City', 'Manassas Park', 'Marti

In [32]:
# listing localiies for visual EDA
select_cities =['Chesapeake', 'Norfolk', 'Richmond City', 'Virginia Beach']

# filtering `cities_df` for the selected localities
selected = cities_df.locality.isin(select_cities)

# instantiating a new dataframe with filtered localities, only
select_df = cities_df[selected]

print(select_df.shape)

select_df.tail()

(544, 6)


Unnamed: 0,report_date,fips,locality,total_cases,hospitalizations,deaths
17950,07/29/2020,51760,Richmond City,2831,270,39
18054,07/30/2020,51550,Chesapeake,2391,199,27
18073,07/30/2020,51710,Norfolk,3080,165,22
18079,07/30/2020,51760,Richmond City,2857,273,38
18084,07/30/2020,51810,Virginia Beach,3979,193,43


## Bar Plot, Total Cases by Locality

In [37]:
fig = px.bar(select_df,  
             x ="locality",  
             y ="total_cases", 
             color ='deaths', 
             animation_frame ='report_date', 
             hover_name ='locality',  
             range_y =[0, 4250]) 
fig.show()

## Bar Plot, Deaths by Locality

In [34]:
fig = px.bar(select_df,  
             x ="locality",  
             y ="deaths", 
             color ='total_cases', 
             animation_frame ='report_date', 
             hover_name ='locality',  
             range_y =[0, 50]) 
fig.show()

## Scatter Plot: May - July, 2020 Totals, Deaths vs Cases by Locality

In [40]:
fig = px.scatter( 
    select_df[select_df.report_date > "04/30/2020"],  
    x ="deaths",  
    y ="total_cases",  
    animation_frame ="report_date",  
    animation_group ="locality", 
    size ="total_cases",  
    color ="locality",  
    hover_name ="locality",  
    facet_col ="locality", 
    size_max = 40, 
    range_x =[0, 200], 
    range_y =[-10, 4500] 
) 
fig.show()

## Scatter Plot: May - July, 2020 Totals, Deaths vs Hospitalizations by Locality

In [39]:
fig = px.scatter( 
    select_df[select_df.report_date > "04/30/2020"],  
    x ="deaths",  
    y ="hospitalizations",  
    animation_frame ="report_date",  
    animation_group ="locality", 
    size ="total_cases",  
    color ="locality",  
    hover_name ="locality",  
    facet_col ="locality", 
    size_max = 40, 
    range_x =[0, 75], 
    range_y =[-10, 325] 
) 
fig.show()

## Scatter Plot: May - July, 2020 Totals, Hospitalizations vs Cases by Locality

In [41]:
fig = px.scatter( 
    select_df[select_df.report_date > "04/30/2020"],  
    x ="hospitalizations",  
    y ="total_cases",  
    animation_frame ="report_date",  
    animation_group ="locality", 
    size ="total_cases",  
    color ="locality",  
    hover_name ="locality",  
    facet_col ="locality", 
    size_max = 50, 
    range_x =[0, 300], 
    range_y =[-10, 4500] 
) 
fig.show()