<a href="https://colab.research.google.com/github/bryandaetz1/SB_County_COVID-19_Data/blob/master/SB_Covid_Data_Visualizations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install plotly==4.12.0

Collecting plotly==4.12.0
[?25l  Downloading https://files.pythonhosted.org/packages/a6/66/af86e9d9bf1a3e4f2dabebeabd02a32e8ddf671a5d072b3af2b011efea99/plotly-4.12.0-py2.py3-none-any.whl (13.1MB)
[K     |████████████████████████████████| 13.1MB 311kB/s 
Installing collected packages: plotly
  Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-4.12.0


In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [3]:
cases_by_age = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/cases_by_age_10-20-20.csv')
cases_by_area = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/cases_by_area_10-20-20.csv')
cases_by_gender = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/cases_by_gender_10-20-20.csv')
ethnicity = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/ethnicity_10-20-20.csv')
recovery_status = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/recovery_status_10-20-20.csv')
testing_status = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/testing_status_10-20-20.csv')
transmission_method = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/transmission_method_10-20-20.csv')

## Still Need to Add

In [4]:
testing_status.head()

Unnamed: 0,Testing Status,Total,Date
0,Cumulative Positive,9671,"October 20, 2020"
1,Cumulative Negative,170215,"October 20, 2020"
2,Cumulative Inconclusive,449,"October 20, 2020"
3,Cumulative Pending6,191,"October 20, 2020"
4,Cumulative Invalid,89,"October 20, 2020"


In [5]:
transmission_method.head()

Unnamed: 0,Transmission Method,Total,Date
0,Community Close Contact Transmission,3357,"October 20, 2020"
1,Persons Incarcerated at Federal Prison in Lomp...,1026,"October 20, 2020"
2,Travel Transmission,12,"October 20, 2020"
3,Community Transmission,2197,"October 20, 2020"
4,Unknown,2971,"October 20, 2020"


In [6]:
transmission_method['Transmission Method'].value_counts()

Travel Transmission                                               75
Under Investigation                                               75
Persons Incarcerated at Federal Prison in Lompoc Close Contact    75
Community Close Contact Transmission                              75
Community Transmission                                            75
Unknown                                                           56
Name: Transmission Method, dtype: int64

In [7]:
# horizontal bar chart where values for each gender are going in opposite directions?, may not work if counting pending or unknown gender
cases_by_gender.head()

Unnamed: 0,Number of Cases by Gender,Daily,Community,Federal Prisonin Lompoc,Total (Community & Prison),Date
0,Female,16,4250,2,4252.0,"October 20, 2020"
1,Male,13,4300,1011,5311.0,"October 20, 2020"
2,Unknown,1,95,13,108.0,"October 20, 2020"
3,Pending,0,0,0,,"October 20, 2020"
4,Female,9,4234,2,4236.0,"October 19, 2020"


## Recovery Status

In [8]:
recovery_status.head()

Unnamed: 0,Recovery Status,Community,Federal Prison in Lompoc,Total(Community & Prison),Date
0,Still Infectious Cases,118.0,0.0,118.0,"October 20, 2020"
1,Recovered Cases,8411.0,1023.0,9434.0,"October 20, 2020"
2,Recovering in Hospital,,,16.0,"October 20, 2020"
3,Recovering in ICU,,,5.0,"October 20, 2020"
4,Pending Information,,,,"October 20, 2020"


In [9]:
recovery_status['Recovery Status'].value_counts()

Recovered Cases           75
Recovering in Hospital    75
Recovering in ICU         75
Pending Information       75
Still Infectious Cases    57
Active Cases              13
Active Cases*              5
Recovering at Home         3
Name: Recovery Status, dtype: int64

In [10]:
#cleaning up recovery status values
recovery_status['Recovery Status'].replace({'Still Infectious Cases':'Active Cases', 
                                            'Active Cases*':'Active Cases'},
                                           inplace = True)

#cleaning up column names
recovery_status.rename({'Community ':'Community',
                        'Federal Prison in Lompoc':'Lompoc Federal Prison',
                        'Total(Community & Prison)':'Total'},
                       axis = 1,
                       inplace = True)

In [11]:
recovery_status['Recovery Status'].value_counts()

Active Cases              75
Recovered Cases           75
Recovering in Hospital    75
Recovering in ICU         75
Pending Information       75
Recovering at Home         3
Name: Recovery Status, dtype: int64

In [12]:
#getting subset of data
recovery = recovery_status[recovery_status['Recovery Status'].isin(['Active Cases','Recovered Cases'])]

In [13]:
recovery.isna().sum() 

Recovery Status          0
Community                0
Lompoc Federal Prison    0
Total                    0
Date                     0
dtype: int64

In [14]:
recovery.dtypes

Recovery Status           object
Community                 object
Lompoc Federal Prison    float64
Total                    float64
Date                      object
dtype: object

In [15]:
#converting community column to numeric
recovery['Community'] = pd.to_numeric(recovery['Community'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [16]:
#function to return active cases for a given column and date
def active_cases(date, column_name):
  count = int(recovery.loc[(recovery['Recovery Status'] == 'Active Cases') & (recovery['Date'] == date), column_name].values)
  return count

#function to return recovered cases for a given column and date
def recovered_cases(date, column_name):
  count = int(recovery.loc[(recovery['Recovery Status'] == 'Recovered Cases') & (recovery['Date'] == date), column_name].values)
  return count

### Validating that Functions are Working

In [17]:
recovery.head(2)

Unnamed: 0,Recovery Status,Community,Lompoc Federal Prison,Total,Date
0,Active Cases,118,0.0,118.0,"October 20, 2020"
1,Recovered Cases,8411,1023.0,9434.0,"October 20, 2020"


In [18]:
for col in recovery.columns[1:4]:
  print(col, '\n', active_cases('October 20, 2020', col), '\n')

Community 
 118 

Lompoc Federal Prison 
 0 

Total 
 118 



In [19]:
for col in recovery.columns[1:4]:
  print(col, '\n', recovered_cases('October 20, 2020', col), '\n')

Community 
 8411 

Lompoc Federal Prison 
 1023 

Total 
 9434 



# Visualizations

In [20]:
#view available plotly templates for visualizations
import plotly.io as pio
pio.templates

Templates configuration
-----------------------
    Default template: 'plotly'
    Available templates:
        ['ggplot2', 'seaborn', 'simple_white', 'plotly',
         'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
         'ygridoff', 'gridon', 'none']

#Cases by Age

In [21]:
cases_by_age.shape

(521, 6)

In [22]:
cases_by_age.head()

Unnamed: 0,Number of Cases by Age,Daily,Community,Federal Prisonin Lompoc,Total(Community & Prison),Date
0,0 – 17,1,807.0,0.0,807.0,"October 20, 2020"
1,18 – 29,10,2731.0,138.0,2869.0,"October 20, 2020"
2,30 – 49,9,2977.0,544.0,3521.0,"October 20, 2020"
3,50 – 69,9,1609.0,311.0,1920.0,"October 20, 2020"
4,70+3,1,519.0,33.0,552.0,"October 20, 2020"


In [23]:
#rename columns
cases_by_age.rename(columns = {'Number of Cases by Age':'Age',
                               'Federal Prisonin Lompoc':'Lompoc Federal Prison',
                               'Total(Community & Prison)':'Total'}, inplace = True)

In [24]:
cases_by_age.head()

Unnamed: 0,Age,Daily,Community,Lompoc Federal Prison,Total,Date
0,0 – 17,1,807.0,0.0,807.0,"October 20, 2020"
1,18 – 29,10,2731.0,138.0,2869.0,"October 20, 2020"
2,30 – 49,9,2977.0,544.0,3521.0,"October 20, 2020"
3,50 – 69,9,1609.0,311.0,1920.0,"October 20, 2020"
4,70+3,1,519.0,33.0,552.0,"October 20, 2020"


In [25]:
cases_by_age.Age.value_counts()

Age Suppressed    75
30 – 49           75
0 – 17            75
18 – 29           75
50 – 69           75
Pending           71
70+               62
70+3              13
Name: Age, dtype: int64

In [26]:
#fix duplicate age groups
cases_by_age['Age'].replace({'70+3':'70+'}, inplace = True)

In [27]:
#creating new dataframe without age suppressed and pending age values for visualization
age_viz = cases_by_age[~cases_by_age['Age'].isin(['Age Suppressed','Pending'])]

In [28]:
age_viz.isna().sum()

Age                      0
Daily                    0
Community                0
Lompoc Federal Prison    0
Total                    0
Date                     0
dtype: int64

In [29]:
#transforming dataframe into wide format
#cases_by_age_viz = cases_by_age[['Age','Community','Lompoc Federal Prison','Date']][~cases_by_age['Age'].isin(['Age Suppressed','Pending'])]
#cases_by_age_viz = pd.melt(cases_by_age_viz, id_vars=['Age','Date'], var_name='Location', value_name='Count')
#cases_by_age_viz.head()

#### Creating Function for Data Transformations Required for Visualization

In [30]:
#def cases_by_age_transform(cases_by_age):
#  #import dataframe
#  cases_by_age = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/cases_by_age_10-20-20.csv')

  #rename columns
#  cases_by_age.rename(columns = {'Number of Cases by Age':'Age',
#                                 'Federal Prisonin Lompoc':'Lompoc Federal Prison',
#                                 'Daily ':'Daily'}, inplace = True) 

  #fix duplicate age groups
#  cases_by_age.Age.replace({'70+3':'70+'}, inplace = True)

  #dropping null values
#  cases_by_age.dropna(axis = 0, inplace = True)

  #transforming dataframe into wide format
#  cases_by_age_viz = cases_by_age[['Age','Community','Lompoc Federal Prison','Date']][~cases_by_age['Age'].isin(['Age Suppressed','Pending'])]
#  cases_by_age_viz = pd.melt(cases_by_age_viz,id_vars=['Age','Date'],var_name='Location', value_name='Count')

#  return cases_by_age_viz


In [31]:
#viewing plotly express qualitative color scheme options
fig = px.colors.qualitative.swatches()
fig.show()

In [32]:
#def create_barplot(date):
#  fig = px.bar(cases_by_age_viz[cases_by_age_viz['Date'] == date], 
#               x = 'Count', 
#               y = 'Age',
#               orientation = 'h', 
#               color = 'Location', 
#               template = 'plotly_dark',
#               color_discrete_sequence = px.colors.qualitative.D3,
#               title = 'Total Covid Cases by Age Group as of ' + date)
  
#  fig.update_layout(
#      hovermode = 'closest',
#      title_font_family = 'Courier',
#      title_font_color = 'white',
#      title_font_size = 24,
#      legend = dict(
#          orientation = 'h',
#          yanchor = 'bottom',
#          y = 1.02,
#          xanchor = 'left',
#          x = 0,
#          font = dict(
#              family = 'Courier',
#              size = 12,
#              color = 'white'
#          )
#      )
#  )

#  fig.show()

In [33]:
age_viz.head()

Unnamed: 0,Age,Daily,Community,Lompoc Federal Prison,Total,Date
0,0 – 17,1,807.0,0.0,807.0,"October 20, 2020"
1,18 – 29,10,2731.0,138.0,2869.0,"October 20, 2020"
2,30 – 49,9,2977.0,544.0,3521.0,"October 20, 2020"
3,50 – 69,9,1609.0,311.0,1920.0,"October 20, 2020"
4,70+,1,519.0,33.0,552.0,"October 20, 2020"


In [177]:
age_viz.Date.unique()[:10]

array(['October 20, 2020', 'October 19, 2020', 'October 16, 2020',
       'October 15, 2020', 'October 14, 2020', 'October 13, 2020',
       'October 12, 2020', 'October 9, 2020', 'October 8, 2020',
       'October 7, 2020'], dtype=object)

In [180]:
age_viz.columns[2:5].to_list()

['Community', 'Lompoc Federal Prison', 'Total']

In [158]:
def create_age_barplot(date, column_name):
  fig = px.bar(age_viz[age_viz['Date'] == date],
               x = 'Age',
               y = column_name,
               color = 'Age',
               color_discrete_sequence = px.colors.qualitative.D3,
               template = 'plotly_dark',
               title = column_name + ' Cases by Age as of ' + date)
  
  fig.update_layout(
      hovermode = 'closest',
      font_family = 'Courier',
      font_color = 'white',
      title_font_size = 24,
      yaxis = dict(title = None),
      xaxis = dict(title = 'Age Group'),
      showlegend = False
      #legend = dict(
      #    orientation = 'h',
      #    yanchor = 'bottom',
      #    y = 1.02,
      #    xanchor = 'left',
      #    x=0
      #)
  )

  fig.show()

In [159]:
create_age_barplot('October 20, 2020','Community')

In [160]:
create_age_barplot('October 20, 2020','Lompoc Federal Prison')

In [161]:
create_age_barplot('October 20, 2020','Total')

# Cases by Gender

In [38]:
cases_by_gender.head()

Unnamed: 0,Number of Cases by Gender,Daily,Community,Federal Prisonin Lompoc,Total (Community & Prison),Date
0,Female,16,4250,2,4252.0,"October 20, 2020"
1,Male,13,4300,1011,5311.0,"October 20, 2020"
2,Unknown,1,95,13,108.0,"October 20, 2020"
3,Pending,0,0,0,,"October 20, 2020"
4,Female,9,4234,2,4236.0,"October 19, 2020"


In [39]:
cases_by_gender.columns.to_list()

['Number of Cases by Gender',
 'Daily',
 'Community',
 'Federal Prisonin Lompoc',
 ' Total\xa0 (Community & Prison)',
 'Date']

In [40]:
#rename columns
cases_by_gender.rename(columns = {'Number of Cases by Gender':'Gender',
                                  'Federal Prisonin Lompoc':'Lompoc Federal Prison',
                                  ' Total\xa0 (Community & Prison)':'Total'}, inplace = True)

In [41]:
cases_by_gender['Gender'].value_counts()

Male       75
Pending    75
Female     75
Unknown    75
Name: Gender, dtype: int64

In [42]:
#creating new dataframe for visualizations
cases_by_gender['Gender'].replace({'Unknown':'Other'}, inplace = True)

gender_viz = cases_by_gender[cases_by_gender['Gender'].isin(['Male','Female','Other'])]

In [43]:
gender_viz.isna().sum()

Gender                   0
Daily                    0
Community                0
Lompoc Federal Prison    0
Total                    0
Date                     0
dtype: int64

In [101]:
gender_viz.head()

Unnamed: 0,Gender,Daily,Community,Lompoc Federal Prison,Total,Date
0,Female,16,4250,2,4252.0,"October 20, 2020"
1,Male,13,4300,1011,5311.0,"October 20, 2020"
2,Other,1,95,13,108.0,"October 20, 2020"
4,Female,9,4234,2,4236.0,"October 19, 2020"
5,Male,15,4287,1011,5298.0,"October 19, 2020"


In [107]:
#calculating totals by date to calculate percentage of total for each gender
totals_community = gender_viz.groupby('Date')['Community'].agg('sum')
totals_prison = gender_viz.groupby('Date')['Lompoc Federal Prison'].agg('sum')
totals = gender_viz.groupby('Date')['Total'].agg('sum')

In [108]:
#creating new columns showing the case count as a percentage of the total
gender_viz['Percentage of Community Cases'] = gender_viz.apply(lambda x: round(((x.Community / totals_community[totals_community.index == x.Date].values[0]) * 100), 2), axis = 1)

gender_viz['Percentage of Prison Cases'] = gender_viz.apply(lambda x: round(((x['Lompoc Federal Prison'] / totals_prison[totals_prison.index == x.Date].values[0]) * 100), 2), axis = 1)

gender_viz['Percentage of Total Cases'] = gender_viz.apply(lambda x: round(((x.Total / totals[totals.index == x.Date].values[0]) * 100), 2), axis = 1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [109]:
gender_viz.head()

Unnamed: 0,Gender,Daily,Community,Lompoc Federal Prison,Total,Date,Percentage of Community Cases,Percentage of Prison Cases,Percentage of Total Cases
0,Female,16,4250,2,4252.0,"October 20, 2020",49.16,0.19,43.97
1,Male,13,4300,1011,5311.0,"October 20, 2020",49.74,98.54,54.92
2,Other,1,95,13,108.0,"October 20, 2020",1.1,1.27,1.12
4,Female,9,4234,2,4236.0,"October 19, 2020",49.15,0.19,43.94
5,Male,15,4287,1011,5298.0,"October 19, 2020",49.76,98.54,54.95


In [110]:
#creating dictionary so that the following function can be run with a single input across the entire dashboard
column_dict = {'Community':'Percentage of Community Cases',
               'Lompoc Federal Prison':'Percentage of Prison Cases',
               'Total':'Percentage of Total Cases'}

In [128]:
def create_pie_chart(date, column_name):
  fig = px.pie(gender_viz[gender_viz['Date'] == date],
               values = column_dict[column_name],
               names = 'Gender',
               color = 'Gender',
               template = 'plotly_dark',
               title = column_dict[column_name] + ' by Gender<br>as of ' + date,
               color_discrete_sequence = px.colors.qualitative.D3,
               width = 650)
  
  fig.update_layout(font_family = 'Courier',
                    font_color = 'white',
                    title_font_size = 24,
                    legend = dict(
                        orientation = 'h',
                        yanchor = 'bottom',
                        y = -0.2,
                        xanchor = 'left',
                        x = -0.2
                    ))

  fig.show()

In [129]:
create_pie_chart('October 20, 2020','Community')

In [130]:
create_pie_chart('October 20, 2020','Lompoc Federal Prison')

In [131]:
create_pie_chart('October 20, 2020','Total')

In [44]:
#def create_gender_barplot(date, column_name):
#  fig = px.bar(gender_viz[gender_viz['Date'] == date],
#               x = column_name,
#               y = 'Gender',
#               orientation = 'h',
               #x = 'Gender',
               #y = column_name,
#               color = 'Gender',
#               template = 'plotly_dark',
#               color_discrete_sequence = px.colors.qualitative.D3,
#               title = column_name + ' Cases by Gender as of ' + date)
  
#  fig.update_layout(
#      hovermode = 'closest',
#      font_family = 'Courier',
#      font_color = 'white',
#      title_font_size = 24,
#      legend = dict(
#          orientation = 'h',
#          yanchor = 'bottom',
#          y = 1.02,
#          xanchor = 'left',
#          x = 0
#      ),
#      xaxis = dict(title=None)
#  )

#  fig.show()

# Cases by Ethnicity

In [48]:
ethnicity.head()

Unnamed: 0,RACE/ETHNICITY,Daily,Community,Federal Prison in Lompoc,Total(Community & Prison),Date
0,Hispanic/Latino,9,5398.0,372.0,5770.0,"October 20, 2020"
1,White,1,918.0,377.0,1295.0,"October 20, 2020"
2,Asian,1,119.0,51.0,170.0,"October 20, 2020"
3,Black/African American,0,49.0,145.0,194.0,"October 20, 2020"
4,American Indian/Native Alaskan,0,6.0,28.0,34.0,"October 20, 2020"


In [49]:
ethnicity['RACE/ETHNICITY'].value_counts()

Hispanic/Latino                                      75
Total                                                75
Unknown Ethnicity                                    75
Unknown Race, Non-Hispanic                           75
Missing                                              75
Native Hawaiian or Pacific Islander, Non-Hispanic    57
Multiracial, Non-Hispanic                            57
American Indian/Native Alaskan, Non-Hispanic         57
White, Non-Hispanic                                  57
Black/African American, Non-Hispanic                 57
Other, Non-Hispanic                                  57
Asian, Non-Hispanic                                  57
Suppressed                                           40
Suppressed/Inmate                                    35
Asian                                                13
Multiracial                                          13
White                                                13
Other                                           

In [50]:
#looks like some duplicate values are the result of extra whitespaces, removing extra whitespaces
ethnicity['RACE/ETHNICITY'] = ethnicity['RACE/ETHNICITY'].str.strip()

In [51]:
#looks like the change to remove "Non-Hispanic" from the various categories was a recent change, going to convert historical data to this format for consistency
ethnicity[ethnicity['RACE/ETHNICITY'].isin(['Native Hawaiian or Pacific Islander','Other','Asian','Multiracial','White'])].head(15)

Unnamed: 0,RACE/ETHNICITY,Daily,Community,Federal Prison in Lompoc,Total(Community & Prison),Date
1,White,1,918.0,377.0,1295.0,"October 20, 2020"
2,Asian,1,119.0,51.0,170.0,"October 20, 2020"
5,Native Hawaiian or Pacific Islander,0,5.0,1.0,6.0,"October 20, 2020"
6,Multiracial,0,84.0,1.0,85.0,"October 20, 2020"
7,Other,0,82.0,0.0,82.0,"October 20, 2020"
14,White,1,917.0,377.0,1294.0,"October 19, 2020"
15,Asian,0,116.0,51.0,167.0,"October 19, 2020"
18,Native Hawaiian or Pacific Islander,0,5.0,1.0,6.0,"October 19, 2020"
19,Multiracial,0,83.0,1.0,84.0,"October 19, 2020"
20,Other,0,82.0,0.0,82.0,"October 19, 2020"


In [52]:
#creating dictionary to replace values
value_dict = {
              'Asian, Non-Hispanic':'Asian',
              'Other, Non-Hispanic':'Other',
              'Native Hawaiian or Pacific Islander, Non-Hispanic':'Native Hawaiian or Pacific Islander',
              'American Indian/Native Alaskan, Non-Hispanic':'American Indian/Native Alaskan',
              'White, Non-Hispanic':'White',
              'Multiracial, Non-Hispanic':'Multiracial',
              'Black/African American, Non-Hispanic':'Black/African American',
              'Suppressed/Inmate':'Suppressed'
}

ethnicity['RACE/ETHNICITY'].replace(value_dict, inplace = True)

In [53]:
#confirming that this worked
ethnicity['RACE/ETHNICITY'].value_counts()

Native Hawaiian or Pacific Islander    75
Unknown Race, Non-Hispanic             75
Total                                  75
American Indian/Native Alaskan         75
Suppressed                             75
Unknown Ethnicity                      75
White                                  75
Black/African American                 75
Missing                                75
Asian                                  75
Other                                  75
Multiracial                            75
Hispanic/Latino                        75
Name: RACE/ETHNICITY, dtype: int64

In [54]:
# converting date column to pandas datetime format
#ethnicity['Date'] = pd.to_datetime(ethnicity['Date'])

In [132]:
#creating new dataframe for visualizations
ethnicity_viz = ethnicity.loc[ethnicity['RACE/ETHNICITY'].isin(['White',                               #filtering based on values in race/ethnicity column
                                                         'American Indian/Native Alaskan',
                                                         'Asian',
                                                         'Multiracial',
                                                         'Hispanic/Latino',
                                                         'Native Hawaiian or Pacific Islander',
                                                         'Black/African American']),
                                                        ['RACE/ETHNICITY','Community','Federal Prison in Lompoc','Total(Community & Prison)','Date']]    #selecting only columns I'll need for visualization
                                           
ethnicity_viz.head(20)

Unnamed: 0,RACE/ETHNICITY,Community,Federal Prison in Lompoc,Total(Community & Prison),Date
0,Hispanic/Latino,5398.0,372.0,5770.0,"October 20, 2020"
1,White,918.0,377.0,1295.0,"October 20, 2020"
2,Asian,119.0,51.0,170.0,"October 20, 2020"
3,Black/African American,49.0,145.0,194.0,"October 20, 2020"
4,American Indian/Native Alaskan,6.0,28.0,34.0,"October 20, 2020"
5,Native Hawaiian or Pacific Islander,5.0,1.0,6.0,"October 20, 2020"
6,Multiracial,84.0,1.0,85.0,"October 20, 2020"
13,Hispanic/Latino,5386.0,372.0,5758.0,"October 19, 2020"
14,White,917.0,377.0,1294.0,"October 19, 2020"
15,Asian,116.0,51.0,167.0,"October 19, 2020"


In [133]:
ethnicity_viz.isna().sum()

RACE/ETHNICITY               0
Community                    0
Federal Prison in Lompoc     0
Total(Community & Prison)    0
Date                         0
dtype: int64

In [134]:
#cleaning up column names
ethnicity_viz.rename({'RACE/ETHNICITY':'Ethnicity',
                      'Federal Prison in Lompoc':'Lompoc Federal Prison',
                      'Total(Community & Prison)':'Total'}, 
                     axis = 1, 
                     inplace = True)

In [135]:
#rather than use the total values from original dataframe (which included groups like unknown and suppressed that I removed), I'm calculating the totals based solely on the ethicity values that I'm using for the visualization
#totals_community = ethnicity_viz.groupby('Date')['Community'].agg('sum')
#totals_prison = ethnicity_viz.groupby('Date')['Lompoc Federal Prison'].agg('sum')
#totals = ethnicity_viz.groupby('Date')['Total'].agg('sum')

In [136]:
#creating new column showing the case count as a percentage of the total
#ethnicity_viz['Percentage of Community Cases'] = ethnicity_viz.apply(lambda x: round(((x.Community / totals_community[totals_community.index == x.Date].values[0]) * 100), 2), axis = 1)

#ethnicity_viz['Percentage of Prison Cases'] = ethnicity_viz.apply(lambda x: round(((x['Lompoc Federal Prison'] / totals_prison[totals_prison.index == x.Date].values[0]) * 100), 2), axis = 1)

#ethnicity_viz['Percentage of Total Cases'] = ethnicity_viz.apply(lambda x: round(((x.Total / totals[totals.index == x.Date].values[0]) * 100), 2), axis = 1)

In [137]:
ethnicity_viz.head()

Unnamed: 0,Ethnicity,Community,Lompoc Federal Prison,Total,Date
0,Hispanic/Latino,5398.0,372.0,5770.0,"October 20, 2020"
1,White,918.0,377.0,1295.0,"October 20, 2020"
2,Asian,119.0,51.0,170.0,"October 20, 2020"
3,Black/African American,49.0,145.0,194.0,"October 20, 2020"
4,American Indian/Native Alaskan,6.0,28.0,34.0,"October 20, 2020"


In [61]:
#creating dictionary so that the following function can be run with a single input across the entire dashboard
#column_dict = {'Community':'Percentage of Community Cases',
#               'Lompoc Federal Prison':'Percentage of Prison Cases',
#               'Total':'Percentage of Total Cases'}

In [62]:
#def create_pie_chart(date, column_name):
#  fig = px.pie(ethnicity_viz[ethnicity_viz['Date'] == date],
#               values = column_dict[column_name],
#               names = 'Ethnicity',
#               color = 'Ethnicity',
#               template = 'plotly_dark',
#               title = column_dict[column_name] + ' by Ethnicity<br>as of ' + date,
#               color_discrete_sequence = px.colors.qualitative.D3,
#               width = 800)
  
#  fig.update_layout(font_family = 'Courier',
#                    font_color = 'white',
#                    title_font_size = 24)
#
#  fig.show()

In [150]:
def create_ethnicity_barplot(date, column_name):
  fig = px.bar(ethnicity_viz[ethnicity_viz['Date'] == date],
               x = column_name,
               y = 'Ethnicity',
               color = 'Ethnicity',
               orientation = 'h',
               template = 'plotly_dark',
               title = column_name + ' Cases by Ethnicity as of ' + date,
               color_discrete_sequence = px.colors.qualitative.D3)
  
  fig.update_layout(
      hovermode = 'closest',
      font_family = 'Courier',
      font_color = 'white',
      title_font_size = 24,
      xaxis = dict(title=None),
      yaxis = dict(title=None),
      showlegend = False
      #legend = dict(
      #    orientation = 'h',
      #    yanchor = 'bottom',
      #    y = -0.2,
      #    xanchor = 'left',
      #    x=-0.21
      #)
  )

  fig.show()

In [151]:
create_ethnicity_barplot('October 20, 2020','Community')

In [152]:
create_ethnicity_barplot('October 20, 2020','Lompoc Federal Prison')

In [153]:
create_ethnicity_barplot('October 20, 2020','Total')

# Cases by Area

In [66]:
cases_by_area.head()

Unnamed: 0,Geographic Area,Daily Cases,Total Confirmed Cases,Recovered by Region,Still infectious by Region,Number of Deaths,Date
0,SOUTH COUNTY UNINCORPORATED AREA includes comm...,1,211.0,199,5,7.0,"October 20, 2020"
1,CITY OF SANTA BARBARA and the unincorporated a...,2,1263.0,1243,7,13.0,"October 20, 2020"
2,CITY OF GOLETA,1,273.0,265,4,4.0,"October 20, 2020"
3,COMMUNITY OF ISLA VISTA,4,298.0,265,32,1.0,"October 20, 2020"
4,UNINCORPORATED AREA OF THE GOLETA VALLEY AND G...,0,216.0,211,3,2.0,"October 20, 2020"


In [67]:
cases_by_area['Geographic Area'].value_counts()

Out of County                                                                                                                          75
FEDERAL PRISON IN LOMPOC                                                                                                               75
SOUTH COUNTY UNINCORPORATED AREA includes communities of Montecito, Summerland and the City of Carpinteria                             75
CITY OF GOLETA                                                                                                                         75
Pending                                                                                                                                75
CITY OF SANTA MARIA                                                                                                                    75
CITY OF SANTA BARBARA and the unincorporated area of Mission Canyon                                                                    75
COMMUNITY OF ISLA VISTA           

In [68]:
#cleaning up Geographic Area column
area_dict = {'SOUTH COUNTY UNINCORPORATED AREA includes communities of Montecito, Summerland and the City of Carpinteria':'South County Unincorporated Area',
             'CITY OF SANTA BARBARA and the unincorporated area of Mission Canyon':'Santa Barbara',
             'CITY OF GOLETA':'Goleta',
             'COMMUNITY OF ISLA VISTA':'Isla Vista',
             'UNINCORPORATED AREA OF THE GOLETA VALLEY AND GAVIOTA':'Goleta Valley/Gaviota',
             'SANTA YNEZ VALLEY including the Cities of Solvang & Buellton, and the communities of Santa Ynez, Los Alamos, Los Olivos and Ballard':'Santa Ynez Valley',
             'CITY OF LOMPOC and the communities of Mission Hills and Vandenberg Village':'Lompoc',
             'FEDERAL PRISON IN LOMPOC':'Federal Prison in Lompoc',
             'CITY OF SANTA MARIA':'Santa Maria',
             'COMMUNITY OF ORCUTT':'Orcutt',
             'UNINCORPORATED AREAS of Sisquoc, Casmalia, Garey,\xa0 Cuyama, New Cuyama, and the City of Guadalupe':'Other Unincorporated Areas',
             'Total**':'Total',
             'Total*':'Total'}

cases_by_area['Area'] = cases_by_area['Geographic Area'].replace(area_dict)

In [69]:
cases_by_area.Area.value_counts()

Out of County                       75
Federal Prison in Lompoc            75
Total                               75
Orcutt                              75
Goleta                              75
Isla Vista                          75
Pending                             75
Santa Ynez Valley                   75
Lompoc                              75
Goleta Valley/Gaviota               75
Other Unincorporated Areas          75
Santa Maria                         75
South County Unincorporated Area    75
Santa Barbara                       75
Name: Area, dtype: int64

In [70]:
cases_by_area.isna().sum()

Geographic Area                0
Daily Cases                   75
Total  Confirmed Cases        75
Recovered by Region            0
Still infectious by Region     0
Number of Deaths              80
Date                           0
Area                           0
dtype: int64

In [71]:
cases_by_area.dtypes

Geographic Area                object
Daily Cases                    object
Total  Confirmed Cases        float64
Recovered by Region             int64
Still infectious by Region      int64
Number of Deaths              float64
Date                           object
Area                           object
dtype: object

In [72]:
#cleaning up Daily Cases column and converting to numeric
cases_by_area['Daily Cases'] = cases_by_area['Daily Cases'].apply(lambda x: x[:-1] if '*' in str(x) else x)    #there are a few cases where the number was follwed by an asterisk, removing asterisks
cases_by_area['Daily Cases'] = cases_by_area['Daily Cases'].str.strip().replace('—',np.nan)   #changing missing values to null so they don't appear on graph 
cases_by_area['Daily Cases'] = pd.to_numeric(cases_by_area['Daily Cases'])     #converting to numeric

In [73]:
#converting Date column to pandas datetime format
cases_by_area['Date'] = pd.to_datetime(cases_by_area['Date'])

In [74]:
#creating new dataframe for visualizations
areas_to_plot = ['Lompoc',
                 'Isla Vista',
                 'Orcutt',
                 'Federal Prison in Lompoc',
                 'Goleta Valley/Gaviota',
                 'Goleta',
                 'Santa Barbara',
                 'Santa Ynez Valley',
                 'Santa Maria']

#dropping original Geographic Area column and filtering for specific areas
area_viz = cases_by_area[cases_by_area['Area'].isin(areas_to_plot)][cases_by_area.columns[1:]]    

#renaming columns
area_viz.rename({'Daily Cases':'New Cases',
                 'Total\xa0 Confirmed Cases':'Total Confirmed Cases',
                 'Recovered by Region':'Recovered Cases',
                 'Still infectious by Region':'Active Cases'},
                axis = 1,
                inplace = True)

In [75]:
area_viz.head()

Unnamed: 0,New Cases,Total Confirmed Cases,Recovered,Active Cases,Number of Deaths,Date,Area
1,2.0,1263.0,1243,7,13.0,2020-10-20,Santa Barbara
2,1.0,273.0,265,4,4.0,2020-10-20,Goleta
3,4.0,298.0,265,32,1.0,2020-10-20,Isla Vista
4,0.0,216.0,211,3,2.0,2020-10-20,Goleta Valley/Gaviota
5,2.0,178.0,165,6,7.0,2020-10-20,Santa Ynez Valley


In [76]:
fig = px.line(area_viz,
              x = 'Date',
              y = 'Active Cases',
              color = 'Area')
fig.show()

In [165]:
#fixing outlier in Active Cases column for Lompoc, looks like an extra zero was added by mistake
area_viz.loc[(area_viz['Area'] == 'Lompoc') & (area_viz['Active Cases'] > 200), ['Active Cases']] = 22

#fixing outlier in Number of Deaths column for Santa Maria, looks like an extra 3 was added by mistake
area_viz.loc[(area_viz['Area'] == 'Santa Maria') & (area_viz['Number of Deaths'] > 300), ['Number of Deaths']] = 34

In [166]:
fig = px.line(area_viz,
              x = 'Date',
              y = 'Active Cases',
              title = 'Active Cases' + ' by Region',
              template = 'plotly_dark',
              #color_discrete_sequence = px.colors.qualitative.D3,
              color = 'Area')
fig.update_layout(
    hovermode = 'closest',
    title_font_family = 'Courier',
    title_font_color = 'white',
    title_font_size = 24,
    legend = dict(
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='left',
        x=0,
        font = dict(
            family = 'Courier',
            size = 12,
            color = 'white')
        )
)

                       
fig.show()

In [167]:
#turning this into function so it can be used for any of the columns in the area_viz dataframe
def create_line_plot(column_name):
  fig = px.line(area_viz,
              x = 'Date',
              y = column_name,
              title = column_name + ' by Region',
              template = 'plotly_dark',
              color = 'Area')
  
  fig.update_layout(
      hovermode = 'closest',
      title_font_size = 24,
      font_family = 'Courier',    #setting font style and color globally, could also set locally by using a font dictionary for the legend or title_font_family/color for title
      font_color = 'white',
      legend = dict(
          orientation='h',
          yanchor='bottom',
          y=1.02,
          xanchor='left',
          x=0
          )
  )
                  
  fig.show()

In [80]:
area_viz.columns.to_list()

['New Cases',
 'Total Confirmed Cases',
 'Recovered',
 'Active Cases',
 'Number of Deaths',
 'Date',
 'Area']

In [81]:
create_line_plot('New Cases')

In [82]:
create_line_plot('Total Confirmed Cases')

In [162]:
create_line_plot('Recovered Cases')

In [163]:
create_line_plot('Active Cases')

In [168]:
create_line_plot('Number of Deaths')