<a href="https://colab.research.google.com/github/bryandaetz1/SB_County_COVID-19_Data/blob/master/SB_Covid_Data_Visualizations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install plotly==4.12.0

Collecting plotly==4.12.0
[?25l  Downloading https://files.pythonhosted.org/packages/a6/66/af86e9d9bf1a3e4f2dabebeabd02a32e8ddf671a5d072b3af2b011efea99/plotly-4.12.0-py2.py3-none-any.whl (13.1MB)
[K     |████████████████████████████████| 13.1MB 321kB/s 
Installing collected packages: plotly
  Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-4.12.0


In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [3]:
cases_by_age = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/cases_by_age.csv')
cases_by_area = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/cases_by_area.csv')
cases_by_gender = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/cases_by_gender.csv')
ethnicity = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/ethnicity.csv')
recovery_status = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/recovery_status.csv')
testing_status = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/testing_status.csv')
transmission_method = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/transmission_method.csv')

## Still Need to Add

In [None]:
testing_status.head()

Unnamed: 0,Testing Status,Total,Date
0,Cumulative Positive,11205,"November 25, 2020"
1,Cumulative Negative,228406,"November 25, 2020"
2,Cumulative Inconclusive,869,"November 25, 2020"
3,Cumulative Pending,205,"November 25, 2020"
4,Cumulative Invalid,420,"November 25, 2020"


In [None]:
transmission_method.head()

Unnamed: 0,Transmission Method,Total,Date
0,Community Close Contact Transmission,3954,"November 25, 2020"
1,Persons Incarcerated at Federal Prison in Lomp...,1027,"November 25, 2020"
2,Travel Transmission,103,"November 25, 2020"
3,Community Transmission,2555,"November 25, 2020"
4,Unknown,3277,"November 25, 2020"


In [None]:
transmission_method['Transmission Method'].value_counts()

Community Close Contact Transmission                              102
Persons Incarcerated at Federal Prison in Lompoc Close Contact    102
Community Transmission                                            102
Travel Transmission                                               102
Under Investigation                                               102
Unknown                                                            83
Name: Transmission Method, dtype: int64

In [None]:
# horizontal bar chart where values for each gender are going in opposite directions?, may not work if counting pending or unknown gender
cases_by_gender.head()

Unnamed: 0,Number of Cases by Gender,Daily,Community,Federal Prisonin Lompoc,Total (Community & Prison),Date
0,Female,18,5068,2,5070.0,"November 25, 2020"
1,Male,22,5006,1012,6018.0,"November 25, 2020"
2,Unknown,0,104,13,117.0,"November 25, 2020"
3,Pending,0,0,0,,"November 25, 2020"
4,Female,57,5051,2,5053.0,"November 24, 2020"


## Recovery Status

In [None]:
recovery_status.head()

Unnamed: 0,Recovery Status,Community,Federal Prison in Lompoc,Total(Community & Prison),Date
0,Still Infectious Cases,391.0,1.0,392.0,"November 25, 2020"
1,Recovered Cases,9655.0,1023.0,10678.0,"November 25, 2020"
2,Recovering in Hospital,,,23.0,"November 25, 2020"
3,Recovering in ICU,,,4.0,"November 25, 2020"
4,Pending Information,,,,"November 25, 2020"


In [None]:
recovery_status['Recovery Status'].value_counts()

Recovering in Hospital    102
Recovered Cases           102
Pending Information       102
Recovering in ICU         102
Still Infectious Cases     84
Active Cases               13
Active Cases*               5
Recovering at Home          3
Name: Recovery Status, dtype: int64

In [None]:
#cleaning up recovery status values
recovery_status['Recovery Status'].replace({'Still Infectious Cases':'Active Cases', 
                                            'Active Cases*':'Active Cases'},
                                           inplace = True)

#cleaning up column names
recovery_status.rename({'Community ':'Community',
                        'Federal Prison in Lompoc':'Lompoc Federal Prison',
                        'Total(Community & Prison)':'Total'},
                       axis = 1,
                       inplace = True)

In [None]:
recovery_status['Recovery Status'].value_counts()

Recovering in Hospital    102
Recovered Cases           102
Pending Information       102
Active Cases              102
Recovering in ICU         102
Recovering at Home          3
Name: Recovery Status, dtype: int64

In [None]:
#getting subset of data
recovery = recovery_status[recovery_status['Recovery Status'].isin(['Active Cases','Recovered Cases'])]

In [None]:
recovery.isna().sum() 

Recovery Status          0
Community                0
Lompoc Federal Prison    0
Total                    0
Date                     0
dtype: int64

In [None]:
recovery.dtypes

Recovery Status           object
Community                 object
Lompoc Federal Prison    float64
Total                    float64
Date                      object
dtype: object

In [None]:
#converting community column to numeric
recovery['Community'] = pd.to_numeric(recovery['Community'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
#function to return active cases for a given column and date
def active_cases(date, column_name):
  count = int(recovery.loc[(recovery['Recovery Status'] == 'Active Cases') & (recovery['Date'] == date), column_name].values)
  text = 'Active Cases: %d' %count
  return text

#function to return recovered cases for a given column and date
def recovered_cases(date, column_name):
  count = int(recovery.loc[(recovery['Recovery Status'] == 'Recovered Cases') & (recovery['Date'] == date), column_name].values)
  text = 'Recovered Cases: %d' %count
  return text

### Validating that Functions are Working

In [None]:
recovery.head(2)

Unnamed: 0,Recovery Status,Community,Lompoc Federal Prison,Total,Date
0,Active Cases,391,1.0,392.0,"November 25, 2020"
1,Recovered Cases,9655,1023.0,10678.0,"November 25, 2020"


In [None]:
for col in recovery.columns[1:4]:
  print(col, '\n', active_cases('October 20, 2020', col), '\n')

Community 
 Active Cases: 118 

Lompoc Federal Prison 
 Active Cases: 0 

Total 
 Active Cases: 118 



In [None]:
for col in recovery.columns[1:4]:
  print(col, '\n', recovered_cases('October 20, 2020', col), '\n')

Community 
 Recovered Cases: 8411 

Lompoc Federal Prison 
 Recovered Cases: 1023 

Total 
 Recovered Cases: 9434 



# Visualizations

In [None]:
#view available plotly templates for visualizations
import plotly.io as pio
pio.templates

Templates configuration
-----------------------
    Default template: 'plotly'
    Available templates:
        ['ggplot2', 'seaborn', 'simple_white', 'plotly',
         'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
         'ygridoff', 'gridon', 'none']

#Cases by Age

In [None]:
cases_by_age.shape

(710, 6)

In [None]:
cases_by_age.head()

Unnamed: 0,Number of Cases by Age,Daily,Community,Federal Prisonin Lompoc,Total(Community & Prison),Date
0,0 – 17,5,950.0,0.0,950.0,"November 25, 2020"
1,18 – 29,12,3335.0,138.0,3473.0,"November 25, 2020"
2,30 – 49,10,3387.0,544.0,3931.0,"November 25, 2020"
3,50 – 69,11,1908.0,312.0,2220.0,"November 25, 2020"
4,70+,2,597.0,33.0,630.0,"November 25, 2020"


In [None]:
#rename columns
cases_by_age.rename(columns = {'Number of Cases by Age':'Age',
                               'Federal Prisonin Lompoc':'Lompoc Federal Prison',
                               'Total(Community & Prison)':'Total'}, inplace = True)

In [None]:
cases_by_age.head()

Unnamed: 0,Age,Daily,Community,Lompoc Federal Prison,Total,Date
0,0 – 17,5,950.0,0.0,950.0,"November 25, 2020"
1,18 – 29,12,3335.0,138.0,3473.0,"November 25, 2020"
2,30 – 49,10,3387.0,544.0,3931.0,"November 25, 2020"
3,50 – 69,11,1908.0,312.0,2220.0,"November 25, 2020"
4,70+,2,597.0,33.0,630.0,"November 25, 2020"


In [None]:
cases_by_age.Age.value_counts()

In [None]:
#fix duplicate age groups
cases_by_age['Age'].replace({'70+3':'70+'}, inplace = True)

In [None]:
#creating new dataframe without age suppressed and pending age values for visualization
age_viz = cases_by_age[~cases_by_age['Age'].isin(['Age Suppressed','Pending'])]

In [None]:
age_viz.isna().sum()

Age                      0
Daily                    0
Community                0
Lompoc Federal Prison    0
Total                    0
Date                     0
dtype: int64

In [None]:
#transforming dataframe into wide format
#cases_by_age_viz = cases_by_age[['Age','Community','Lompoc Federal Prison','Date']][~cases_by_age['Age'].isin(['Age Suppressed','Pending'])]
#cases_by_age_viz = pd.melt(cases_by_age_viz, id_vars=['Age','Date'], var_name='Location', value_name='Count')
#cases_by_age_viz.head()

#### Creating Function for Data Transformations Required for Visualization

In [None]:
#def cases_by_age_transform(cases_by_age):
#  #import dataframe
#  cases_by_age = pd.read_csv('https://raw.githubusercontent.com/bryandaetz1/SB_County_COVID-19_Data/master/CSV_Files/cases_by_age_10-20-20.csv')

  #rename columns
#  cases_by_age.rename(columns = {'Number of Cases by Age':'Age',
#                                 'Federal Prisonin Lompoc':'Lompoc Federal Prison',
#                                 'Daily ':'Daily'}, inplace = True) 

  #fix duplicate age groups
#  cases_by_age.Age.replace({'70+3':'70+'}, inplace = True)

  #dropping null values
#  cases_by_age.dropna(axis = 0, inplace = True)

  #transforming dataframe into wide format
#  cases_by_age_viz = cases_by_age[['Age','Community','Lompoc Federal Prison','Date']][~cases_by_age['Age'].isin(['Age Suppressed','Pending'])]
#  cases_by_age_viz = pd.melt(cases_by_age_viz,id_vars=['Age','Date'],var_name='Location', value_name='Count')

#  return cases_by_age_viz


In [None]:
#viewing plotly express qualitative color scheme options
fig = px.colors.qualitative.swatches()
fig.show()

In [None]:
#def create_barplot(date):
#  fig = px.bar(cases_by_age_viz[cases_by_age_viz['Date'] == date], 
#               x = 'Count', 
#               y = 'Age',
#               orientation = 'h', 
#               color = 'Location', 
#               template = 'plotly_dark',
#               color_discrete_sequence = px.colors.qualitative.D3,
#               title = 'Total Covid Cases by Age Group as of ' + date)
  
#  fig.update_layout(
#      hovermode = 'closest',
#      title_font_family = 'Courier',
#      title_font_color = 'white',
#      title_font_size = 24,
#      legend = dict(
#          orientation = 'h',
#          yanchor = 'bottom',
#          y = 1.02,
#          xanchor = 'left',
#          x = 0,
#          font = dict(
#              family = 'Courier',
#              size = 12,
#              color = 'white'
#          )
#      )
#  )

#  fig.show()

In [None]:
age_viz.head()

Unnamed: 0,Age,Daily,Community,Lompoc Federal Prison,Total,Date
0,0 – 17,5,950.0,0.0,950.0,"November 25, 2020"
1,18 – 29,12,3335.0,138.0,3473.0,"November 25, 2020"
2,30 – 49,10,3387.0,544.0,3931.0,"November 25, 2020"
3,50 – 69,11,1908.0,312.0,2220.0,"November 25, 2020"
4,70+,2,597.0,33.0,630.0,"November 25, 2020"


In [None]:
age_viz.Date.unique()[:10]

array(['November 25, 2020', 'November 24, 2020', 'November 23, 2020',
       'November 22, 2020', 'November 21, 2020', 'November 20, 2020',
       'November 19, 2020', 'November 18, 2020', 'November 17, 2020',
       'November 16, 2020'], dtype=object)

In [None]:
age_viz.columns[2:5].to_list()

['Community', 'Lompoc Federal Prison', 'Total']

In [None]:
age_viz2 = age_viz[age_viz['Date'] == 'November 25, 2020']

In [None]:
age_viz2

Unnamed: 0,Age,Daily,Community,Lompoc Federal Prison,Total,Date
0,0 – 17,5,950.0,0.0,950.0,"November 25, 2020"
1,18 – 29,12,3335.0,138.0,3473.0,"November 25, 2020"
2,30 – 49,10,3387.0,544.0,3931.0,"November 25, 2020"
3,50 – 69,11,1908.0,312.0,2220.0,"November 25, 2020"
4,70+,2,597.0,33.0,630.0,"November 25, 2020"


In [None]:
age_viz2.shape[0]

5

In [None]:
['lightgray']*age_viz2.shape[0]

list

In [None]:
age_viz2['Community'].max()

3387.0

In [None]:
index = age_viz2[age_viz2['Community'] == age_viz2['Community'].max()].index.values[0]

In [None]:
age_viz2['Community'][index]

3387.0

In [None]:
colors = ['lightslategray'] * age_viz2.shape[0]
colors[index] = 'crimson'
colors

['lightslategray',
 'lightslategray',
 'crimson',
 'lightslategray',
 'lightslategray']

In [None]:
def create_age_barplot(date, column_name):
  age_plot = age_viz[age_viz['Date'] == date].reset_index(drop=True)
  index = age_plot[age_plot[column_name] == age_plot[column_name].max()].index.values[0]
  
  colors = ['#4b4c4f']*age_plot.shape[0]
  colors[index] = '#ed2009'

  fig = px.bar(age_plot,
               y = 'Age',
               x = column_name,
               orientation = 'h',
               color = 'Age',
               color_discrete_sequence = colors,
               template = 'plotly_dark',
               title = column_name + ' Cases by Age as of ' + date)
  
  fig.update_layout(
      hovermode = 'closest',
      font_family = 'Courier',
      font_color = 'white',
      title_font_size = 24,
      yaxis = dict(title = None),
      xaxis = dict(title = 'Age Group'),
      showlegend = False
      
  )
  
  fig.show()

In [None]:
create_age_barplot('October 20, 2020','Community')

In [None]:
create_age_barplot('October 20, 2020','Lompoc Federal Prison')

In [None]:
create_age_barplot('November 25, 2020','Total')

# Cases by Gender

In [None]:
cases_by_gender.head()

In [None]:
cases_by_gender.columns.to_list()

In [None]:
#rename columns
cases_by_gender.rename(columns = {'Number of Cases by Gender':'Gender',
                                  'Federal Prisonin Lompoc':'Lompoc Federal Prison',
                                  ' Total\xa0 (Community & Prison)':'Total'}, inplace = True)

In [None]:
cases_by_gender['Gender'].value_counts()

In [None]:
#creating new dataframe for visualizations
cases_by_gender['Gender'].replace({'Unknown':'Other'}, inplace = True)

gender_viz = cases_by_gender[cases_by_gender['Gender'].isin(['Male','Female','Other'])]

In [None]:
gender_viz.isna().sum()

In [None]:
gender_viz.head()

In [None]:
#calculating totals by date to calculate percentage of total for each gender
totals_community = gender_viz.groupby('Date')['Community'].agg('sum')
totals_prison = gender_viz.groupby('Date')['Lompoc Federal Prison'].agg('sum')
totals = gender_viz.groupby('Date')['Total'].agg('sum')

In [None]:
#creating new columns showing the case count as a percentage of the total
gender_viz['Percentage of Community Cases'] = gender_viz.apply(lambda x: round(((x.Community / totals_community[totals_community.index == x.Date].values[0]) * 100), 2), axis = 1)

gender_viz['Percentage of Prison Cases'] = gender_viz.apply(lambda x: round(((x['Lompoc Federal Prison'] / totals_prison[totals_prison.index == x.Date].values[0]) * 100), 2), axis = 1)

gender_viz['Percentage of Total Cases'] = gender_viz.apply(lambda x: round(((x.Total / totals[totals.index == x.Date].values[0]) * 100), 2), axis = 1)

In [None]:
gender_viz.head()

In [None]:
#creating dictionary so that the following function can be run with a single input across the entire dashboard
column_dict = {'Community':'Percentage of Community Cases',
               'Lompoc Federal Prison':'Percentage of Prison Cases',
               'Total':'Percentage of Total Cases'}

In [None]:
def create_pie_chart(date, column_name):
  fig = px.pie(gender_viz[gender_viz['Date'] == date],
               values = column_dict[column_name],
               names = 'Gender',
               color = 'Gender',
               template = 'plotly_dark',
               title = column_dict[column_name] + ' by Gender<br>as of ' + date,
               color_discrete_sequence = px.colors.qualitative.D3,
               width = 650)
  
  fig.update_layout(font_family = 'Courier',
                    font_color = 'white',
                    title_font_size = 24,
                    legend = dict(
                        orientation = 'h',
                        yanchor = 'bottom',
                        y = -0.2,
                        xanchor = 'left',
                        x = -0.2
                    ))

  fig.show()

In [None]:
create_pie_chart('October 20, 2020','Community')

In [None]:
create_pie_chart('October 20, 2020','Lompoc Federal Prison')

In [None]:
create_pie_chart('October 20, 2020','Total')

In [None]:
#def create_gender_barplot(date, column_name):
#  fig = px.bar(gender_viz[gender_viz['Date'] == date],
#               x = column_name,
#               y = 'Gender',
#               orientation = 'h',
               #x = 'Gender',
               #y = column_name,
#               color = 'Gender',
#               template = 'plotly_dark',
#               color_discrete_sequence = px.colors.qualitative.D3,
#               title = column_name + ' Cases by Gender as of ' + date)
  
#  fig.update_layout(
#      hovermode = 'closest',
#      font_family = 'Courier',
#      font_color = 'white',
#      title_font_size = 24,
#      legend = dict(
#          orientation = 'h',
#          yanchor = 'bottom',
#          y = 1.02,
#          xanchor = 'left',
#          x = 0
#      ),
#      xaxis = dict(title=None)
#  )

#  fig.show()

# Cases by Ethnicity

In [4]:
ethnicity.head()

Unnamed: 0,RACE/ETHNICITY,Daily,Community,Federal Prison in Lompoc,Total(Community & Prison),Date
0,Hispanic/Latino,105,7210.0,373,7583.0,"December 15, 2020"
1,White,31,1861.0,376,2237.0,"December 15, 2020"
2,Asian,4,233.0,51,284.0,"December 15, 2020"
3,Black/African American,0,92.0,146,238.0,"December 15, 2020"
4,American Indian/Native Alaskan,0,8.0,28,36.0,"December 15, 2020"


In [None]:
ethnicity['RACE/ETHNICITY'].value_counts()

Missing                                              102
Unknown Race, Non-Hispanic                           102
Total                                                102
Hispanic/Latino                                      102
Unknown Ethnicity                                    102
Suppressed/Inmate                                     62
Native Hawaiian or Pacific Islander, Non-Hispanic     57
Multiracial, Non-Hispanic                             57
White, Non-Hispanic                                   57
Black/African American, Non-Hispanic                  57
Asian, Non-Hispanic                                   57
American Indian/Native Alaskan, Non-Hispanic          57
Other, Non-Hispanic                                   57
Suppressed                                            40
Black/African American                                34
American Indian/Native Alaskan                        34
Native Hawaiian or Pacific Islander                   32
White                          

In [5]:
#looks like some duplicate values are the result of extra whitespaces, removing extra whitespaces
ethnicity['RACE/ETHNICITY'] = ethnicity['RACE/ETHNICITY'].str.strip()

In [6]:
#looks like the change to remove "Non-Hispanic" from the various categories was a recent change, going to convert historical data to this format for consistency
ethnicity[ethnicity['RACE/ETHNICITY'].isin(['Native Hawaiian or Pacific Islander','Other','Asian','Multiracial','White'])].head(15)

Unnamed: 0,RACE/ETHNICITY,Daily,Community,Federal Prison in Lompoc,Total(Community & Prison),Date
1,White,31,1861.0,376,2237.0,"December 15, 2020"
2,Asian,4,233.0,51,284.0,"December 15, 2020"
5,Native Hawaiian or Pacific Islander,0,10.0,2,12.0,"December 15, 2020"
6,Multiracial,0,144.0,1,145.0,"December 15, 2020"
7,Other,3,109.0,1,110.0,"December 15, 2020"
14,White,50,1831.0,376,2207.0,"December 14, 2020"
15,Asian,5,226.0,51,277.0,"December 14, 2020"
18,Native Hawaiian or Pacific Islander,0,10.0,2,12.0,"December 14, 2020"
19,Multiracial,0,143.0,1,144.0,"December 14, 2020"
20,Other,7,105.0,1,106.0,"December 14, 2020"


In [7]:
#creating dictionary to replace values
value_dict = {
              'Asian, Non-Hispanic':'Asian',
              'Other, Non-Hispanic':'Other',
              'Native Hawaiian or Pacific Islander, Non-Hispanic':'Native Hawaiian or Pacific Islander',
              'American Indian/Native Alaskan, Non-Hispanic':'American Indian/Native Alaskan',
              'White, Non-Hispanic':'White',
              'Multiracial, Non-Hispanic':'Multiracial',
              'Black/African American, Non-Hispanic':'Black/African American',
              'Suppressed/Inmate':'Suppressed'
}

ethnicity['RACE/ETHNICITY'].replace(value_dict, inplace = True)

In [8]:
#confirming that this worked
ethnicity['RACE/ETHNICITY'].value_counts()

White                                  117
Other                                  117
Multiracial                            117
Asian                                  117
Total                                  117
Native Hawaiian or Pacific Islander    117
American Indian/Native Alaskan         117
Unknown Ethnicity                      117
Suppressed                             117
Hispanic/Latino                        117
Unknown Race, Non-Hispanic             117
Missing                                117
Black/African American                 117
Name: RACE/ETHNICITY, dtype: int64

In [None]:
# converting date column to pandas datetime format
#ethnicity['Date'] = pd.to_datetime(ethnicity['Date'])

In [31]:
#creating new dataframe for visualizations
ethnicity_viz = ethnicity.loc[ethnicity['RACE/ETHNICITY'].isin(['White',                               #filtering based on values in race/ethnicity column
                                                         'American Indian/Native Alaskan',
                                                         'Asian',
                                                         'Multiracial',
                                                         'Hispanic/Latino',
                                                         'Native Hawaiian or Pacific Islander',
                                                         'Black/African American']),
                                                        ['RACE/ETHNICITY','Community','Federal Prison in Lompoc','Total(Community & Prison)','Date']]    #selecting only columns I'll need for visualization
                                           
ethnicity_viz.head(20)

Unnamed: 0,RACE/ETHNICITY,Community,Federal Prison in Lompoc,Total(Community & Prison),Date
0,Hispanic/Latino,7210.0,373,7583.0,"December 15, 2020"
1,White,1861.0,376,2237.0,"December 15, 2020"
2,Asian,233.0,51,284.0,"December 15, 2020"
3,Black/African American,92.0,146,238.0,"December 15, 2020"
4,American Indian/Native Alaskan,8.0,28,36.0,"December 15, 2020"
5,Native Hawaiian or Pacific Islander,10.0,2,12.0,"December 15, 2020"
6,Multiracial,144.0,1,145.0,"December 15, 2020"
13,Hispanic/Latino,7085.0,373,7458.0,"December 14, 2020"
14,White,1831.0,376,2207.0,"December 14, 2020"
15,Asian,226.0,51,277.0,"December 14, 2020"


In [32]:
ethnicity_viz.isna().sum()

RACE/ETHNICITY               0
Community                    0
Federal Prison in Lompoc     0
Total(Community & Prison)    0
Date                         0
dtype: int64

In [33]:
#cleaning up column names
ethnicity_viz.rename({'RACE/ETHNICITY':'Ethnicity',
                      'Federal Prison in Lompoc':'Lompoc Federal Prison',
                      'Total(Community & Prison)':'Total'}, 
                     axis = 1, 
                     inplace = True)

In [34]:
ethnicity_viz.dtypes

Ethnicity                 object
Community                float64
Lompoc Federal Prison     object
Total                    float64
Date                      object
dtype: object

In [35]:
#converting lompoc federal prison column to numeric
ethnicity_viz['Lompoc Federal Prison'] = pd.to_numeric(ethnicity_viz['Lompoc Federal Prison'].str.strip())

In [None]:
#rather than use the total values from original dataframe (which included groups like unknown and suppressed that I removed), I'm calculating the totals based solely on the ethicity values that I'm using for the visualization
#totals_community = ethnicity_viz.groupby('Date')['Community'].agg('sum')
#totals_prison = ethnicity_viz.groupby('Date')['Lompoc Federal Prison'].agg('sum')
#totals = ethnicity_viz.groupby('Date')['Total'].agg('sum')

In [None]:
#creating new column showing the case count as a percentage of the total
#ethnicity_viz['Percentage of Community Cases'] = ethnicity_viz.apply(lambda x: round(((x.Community / totals_community[totals_community.index == x.Date].values[0]) * 100), 2), axis = 1)

#ethnicity_viz['Percentage of Prison Cases'] = ethnicity_viz.apply(lambda x: round(((x['Lompoc Federal Prison'] / totals_prison[totals_prison.index == x.Date].values[0]) * 100), 2), axis = 1)

#ethnicity_viz['Percentage of Total Cases'] = ethnicity_viz.apply(lambda x: round(((x.Total / totals[totals.index == x.Date].values[0]) * 100), 2), axis = 1)

In [12]:
ethnicity_viz.head()

Unnamed: 0,Ethnicity,Community,Lompoc Federal Prison,Total,Date
0,Hispanic/Latino,7210.0,373,7583.0,"December 15, 2020"
1,White,1861.0,376,2237.0,"December 15, 2020"
2,Asian,233.0,51,284.0,"December 15, 2020"
3,Black/African American,92.0,146,238.0,"December 15, 2020"
4,American Indian/Native Alaskan,8.0,28,36.0,"December 15, 2020"


In [36]:
ethnicity_plot2 = ethnicity_viz[ethnicity_viz['Date'] == 'December 15, 2020'].reset_index(drop=True)
#index = ethnicity_plot2[ethnicity_plot2[column_name] == ethnicity_plot2[column_name].max()].index.values[0] 
ethnicity_plot2['Lompoc Federal Prison'].max()

376

In [18]:
#dictionary of colors for app
#original background color was #111111
colors = {'background': '#18191A',
          'text':'#E4E6EB',
          'text2':'#B0B3BB',
          'paper_bgcolor':'#242526',
          'plot_bgcolor':'#242526'
          }

In [19]:
#getting max value in Total column to use as max for x-range in plot
eth_viz_max = ethnicity_viz['Total'].max()

In [20]:
#function to create barplot for ethnicity data
def create_ethnicity_barplot(date, column_name):
  ethnicity_plot = ethnicity_viz[ethnicity_viz['Date'] == date].reset_index(drop=True)
  index = ethnicity_plot[ethnicity_plot[column_name] == ethnicity_plot[column_name].max()].index.values[0]  
  
  bar_colors = ['#4b4c4f']*ethnicity_plot.shape[0]
  bar_colors[index] = '#ed2009'
    
  fig = px.bar(ethnicity_plot,
               x = column_name,
               y = 'Ethnicity',
               opacity = 0.65,
               range_x = [0,eth_viz_max], #max for range calculated based on max from dataframe
               color = 'Ethnicity',
               color_discrete_sequence = bar_colors,
               orientation = 'h',
               template = 'plotly_dark',
               title = 'Cases by Ethnicity')
               
  
  fig.update_layout(
      hovermode = 'closest',
      font_family = 'Courier',
      font_color = 'white',
      title_font_size = 24,
      xaxis = dict(title=None),
      yaxis = dict(title=None),
      showlegend = False,
      paper_bgcolor = colors['paper_bgcolor'],
      plot_bgcolor = colors['plot_bgcolor'])

  return fig

In [22]:
create_ethnicity_barplot('December 15, 2020','Community')

In [37]:
create_ethnicity_barplot('December 15, 2020','Lompoc Federal Prison')

In [38]:
create_ethnicity_barplot('October 20, 2020','Total')

# Cases by Area

In [None]:
cases_by_area.head()

In [None]:
cases_by_area['Geographic Area'].value_counts()

In [None]:
#cleaning up Geographic Area column
area_dict = {'SOUTH COUNTY UNINCORPORATED AREA includes communities of Montecito, Summerland and the City of Carpinteria':'South County Unincorporated Area',
             'CITY OF SANTA BARBARA and the unincorporated area of Mission Canyon':'Santa Barbara',
             'CITY OF GOLETA':'Goleta',
             'COMMUNITY OF ISLA VISTA':'Isla Vista',
             'UNINCORPORATED AREA OF THE GOLETA VALLEY AND GAVIOTA':'Goleta Valley/Gaviota',
             'SANTA YNEZ VALLEY including the Cities of Solvang & Buellton, and the communities of Santa Ynez, Los Alamos, Los Olivos and Ballard':'Santa Ynez Valley',
             'CITY OF LOMPOC and the communities of Mission Hills and Vandenberg Village':'Lompoc',
             'FEDERAL PRISON IN LOMPOC':'Federal Prison in Lompoc',
             'CITY OF SANTA MARIA':'Santa Maria',
             'COMMUNITY OF ORCUTT':'Orcutt',
             'UNINCORPORATED AREAS of Sisquoc, Casmalia, Garey,\xa0 Cuyama, New Cuyama, and the City of Guadalupe':'Other Unincorporated Areas',
             'Total**':'Total',
             'Total*':'Total'}

cases_by_area['Area'] = cases_by_area['Geographic Area'].replace(area_dict)

In [None]:
cases_by_area.Area.value_counts()

In [None]:
cases_by_area.isna().sum()

In [None]:
cases_by_area.dtypes

In [None]:
#cleaning up Daily Cases column and converting to numeric
cases_by_area['Daily Cases'] = cases_by_area['Daily Cases'].apply(lambda x: x[:-1] if '*' in str(x) else x)    #there are a few cases where the number was follwed by an asterisk, removing asterisks
cases_by_area['Daily Cases'] = cases_by_area['Daily Cases'].str.strip().replace('—',np.nan)   #changing missing values to null so they don't appear on graph 
cases_by_area['Daily Cases'] = pd.to_numeric(cases_by_area['Daily Cases'])     #converting to numeric

In [None]:
#converting Date column to pandas datetime format
cases_by_area['Date'] = pd.to_datetime(cases_by_area['Date'])

In [None]:
#creating new dataframe for visualizations
areas_to_plot = ['Lompoc',
                 'Isla Vista',
                 'Orcutt',
                 'Federal Prison in Lompoc',
                 'Goleta Valley/Gaviota',
                 'Goleta',
                 'Santa Barbara',
                 'Santa Ynez Valley',
                 'Santa Maria']

#dropping original Geographic Area column and filtering for specific areas
area_viz = cases_by_area[cases_by_area['Area'].isin(areas_to_plot)][cases_by_area.columns[1:]]    

#renaming columns
area_viz.rename({'Daily Cases':'New Cases',
                 'Total\xa0 Confirmed Cases':'Total Confirmed Cases',
                 'Recovered by Region':'Recovered Cases',
                 'Still infectious by Region':'Active Cases'},
                axis = 1,
                inplace = True)

In [None]:
area_viz.head()

In [None]:
fig = px.line(area_viz,
              x = 'Date',
              y = 'Active Cases',
              color = 'Area')
fig.show()

In [None]:
#fixing outlier in Active Cases column for Lompoc, looks like an extra zero was added by mistake
area_viz.loc[(area_viz['Area'] == 'Lompoc') & (area_viz['Active Cases'] > 200), ['Active Cases']] = 22

#fixing outlier in Number of Deaths column for Santa Maria, looks like an extra 3 was added by mistake
area_viz.loc[(area_viz['Area'] == 'Santa Maria') & (area_viz['Number of Deaths'] > 300), ['Number of Deaths']] = 34

In [None]:
fig = px.line(area_viz,
              x = 'Date',
              y = 'Active Cases',
              title = 'Active Cases' + ' by Region',
              template = 'plotly_dark',
              #color_discrete_sequence = px.colors.qualitative.D3,
              color = 'Area')
fig.update_layout(
    hovermode = 'closest',
    title_font_family = 'Courier',
    title_font_color = 'white',
    title_font_size = 24,
    legend = dict(
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='left',
        x=0,
        font = dict(
            family = 'Courier',
            size = 12,
            color = 'white')
        )
)

                       
fig.show()

In [None]:
#turning this into function so it can be used for any of the columns in the area_viz dataframe
def create_line_plot(column_name):
  fig = px.line(area_viz,
              x = 'Date',
              y = column_name,
              title = column_name + ' by Region',
              template = 'plotly_dark',
              color = 'Area')
  
  fig.update_layout(
      hovermode = 'closest',
      title_font_size = 24,
      font_family = 'Courier',    #setting font style and color globally, could also set locally by using a font dictionary for the legend or title_font_family/color for title
      font_color = 'white',
      legend = dict(
          orientation='h',
          yanchor='bottom',
          y=1.02,
          xanchor='left',
          x=0
          )
  )
                  
  fig.show()

In [None]:
area_viz.columns.to_list()

In [None]:
create_line_plot('New Cases')

In [None]:
create_line_plot('Total Confirmed Cases')

In [None]:
create_line_plot('Recovered Cases')

In [None]:
create_line_plot('Active Cases')

In [None]:
create_line_plot('Number of Deaths')