In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pygal 
from pygal import Config
from pywaffle import Waffle
from flask import Flask
from pygal.style import Style
# import sqviz as vz

%matplotlib inline

In [2]:
import os, ssl
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and
getattr(ssl, '_create_unverified_context', None)):
    ssl._create_default_https_context = ssl._create_unverified_context

In [3]:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
df = pd.read_csv(url, error_bad_lines=False)

In [4]:
df.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [108]:
def get_maine_df():
    ''' Make a df for just the Maine data from the API'''
    df_maine = pd.read_csv(
        'http://mecovid19data.almandhunter.com/api/v0/countydata.csv?')
    return df_maine


def combine_counties(df_maine):
    ''' Create a df with total cases and deaths in Maine for each day '''
    # find the total cases, deaths for each day
    df_state_tot = df_maine.groupby('date').sum()
    return df_state_tot


In [109]:
df_all = (get_maine_df()
      .pipe(combine_counties))

In [122]:
df_all.head()

Unnamed: 0_level_0,confirmed,recovered,hospitalizations,deaths
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-03-11,0,0,0,0
2020-03-12,1,0,0,0
2020-03-13,2,0,0,0
2020-03-14,3,0,0,0
2020-03-15,12,0,0,0


In [111]:
np.where(df_all.index=='2020-03-26')[0]

array([15])

In [112]:
def insert_press_herald_recovered(df):
    recovered_insert = [16,24,36,41,41,68]
    fill_dates = ['2020-03-26','2020-03-27','2020-03-28','2020-03-29','2020-03-30','2020-03-31']
    for idx,rec in enumerate(recovered_insert):
        df.loc[fill_dates[idx]]['recovered'] = rec
    return df

In [121]:
df_all = insert_press_herald_recovered(df_all)
df_all.head()

Unnamed: 0_level_0,confirmed,recovered,hospitalizations,deaths
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-03-11,0,0,0,0
2020-03-12,1,0,0,0
2020-03-13,2,0,0,0
2020-03-14,3,0,0,0
2020-03-15,12,0,0,0


In [114]:
df_maine = df[df.state == 'Maine']

In [115]:
df_maine.date.max()

'2020-04-21'

In [116]:
df_maine.county.unique()

array(['Androscoggin', 'Cumberland', 'Lincoln', 'Unknown', 'Kennebec',
       'Oxford', 'York', 'Penobscot', 'Sagadahoc', 'Knox', 'Waldo',
       'Franklin', 'Somerset', 'Hancock', 'Aroostook', 'Washington',
       'Piscataquis'], dtype=object)

In [117]:
df_state_tot = df_maine.groupby('date').sum()

In [118]:
df_state_tot.head()

Unnamed: 0_level_0,fips,cases,deaths
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-03-12,23001.0,1,0
2020-03-13,46006.0,2,0
2020-03-14,46006.0,3,0
2020-03-15,69021.0,12,0
2020-03-16,69021.0,17,0


df_state_tot = df_state_tot.head().reset_index()

In [119]:
dates = df_state_tot.index.values.tolist()
cases = df_state_tot.cases.values.tolist()
deaths = df_state_tot.deaths.values.tolist()

In [120]:
dates_major = dates[0::3]

## Plot Total Cases and Deaths

In [123]:
bar_chart = pygal.Bar(x_label_rotation=20, show_minor_x_labels=False, show_legend=False, y_title = 'Number of Cases')
bar_chart.title = 'Total COVID-19 Cases in Maine'
bar_chart.x_labels = dates
bar_chart.x_labels_major = dates_major
bar_chart.add('Number of Cases', cases)

bar_chart.render_to_file('plots/total_cases_maine.svg')                          # Save the svg to a file

In [124]:
bar_chart = pygal.Bar(x_label_rotation=20, show_minor_x_labels=False, show_legend=False, y_title='Number of Deaths')
bar_chart.title = 'Total COVID-19 Deaths in Maine'
bar_chart.x_labels = dates
bar_chart.x_labels_major = dates_major

bar_chart.add('Deaths', deaths)

bar_chart.render_to_file('plots/total_deaths_maine.svg')                          # Save the svg to a file

## Plot New Daily Cases

In [125]:
df_state_tot['new_cases'] = df_state_tot.cases.diff()
df_state_tot['new_cases'][0] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [126]:
df_state_tot.head()

Unnamed: 0_level_0,fips,cases,deaths,new_cases
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-03-12,23001.0,1,0,1.0
2020-03-13,46006.0,2,0,1.0
2020-03-14,46006.0,3,0,1.0
2020-03-15,69021.0,12,0,9.0
2020-03-16,69021.0,17,0,5.0


In [127]:
bar_chart = pygal.Bar(x_label_rotation=20, 
                      show_minor_x_labels=False,
                      show_legend=False,
                      y_title = 'Number of New Cases',
                      x_title = 'Day')
bar_chart.title = 'Daily New COVID-19 Cases in Maine'
bar_chart.x_labels = dates
bar_chart.x_labels_major = dates_major
bar_chart.add('Number of New Cases', df_state_tot.new_cases.to_list(), spacing=0)

bar_chart.render_to_file('plots/new_cases_maine.svg')                          # Save the svg to a file

#### Make a df of zero cases for the 13 days before or first case

In [128]:
df_zeros = pd.DataFrame.from_dict({'date':['2020-02-28','2020-02-29','2020-03-01','2020-03-02','2020-03-03','2020-03-04',
                                           '2020-03-05','2020-03-06','2020-03-07','2020-03-08','2020-03-09',
                                           '2020-03-10','2020-03-11'],
                                   'deaths':[0]*13,
                                   'fips':[0]*13,
                                   'cases':[0]*13,
                                   'new_cases':[0]*13})

In [129]:
df_zeros.set_index('date', inplace=True)

In [133]:
df_state_tot = pd.concat([df_zeros, df_state_tot], sort=True)

#### Moving Average New Cases

In [134]:
df_state_tot['moving_avg'] = df_state_tot.new_cases.rolling(window=14).mean()   

In [136]:
df_state_tot.head()

Unnamed: 0_level_0,cases,deaths,fips,moving_avg,new_cases
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-28,0,0,0.0,,0.0
2020-02-29,0,0,0.0,,0.0
2020-03-01,0,0,0.0,,0.0
2020-03-02,0,0,0.0,,0.0
2020-03-03,0,0,0.0,,0.0


In [149]:
def format_date_str(df):
    date_list = df.index.values.tolist()
    dates = []
    for date_ in date_list:
        dates.append(date_[-5:-3] + '/' + date_[-2:])
    return dates

In [150]:
dates = format_date_str(df_state_tot)

In [98]:
from pygal.graph.graph import Graph

In [99]:
# Create a class extending Graph
class LineBar(Graph):
    # All the series are bar except the last which is line
    def _plot(self):
        for i, serie in enumerate(self.series, 1):
            if i == len(self.series):
                self.line(serie)
            else:
                self.bar(serie)

# Add bar properties
LineBar.bar = pygal.Bar.bar
LineBar._compute = pygal.Bar._compute
LineBar._bar = pygal.Bar._bar
LineBar._series_margin = pygal.Bar._series_margin
LineBar._serie_margin = pygal.Bar._serie_margin
LineBar._tooltip_and_print_values = pygal.Bar._tooltip_and_print_values

# Add line properties
LineBar.line = pygal.Line.line
LineBar._fill = pygal.Line._fill
LineBar._self_close = False

In [100]:
bar_chart = LineBar(x_label_rotation=20, 
                      show_minor_x_labels=False,
                      show_legend=False,
                      y_title = 'Number of New Cases',
                      x_title = 'Day')
bar_chart.title = 'Daily New COVID-19 Cases in Maine'
bar_chart.x_labels = dates
bar_chart.x_labels_major = dates_major
bar_chart.add('Number of New Cases', df_state_tot.new_cases.to_list(), spacing=0)
bar_chart.add('14-Day Moving Average', df_state_tot.moving_avg.to_list(), spacing=0)



bar_chart.render_to_file('plots/test.svg')                          # Save the svg to a file

## Plot Daily Deaths

In [17]:
df_state_tot['new_deaths'] = df_state_tot.deaths.diff()
df_state_tot['new_deaths'][0] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [18]:
df_state_tot.tail()

Unnamed: 0_level_0,fips,cases,deaths,new_cases,new_deaths
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-04-01,299203.0,344,7,41.0,2.0
2020-04-02,299203.0,376,7,32.0,0.0
2020-04-03,345235.0,432,9,56.0,2.0
2020-04-04,345235.0,456,10,24.0,1.0
2020-04-05,345235.0,470,10,14.0,0.0


In [19]:
bar_chart = pygal.Bar(x_label_rotation=20, 
                      show_minor_x_labels=False,
                      show_legend=False,
                      y_title = 'Number of Deaths')
bar_chart.title = 'Daily New COVID-19 Deaths in Maine'
bar_chart.x_labels = dates
bar_chart.x_labels_major = dates_major
bar_chart.add('Number of New Deaths', df_state_tot.new_deaths.to_list())

bar_chart.render_to_file('plots/new_deaths_maine.svg')                          # Save the svg to a file

## Plot County-Level Data

In [20]:
df_maine_today = df_maine[df_maine.date == df_maine.date.max()]

In [21]:
df_maine_today

Unnamed: 0,date,county,state,fips,cases,deaths
34213,2020-04-05,Androscoggin,Maine,23001.0,20,0
34214,2020-04-05,Aroostook,Maine,23003.0,1,0
34215,2020-04-05,Cumberland,Maine,23005.0,238,7
34216,2020-04-05,Franklin,Maine,23007.0,4,0
34217,2020-04-05,Hancock,Maine,23009.0,2,0
34218,2020-04-05,Kennebec,Maine,23011.0,22,1
34219,2020-04-05,Knox,Maine,23013.0,9,0
34220,2020-04-05,Lincoln,Maine,23015.0,8,0
34221,2020-04-05,Oxford,Maine,23017.0,11,0
34222,2020-04-05,Penobscot,Maine,23019.0,22,0


In [22]:
df_maine_today.sort_values(by=['cases'], ascending=False, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [23]:
bar_chart = pygal.Bar(x_label_rotation=20, show_legend=False, y_title='Number of Cases', x_title='County')
bar_chart.title = 'COVID-19 Cases by County (March 31, 2020)'
bar_chart.x_labels = df_maine_today.county.to_list()
bar_chart.add('Cases', df_maine_today.cases.to_list())

bar_chart.render_to_file('plots/current_cases_by_county.svg')                          # Save the svg to a file

## County Case Growth with recovered

In [24]:
df_current_county = pd.read_csv('data/countydata_journal.csv', index_col=None)
df_current_county = df_current_county[df_current_county.end.isna() == True]
df_current_county.sort_values(by=['confirmed'], ascending=False, inplace=True)

In [25]:
df_current_county

Unnamed: 0,county,confirmed,recovered,hospitalizations,deaths,start,end
49,Cumberland,238,84,40,7,2020-04-05T15:08:03.676Z,
54,York,101,31,19,2,2020-04-05T15:08:03.676Z,
52,Penobscot,22,13,4,0,2020-04-05T15:08:03.676Z,
51,Kennebec,22,6,7,1,2020-04-05T15:08:03.676Z,
48,Androscoggin,20,6,6,0,2020-04-05T15:08:03.676Z,
44,Sagadahoc,13,3,5,0,2020-04-05T00:21:23.722Z,
42,Oxford,11,4,1,0,2020-04-05T00:21:23.722Z,
55,Unknown,11,0,3,0,2020-04-05T15:08:03.676Z,
31,Knox,9,2,1,0,2020-04-03T20:27:45.594Z,
7,Lincoln,8,2,0,0,2020-04-02T02:58:03.998Z,


In [26]:
def large_style_bar():
    custom_style = Style(
        colors=['#3F51B5', '#F44336', '#009688'],
        title_font_size=18,
        label_font_size=14,
        major_label_font_size=14,
        legend_font_size=14
    )
    return custom_style

def small_style_bar():
    custom_style = Style(
        colors=['#3F51B5', '#F44336', '#009688'],
        title_font_size=30,
        label_font_size=24,
        major_label_font_size=24,
        legend_font_size=24
    )
    return custom_style

In [27]:
def plot_current_cases_by_county_breakdown(size):
    if size == 'small':
        custom_style = small_style_bar()
        custom_style.title_font_size = 26
        custom_style.label_font_size = 20
        x_rot = 40
    else:
        custom_style = large_style_bar()
        x_rot=20
        custom_style.legend_font_size = 12
        
    # calculate current cases
    df_current_county['active_cases'] = df_current_county.confirmed - df_current_county.deaths - df_current_county.recovered

    # plot the data
    bar_chart = pygal.StackedBar(style=custom_style,
                          x_label_rotation=x_rot,
                          y_title='Number of Cases',
                          x_title='County')
    title_text = 'COVID-19 Cases by County' + ' (' + str(df_maine_today.date.max()) + ')'
    bar_chart.title = title_text
    bar_chart.x_labels = df_current_county.county.to_list()
    bar_chart.add('Active Cases', df_current_county.active_cases.values.tolist())
    bar_chart.add('Deaths', df_current_county.deaths.values.tolist())
    bar_chart.add('Recovered Cases', df_current_county.recovered.values.tolist())
    
    bar_chart.render_to_file('plots/current_cases_by_county_breakdown.svg')                          # Save the svg to a file

In [30]:
df_current_county

Unnamed: 0,county,confirmed,recovered,hospitalizations,deaths,start,end
49,Cumberland,238,84,40,7,2020-04-05T15:08:03.676Z,
54,York,101,31,19,2,2020-04-05T15:08:03.676Z,
52,Penobscot,22,13,4,0,2020-04-05T15:08:03.676Z,
51,Kennebec,22,6,7,1,2020-04-05T15:08:03.676Z,
48,Androscoggin,20,6,6,0,2020-04-05T15:08:03.676Z,
44,Sagadahoc,13,3,5,0,2020-04-05T00:21:23.722Z,
42,Oxford,11,4,1,0,2020-04-05T00:21:23.722Z,
55,Unknown,11,0,3,0,2020-04-05T15:08:03.676Z,
31,Knox,9,2,1,0,2020-04-03T20:27:45.594Z,
7,Lincoln,8,2,0,0,2020-04-02T02:58:03.998Z,


In [31]:
plot_current_cases_by_county_breakdown('small')

##  County Case Growth

In [32]:
def create_days_to_double_data(df, days_to_double):
    cases= [1]
    n_days = len(df.date.unique())
    d = range(1,len(df.date.unique()))
    for day in d:
        cases.append(round(2**(day/days_to_double),2))
    return cases

Color pallette source: https://clrs.cc/

In [33]:
def get_custom_style():
    custom_style = Style(
    colors=['#85144B', '#111111', '#7FDBFF', '#39CCCC', '#3D9970', '#2ECC40', '#01FF70',
            '#FFDC00', '#FF851B', '#FF4136', '#F012BE', '#B10DC9', '#00008b', '#0074D9',
            '#6e6e6e', '#9e9e9e', '#dbdbdb'],
    label_font_size=14,
    major_guide_stroke_dasharray= '1.5,1.5'
    )

    return custom_style

def case_by_county_config():
    config = Config()
    custom_style = get_custom_style()
    config.style=custom_style
    config.x_label_rotation=20
    config.show_minor_x_labels=False
    config.y_labels_major_every=3
    config.show_minor_y_labels=False
    config.truncate_legend=-1 

    return config

In [34]:
def plot_county_lines(df_maine, line_chart):
    for county in df_maine.county.unique():
        if len(list(df_maine.date.unique())) == len(df_maine[df_maine.county==county].cases):
            case_data = df_maine[df_maine.county==county].cases
        else:
            len_diff = len(list(df_maine.date.unique())) - len(df_maine[df_maine.county==county].cases)
            case_data = df_maine[df_maine.county==county].cases.to_list()
            case_data = [0]*len_diff + case_data
        line_chart.add(county, case_data, dots_size=1.5)

In [35]:
config = case_by_county_config()

line_chart = pygal.Line(config,
                        y_title='Number of Cases',
                        height=500,
                        width=700)
line_chart.title = 'COVID-19 Case Growth by County'
line_chart.x_labels = list(df_maine.date.unique())
line_chart.x_labels_major = list(df_maine.date.unique())[0::3]
#add a line for each county
plot_county_lines(df_maine, line_chart)

line_chart.render_to_file('plots/growth_by_county.svg')                          # Save the svg to a file

In [36]:
config = case_by_county_config()

line_chart = pygal.Line(config,
                        y_title='Cases',
                        logarithmic=True,
                        height=500,
                        width=800                       )
line_chart.title = 'COVID-19 Case Growth by County (log scale)'
line_chart.x_labels = list(df_maine.date.unique())
line_chart.x_labels_major = list(df_maine.date.unique())[0::3]

#add a line for each county
plot_county_lines(df_maine, line_chart)

ref_style = stroke_style={'width':2.5}
line_chart.add('Cases Double every 4 Days', create_days_to_double_data(df_maine, 4),
              stroke_style=ref_style, dots_size=1)
line_chart.add('Cases Double every 5 Days', create_days_to_double_data(df_maine, 5),
               stroke_style=ref_style, dots_size=1)
line_chart.add('Cases Double every Week', create_days_to_double_data(df_maine, 7),
               stroke_style=ref_style, dots_size=1)


line_chart.render_to_file('plots/growth_by_county_log.svg')                          # Save the svg to a file

###### Cumberland

In [37]:
bar_chart = pygal.Bar(x_label_rotation=20, show_legend=False, y_title='Number of Cases', show_minor_x_labels=False)
bar_chart.title = 'COVID-19 Cases in Cumberland County'
bar_chart.x_labels = df_maine[df_maine.county=='Cumberland'].date.to_list()
bar_chart.x_labels_major = df_maine[df_maine.county=='Cumberland'].date.to_list()[0::3]
bar_chart.add('Cases', df_maine[df_maine.county=='Cumberland'].cases.to_list())

bar_chart.render_to_file('plots/cumberland_cases.svg')                          # Save the svg to a file

#### York

In [38]:
bar_chart = pygal.Bar(x_label_rotation=20, show_legend=False, y_title='Number of Cases', show_minor_x_labels=False)
bar_chart.title = 'COVID-19 Cases in York County'
bar_chart.x_labels = df_maine[df_maine.county=='York'].date.to_list()
bar_chart.x_labels_major = df_maine[df_maine.county=='York'].date.to_list()[0::3]
bar_chart.add('Cases', df_maine[df_maine.county=='York'].cases.to_list())

bar_chart.render_to_file('plots/york_cases.svg')                          # Save the svg to a file

#### Kennebec

In [5]:
bar_chart = pygal.Bar(x_label_rotation=20, show_legend=False, y_title='Number of Cases', show_minor_x_labels=False)
bar_chart.title = 'COVID-19 Cases in Kennebec County'
bar_chart.x_labels = df_maine[df_maine.county=='Kennebec'].date.to_list()
bar_chart.x_labels_major = df_maine[df_maine.county=='Kennebec'].date.to_list()[0::3]
bar_chart.add('Cases', df_maine[df_maine.county=='Kennebec'].cases.to_list())

bar_chart.render_to_file('plots/kennebec_cases.svg')                          # Save the svg to a file

NameError: name 'df_maine' is not defined

## Race and Ethnicity Data

source: https://data.census.gov/cedsci/profile?q=Maine&g=0400000US23&tid=ACSDP1Y2018.DP05

In [64]:
df_ethnicity = pd.DataFrame.from_dict({'race':['American Indian or Alaskan Native', 'Asian or Pacific Islander',
                         'Black or African American', 'White', 'Two or More Races', 'Other Race'],
                 'perc_maine_pop': [0.6, 1.2, 1.4, 94.3, 2.2, 0.2],
                 'count_covid': [2, 20, 73, 829, 2, 26]
                })

In [65]:
df_ethnicity['perc_covid_cases'] = round(df_ethnicity.count_covid/df_ethnicity.count_covid.sum()*100, 1)

In [66]:
df_ethnicity.sort_values(by=['perc_covid_cases'], ascending=False, inplace=True)
df_ethnicity

Unnamed: 0,race,perc_maine_pop,count_covid,perc_covid_cases
3,White,94.3,829,87.1
2,Black or African American,1.4,73,7.7
5,Other Race,0.2,26,2.7
1,Asian or Pacific Islander,1.2,20,2.1
0,American Indian or Alaskan Native,0.6,2,0.2
4,Two or More Races,2.2,2,0.2


In [69]:
bar_chart = pygal.HorizontalBar()
bar_chart.title = 'Case Rate by Race and Ethnicity'
bar_chart.x_labels = df_ethnicity.race
bar_chart.add('% of COVID Cases', df_ethnicity.perc_covid_cases.to_list())
bar_chart.add('% of Maine Population', df_ethnicity.perc_maine_pop.to_list())


bar_chart.render_to_file('plots/ethnicity.svg')                          # Save the svg to a file

In [70]:
bar_chart = pygal.Bar()
bar_chart.title = 'Case Rate by Race and Ethnicity'
bar_chart.x_labels = df_ethnicity.race
bar_chart.add('% of COVID Cases', df_ethnicity.perc_covid_cases.to_list())
bar_chart.add('% of Maine Population', df_ethnicity.perc_maine_pop.to_list())


bar_chart.render_to_file('plots/ethnicity.svg')                          # Save the svg to a file

In [68]:
xy_chart = pygal.XY(show_legend=False,
                    stroke=False,
                    dots_size=3
                   )
title_text = '% of Maine Population vs % of COVID Cases' + ' (May 9, 2020)'
xy_chart.title = title_text
# plot the data for each county
for index, row in df_ethnicity.iterrows():
    pop_data = [(row['perc_maine_pop'], row['perc_covid_cases'])]
    xy_chart.add(row.race, pop_data)
        
xy_chart.render_to_file('plots/scatter_ethnicity.svg')                          # Save the svg to a file

## Cases Per Capita

#### County Population

https://data.census.gov/cedsci/profile?g=0500000US23005&q=Cumberland
https://data.census.gov/cedsci/profile?g=0500000US23031&q=York
https://data.census.gov/cedsci/profile?g=0500000US23017&q=Oxford
https://data.census.gov/cedsci/profile?g=0500000US23023&q=Sagadahoc
https://data.census.gov/cedsci/profile?g=0500000US23001&q=Androscoggin
    https://data.census.gov/cedsci/profile?g=0500000US23015&q=Lincoln
        https://data.census.gov/cedsci/profile?g=0500000US23011&q=Kennebec
https://data.census.gov/cedsci/profile?g=0500000US23007&q=Franklin
    https://data.census.gov/cedsci/profile?g=0500000US23027&q=Waldo
        https://data.census.gov/cedsci/profile?g=0500000US23025&q=Somerset
            https://data.census.gov/cedsci/profile?g=0500000US23009&q=Hancock
https://data.census.gov/cedsci/profile?g=0500000US23029&q=Washington
https://data.census.gov/cedsci/profile?g=0500000US23021&q=Piscataquis
    https://data.census.gov/cedsci/profile?g=0500000US23019&q=Penobscot
        https://data.census.gov/cedsci/profile?g=0500000US23003&q=Aroostook

In [52]:
population_data = {'county':['Cumberland', 'York', 'Oxford', 'Sagadahoc', 'Androscoggin',
                             'Lincoln', 'Kennebec', 'Franklin', 'Knox', 'Waldo', 'Somerset',
                             'Hancock', 'Washington', 'Piscataquis', 'Penobscot', 'Aroostook',
                             'Unknown'],
                   'population':[290944, 203102,  57325,  35277, 107444,  34067, 121545,
                                 30019,  39823,  39418,  50710,  54541,  31694,  16877,
                                 151748,  68269, np.nan],
                   'county_area_sq_mile':[835.5, 990.5, 2076.3, 253.9, 467.8,
                                455.7, 867.3, 1696.5, 365, 729.7, 3923.3,
                                1586.6, 2562, 3959.9, 3396.3, 6669.3,
                                np.nan]}

In [53]:
df_population = pd.DataFrame.from_dict(population_data)
df_population['pop_density'] = df_population.population/df_population.county_area_sq_mile

In [57]:
df_maine_today = df_maine_today.merge(df_population, on='county')
df_maine_today['cases_per_ten_thousand'] = df_maine_today.cases/ (df_maine_today.population/10000)
df_maine_today = df_maine_today.round({'cases_per_ten_thousand':1})

In [62]:
df_maine_today = df_maine_today.round({'pop_density':1})
df_maine_today

Unnamed: 0,date,county,state,fips,cases,deaths,cases_per_ten_thousand,population,county_area_sq_mile,pop_density
0,2020-04-05,Cumberland,Maine,23005.0,238,7,8.2,290944.0,835.5,348.2
1,2020-04-05,York,Maine,23031.0,101,2,5.0,203102.0,990.5,205.0
2,2020-04-05,Kennebec,Maine,23011.0,22,1,1.8,121545.0,867.3,140.1
3,2020-04-05,Penobscot,Maine,23019.0,22,0,1.4,151748.0,3396.3,44.7
4,2020-04-05,Androscoggin,Maine,23001.0,20,0,1.9,107444.0,467.8,229.7
5,2020-04-05,Sagadahoc,Maine,23023.0,13,0,3.7,35277.0,253.9,138.9
6,2020-04-05,Oxford,Maine,23017.0,11,0,1.9,57325.0,2076.3,27.6
7,2020-04-05,Unknown,Maine,,11,0,,,,
8,2020-04-05,Knox,Maine,23013.0,9,0,2.3,39823.0,365.0,109.1
9,2020-04-05,Lincoln,Maine,23015.0,8,0,2.3,34067.0,455.7,74.8


In [73]:
for index, row in df_maine_today.iterrows():
    pop_data = [(row['pop_density'], row['cases_per_ten_thousand'])]
    print(row.county, pop_data)

Cumberland [(348.2, 8.2)]
York [(205.0, 5.0)]
Kennebec [(140.1, 1.8)]
Penobscot [(44.7, 1.4)]
Androscoggin [(229.7, 1.9)]
Sagadahoc [(138.9, 3.7)]
Oxford [(27.6, 1.9)]
Unknown [(nan, nan)]
Knox [(109.1, 2.3)]
Lincoln [(74.8, 2.3)]
Franklin [(17.7, 1.3)]
Somerset [(12.9, 0.8)]
Waldo [(54.0, 0.8)]
Hancock [(34.4, 0.4)]
Aroostook [(10.2, 0.1)]
Washington [(12.4, 0.3)]


In [None]:
# Drop the Unknown county column
unknown_idx = df_maine_today[df_maine_today.county=='Unknown'].index
df_maine_today = df_maine_today.drop(labels=unknown_idx, axis=0)

In [None]:
# Drop the Unknown county row
unknown_idx = df_maine_today[df_maine_today.county=='Unknown'].index
df_maine_today = df_maine_today.drop(labels=unknown_idx, axis=0)

# plot the data
bar_chart = pygal.Bar(x_label_rotation=20,
                      show_legend=False,
                      y_title='Cases per 10,000 Residents',
                      x_title='County')
title_text = 'COVID-19 Cases per 10,000 Residents' + ' (' + \
              str(df_maine_today.date.max()) + ')'
bar_chart.title = title_text
bar_chart.x_labels = df_maine_today.county.to_list()
bar_chart.add('Cases per 10,000 Residents', df_maine_today.cases_per_ten_thousand.to_list())

bar_chart.render_to_file('plots/current_cases_per_hundred_thous.svg')                          # Save the svg to a file

## Make a df with Press Herald Data (https://www.pressherald.com/2020/03/17/track-maines-coronavirus-cases-by-county/)


*press herald data used for recovered counts for March 26-31

In [None]:
def append_recovered_data(df):
    recovered = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,24,36,41,41,68,80,94,113]
    if len(recovered) != len(df):
        recovered.append(np.nan)
    df['recovered'] = recovered
    return df

In [None]:
df_state_tot = append_recovered_data(df_state_tot)
df_state_tot.tail()

In [None]:
df_state_tot['active_cases'] = df_state_tot.cases - df_state_tot.deaths - df_state_tot.recovered

In [None]:
bar_chart = pygal.StackedBar(x_label_rotation=20, show_minor_x_labels=False)
bar_chart.title = 'Maine COVID-19 Cases by Status'
bar_chart.x_labels = df_state_tot.index.values.tolist()
bar_chart.x_labels_major = df_state_tot.index.values.tolist()[0::3]

bar_chart.add('Active Cases', df_state_tot.active_cases.values.tolist())
bar_chart.add('Deaths', df_state_tot.deaths.values.tolist())
bar_chart.add('Recovered Cases', df_state_tot.recovered.values.tolist())


bar_chart.render_to_file('plots/case_status.svg')                          # Save the svg to a file

## Hospitalized Patients 

In [None]:
hospitalized=[None, None,None,None,None,None,None,None,None,None,49,57,63,68,75]

In [None]:
hosp_dates = ['2020-03-20', '2020-03-21',
 '2020-03-22',
 '2020-03-23',
 '2020-03-24',
 '2020-03-25',
 '2020-03-26',
 '2020-03-27',
 '2020-03-28',
 '2020-03-29',
 '2020-03-30',
 '2020-03-31',
 '2020-04-01',
 '2020-04-02',
 '2020-04-03',
 '2020-04-04']

In [None]:
custom_style = Style(
    colors=['#08519c', '#3182bd'],
    label_font_size=14,
    major_guide_stroke_dasharray= '1.5,1.5'
)

line_chart = pygal.Line(style=custom_style,
                        include_x_axis=True,
                        x_label_rotation=20,
                        show_minor_x_labels=False,
                        y_labels_major_every=2,
                        show_minor_y_labels=False,
                        truncate_legend=-1,
                        x_title = 'Date')
line_chart.title = 'Number of Patients Hospitalized for COVID-19 in Maine'
line_chart.x_labels = hosp_dates
line_chart.x_labels_major = hosp_dates[0::2]
line_chart.add('Count Hospitalized', hospitalized,
               stroke_style={'dasharray': '3, 6', 'width':2.5})

line_chart.render_to_file('plots/hospitalized.svg')                          # Save the svg to a file

## Patient Age

In [None]:
df_age = pd.DataFrame.from_dict({'age_range':['< 20','20s', '30s', '40s', '50s', '60s', '70s','80+'], 
                                 'cases': [9,43,35,67,87,96,58,37]})
df_age.head()

In [None]:
total_count = df_age.cases.sum()
df_age['percent_of_tot'] = df_age.cases/total_count*100
df_age = df_age.round({'percent_of_tot':1})
df_age.head()

In [None]:
fig = plt.figure(
    FigureClass=Waffle, 
    rows=4,
    values=df_age.percent_of_tot,
    labels=list(df_age.age_range),
    legend={'loc': 'lower left',
            'bbox_to_anchor': (-.01, -0.5),
            'ncol': len(df_age.percent_of_tot),
            'fontsize': 12,
            'title':'Age of Patient'
           },
    title={'label': 'Case Distribution by Patient Age',
           'loc': 'left',
           'pad':10,
           'fontdict': {'fontsize': 20}},
    figsize=(10, 8)  # figsize is a parameter of plt.figure

)

In [None]:
bar_chart = pygal.Bar(x_label_rotation=20,
                      show_legend=False,
                      y_title='Percent of Cases (%)',
                      x_title='Age Group')
bar_chart.title = 'Case Distribution by Patient Age'
bar_chart.x_labels = df_age.age_range
bar_chart.add('% of Cases', df_age.percent_of_tot.to_list())

bar_chart.render_to_file('plots/age_range.svg')                          # Save the svg to a file

## Plot available hospital beds, ICU, ventilators

In [156]:
def find_occupied_assets(the_dict, total_asset, available_asset, return_col_name='occupied'):
    occupied_asset = []
    for idx in range(0, len(the_dict['date'])):
        if the_dict[total_asset][idx] is not None and the_dict[available_asset][idx] is not None:
            occupied_asset.append(the_dict[total_asset][idx] - the_dict[available_asset][idx])
        else:
            occupied_asset.append(None)
    
    the_dict[return_col_name] = occupied_asset
    return the_dict

In [157]:
def find_total_vent_including_alt(the_dict):
    total_vent_including_alt = []
    for idx in range(0, len(the_dict['date'])):
        if the_dict['total_ventilators'][idx] is not None and the_dict['alternative_ventilators'][idx] is not None:
            total_vent_including_alt.append(the_dict['total_ventilators'][idx] + the_dict['alternative_ventilators'][idx])
        else:
            total_vent_including_alt.append(None)
            
    the_dict['total_vent_including_alt'] = total_vent_including_alt
    return the_dict

In [158]:
def create_hospital_assets_dict():
    hosp_assets_dict = {'date':['2020-03-20','2020-03-21', '2020-03-22', '2020-03-23','2020-03-24',
                            '2020-03-25','2020-03-26','2020-03-27','2020-03-28','2020-03-29',
                            '2020-03-30','2020-03-31', '2020-04-01','2020-04-02','2020-04-03'],
                         'total_icu_beds':[135, None, None, None, None,
                                           151, 151, 164, None, None,
                                           176, 190, 272, 285, 289],
                         'available_icu_beds': [56, None, None, None, 77,
                                                83, 86, 86, None, None,
                                                92, 90, 124, 122, 110],
                         'total_ventilators':[291, None, None, None, None,
                                              306, 307, 308, None, None,
                                              309, 330, 348, 334, 324],
                         'available_ventilators':[218, None, None, None, 248,
                                                  248, 250, 247, None, None,
                                                  253, 262, 271, 266, 267],
                         'alternative_ventilators':[None, None, None, None, None,
                                                    None, None, 58, None, None,
                                                    87, 89, 128, 186, 199],
                         'respiratory_therapists':[None, None, None, None, 84,
                                                   88, None, None, None, None,
                                                   None, None, None, None, 127]
                        }
    # Calculate the number of occupied ICU Beds
    hosp_assets_dict = find_occupied_assets(hosp_assets_dict, 'total_icu_beds', 'available_icu_beds',
                                            return_col_name='occupied_icu_beds')
    hosp_assets_dict = find_occupied_assets(hosp_assets_dict, 'total_ventilators', 'available_ventilators',
                                            return_col_name='occupied_ventilators')
    hosp_assets_dict = find_total_vent_including_alt(hosp_assets_dict)

    return hosp_assets_dict

In [160]:
hosp_assets_dict = create_hospital_assets_dict()



list

*numbers are going up because more hospitals are reporting to CDC

In [None]:
hosp_assets_dict = create_hospital_assets_dict()

custom_style = Style(
    colors=['#08519c', '#3182bd', '#6baed6'],
    label_font_size=14,
    major_guide_stroke_dasharray= '1.5,1.5',
    legend_font_size= 10
)

line_chart = pygal.Line(style=custom_style,
                        dots_size=2,
                        x_label_rotation=20,
                        show_minor_x_labels=False,
                        y_labels_major_every=2,
                        show_minor_y_labels=False,
                        width=750,
                        height=400,
                        truncate_legend=-1
                       )

line_chart.title = 'Statewide Ventilator Availablity'
line_chart.x_labels = hosp_assets_dict['date']
line_chart.x_labels_major = hosp_assets_dict['date'][0::3]

line_chart.add('Total Ventilators (including alternative)', hosp_assets_dict['total_vent_including_alt'],
               stroke_style={'width':2.5}, show_dots=1, dots_size=1)
line_chart.add('Total Traditional Ventilators', hosp_assets_dict['total_ventilators'],
               stroke_style={'width':2.5}, show_dots=1, dots_size=1)
line_chart.add('Occupied Ventilators', hosp_assets_dict['occupied_ventilators'],
              stroke_style={'dasharray': '3, 6', 'width':2.5})

line_chart.render_to_file('plots/ventilators.svg')                          # Save the svg to a file

In [None]:
hosp_assets_dict = create_hospital_assets_dict()


custom_style = Style(
    colors=['#08519c', '#3182bd'],
    label_font_size=14,
    major_guide_stroke_dasharray= '1.5,1.5'
)

line_chart = pygal.Line(style=custom_style,
                        dots_size=2.5,
                        x_label_rotation=20,
                        truncate_legend=-1,
                        show_minor_x_labels=False,
                        y_labels_major_every=2,
                        show_minor_y_labels=False
                        )
line_chart.title = 'Statewide ICU Bed Availablity'
line_chart.x_labels = hosp_assets_dict['date']
line_chart.x_labels_major = hosp_assets_dict['date'][0::3]

line_chart.add('Total ICU Beds', hosp_assets_dict['total_icu_beds'], stroke_style={'width':2.5},
               show_dots=1, dots_size=1)
line_chart.add('Occupied ICU Beds', hosp_assets_dict['occupied_icu_beds'],
               stroke_style={'dasharray': '3, 6', 'width':2.5})
# stroke_style={'width':2.5}


line_chart.render_to_file('plots/icu_beds.svg')                          # Save the svg to a file