# COVID-19 Interactive Map by State

### Authors: Christopher McKenzie, Andrew Chuah, Jayant Kumar, Malika Thakur

The purpose of this program is to:
1. Pull COVID-19 data from GitHub
2. Pull the locations of PNGs which contain this data on graphs
3. Combine all data and PNG locations into one dataframe
4. Create a map of the lakes, land, and states of the US
5. Combine those maps together into one
6. Merge dataframe with US map
7. Output an interactive map into an html file

In [1]:
import pandas as pd             
import datetime as dt
import numpy as np

import requests, io             # internet and input tools  
import os                       # operating system tools (check files)

from census import Census

import geopandas as gpd # this is the main geopandas 
from shapely.geometry import Polygon # also needed

#Imports below are for interactive map creation
import json
from bokeh.io import show
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
                          CustomJS, CustomJSFilter, 
                          GeoJSONDataSource, HoverTool,
                          LinearColorMapper, Slider)

from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer, all_palettes
from bokeh.plotting import figure
from bokeh.models import Title
from bokeh.layouts import gridplot

from bokeh.plotting import figure, save
from bokeh.models import Panel, Tabs

from bokeh.resources import CDN
from bokeh.embed import file_html

from bokeh.io import output_file, show
from bokeh.models import Div

In [2]:
class CovidMapCreator:


    """1. Pulls COVID-19 cases and death data
    2. Pulls COVID-19 vaccination data
    3. Merges both into one dataframe
    4. Pulls PNGs of tables of cases + deaths, and vaccinations
    5. Pulls lake, land, and state shape files
    6. Merges COVID data with US shapefile data
    7. Creates map
    """

    
#     def __init__(self):

#         """Initialize functions"""
        
#         df = self.import_csvs_into_df("https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/covid19_cases.csv",
#                                  "https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/covid19_vaccinations.csv")
        
#         self.import_pngs_into_df(df, "https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/docs/Cases_PNGs/",
#                             "https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/docs/Vaccinations_PNGs/")
        

    def import_csvs_into_df(self, cases_file: str, vaccine_file: str):

        """Creates a dataframe by importing the csv file with COVID-19 Cases and Deaths"""

        #Used to limit size of dataframe to only significant data
        cases_col = ['State/Territory',
        'Total Cases','Total Deaths',
        'Death Rate per 100000', '7-Day Cases Rate per 100000']
        
        #Used to limit df to only 50 states, and exclude territory data
        state_names = ["Alaska", "Alabama", "Arkansas", "Arizona",
        "California", "Colorado", "Connecticut", "Delaware", "Florida",
        "Georgia", "Hawaii", "Iowa", "Idaho", "Illinois", "Indiana",
        "Kansas", "Kentucky", "Louisiana", "Massachusetts", "Maryland",
        "Maine", "Michigan", "Minnesota", "Missouri", "Mississippi",
        "Montana", "North Carolina", "North Dakota", "Nebraska",
        "New Hampshire", "New Jersey", "New Mexico", "Nevada", "New York",
        "New York State", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
        "Rhode Island", "South Carolina", "South Dakota", "Tennessee",
        "Texas", "Utah", "Virginia", "Vermont", "Washington",
        "Wisconsin", "West Virginia", "Wyoming"]

        df = pd.read_csv(cases_file, skiprows=2, usecols=cases_col)

        #Limits to only those in state_name list
        df = df[df["State/Territory"].isin(state_names)]
        
        #We need to reset the index after removing other states
        #Drop = True: Does not insert a brand new index column into dataframe columns after reset.
        df = df.reset_index(drop=True)

        #Repeat the same process for vaccine date

        vac_col = ['State/Territory/Federal Entity',
        'Total Doses Delivered', 'Percent of Total Pop with at least One Dose by State of Residence',
        'Total Doses Administered by State where Administered',
        'Percent of Total Pop Fully Vaccinated by State of Residence',
        'People Fully Vaccinated by State of Residence']

        df_vac = pd.read_csv(vaccine_file, skiprows=2, usecols=vac_col)
        
        #Rename columns to fit in bar graphs later
        #Inplace needs to be set to True so modifications are made within dataframe.
        #Otherwise, we would need to create a separate copy of the df to keep changes.
        df_vac.rename(columns={'State/Territory/Federal Entity' : "State/Territory",
                    'Total Doses Administered by State where Administered' :
                    'Total Doses Administered', 
                    'Percent of Total Pop with at least One Dose by State of Residence':
                    'Percent Pop One Dose',
                    'Percent of Total Pop Fully Vaccinated by State of Residence': 'Percent Fully Vaccinated',
                    'People Fully Vaccinated by State of Residence': 'People Fully Vaccinated'}, inplace=True)

        df_vac = df_vac[df_vac["State/Territory"].isin(state_names)]
        df_vac = df_vac.reset_index(drop=True)

        #Merge the vaccination data into the main dataframe
        df = df.join(df_vac["Total Doses Delivered"])
        df = df.join(df_vac["Percent Pop One Dose"])
        df = df.join(df_vac["Total Doses Administered"])
        df = df.join(df_vac["Percent Fully Vaccinated"])
        df = df.join(df_vac["People Fully Vaccinated"])

        #These data in these columns need to be converted to int. Currently str type
        df[["Total Cases",'Total Deaths',
            'Death Rate per 100000',
            '7-Day Cases Rate per 100000']] = df[["Total Cases",'Total Deaths',
                                           'Death Rate per 100000',
                                            '7-Day Cases Rate per 100000']].astype(int)

        df[["Total Doses Delivered", "Percent Pop One Dose",
        "Total Doses Administered",
        "Percent Fully Vaccinated",
        "People Fully Vaccinated"]] = df[["Total Doses Delivered",
                                          "Percent Pop One Dose",
                                          "Total Doses Administered",
                                          "Percent Fully Vaccinated",
                                            "People Fully Vaccinated"]].astype(int)

        return df
    
    #GOOD
    def create_lake_map(self, path=os.getcwd()):
        
        """Creates a map of the lakes."""
    
        lake_shapes = path + "\\shapefiles\\lake\\ne_10m_lakes.shx"
        
        #Returns GeoDataFrame object we can work with
        lake_map = gpd.read_file(lake_shapes)
        #Sets coordinate reference system of map to current map projection: espg:2163
        lake_map = lake_map.to_crs({'init': "epsg:2163"})

        return lake_map
    
    #GOOD
    def create_land_map(self, path=os.getcwd()):
        
        """Creates a map of the land."""
        
        land_shapes = path + "\\shapefiles\\land\\ne_50m_land.shx"
        land_map = gpd.read_file(land_shapes)
        land_map = land_map.to_crs({'init': "epsg:2163"})

        return land_map
    
    #GOOD
    def create_us_map(self, lake_map, land_map, path=os.getcwd()):
        
        """Merges state, land, and lake maps to create a map of the US"""

        state_shapes = path + "\\shapefiles\\state\\tl_2017_us_state.shx"
        us_map = gpd.read_file(state_shapes)
        us_map = us_map.to_crs({'init': "epsg:2163"})

        #Overlay land shapes over state shapes.
        #We use intersection so it only returns the shapes in both geodataframes
        #US map displays the shapes of the states more accurately with land map geodataframe
        us_map = gpd.overlay(us_map, land_map, how='intersection')
        
        #Try without great lakes
        #Overlay the great lakes over the US map
        #We use difference to cut out the lakes from the US Map
        #Displays US Map more accurately
        great_lakes = ["Lake Superior", "Lake Michigan", "Lake Erie", "Lake Superior", "Lake Huron"]
        us_map = gpd.overlay(us_map, lake_map[lake_map.name.isin(great_lakes)], how='difference')

        #Reduces line size and shortens load time significantly
        us_map["geometry"] = us_map["geometry"].simplify(2000)
        
        return us_map
        
    def merge_gdf_df(self, us_map, df):
        
        """Merges US map with COVID-19 data from dataframe"""
        
        #Merge df data into US Map
        #Removed indicator=True - only adds a column saying where they merged. Not needed
        us_map = us_map.merge(df, left_on='NAME',
                            right_on='State/Territory', how="left")
        
        #STATEFP is given ID for each state
        #We set STATEFP as index so we can drop territories by row
        us_map.set_index("STATEFP", inplace=True)
        
        #This removes all territory data (Puerto Rico, Guam, Virgin Islands, etc)
        drop_list = ["11","60","66","69","72","78"]
        us_map.drop(drop_list, inplace=True)
        us_map.reset_index()
        
        #Create labels for the map PNGs
        #Removes decimal and adds commas to numbers >999
        #We need to create separate formatted columns of each label
        #Otherwise the segment cuts will not work later
        
        ##CHANGE LABEL NAMES HERE
        us_map["cases_label"] = us_map["Total Cases"].round(0)
        us_map["cases_label"] = us_map["cases_label"].map('{:,.0f}'.format)
        us_map["deaths_label"] = us_map["Total Deaths"].round(0)
        us_map["deaths_label"] = us_map["deaths_label"].map('{:,.0f}'.format)
        us_map["death_rate_label"] = us_map["Death Rate per 100000"].round(0)
        us_map["death_rate_label"] = us_map["death_rate_label"].map('{:,.0f}'.format)
        us_map['cases_rate_label'] = us_map['7-Day Cases Rate per 100000'].round(0)
        us_map['cases_rate_label'] = us_map["cases_rate_label"].map('{:,.0f}'.format)

        #Vaccine map
        us_map["deliv_label"] = us_map["Total Doses Delivered"].round(0)
        us_map["deliv_label"] = us_map["deliv_label"].map('{:,.0f}'.format)
        us_map["admin_label"] = us_map["Total Doses Administered"].round(0)
        us_map["admin_label"] = us_map["admin_label"].map('{:,.0f}'.format)
        us_map["one_dose_label"] = us_map["Percent Pop One Dose"].round(0)
        us_map["one_dose_label"] = us_map["one_dose_label"].map('{:,.0f}'.format)
        us_map["percent_fully_label"] = us_map["Percent Fully Vaccinated"].round(0)
        us_map["percent_fully_label"] = us_map["percent_fully_label"].map('{:,.0f}'.format)
        us_map["num_fully_label"] = us_map["People Fully Vaccinated"].round(0)
        us_map["num_fully_label"] = us_map["num_fully_label"].map('{:,.0f}'.format)

        return us_map

    def create_case_map(self, us_map):
        
        
        #Create ticks for the color bar
        int_case_ticks = [0, 10000, 25000, 50000,
                        100000, 250000, 500000,
                        750000, 1000000, 2000000, np.inf]
        
        #Segments data based on ticks 
        us_map["int_case_ticks"]= pd.cut(us_map["Total Cases"], int_case_ticks, labels=range(0,10))
        
        
        ###CHANGE NAME OF THIS
        #Convert to us_map to geojson
        #Needed to create interactive map with colors, hover tools, etc
        source = GeoJSONDataSource(geojson = us_map.to_json())
        
        palette = (all_palettes['OrRd'][9])[::-1]
        
        #MAYBE CHANGE NAME
        #Maps palette colors from low to high
        #Needed for patching in colors into states
        color = LinearColorMapper(palette=palette, low=0, high=9)
        
        #Adds some of the values of ticks to right side of color bar
        #Maybe we should just show all values?
        case_ticks = {2:'1,000', 4:'100,000', 6:'500,000', 8:'2,000,000'}
        
        ##FIGURE THIS OUT AT THE END
        #Today's date/ To be used in titles
        today = dt.date.today()
        d = today.strftime("%B %d, %Y")
        
        # Create figure object. First one
        case_map = figure(active_scroll='wheel_zoom',
                active_drag='pan',
                plot_height = 800,
                plot_width = 1300, 
                toolbar_location = None)
        
        #This keeps the grid lines out
        #Check which ones you need and don't need. Maybe not the last one
        case_map.xgrid.grid_line_color = None
        case_map.ygrid.grid_line_color = None
        case_map.axis.visible = False
        
        case_map.background_fill_color = "grey"
        case_map.background_fill_alpha = 0.25
        case_map.border_fill_color = "#F5F5F5"
        
        #Add patch renderer to figure.
        #Adding colors and lines to states?
        states = case_map.patches('xs','ys', source=source,
                        fill_color = {"field" :'int_case_ticks',
                                        "transform" : color},
                        line_color = "gray", 
                        line_width = 0.5, 
                        fill_alpha = 1)

        state_line = case_map.multi_line('xs','ys', source=source,
                        line_color = "black", 
                        line_width = 0.5)
        
        #CHECK OUT THE DOCUMENTATION
        #Adding the color bar
        color_bar = ColorBar(color_mapper=color, 
                     label_standoff = 8,
                     width = 20, height = 700,
                     border_line_color = None,
                     orientation = "vertical",
                     location=(0,0), major_label_overrides = case_ticks,
                     major_label_text_align="left",
                     major_tick_line_alpha = .25)

        color_bar.background_fill_color = "#F5F5F5"
        case_map.add_layout(color_bar, "right")
        
        #CHANGE NAMES MAYBE IDK
        #Text/titles strings to be inserted in map
        title = "COVID-19 Cases by State as of " + d + " || Total Cases: " +\
        f"{int(us_map['Total Cases'].sum()):,d}"
        descip = "Data from the CDC COVID-19 Data Tracker"
        author = "Created by Team Coach"
        
        #CHANGE FONT SIZES
        #Adding strings to map
        case_map.add_layout(Title(text=descip, text_font_style="italic", text_font_size="9pt"), 'above')
        case_map.add_layout(Title(text=title, text_font_size="11pt"), 'above')
        case_map.add_layout(Title(text=author, text_font_style="italic", text_font_size="9pt"), 'below')
        
        TOOLTIPS = [
            ("State", "@NAME"),
            ("Total Cases", "@cases_label"),
            ('7-Day Cases Rate Per 100K', '@cases_rate_label'),
            ("Total Deaths", "@deaths_label"),
            ("Death Rate Per 100K", "@death_rate_label")
        ]
        case_map.add_tools(HoverTool(renderers = [states],
                      tooltips = TOOLTIPS))
        
        return case_map
        
    def create_vac_map(self, us_map):
        
        
        
        int_vac_ticks = [400000, 800000, 1000000, 2000000,
                        3000000, 4000000, 5000000, 10000000,
                        15000000, 30000000, np.inf]
        us_map["int_vac_ticks"]= pd.cut(us_map["Total Doses Delivered"], int_vac_ticks, labels=range(0,10))
        
        source = GeoJSONDataSource(geojson = us_map.to_json())
        
        palette = (all_palettes['GnBu'][9])[::-1]
        color_mapper_vac = LinearColorMapper(palette=palette, low=0, high=9)
        
        vac_ticks = {2:'800,000', 4:'3,000,000', 6:'5,000,000', 8:'30,000,000'}
        
        today = dt.date.today()
        d = today.strftime("%B %d, %Y")
        
        vac_map = figure(active_scroll='wheel_zoom',
                active_drag='pan',
                plot_height = 800,
                plot_width = 1300, 
                toolbar_location = None)
        
        vac_map.xgrid.grid_line_color = None
        vac_map.ygrid.grid_line_color = None        
        vac_map.axis.visible = False
        vac_map.background_fill_color = "grey"
        vac_map.background_fill_alpha = 0.25
        vac_map.border_fill_color = "#F5F5F5"


        states = vac_map.patches('xs','ys', source=source,
                        fill_color = {"field" :'int_vac_ticks',
                                        "transform" : color_mapper_vac},
                        line_color = "gray", 
                        line_width = 0.5, 
                        fill_alpha = 1)

        state_line = vac_map.multi_line('xs','ys', source=source,
                        line_color = "black", 
                        line_width = 0.5)
        
        #Label_standoff - how many pixels tick labels will be away from colorbar
        color_bar = ColorBar(color_mapper = color_mapper_vac, 
                     label_standoff = 8,
                     width = 20, height = 700,
                     border_line_color = None,
                     orientation = "vertical",
                     location=(0,0), major_label_overrides = vac_ticks,
                     major_label_text_align="left",
                     major_tick_line_alpha = .25)
        
        color_bar.background_fill_color = "#F5F5F5"
        vac_map.add_layout(color_bar, "right")
        
        
        title_vaccines = "COVID-19 Vaccinations by State as of " + d + " || Total Doses Delivered: " +\
        f"{int(us_map['Total Doses Delivered'].sum()):,d}"
        descip = "Data from the CDC COVID-19 Data Tracker"
        author = "Created by Team Coach"

        vac_map.add_layout(Title(text=descip, text_font_style="italic", text_font_size="9pt"), 'above')
        vac_map.add_layout(Title(text=title_vaccines, text_font_size="11pt"), 'above')
        vac_map.add_layout(Title(text=author, text_font_style="italic", text_font_size="9pt"), 'below')
        
        TOOLTIPS = [
            ("State", "@NAME"),
            ("Total Doses Delivered", "@deliv_label"),
            ("Total Doses Administered", "@admin_label"),
            ("% Total Pop. w/ 1 Dose", "@one_dose_label"+"%"),
            ("% Total Pop. Fully Vacc.", "@percent_fully_label"+"%"),
            ("Total Fully Vaccinated", "@num_fully_label")
        ]

        vac_map.add_tools(HoverTool(renderers = [states],
                      tooltips = TOOLTIPS))
        
        return vac_map

    def create_interactive_map(self, case_map, vac_map, file_path=os.getcwd()):
        
        
        div = Div(text="""<b>1.</b> The tabs on the top left will allow you to switch between COVID-19 maps regarding cases and vaccinations.
        <p><b>2.</b> Use your cursor to highlight states of interest.</p>
        <p><b>3.</b> Once a state is highlighted, information regarding COVID-19 cases or vaccinations will be displayed, depending on which tab is selected.</p>
        <p><b>4.</b> You may use the scroll wheel to zoom in or out of any point in the map.</p>
        <p><b>5.</b> You may use the left click button to reposition the map.</p>
        <br>
        <p>Please leave your feedback on using this map by clicking on this link: <a href='https://forms.gle/8AzJx1AnGhkhWQtk6'>Google Form Survey</a></p>
        """,
        width=400, height=100, style={"font-size": "large"})
        
        case_tab = Panel(child=case_map, title="Cases")
        vac_tab = Panel(child=vac_map, title="Vaccines")
        tut_tab = Panel(child=div, title='Tutorial')

        tabs = Tabs(tabs=[ tut_tab, case_tab, vac_tab ])

        file_path = os.getcwd()
        

        #doc_path = file_path +"\\Documents"
        doc_path = file_path +"\\docs"

        outfp = doc_path + "\\us_covid_map.html"

        # Save the map
        save(tabs, outfp)

        # Not sure if this is important, but seemed to start working once
        # I ran it
        html = file_html(tabs, CDN, outfp)

        return html
        
        

In [3]:
example = CovidMapCreator()

In [4]:
df = example.import_csvs_into_df("https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/covid19_cases.csv",
                                 "https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/covid19_vaccinations.csv")    

In [5]:
# df = example.import_pngs_into_df(df, "https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/docs/Cases_PNGs/",
#                             "https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/docs/Vaccinations_PNGs/")

In [6]:
lake_map = example.create_lake_map()

  return _prepare_from_string(" ".join(pjargs))


In [7]:
land_map = example.create_land_map()

In [8]:
us_map = example.create_us_map(lake_map, land_map)

In [9]:
us_map = example.merge_gdf_df(us_map, df)

In [10]:
case_map = example.create_case_map(us_map)

In [11]:
vac_map = example.create_vac_map(us_map)

In [12]:
example.create_interactive_map(case_map, vac_map)

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")


\\": {\\"7-Day Cases Rate per 100000\\": 96.0, \\"ALAND\\": 1478588231566, \\"AWATER\\": 277723861311, \\"DIVISION\\": \\"9\\", \\"Death Rate per 100000\\": 46.0, \\"FUNCSTAT\\": \\"A\\", \\"GEOID\\": \\"02\\", \\"INTPTLAT\\": \\"+63.2813242\\", \\"INTPTLON\\": \\"-152.5730397\\", \\"LSAD\\": \\"00\\", \\"MTFCC\\": \\"G4000\\", \\"NAME\\": \\"Alaska\\", \\"People Fully Vaccinated\\": 263603.0, \\"Percent Fully Vaccinated\\": 36.0, \\"Percent Pop One Dose\\": 42.0, \\"REGION\\": \\"4\\", \\"STATENS\\": \\"01785533\\", \\"STUSPS\\": \\"AK\\", \\"State/Territory\\": \\"Alaska\\", \\"Total Cases\\": 65409.0, \\"Total Deaths\\": 341.0, \\"Total Doses Administered\\": 562875.0, \\"Total Doses Delivered\\": 769205.0, \\"admin_label\\": \\"562,875\\", \\"cases_label\\": \\"65,409\\", \\"cases_rate_label\\": \\"96\\", \\"death_rate_label\\": \\"46\\", \\"deaths_label\\": \\"341\\", \\"deliv_label\\": \\"769,205\\", \\"featurecla\\": \\"Land\\", \\"int_case_ticks\\": 3, \\"min_zoom\\": 3.0, \\"n

In [13]:
##DOC PATH CAUSES PROBLEM IN THE FINAL METHOD