In [52]:
import pandas as pd             
import matplotlib.pyplot as plt 
import datetime as dt
import numpy as np

# these are new 
import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools 
import shutil                   # file management tools 
import os                       # operating system tools (check files)

import geopandas as gpd # this is the main geopandas 
from shapely.geometry import Point, Polygon # also needed

import pyarrow as pa
import pyarrow.parquet as pq


from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset


#THIS PART IS FOR CREATING THE MAP
import json
from bokeh.io import show
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
                          CustomJS, CustomJSFilter, 
                          GeoJSONDataSource, HoverTool,
                          LinearColorMapper, Slider)

from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer, all_palettes
from bokeh.plotting import figure
from bokeh.models import Title
from bokeh.layouts import gridplot

from bokeh.plotting import figure, save
from bokeh.models import Panel, Tabs

from bokeh.resources import CDN
from bokeh.embed import file_html

from bokeh.io import output_file, show
from bokeh.models import Div

In [53]:
class CovidMapCreator:


    """1. Pulls COVID-19 cases and death data
    2. Pulls COVID-19 vaccination data
    3. Merges both into one dataframe
    4. Pulls PNGs of tables of cases + deaths, and vaccinations
    5. Pulls lake, land, and state shape files
    6. Merges COVID data with US shapefile data
    7. Creates map
    """

    def __init__(self):

        """Initialize functions"""
        self.test = "test"
#         self.cases_file = cases_file
#         self.vaccine_file = vaccine_file
        
#         self.import_cvs_into_df(cases_file, vaccine_file)
        

    def import_csvs_into_df(self, cases_file: str, vaccine_file: str):

        """Import the csv file with COVID-19 Cases and Deaths"""

        #Used to limit size of dataframe to only significant data
        cases_col = ['State/Territory',
        'Total Cases', 'Confirmed Cases', 'Cases in Last 7 Days',
        'Case Rate per 100000', 'Total Deaths']
        
        #Used to limit df to only 50 states, and exclude territory data
        state_names = ["Alaska", "Alabama", "Arkansas", "Arizona",
        "California", "Colorado", "Connecticut", "Delaware", "Florida",
        "Georgia", "Hawaii", "Iowa", "Idaho", "Illinois", "Indiana",
        "Kansas", "Kentucky", "Louisiana", "Massachusetts", "Maryland",
        "Maine", "Michigan", "Minnesota", "Missouri", "Mississippi",
        "Montana", "North Carolina", "North Dakota", "Nebraska",
        "New Hampshire", "New Jersey", "New Mexico", "Nevada", "New York",
        "New York State", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
        "Rhode Island", "South Carolina", "South Dakota", "Tennessee",
        "Texas", "Utah", "Virginia", "Vermont", "Washington",
        "Wisconsin", "West Virginia", "Wyoming"]

        df = pd.read_csv(cases_file, skiprows=3, usecols=cases_col)

        #Limits to only those in state_name list
        df = df[df["State/Territory"].isin(state_names)]
        df = df.reset_index(drop=True)

        #total_cases = df['Total Cases'].sum()

        #Repeat the same process for vaccine date

        vac_col = ['State/Territory/Federal Entity',
        'Total Doses Delivered', 'Doses Delivered per 100K',
        'Total Doses Administered by State where Administered',
        'Doses Administered per 100k by State where Administered']

        df_vac = pd.read_csv(vaccine_file, skiprows=3, usecols=vac_col)

        df_vac.rename(columns={'State/Territory/Federal Entity' : "State/Territory",
                    'Total Doses Administered by State where Administered' :
                    'Total Doses Administered',
                    'Doses Administered per 100k by State where Administered' : 
                    'Doses Administered per 100k'}, inplace=True)

        df_vac = df_vac[df_vac["State/Territory"].isin(state_names)]
        #Do we need this?
        df_vac = df_vac.reset_index(drop=True)
        total_vaccines = df_vac["Total Doses Delivered"].sum()

        #Now merge the vaccination data into the main dataframe
        df = df.join(df_vac["Total Doses Delivered"])
        df = df.join(df_vac["Doses Delivered per 100K"])
        df = df.join(df_vac["Total Doses Administered"])
        df = df.join(df_vac["Doses Administered per 100k"])

        #Figure out how to make this neat
        #Change to int types
        df[["Total Cases", 'Confirmed Cases', 'Cases in Last 7 Days', 'Case Rate per 100000', 'Total Deaths']] = df[["Total Cases", 'Confirmed Cases', 'Cases in Last 7 Days', 'Case Rate per 100000', 'Total Deaths']].astype(int)

        df[["Total Doses Delivered", "Doses Delivered per 100K", "Total Doses Administered", "Doses Administered per 100k"]] = df[["Total Doses Delivered", "Doses Delivered per 100K", "Total Doses Administered", "Doses Administered per 100k"]].astype(int)

        return df

    def import_pngs_into_df(self, df, cases_html: str, vac_html: str):
        #Now we get merge pngs of data tables into the df
        cwd = os.getcwd()
        doc_path = cwd + "\\Docs\\"
        df["cases_file_location"] = cases_html + df['State/Territory'].astype(str) + "Cases.png"
        df["vaccine_file_location"] = vac_html + df['State/Territory'].astype(str) + "Vaccinations.png"

        return df

    def create_lake_map(self, cwd = os.getcwd()):
        map_projection = "epsg:2163"

        #First we start setting up the lakes
        lake_shapes = cwd + "\\shapefiles\\lake\\ne_10m_lakes.shx"
        #Returns GeoDataFrame object we can work with
        lake_map = gpd.read_file(lake_shapes)
        #Sets coordinate reference system of map to current map projection: espg:2163
        lake_map = lake_map.to_crs({'init': map_projection})

        return lake_map

    def create_land_map(self, cwd = os.getcwd()):
        map_projection = "epsg:2163"

        land_shapes = cwd + "\\shapefiles\\land\\ne_50m_land.shx"
        land_map = gpd.read_file(land_shapes)
        land_map = land_map.to_crs({'init': map_projection})
        #Why this cutoff?
        land_map = land_map.iloc[0:1200]

        return land_map
    
    def create_us_map(self, lake_map, land_map, cwd=os.getcwd()):
        #Next we work on shape files of the lakes, land, and states
        #Need this for specific map display we have. Refer to bookmarks to see how other epsgs look
        map_projection = "epsg:2163"
        #Finally, we add the state shape and create a new variable, us_map. Do we even use the others?
        state_shapes = cwd + "\\shapefiles\\state\\tl_2017_us_state.shx"
        us_map = gpd.read_file(state_shapes)
        us_map = us_map.to_crs({'init': map_projection})

        #Add in lake and land shapefile data
        #Puts one on top of the other i guess?
        us_map = gpd.overlay(us_map, land_map, how='intersection')
        
        #Now overlay the lakes
        great_lakes = ["Lake Superior", "Lake Michigan", "Lake Erie", "Lake Superior", "Lake Huron"]
        us_map = gpd.overlay(us_map, lake_map[lake_map.name.isin(great_lakes)], how='difference')

        #Simplifies geometry, how so?
        us_map["geometry"] = us_map["geometry"].simplify(200)
        return us_map
        



    # def import_shapefiles_into_map(self, df, cases_html: str, vac_html: str) -> gdf:
    #     cwd = os.getcwd()
    #     #Next we work on shape files of the lakes, land, and states
    #     #Need this for specific map display we have. Refer to bookmarks to see how other epsgs look
    #     map_projection = "epsg:2163"

    #     #First we start setting up the lakes
    #     lake_shapes = cwd + "\\shapefiles\\lake\\ne_10m_lakes.shx"
    #     #Returns GeoDataFrame object we can work with
    #     lake_map = gpd.read_file(lake_shapes)
    #     #Sets coordinate reference system of map to current map projection: espg:2163
    #     lake_map = lake_map.to_crs({'init': map_projection})

    #     #Then set up the land shapes using the same process
    #     land_shapes = cwd + "\\shapefiles\\land\\ne_50m_land.shx"
    #     land_map = gpd.read_file(land_shapes)
    #     land_map = land_map.to_crs({'init': map_projection})
    #     #Why this cutoff?
    #     land_map = land_map.iloc[0:1200]

    #     #Finally, we add the state shape and create a new variable, us_map. Do we even use the others?
    #     state_shapes = cwd + "\\shapefiles\\state\\tl_2017_us_state.shx"
    #     us_map = gpd.read_file(state_shapes)
    #     us_map = us_map.to_crs({'init': map_projection})

    #     #Simplifies geometry, how so?
    #     us_map["geometry"] = us_map["geometry"].simplify(200)
    #     return us_map
    

    def merge_gdf_df(self, us_map, df):
        #First merge df data into us_map
        us_map = us_map.merge(df, left_on='NAME',
                            right_on='State/Territory', how="left",
                            indicator=True)
        
        #STATEFP is given ID for each state
        us_map.set_index("STATEFP", inplace = True)
        #This removes all territory data, irrelevant
        drop_list = ["72","78","69","66","60","11"]
        us_map.drop(drop_list, inplace = True)
        #Why do we reset the index?
        us_map.reset_index()
        
        #These remove the 0 and add the comma for the numbers greater than 999
        us_map["cases_label"] = us_map["Total Cases"].round(0)
        us_map["cases_label"] = us_map["cases_label"].map('{:,.0f}'.format)
        us_map["vaccines_label"] = us_map["Total Doses Delivered"].round(0)
        us_map["vaccines_label"] = us_map["vaccines_label"].map('{:,.0f}'.format)

        # #Setting up for cases
        # q_cases = [0,50000,100000,250000,500000,650000,800000,1000000,2000000,3000000,np.inf]
        # us_map["q_cases"]= pd.cut(us_map["Total Cases"],q_cases, labels=range(0,10))
        # us_map["q_vaccines"]= pd.cut(us_map["Total Doses Delivered"],q_cases, labels=range(0,10))

        # #Replaceing N/As?
        # us_map["q_cases"].fillna(0, inplace = True)
        # us_map["q_vaccines"].fillna(0, inplace = True)

        # us_map["cases_label"].replace("nan", "N.R.", inplace=True)
        # us_map["vaccines_label"].replace("nan", "N.R.", inplace=True)
    
        return us_map

    def create_interactive_map(self, us_map):

        #Setting up for cases
        q_cases = [0,50000,100000,250000,500000,650000,800000,1000000,2000000,3000000,np.inf]
        us_map["q_cases"]= pd.cut(us_map["Total Cases"],q_cases, labels=range(0,10))
        us_map["q_vaccines"]= pd.cut(us_map["Total Doses Delivered"],q_cases, labels=range(0,10))

        #Replaceing N/As?
        us_map["q_cases"].fillna(0, inplace = True)
        us_map["q_vaccines"].fillna(0, inplace = True)

        us_map["cases_label"].replace("nan", "N.R.", inplace=True)
        us_map["vaccines_label"].replace("nan", "N.R.", inplace=True)

        #Creating the interactive map
        state_geosource = GeoJSONDataSource(geojson = us_map.to_json())

        palette = all_palettes['Viridis'][11]
        reversed_palette = palette[::-1]

        color_mapper_case = LinearColorMapper(palette = reversed_palette, low=0, high=9)
        color_mapper_vac = LinearColorMapper(palette = reversed_palette, low=0, high=9)
        tick_labels = {2:str(q_cases[1]), 4:str(q_cases[4]), 6:str(q_cases[6]), 8:str(q_cases[9])}

        today = dt.date.today()
        d = today.strftime("%B %d, %Y")

        title = "COVID-19 Cases by State as of " + d + " || Total Cases: " + f"{int(us_map['Total Cases'].sum()):,d}"

        color_bar = ColorBar(color_mapper = color_mapper_case, 
                            label_standoff = 8,
                            width = 20, height = 420,
                            border_line_color = None,
                            orientation = "vertical",
                            location=(0,0), major_label_overrides = tick_labels,
                            major_tick_line_alpha = .25)

        # Create figure object. First one
        p = figure( 
                plot_height = 600 ,
                plot_width = 950, 
                toolbar_location = None)

        descip = "Colorbar by # of COVID-19 cases; Hover tool plots cases by day.\n"
        descip = descip + "Data from https://covid.cdc.gov/covid-data-tracker/#cases_totalcases"
        p.add_layout(Title(text=descip, text_font_style="italic", text_font_size="9pt"), 'above')
        p.add_layout(Title(text=title, text_font_size="11pt"), 'above')

        author = "Created by Andrew Chuah"
        p.add_layout(Title(text=author, text_font_style="italic", text_font_size="9pt"), 'below')

        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
        # Add patch renderer to figure.

        states = p.patches('xs','ys', source = state_geosource,
                        fill_color = {"field" :'q_cases',
                                        "transform" : color_mapper_case},
                        line_color = "gray", 
                        line_width = 0.25, 
                        fill_alpha = 1)

        state_line = p.multi_line('xs','ys', source = state_geosource,
                        line_color = "black", 
                        line_width = 0.25)

        p.axis.visible = False
        p.background_fill_color = "grey"
        p.background_fill_alpha = 0.25

        p.border_fill_color = "#F5F5F5"
        color_bar.background_fill_color = "#F5F5F5"

        p.toolbar.autohide = True

        p.add_layout(color_bar, "right")

        title_vaccines = "COVID-19 Vaccinations by State as of " + d + " || Total Doses Delivered: " + f"{int(us_map['Total Doses Delivered'].sum()):,d}"

        color_bar = ColorBar(color_mapper = color_mapper_vac, 
                            label_standoff = 8,
                            width = 20, height = 420,
                            border_line_color = None,
                            orientation = "vertical",
                            location=(0,0), major_label_overrides = tick_labels,
                            major_tick_line_alpha = .25)

        # Create figure object. Second one
        pvac = figure( 
                plot_height = 600,
                plot_width = 950, 
                toolbar_location = None)

        descip = "Colorbar by # of COVID-19 vaccinations; Hover tool plots by day.\n"
        descip = descip + " Data from https://covid.cdc.gov/covid-data-tracker/#vaccinations"
        pvac.add_layout(Title(text=descip, text_font_style="italic", text_font_size="9pt"), 'above')
        pvac.add_layout(Title(text=title_vaccines, text_font_size="11pt"), 'above')

        author = "Created by Andrew Chuah"
        pvac.add_layout(Title(text=author, text_font_style="italic", text_font_size="9pt"), 'below')

        pvac.xgrid.grid_line_color = None
        pvac.ygrid.grid_line_color = None
        # Add patch renderer to figure.

        states = pvac.patches('xs','ys', source = state_geosource,
                        fill_color = {"field" :'q_vaccines',
                                        "transform" : color_mapper_vac},
                        line_color = "gray", 
                        line_width = 0.25, 
                        fill_alpha = 1)

        state_line = pvac.multi_line('xs','ys', source = state_geosource,
                        line_color = "black", 
                        line_width = 0.25)

        pvac.axis.visible = False
        pvac.background_fill_color = "grey"
        pvac.background_fill_alpha = 0.25

        pvac.border_fill_color = "#F5F5F5"
        color_bar.background_fill_color = "#F5F5F5"

        pvac.toolbar.autohide = True

        pvac.add_layout(color_bar, "right")

        tab1 = Panel(child=p, title="Cases")
        tab2 = Panel(child=pvac, title="Vaccines")

        tabs = Tabs(tabs=[ tab1, tab2 ])

        file_path = os.getcwd()
        
        #DOC PATH CAUSES PROBLEMS!!
        doc_path = file_path +"\\Documents"

        outfp = doc_path + "\\us_covid_map.html"

        # Save the map
        save(tabs, outfp)

        # Not sure if this is important, but seemed to start working once
        # I ran it
        html = file_html(tabs, CDN, outfp)

        return html

In [54]:
example = CovidMapCreator()

In [55]:
df = example.import_csvs_into_df("https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/covid19_cases.csv","https://raw.githubusercontent.com/camckenzie/COVID-19-Interactive-Map/main/covid19_vaccinations.csv")

In [56]:
df = example.import_pngs_into_df(df, "https://github.com/camckenzie/COVID-19-Interactive-Map/tree/main/Docs/Cases_PNGs/",
                           "https://github.com/camckenzie/COVID-19-Interactive-Map/tree/main/Docs/Vaccinations_PNGs/")

In [57]:
lake_map = example.create_lake_map()

  return _prepare_from_string(" ".join(pjargs))


In [58]:
land_map = example.create_land_map()

In [59]:
us_map = example.create_us_map(lake_map, land_map)

In [60]:
us_map = example.merge_gdf_df(us_map, df)

In [61]:
example.create_interactive_map(us_map)

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")


'\n\n\n\n<!DOCTYPE html>\n<html lang="en">\n  \n  <head>\n    \n      <meta charset="utf-8">\n      <title>C:\\Users\\Chris\\Documents\\us_covid_map.html</title>\n      \n      \n        \n          \n        \n        \n          \n        <script type="text/javascript" src="https://cdn.bokeh.org/bokeh/release/bokeh-2.2.3.min.js" integrity="sha384-T2yuo9Oe71Cz/I4X9Ac5+gpEa5a8PpJCDlqKYO0CfAuEszu1JrXLl8YugMqYe3sM" crossorigin="anonymous"></script>\n        <script type="text/javascript">\n            Bokeh.set_log_level("info");\n        </script>\n        \n      \n      \n    \n  </head>\n  \n  \n  <body>\n    \n      \n        \n          \n          \n            \n              <div class="bk-root" id="68774208-74a6-4ad8-a081-ab458f28e5f1" data-root-id="1699"></div>\n            \n          \n        \n      \n      \n        <script type="application/json" id="2081">\n          {"731b92d0-8de1-4815-8149-7f8e6f679415":{"roots":{"references":[{"attributes":{"axis":{"id":"1665"},"dim

In [62]:
##DOC PATH CAUSES PROBLEM IN THE FINAL METHOD