<a href="https://colab.research.google.com/github/bdmello1/Covid_-geospatial_map/blob/master/Project_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
# import packages
import pandas as pd
import plotly.express as px
# import libraries to access geojson file (for choropleth map)
from urllib.request import urlopen
import json
# data from JHU CSSE
data_covid = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-14-2020.csv"
# make dataframe
df_covid_0414 = pd.read_csv(data_covid)
# preview data
df_covid_0414.head()
# keep only US data
df_covid_0414_us = df_covid_0414[df_covid_0414["Country_Region"]== "US"]
# save csv data from us census population
data_population = "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv"
# import county population total data from 2019
df_countypop = pd.read_csv(data_population,encoding='latin1')
# preview data
df_countypop.head()
# convert state and county columns to strings
df_countypop["STATE"] = df_countypop["STATE"].astype(str)
df_countypop["COUNTY"] = df_countypop["COUNTY"].astype(str)
# fill in preceding zeros to make state 2 digits and county 3 digits
df_countypop["STATE"] = df_countypop["STATE"].str.zfill(2)
df_countypop["COUNTY"] = df_countypop["COUNTY"].str.zfill(3)
# sum state and county FIPS to create a column that contains the full county fips code
df_countypop["FIPS"] = df_countypop["STATE"] + df_countypop["COUNTY"]
# preview data
df_countypop.head()
# remove null FIPS codes
df_covid_0414_us = df_covid_0414_us[df_covid_0414_us["FIPS"].notnull()]
# convert JHU df FIPS to integer
df_covid_0414_us["FIPS"] = df_covid_0414_us["FIPS"].astype(int)
df_covid_0414_us["FIPS"] = df_covid_0414_us["FIPS"].astype(str)
# fill in preceding zeros to make FIPS five digits
df_covid_0414_us["FIPS"] = df_covid_0414_us["FIPS"].str.zfill(5)
# preview data
df_covid_0414_us.tail()
# get column names of population dataset
df_countypop.columns.tolist()

# merge 2019 population data onto covid-19 df
df_covid_0414_us = pd.merge(df_covid_0414_us,
                           df_countypop[["FIPS","POPESTIMATE2019"]],
                           on = "FIPS",
                           how = "left")
# preview merged data
df_covid_0414_us.tail()
# make column for number of cases per 1000 people and deaths per 1000 people
df_covid_0414_us["cases_per1000"] = (df_covid_0414_us["Confirmed"]/df_covid_0414_us["POPESTIMATE2019"])*1000
df_covid_0414_us["deaths_per1000"] = (df_covid_0414_us["Deaths"]/df_covid_0414_us["POPESTIMATE2019"])*1000
# round per capita values
df_covid_0414_us["cases_per1000"] = round(df_covid_0414_us["cases_per1000"], 2)
df_covid_0414_us["deaths_per1000"] = round(df_covid_0414_us["deaths_per1000"], 2)
# preview datat
df_covid_0414_us.head()
# set up mapbox access token
px.set_mapbox_access_token("mapbox_token")
# bubble map of number of cases
bubble_map = px.scatter_mapbox(df_covid_0414_us, 
                               lat = "Lat",
                               lon = "Long_",
                               size = "Confirmed",
                               hover_data = ["Confirmed", "Deaths", "cases_per1000", "deaths_per1000"] ,
                               hover_name = "Combined_Key",
                               center = dict(lat=37.0902, 
                                            lon= -95.7129), # center of the US
                               zoom = 2.5,
                               title = "Confirmed COVID-19 Cases in the United States",
                               labels = {"Confirmed": "Cumulative Confirmed Cases",
                                         "Deaths": "Cumulative Confirmed Deaths",
                                         "cases_per1000": "Cases per 1,000 People",
                                         "deaths_per1000": "Deaths per 1,000 People"
                                        } 
                              )
# show bubble map
bubble_map
# each row contributes to the density of the heatmap
density_heatmap = px.density_mapbox(df_covid_0414,
                                    lat= "column of latitude values", 
                                    lon= "column of longitude values", 
                                    z= "column of values that are the density values", 
                                    hover_data = df_covid_0414_us["cases_per1000"] ,
                                    hover_name = "column with additional data that you want bolded in the hover pop-up", 
                                    labels = {"cases_per1000": "Cases per 1,000 People",
                                         "deaths_per1000": "Deaths per 1,000 People"
                                    }
                              )

# copy row and append row equal to the number of cases for that county
df_covid_0414_repeat = df_covid_0414_us.reindex(df_covid_0414_us.index.repeat(df_covid_0414_us["Confirmed"])).reset_index()
# preview first 11 rows of data
df_covid_0414_repeat.head(11)
# rename Long_ column to long and Lat column to lat to upload to mapbox
df_covid_0414_repeat = df_covid_0414_repeat.rename(columns = {"Lat": "lat",
                                                                     "Long_": "lon"})
# density heatmap of confirmed case data
density_heatmap = px.density_mapbox(df_covid_0414_repeat, # dataframe with data for the heatmap 
                                    lat = "lat", 
                                    lon = "lon", 
                                    z = "Confirmed", 
                                    hover_data = ["Confirmed", "Deaths", "cases_per1000", "deaths_per1000"],
                                    hover_name = "Combined_Key",
                                    center = dict(lat=37.0902, 
                                                  lon= -95.7129), # center of the US
                                    zoom = 2.5, 
                                    labels = {"Confirmed": "Cumulative Confirmed Cases",
                                         "Deaths": "Cumulative Confirmed Deaths",
                                         "cases_per1000": "Cases per 1,000 People",
                                         "deaths_per1000": "Deaths per 1,000 People"
                                        }, 
                                    color_continuous_scale= "Inferno",
                                    title = "Density Heatmap of Confirmed COVID-19 Cases in the US",
                                    radius = 25,
                                   )
# this takes significan't memory and might not work on your computer
#density_heatmap

# instead, we can view if we export as an html
density_heatmap.write_html("density_heatmap_covid19_april14.html")
# save geojson file in our notebook
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    us_counties_shape = json.load(response)
#us_counties_shape
us_counties_shape
# upload the nyt covid-19 data
covid_nyt_data = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
# make data into a dataframe
df_county_nyt = pd.read_csv(covid_nyt_data)
# preview data
df_county_nyt.tail()
# look at datatypes

# convert fips to an integer then string then forward fill 0s to get 5 digits
#remove null values
df_county_nyt = df_county_nyt[df_county_nyt["fips"].notnull()]
# edit datatypes
df_county_nyt["fips"] = df_county_nyt["fips"].astype(int)
df_county_nyt["fips"] = df_county_nyt["fips"].astype(str)
df_county_nyt["fips"] = df_county_nyt["fips"].str.zfill(5)

# look at dataframe data types
df_county_nyt.info()
# convert date column to datetime
df_county_nyt["date"] = pd.to_datetime(df_county_nyt["date"])
# make new column to make a more readable date
####df_county_nyt.head()
# make column with month name
df_county_nyt["month_name"] = df_county_nyt["date"].dt.month_name()
# make column with month day
df_county_nyt["day_of_month"] = df_county_nyt["date"].dt.day
# preview new columns
df_county_nyt.head()
# combine month and day into a string
df_county_nyt["month_day_name"] = df_county_nyt["month_name"] + " " + df_county_nyt["day_of_month"].astype(str)
# preview data
df_county_nyt.head()
# merge 2019 population data onto nyt covid-19 df
df_county_nyt = pd.merge(df_county_nyt,
                         df_countypop[["FIPS","POPESTIMATE2019"]],
                         left_on = "fips",
                         right_on = "FIPS",
                         how = "left")
# make columns to calculate number of cases and deaths per 100000 residents
# round the values to the 3rd decimal
df_county_nyt["cases_per100000"] = round(((df_county_nyt["cases"]/df_county_nyt["POPESTIMATE2019"])*100000),3)
df_county_nyt["deaths_per100000"] = round(((df_county_nyt["deaths"]/df_county_nyt["POPESTIMATE2019"])*100000),3)
# make columns to calculate number of cases and deaths per 1000 residents
# round the values to the 3rd decimal
df_county_nyt["cases_per1000"] = round(((df_county_nyt["cases"]/df_county_nyt["POPESTIMATE2019"])*1000),3)
df_county_nyt["deaths_per1000"] = round(((df_county_nyt["deaths"]/df_county_nyt["POPESTIMATE2019"])*1000),3)
# preview data
df_county_nyt.head()
# print color HEX codes
print(px.colors.sequential.Plasma)
# make a logai=rithmic color scale
covid_colorscale = [
        [0, '#0d0887'],       #0 cases
        [1./1000, '#7201a8'], #10 cases
        [1./100, '#bd3786'],  #100 cases
        [1./10, '#ed7953'],   #1000 cases
        [1., '#fdca26'],      #10000 cases
]
# make a map of covid cases on april 14, 2020
covid_uscounty_color = px.choropleth(df_county_nyt, # dataframe with data for choropleth
                                     geojson=us_counties_shape, # shape, geospatial data geojson
                                     locations='fips', # column in geojson that denotes the shapes
                                     color='cases_per100000', # column in df that denotes the color scale
                                     animation_group = "month_day_name",
                                     animation_frame = "month_day_name",
                                     hover_name = df_county_nyt["county"] + ", " + df_county_nyt["state"],
                                     hover_data = ["cases", "deaths", "cases_per100000", "deaths_per100000"],
                                     color_continuous_scale=covid_colorscale, # custom color scale to better show exponential growth
                                     scope="usa", # scope of map (world, USA, or any continent)
                                     labels={'cases':'Cummulative Reported COVID-19 Cases',
                                             "deaths": "Cummulative Reported COVID-19 Deaths",
                                             "month_day_name": "Date",
                                             "fips": "FIPS Code",
                                            "cases_per100000": "Cummulative Reported Cases per 100,000 People",
                                             "deaths_per100000": "Cummulative Reported Deaths per 100,000 People"
                                            }, # renaming any columns used
                                     title = "Spread of COVID-19 Cases in US Counties Per Capita<br>January 21, 2020-April 15, 2020"
                                          
                          )
#covid_uscounty

###!pip install plotly --upgrade plotly
# export animation to html
covid_uscounty_color.write_html("covid19_cases_us_county.html")

ValueError: ignored