Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reverse colorbar #65

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 23 additions & 78 deletions analysis/db/us_map/choroplethMap.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
import plotly.figure_factory as ff
import numpy as np
import plotly.io as pio
from pyprojroot import here
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
from urllib.request import urlopen
import json
import pathlib as pl
import json

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)

# TODO: Change range_color numbers in fig = px.choropleth()
# TODO: Build the map for a specific date
# TODO: Build line/bar graphs to check case numbers per state over a period of time
# TODO: Draw another version of this map, but accounts for population density per county (per capita count)
# TODO: See if rate is changing, counts over time (a 14 day sliding window count)
# Choropleth map with time slider and hover text

confirmed_df = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/'
'csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')
# Resource for State_FIPS: https://www.nrcs.usda.gov/wps/portal/nrcs/detail/national/home/?cid=nrcs143_013697
loc_df = pd.read_excel(here('./data/db/original/maps/State_FIPS.xlsx'))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

link to where you got data from

# Resource for PopulationEstimates: https://www.ers.usda.gov/data-products/county-level-data-sets/download-data/
pop_df = pd.read_excel(here('./data/db/original/maps/PopulationEstimates.xls')) # population dataset for 2019
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where did this dataset come from?


pop_df['fips_str'] = pop_df['FIPStxt'].apply(lambda x: f'{x:05.0f}')
pop_df = pop_df[['fips_str', 'Area_Name', 'POP_ESTIMATE_2019']]

merged_df = pd.merge(loc_df, confirmed_df, right_on='Admin2', left_on='Name')

Expand All @@ -33,83 +33,28 @@

molten_df['date_iso'] = pd.to_datetime(molten_df['date'], format="%m/%d/%y") # change date to ISO8601 standard format

fips = molten_df['fips_str'].tolist()
# max_val = molten_df['value'].max()
molten_pop_df = pd.merge(molten_df, pop_df, on='fips_str') # add population per county
grouped_by = molten_pop_df.groupby(['fips_str', 'date_iso', 'State', 'Admin2', 'POP_ESTIMATE_2019'])['value'].sum().reset_index()
grouped_by['total_per_cap'] = grouped_by['value'] / grouped_by['POP_ESTIMATE_2019'] # get per capita value

plot_data = grouped_by[grouped_by.date_iso == '2020-04-01'] # confirmed cases on a specific day
value = 'value' # 'value' = raw count, 'total_per_cap' = per capita

fig = px.choropleth(molten_df,
# confirmed cases per capita/raw count
fig = px.choropleth(plot_data,
geojson=counties,
locations=fips,
color='value',
animation_frame='date',
hover_data=['State', 'value'],
color_continuous_scale="Viridis",
range_color=(0, 300),
locations=plot_data.fips_str,
color=value,
# animation_frame='date',
hover_data=['State', 'Admin2', value, 'POP_ESTIMATE_2019'],
color_continuous_scale='viridis_r',
range_color=(0, plot_data[value].max()),
scope="usa",
title='Confirmed cases',
labels={'value': 'confirmed cases'}
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
# fig.show()

# save out figure
# save out working data
pl.Path(here("./output/maps", warn=False)).mkdir(parents=True, exist_ok=True)
pio.write_html(fig,
file=str(here("./output/maps/choropleth_us_cases.html", warn=False)),
auto_open=False)


# ChoroplethMap using FIPS from merged data
'''
confirmed_df = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/'
'csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')

loc_df = pd.read_excel(here('./data/db/original/maps/State_FIPS.xlsx'))
merged_df = pd.merge(loc_df, confirmed_df, right_on='Admin2', left_on='Name')

merged_df['fips_str'] = merged_df['FIPS_x'].apply(lambda x: f'{x:05.0f}') # left pad with 0 for 5 digits

colorscale = ["#f7fbff", "#ebf3fb", "#deebf7", "#d2e3f3", "#c6dbef", "#b3d2e9", "#9ecae1",
"#85bcdb", "#6baed6", "#57a0ce", "#4292c6", "#3082be", "#2171b5", "#1361a9",
"#08519c", "#0b4083", "#08306b"]

molten_df = merged_df.melt(
id_vars=['FIPS_x', 'Name', 'State', 'UID', 'iso2', 'iso3', 'code3', 'FIPS_y', 'Admin2',
'Province_State', 'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'fips_str'],
var_name=['date']
)

molten_df['date_iso'] = pd.to_datetime(molten_df['date'], format="%m/%d/%y") # change date to ISO8601 standard format

endpts = list(np.linspace(0, 3000, len(colorscale) - 1))
fips = molten_df['fips_str'].tolist()
values = molten_df['date_iso'].tolist()

fig = ff.create_choropleth(
fips=fips,
# values=values,
scope=['usa',
# 'Alaska',
# 'Puerto Rico',
# 'American Samoa',
# 'Commonwealth of the Northern Mariana Islands', 'Guam',
# 'United States Virgin Islands'
],
binning_endpoints=endpts,
colorscale=colorscale,
show_state_data=True,
show_hover=True,
centroid_marker={'opacity': 0},
asp=2.9,
title='Confirmed cases on April 22',
legend_title='# confirmed cases',
text=molten_df['fips']
)

fig.layout.template = None
fig.show()
'''



auto_open=True)
47 changes: 47 additions & 0 deletions analysis/db/us_map/graphs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from pyprojroot import here
import plotly.io as pio
import seaborn as sns
from urllib.request import urlopen
import pathlib as pl


confirmed_df = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/'
'csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')
loc_df = pd.read_excel(here('./data/db/original/maps/State_FIPS.xlsx'))

merged_df = pd.merge(loc_df, confirmed_df, right_on='Admin2', left_on='Name')

merged_df['fips_str'] = merged_df['FIPS_x'].apply(lambda x: f'{x:05.0f}') # left pad with 0 for 5 digits
molten_df = merged_df.melt(
id_vars=['FIPS_x', 'Name', 'State', 'UID', 'iso2', 'iso3', 'code3', 'FIPS_y', 'Admin2',
'Province_State', 'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'fips_str'],
var_name=['date']
)

molten_df['date_iso'] = pd.to_datetime(molten_df['date'], format="%m/%d/%y") # change date to ISO8601 standard format
# state = molten_df.loc[molten_df.Province_State == 'Virginia', molten_df.Province_State == 'New York']
# molten_df['date_iso'] = molten_df.loc[molten_df.date_iso == '2020-04-01', molten_df.date_iso == '2020-04-05']
state1 = 'Washington'
state2 = 'California'
subset = molten_df.loc[(molten_df.Province_State.isin([state1, state2])),
['Province_State', 'Admin2', 'value', 'date_iso']]
# subset = molten_df[['Province_State', 'Admin2', 'value', 'date_iso']]
# subset = subset[subset.date_iso == '2020-04-01'] # for violinplot to show on certain date
grouped_counts = subset.groupby(['date_iso', 'Province_State', 'Admin2'])['value'].sum().reset_index()

# Noninteractive graps
'''
# ax = sns.lineplot(x="date_iso", y="value", hue='Province_State', data=grouped_counts) # show cases per state monthly
# ax = sns.stripplot(x="date_iso", y="value", hue='Province_State', data=grouped_counts)
# ax = sns.violinplot(x='date_iso', y='value', hue='Province_State', data=grouped_counts, palette="Set2", split=True,
# scale="count", inner="quartile")
# ax = sns.countplot(x="date_iso", hue='Province_State', data=grouped_counts) # works better if there are certain dates
# plt.tight_layout()
# plt.show()
'''
Comment on lines +38 to +46
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did you comment these out? we could also add general values into the dashboard too