From f8c38bc4dff6bad9ceeb35af6f8fdc1b66a16966 Mon Sep 17 00:00:00 2001 From: Makhsuda Date: Wed, 24 Jun 2020 19:56:38 -0400 Subject: [PATCH 1/6] reverse colorbar --- analysis/db/us_map/choroplethMap.py | 70 ++++------------------------- 1 file changed, 9 insertions(+), 61 deletions(-) diff --git a/analysis/db/us_map/choroplethMap.py b/analysis/db/us_map/choroplethMap.py index 0bdd092..6a107ad 100644 --- a/analysis/db/us_map/choroplethMap.py +++ b/analysis/db/us_map/choroplethMap.py @@ -1,13 +1,13 @@ -import plotly.figure_factory as ff import numpy as np import plotly.io as pio from pyprojroot import here +import seaborn as sns +import matplotlib.pyplot as plt import plotly.express as px import pandas as pd from urllib.request import urlopen -import json import pathlib as pl - +import json with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response: counties = json.load(response) @@ -34,7 +34,10 @@ molten_df['date_iso'] = pd.to_datetime(molten_df['date'], format="%m/%d/%y") # change date to ISO8601 standard format fips = molten_df['fips_str'].tolist() -# max_val = molten_df['value'].max() + +# plt.show() +# color_map = plt.cm.get_cmap('viridis') +# reversed_viridis = color_map.reversed() fig = px.choropleth(molten_df, @@ -43,73 +46,18 @@ color='value', animation_frame='date', hover_data=['State', 'value'], - color_continuous_scale="Viridis", + color_continuous_scale='viridis_r', range_color=(0, 300), scope="usa", title='Confirmed cases', labels={'value': 'confirmed cases'} ) fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) -# fig.show() # save out figure # save out working data pl.Path(here("./output/maps", warn=False)).mkdir(parents=True, exist_ok=True) pio.write_html(fig, file=str(here("./output/maps/choropleth_us_cases.html", warn=False)), - auto_open=False) - - -# ChoroplethMap using FIPS from merged data -''' -confirmed_df = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/' - 'csse_covid_19_time_series/time_series_covid19_confirmed_US.csv') - -loc_df = pd.read_excel(here('./data/db/original/maps/State_FIPS.xlsx')) -merged_df = pd.merge(loc_df, confirmed_df, right_on='Admin2', left_on='Name') - -merged_df['fips_str'] = merged_df['FIPS_x'].apply(lambda x: f'{x:05.0f}') # left pad with 0 for 5 digits - -colorscale = ["#f7fbff", "#ebf3fb", "#deebf7", "#d2e3f3", "#c6dbef", "#b3d2e9", "#9ecae1", - "#85bcdb", "#6baed6", "#57a0ce", "#4292c6", "#3082be", "#2171b5", "#1361a9", - "#08519c", "#0b4083", "#08306b"] - -molten_df = merged_df.melt( - id_vars=['FIPS_x', 'Name', 'State', 'UID', 'iso2', 'iso3', 'code3', 'FIPS_y', 'Admin2', - 'Province_State', 'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'fips_str'], - var_name=['date'] -) - -molten_df['date_iso'] = pd.to_datetime(molten_df['date'], format="%m/%d/%y") # change date to ISO8601 standard format - -endpts = list(np.linspace(0, 3000, len(colorscale) - 1)) -fips = molten_df['fips_str'].tolist() -values = molten_df['date_iso'].tolist() - -fig = ff.create_choropleth( - fips=fips, - # values=values, - scope=['usa', - # 'Alaska', - # 'Puerto Rico', - # 'American Samoa', - # 'Commonwealth of the Northern Mariana Islands', 'Guam', - # 'United States Virgin Islands' - ], - binning_endpoints=endpts, - colorscale=colorscale, - show_state_data=True, - show_hover=True, - centroid_marker={'opacity': 0}, - asp=2.9, - title='Confirmed cases on April 22', - legend_title='# confirmed cases', - text=molten_df['fips'] -) - -fig.layout.template = None -fig.show() -''' - - + auto_open=True) From 30f787cf6348f1268f6a7f241167bf5b794c855c Mon Sep 17 00:00:00 2001 From: Makhsuda Date: Fri, 26 Jun 2020 13:08:30 -0400 Subject: [PATCH 2/6] add map with confirmed cases per capita --- analysis/db/us_map/choroplethMap.py | 51 +++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/analysis/db/us_map/choroplethMap.py b/analysis/db/us_map/choroplethMap.py index 6a107ad..0b7b5a7 100644 --- a/analysis/db/us_map/choroplethMap.py +++ b/analysis/db/us_map/choroplethMap.py @@ -11,16 +11,17 @@ with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response: counties = json.load(response) -# TODO: Change range_color numbers in fig = px.choropleth() -# TODO: Build the map for a specific date # TODO: Build line/bar graphs to check case numbers per state over a period of time -# TODO: Draw another version of this map, but accounts for population density per county (per capita count) # TODO: See if rate is changing, counts over time (a 14 day sliding window count) -# Choropleth map with time slider and hover text +# TODO: Try to merge PopulationEstimates.xls to confirmed_df and remove State_FIPS.xlsx confirmed_df = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/' 'csse_covid_19_time_series/time_series_covid19_confirmed_US.csv') loc_df = pd.read_excel(here('./data/db/original/maps/State_FIPS.xlsx')) +pop_df = pd.read_excel(here('./data/db/original/maps/PopulationEstimates.xls')) # population dataset for 2019 + +pop_df['fips_str'] = pop_df['FIPStxt'].apply(lambda x: f'{x:05.0f}') +pop_df = pop_df[['fips_str', 'Area_Name', 'POP_ESTIMATE_2019']] merged_df = pd.merge(loc_df, confirmed_df, right_on='Admin2', left_on='Name') @@ -32,22 +33,44 @@ ) molten_df['date_iso'] = pd.to_datetime(molten_df['date'], format="%m/%d/%y") # change date to ISO8601 standard format +# fips = molten_df['fips_str'].tolist() -fips = molten_df['fips_str'].tolist() +molten_pop_df = pd.merge(molten_df, pop_df, on='fips_str') # add population per county +grouped_by = molten_pop_df.groupby(['fips_str', 'date_iso', 'Admin2', 'POP_ESTIMATE_2019'])['value'].sum().reset_index() +grouped_by['value'] = grouped_by['value']/grouped_by['POP_ESTIMATE_2019'] # get per capita value -# plt.show() -# color_map = plt.cm.get_cmap('viridis') -# reversed_viridis = color_map.reversed() +plot_data = grouped_by[grouped_by.date_iso == '2020-04-01'] +# confirmed cases per capita +fig = px.choropleth(plot_data, + geojson=counties, + locations=plot_data.fips_str, + color='value', + # animation_frame='date', + hover_data=['Admin2', 'value', 'POP_ESTIMATE_2019'], + color_continuous_scale='viridis_r', + range_color=(0, plot_data['value'].max()), + scope="usa", + title='Confirmed cases per capita', + labels={'value': 'confirmed cases'} + ) +fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) +pl.Path(here("./output/maps", warn=False)).mkdir(parents=True, exist_ok=True) +pio.write_html(fig, + file=str(here("./output/maps/choropleth_us_cases.html", warn=False)), + auto_open=False) -fig = px.choropleth(molten_df, +''' +# overall confirmed cases data +plot_data = molten_df[molten_df.date_iso == '2020-04-01'] +fig = px.choropleth(plot_data, geojson=counties, - locations=fips, + locations=plot_data.fips_str, color='value', - animation_frame='date', + # animation_frame='date', hover_data=['State', 'value'], color_continuous_scale='viridis_r', - range_color=(0, 300), + range_color=(0, 500), scope="usa", title='Confirmed cases', labels={'value': 'confirmed cases'} @@ -59,5 +82,5 @@ pl.Path(here("./output/maps", warn=False)).mkdir(parents=True, exist_ok=True) pio.write_html(fig, file=str(here("./output/maps/choropleth_us_cases.html", warn=False)), - auto_open=True) - + auto_open=False) +''' \ No newline at end of file From 30f3723bc9b97f88912a47de18a8ff8b82142656 Mon Sep 17 00:00:00 2001 From: Makhsuda Date: Fri, 26 Jun 2020 16:16:29 -0400 Subject: [PATCH 3/6] add lineplot to see the rise of confirmed case numbers --- analysis/db/us_map/graphs.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 analysis/db/us_map/graphs.py diff --git a/analysis/db/us_map/graphs.py b/analysis/db/us_map/graphs.py new file mode 100644 index 0000000..56e24d4 --- /dev/null +++ b/analysis/db/us_map/graphs.py @@ -0,0 +1,36 @@ +import matplotlib.pyplot as plt +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import numpy as np +from pyprojroot import here +import plotly.io as pio +import seaborn as sns +from urllib.request import urlopen +import pathlib as pl + + +confirmed_df = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/' + 'csse_covid_19_time_series/time_series_covid19_confirmed_US.csv') +loc_df = pd.read_excel(here('./data/db/original/maps/State_FIPS.xlsx')) + +merged_df = pd.merge(loc_df, confirmed_df, right_on='Admin2', left_on='Name') + +merged_df['fips_str'] = merged_df['FIPS_x'].apply(lambda x: f'{x:05.0f}') # left pad with 0 for 5 digits +molten_df = merged_df.melt( + id_vars=['FIPS_x', 'Name', 'State', 'UID', 'iso2', 'iso3', 'code3', 'FIPS_y', 'Admin2', + 'Province_State', 'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'fips_str'], + var_name=['date'] +) + +molten_df['date_iso'] = pd.to_datetime(molten_df['date'], format="%m/%d/%y") # change date to ISO8601 standard format +# state = molten_df.loc[molten_df.Province_State == 'Virginia', molten_df.Province_State == 'New York'] +# molten_df['date_iso'] = molten_df.loc[molten_df.date_iso == '2020-04-01', molten_df.date_iso == '2020-04-05'] +subset = molten_df.loc[molten_df.Province_State == 'Virginia', ['Province_State', 'Admin2', 'value', 'date_iso']] + +grouped_counts = subset.groupby(['date_iso', 'Province_State', 'Admin2'])['value'].sum().reset_index() + +ax = sns.lineplot(x="date_iso", y="value", hue='Province_State', data=grouped_counts) # show cases per state monthly +# ax = sns.stripplot(x="date_iso", y="value", hue='Province_State', data=grouped_counts) +# plt.tight_layout() +plt.show() From c065f06b0f88ef89faf3afe19fd86104424f49b0 Mon Sep 17 00:00:00 2001 From: Makhsuda Date: Fri, 26 Jun 2020 18:26:43 -0400 Subject: [PATCH 4/6] add noninteractive graphs --- analysis/db/us_map/graphs.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/analysis/db/us_map/graphs.py b/analysis/db/us_map/graphs.py index 56e24d4..7034e4a 100644 --- a/analysis/db/us_map/graphs.py +++ b/analysis/db/us_map/graphs.py @@ -26,11 +26,22 @@ molten_df['date_iso'] = pd.to_datetime(molten_df['date'], format="%m/%d/%y") # change date to ISO8601 standard format # state = molten_df.loc[molten_df.Province_State == 'Virginia', molten_df.Province_State == 'New York'] # molten_df['date_iso'] = molten_df.loc[molten_df.date_iso == '2020-04-01', molten_df.date_iso == '2020-04-05'] -subset = molten_df.loc[molten_df.Province_State == 'Virginia', ['Province_State', 'Admin2', 'value', 'date_iso']] - +state1 = 'Washington' +state2 = 'California' +subset = molten_df.loc[(molten_df.Province_State.isin([state1, state2])), + ['Province_State', 'Admin2', 'value', 'date_iso']] +# subset = molten_df[['Province_State', 'Admin2', 'value', 'date_iso']] +# subset = subset[subset.date_iso == '2020-04-01'] # for violinplot to show on certain date grouped_counts = subset.groupby(['date_iso', 'Province_State', 'Admin2'])['value'].sum().reset_index() -ax = sns.lineplot(x="date_iso", y="value", hue='Province_State', data=grouped_counts) # show cases per state monthly +# Noninteractive graps +''' +# ax = sns.lineplot(x="date_iso", y="value", hue='Province_State', data=grouped_counts) # show cases per state monthly # ax = sns.stripplot(x="date_iso", y="value", hue='Province_State', data=grouped_counts) +# ax = sns.violinplot(x='date_iso', y='value', hue='Province_State', data=grouped_counts, palette="Set2", split=True, +# scale="count", inner="quartile") +# ax = sns.countplot(x="date_iso", hue='Province_State', data=grouped_counts) # works better if there are certain dates # plt.tight_layout() -plt.show() +# plt.show() +''' + From c086b3319ae5bf9f1d0d46da8caa29060e1f7d25 Mon Sep 17 00:00:00 2001 From: Makhsuda Date: Thu, 2 Jul 2020 10:32:07 -0400 Subject: [PATCH 5/6] add datasets' original links --- analysis/db/us_map/choroplethMap.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/analysis/db/us_map/choroplethMap.py b/analysis/db/us_map/choroplethMap.py index 0b7b5a7..beb64e5 100644 --- a/analysis/db/us_map/choroplethMap.py +++ b/analysis/db/us_map/choroplethMap.py @@ -17,7 +17,9 @@ confirmed_df = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/' 'csse_covid_19_time_series/time_series_covid19_confirmed_US.csv') +# Resource for State_FIPS: https://www.nrcs.usda.gov/wps/portal/nrcs/detail/national/home/?cid=nrcs143_013697 loc_df = pd.read_excel(here('./data/db/original/maps/State_FIPS.xlsx')) +# Resource for PopulationEstimates: https://www.ers.usda.gov/data-products/county-level-data-sets/download-data/ pop_df = pd.read_excel(here('./data/db/original/maps/PopulationEstimates.xls')) # population dataset for 2019 pop_df['fips_str'] = pop_df['FIPStxt'].apply(lambda x: f'{x:05.0f}') @@ -37,7 +39,7 @@ molten_pop_df = pd.merge(molten_df, pop_df, on='fips_str') # add population per county grouped_by = molten_pop_df.groupby(['fips_str', 'date_iso', 'Admin2', 'POP_ESTIMATE_2019'])['value'].sum().reset_index() -grouped_by['value'] = grouped_by['value']/grouped_by['POP_ESTIMATE_2019'] # get per capita value +grouped_by['total_per_cap'] = grouped_by['value']/grouped_by['POP_ESTIMATE_2019'] # get per capita value plot_data = grouped_by[grouped_by.date_iso == '2020-04-01'] @@ -45,11 +47,11 @@ fig = px.choropleth(plot_data, geojson=counties, locations=plot_data.fips_str, - color='value', + color='total_per_cap', # animation_frame='date', - hover_data=['Admin2', 'value', 'POP_ESTIMATE_2019'], + hover_data=['Admin2', 'total_per_cap', 'POP_ESTIMATE_2019'], color_continuous_scale='viridis_r', - range_color=(0, plot_data['value'].max()), + range_color=(0, plot_data['total_per_cap'].max()), scope="usa", title='Confirmed cases per capita', labels={'value': 'confirmed cases'} From d1909caf54baff1521455ba58977814db239d8b6 Mon Sep 17 00:00:00 2001 From: Makhsuda Date: Thu, 2 Jul 2020 23:16:41 -0400 Subject: [PATCH 6/6] change structure of the code --- analysis/db/us_map/choroplethMap.py | 48 ++++++----------------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/analysis/db/us_map/choroplethMap.py b/analysis/db/us_map/choroplethMap.py index beb64e5..d5cd0e3 100644 --- a/analysis/db/us_map/choroplethMap.py +++ b/analysis/db/us_map/choroplethMap.py @@ -8,13 +8,10 @@ from urllib.request import urlopen import pathlib as pl import json + with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response: counties = json.load(response) -# TODO: Build line/bar graphs to check case numbers per state over a period of time -# TODO: See if rate is changing, counts over time (a 14 day sliding window count) -# TODO: Try to merge PopulationEstimates.xls to confirmed_df and remove State_FIPS.xlsx - confirmed_df = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/' 'csse_covid_19_time_series/time_series_covid19_confirmed_US.csv') # Resource for State_FIPS: https://www.nrcs.usda.gov/wps/portal/nrcs/detail/national/home/?cid=nrcs143_013697 @@ -35,54 +32,29 @@ ) molten_df['date_iso'] = pd.to_datetime(molten_df['date'], format="%m/%d/%y") # change date to ISO8601 standard format -# fips = molten_df['fips_str'].tolist() molten_pop_df = pd.merge(molten_df, pop_df, on='fips_str') # add population per county -grouped_by = molten_pop_df.groupby(['fips_str', 'date_iso', 'Admin2', 'POP_ESTIMATE_2019'])['value'].sum().reset_index() -grouped_by['total_per_cap'] = grouped_by['value']/grouped_by['POP_ESTIMATE_2019'] # get per capita value +grouped_by = molten_pop_df.groupby(['fips_str', 'date_iso', 'State', 'Admin2', 'POP_ESTIMATE_2019'])['value'].sum().reset_index() +grouped_by['total_per_cap'] = grouped_by['value'] / grouped_by['POP_ESTIMATE_2019'] # get per capita value -plot_data = grouped_by[grouped_by.date_iso == '2020-04-01'] +plot_data = grouped_by[grouped_by.date_iso == '2020-04-01'] # confirmed cases on a specific day +value = 'value' # 'value' = raw count, 'total_per_cap' = per capita -# confirmed cases per capita +# confirmed cases per capita/raw count fig = px.choropleth(plot_data, geojson=counties, locations=plot_data.fips_str, - color='total_per_cap', + color=value, # animation_frame='date', - hover_data=['Admin2', 'total_per_cap', 'POP_ESTIMATE_2019'], + hover_data=['State', 'Admin2', value, 'POP_ESTIMATE_2019'], color_continuous_scale='viridis_r', - range_color=(0, plot_data['total_per_cap'].max()), - scope="usa", - title='Confirmed cases per capita', - labels={'value': 'confirmed cases'} - ) -fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) -pl.Path(here("./output/maps", warn=False)).mkdir(parents=True, exist_ok=True) -pio.write_html(fig, - file=str(here("./output/maps/choropleth_us_cases.html", warn=False)), - auto_open=False) - -''' -# overall confirmed cases data -plot_data = molten_df[molten_df.date_iso == '2020-04-01'] -fig = px.choropleth(plot_data, - geojson=counties, - locations=plot_data.fips_str, - color='value', - # animation_frame='date', - hover_data=['State', 'value'], - color_continuous_scale='viridis_r', - range_color=(0, 500), + range_color=(0, plot_data[value].max()), scope="usa", title='Confirmed cases', labels={'value': 'confirmed cases'} ) fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) - -# save out figure -# save out working data pl.Path(here("./output/maps", warn=False)).mkdir(parents=True, exist_ok=True) pio.write_html(fig, file=str(here("./output/maps/choropleth_us_cases.html", warn=False)), - auto_open=False) -''' \ No newline at end of file + auto_open=True)