## Maps

We use the `geopandas` and `geoplot` to plot the map. We use the shapefile from Vietnam Government, with `adm = 1`.

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import cmasher as cmr
import numpy as np

### Setup

In [None]:
cmap = cmr.get_sub_cmap('Reds', 0.3, 1)

### Timeline: 2017 - 2019

In [None]:
# Load the data
df = pd.read_csv('../Data/province_stat.csv')
gdf = gpd.read_file('../Data/vnm_admbnda_adm1_gov_20201027.shp')

In [None]:
# Some provinces missing the 'city' suffix. We may hardcode it here.

for province in df['Province']:
    if province not in gdf['ADM1_EN'].values:
        df['Province'] = df['Province'].replace(province, province + ' city')

# Some provinces are not in the shapefile. We may add them manually.
for province in gdf['ADM1_EN'].values:
    if province not in df['Province'].values:
        df = pd.concat([df, pd.DataFrame({'Province': [province], 'Patient': [0]})], ignore_index=True)

df = df.sort_values(by='Province', ignore_index=True)

In [None]:
df

In [None]:
patients = list(df['Patient']) + [0] * (len(gdf) - len(df))
gdf = gdf.sort_values(by='ADM1_EN')

if 'Patient' in gdf.columns:
    gdf = gdf.drop('Patient', axis=1)

gdf.insert(0, 'Patient', patients)

f, ax = plt.subplots(1)
gdf_ax = gdf.plot(column='Patient', figsize=(10, 10), cmap=cmap, ax=ax, legend=True)
gdf_ax.set_axis_off()
cbar = ax.get_figure().get_axes()[1]
cbar.set_ylabel('Number of reported cases, 2019 (Vietnam)')

### Timeline: 2017

In [None]:
# Load the data
df_years = pd.read_csv('../Data/year_and_province.csv')

In [None]:
# Some provinces missing the 'city' suffix. We may hardcode it here.

for province in df_years['Province']:
    if province not in gdf['ADM1_EN'].values:
        df_years['Province'] = df_years['Province'].replace(province, province + ' city')

# Some provinces are not in the shapefile. We may add them manually.
for province in gdf['ADM1_EN'].values:
    if province not in df_years['Province'].values:
        df_years = pd.concat([df_years, pd.DataFrame({'Province': [province], '2017': [0], '2018': [0], '2019': [0]})], ignore_index=True)

df_years = df_years.sort_values(by='Province', ignore_index=True)

In [None]:
df_years

In [None]:
patients = list(df_years['2017']) + [0] * (len(gdf) - len(df))
gdf_2017 = gdf.sort_values(by='ADM1_EN')

gdf_2017.insert(0, 'Patient_2017', patients)

f, ax = plt.subplots(1)
gdf_ax = gdf_2017.plot(column='Patient_2017', figsize=(10, 10), cmap=cmap, ax=ax, legend=True)
gdf_ax.set_axis_off()
cbar = ax.get_figure().get_axes()[1]
cbar.set_ylabel('Number of reported cases, 2019 (Vietnam)')

### Timeline: 2018

In [None]:
patients = list(df_years['2018']) + [0] * (len(gdf) - len(df))
gdf_2018 = gdf.sort_values(by='ADM1_EN')

gdf_2018.insert(0, 'Patient_2018', patients)

f, ax = plt.subplots(1)
gdf_ax = gdf_2018.plot(column='Patient_2018', figsize=(10, 10), cmap=cmap, ax=ax, legend=True)
gdf_ax.set_axis_off()
cbar = ax.get_figure().get_axes()[1]
cbar.set_ylabel('Number of reported cases, 2018 (Vietnam)')

### Timeline: 2019

In [None]:
patients = list(df_years['2019']) + [0] * (len(gdf) - len(df))
gdf_2019 = gdf.sort_values(by='ADM1_EN')

gdf_2019.insert(0, 'Patient_2019', patients)

f, ax = plt.subplots(1)
gdf_ax = gdf_2019.plot(column='Patient_2019', figsize=(10, 10), cmap=cmap, ax=ax, legend=True)
gdf_ax.set_axis_off()
cbar = ax.get_figure().get_axes()[1]
cbar.set_ylabel('Number of reported cases, 2019 (Vietnam)')

### Comparison side-by-side

In [None]:
f, axes = plt.subplots(figsize=(30, 10), ncols=3, nrows=1, sharex=True, sharey=True)
gdf_2017.plot(column='Patient_2017', cmap=cmap, ax=axes[0], legend=True)
axes[0].set_title('Number of reported cases, 2017')
axes[0].set_axis_off()
gdf_2018.plot(column='Patient_2018', cmap=cmap, ax=axes[1], legend=True)
axes[1].set_title('Number of reported cases, 2018')
axes[1].set_axis_off()
gdf_2019.plot(column='Patient_2019', cmap=cmap, ax=axes[2], legend=True)
axes[2].set_title('Number of reported cases, 2019')
axes[2].set_axis_off()
f.suptitle('Residence of reported cases in Vietnam, 2017 - 2019', fontsize=20)

## Histogram

In [None]:
# Load the data
data = pd.read_csv('../Data/out.csv')

### Critical vs. Non critical

In [None]:
import datetime
from matplotlib.dates import date2num

In [None]:
data['Clinical classification'].value_counts()
data['admission_date'] = pd.to_datetime(data['admission_date']).dt.date
data = data.sort_values(by='admission_date')
print(data['admission_date'])

In [None]:
from matplotlib.dates import DateFormatter

start, end = data['admission_date'].min(), data['admission_date'].max()
one_day = datetime.timedelta(days=1)

pack = [
        np.array(data[data['Clinical classification'] == 0][['admission_date']]).flatten(),
        # np.array([]).flatten(),
        np.array(data[data['Clinical classification'] == 1][['admission_date']]).flatten(),
    ]

weeks = []
for i in range(0, (end - start).days + 1, 7):
    weeks.append(start + i * one_day)

numweek = date2num(weeks)

# Draw stacked bar chart, divided by week, based on Clinical classification
f, ax = plt.subplots(figsize=(20, 10))
n, bins, patches = ax.hist(pack, bins=numweek, stacked=True, label=['Non-Critical', 'Critical'])
ax.xaxis.set_major_formatter(DateFormatter("%y-%m-%d"))
ax.xaxis.set_ticks(np.arange(weeks[0], weeks[-1] + one_day, 7 * 2))
ax.legend()
ax.set_xlabel('Date')
ax.set_ylabel('Cases')
ax.set_title('Critical vs. Non-critical')
plt.gcf().autofmt_xdate(rotation=90)

### Deceased vs. Alive

In [None]:
data['outcome_died'].value_counts()

In [None]:
start, end = data['admission_date'].min(), data['admission_date'].max()
one_day = datetime.timedelta(days=1)

pack = [
        np.array(data[data['outcome_died'] == 0][['admission_date']]).flatten(),
        # np.array([]).flatten(),
        np.array(data[data['outcome_died'] == 1][['admission_date']]).flatten(),
    ]
print(pack[0].size, pack[1].size)

weeks = []
for i in range(0, (end - start).days + 1, 7):
    weeks.append(start + i * one_day)

numweek = date2num(weeks)

# Draw stacked bar chart, divided by week, based on Clinical classification
# plt.figure(figsize=(20, 10))
# plt.hist(pack, bins=numweek, stacked=True, label=['Alive', 'Deceased'])
# plt.legend()

f, ax = plt.subplots(figsize=(20, 10))
n, bins, patches = ax.hist(pack, bins=numweek, stacked=True, label=['Alive', 'Deceased'])
ax.xaxis.set_major_formatter(DateFormatter("%y-%m-%d"))
ax.xaxis.set_ticks(np.arange(weeks[0], weeks[-1] + one_day, 7 * 2))
ax.legend()
ax.set_xlabel('Date')
ax.set_ylabel('Cases')
ax.set_title('Alive vs. Deceased')
plt.gcf().autofmt_xdate(rotation=90)
# plt.gcf().autofmt_xdate()