In [1]:
%matplotlib notebook

import numpy as np
import pandas as pd

import re

import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import shapefile
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import seaborn as sns

from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset

from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from matplotlib.patches import PathPatch

import folium
from folium.plugins import MarkerCluster

In [3]:
# Geocoding

from geopy.geocoders import Nominatim
from geopy.geocoders import Bing
from geopy.exc import GeocoderTimedOut

In [4]:
# Timestamp
from datetime import date

today = date.today() # date object
# Format
time_stamp = today.strftime("%m-%d-%Y")

# Loading the data

In [5]:
df = pd.read_csv('Raw Data/DOH COVID Data Drop_ 20200417 - 05 Case Information.csv')
df.head()

Unnamed: 0,CaseNo,Age,AgeGroup,Sex,DateRepConf,DateRecover,DateDied,RemovalType,DateRepRem,Admitted,RegionRes,ProvCityRes
0,C100119,38.0,35 to 39,Female,1/30/2020,2/8/2020,,Recovered,2/10/2020,,Negros Oriental,Dumaguete City (Capital)
1,C100264,44.0,40 to 44,Male,2/3/2020,,2/1/2020,Died,2/2/2020,,Negros Oriental,Dumaguete City (Capital)
2,C100660,60.0,60 to 64,Female,2/5/2020,1/31/2020,,Recovered,2/10/2020,Yes,Bohol,Panglao
3,C100776,48.0,45 to 49,Male,3/6/2020,3/19/2020,,Recovered,3/27/2020,Yes,NCR,Taguig City
4,C101015,62.0,60 to 64,Male,3/6/2020,,3/11/2020,Died,3/12/2020,Yes,Rizal,Cainta


In [6]:
df.dtypes

CaseNo          object
Age            float64
AgeGroup        object
Sex             object
DateRepConf     object
DateRecover     object
DateDied        object
RemovalType     object
DateRepRem      object
Admitted        object
RegionRes       object
ProvCityRes     object
dtype: object

In [7]:
len(df)

5878

total cases in the country

# Cleaning

### Making date columns into datetime

In [8]:
df.DateRepConf =  pd.to_datetime(df.DateRepConf, format='%m/%d/%Y')
df.DateRecover =  pd.to_datetime(df.DateRecover, format='%m/%d/%Y')
df.DateRepRem =  pd.to_datetime(df.DateRepRem, format='%m/%d/%Y')
df.DateDied =  pd.to_datetime(df.DateDied, format='%m/%d/%Y')

### Changing Region names to match the shapefiles's region names

In [9]:
df.RegionRes.replace(to_replace ="Cotabato City (not a province)", value ="Maguindanao", inplace=True) 
df.RegionRes.replace(to_replace ="NCR", value ="Metropolitan Manila", inplace=True) 
df.RegionRes.replace(to_replace ="Cotabato (North Cotabato)", value ="North Cotabato", inplace=True) 
df.RegionRes.replace(to_replace ="Samar (Western Samar)", value ="Samar", inplace=True) 

### Problematic in OSM

For some reason OSM selects the town boundary rather than the town itself. We replace these addresses with landmarks

Mogpog -> Mogpog Health Center, Mogpog

Torrijos -> St. Ignatius of Loyola Church, Torrijos

Mahinog -> St. Michael Parish Church, Mahinog

Tarangnan -> St. Francis of Assisi Parish Church, Tarangnan

Panglao -> Panglao plaza

Jamindan -> Jamindan Public Market, Jamindan

Mambusao -> Capiz State University - Poblacion Campus

Pandan -> Pandan Market, Pandan

San Jose -> San Jose Rural Health Unit, Romblon

Roxas City -> Roxas Airport


Incorrect names:
Jala-Jala -> Jalajala

In [None]:
# df.ProvCityRes.replace(to_replace ="Mogpog", value ="Mogpog Health Center, Mogpog", inplace=True) 
# df.ProvCityRes.replace(to_replace ="Torrijos", value ="St. Ignatius of Loyola Church, Torrijos", inplace=True) 
# df.ProvCityRes.replace(to_replace ="Mahinog", value ="St. Michael Parish Church, Mahinog", inplace=True) 
# df.ProvCityRes.replace(to_replace ="Panglao", value ="Panglao plaza", inplace=True) 

In [10]:
df.ProvCityRes.replace(to_replace ="Jala-Jala", value ="Jalajala", inplace=True) 

# Time series

### Number of cases per day

In [11]:
df_number_of_cases = pd.DataFrame(df.DateRepConf.value_counts())
df_number_of_cases.columns = ['new_cases']
df_number_of_cases.sort_index(inplace=True)
df_number_of_cases['total_cases'] = np.cumsum(df_number_of_cases.new_cases)
df_number_of_cases.tail()

Unnamed: 0,new_cases,total_cases
2020-04-13,284,4932
2020-04-14,291,5223
2020-04-15,230,5453
2020-04-16,207,5660
2020-04-17,218,5878


In [12]:
# sanity check

np.sum(df_number_of_cases.new_cases) #Total cases

5878

### Plot

In [14]:
fig, ax = plt.subplots(figsize=(8, 6))
plt.rcParams.update({'font.size': 10})
plt.style.use('seaborn-dark')
plt.title('Confirmed COVID-19 Positive Cases')

y_max = df_number_of_cases.iloc[-1].total_cases / 1000 + 1.5

# Story
ax.axvline(pd.to_datetime('2020-03-06'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-03-05'), 1, '   First local\n   case', fontsize=7)

ax.axvline(pd.to_datetime('2020-03-9'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-03-07'), 4, '   President\n   Duterte\n   declares a\n   state of\n   public health\n   emergency', fontsize=7)

ax.axvline(pd.to_datetime('2020-03-10'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-03-08'), 2.3, '   Classes in all\n   levels are\n   suspended from\n   March 10-14', fontsize=7)

ax.axvline(pd.to_datetime('2020-03-12'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-03-11'), 1, 'President Duterte\nplaces Manila in\nCommunity\nQuarantine', fontsize=7)

ax.axvline(pd.to_datetime('2020-03-16'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-03-14'), 4.58, 'President Duterte\nplaces the\ncountry in\na state of\ncalamity.\n\n\nEnhanced\nCommunity\nQuarantine takes\neffect throughout\nLuzon until\nApril 12', fontsize=7)

ax.axvline(pd.to_datetime('2020-03-17'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-03-15'), 2.6, 'Airline companies\nstart cancellations\nof domestic flights', fontsize=7)

ax.axvline(pd.to_datetime('2020-03-24'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-03-22'), y_max-1.3, 'Congress and\nSenate grant\nPresident Duterte\nemergency\npowers', fontsize=7)

#ax.axvline(pd.to_datetime('2020-03-28'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
#ax.text(pd.to_datetime('2020-03-26'), 2.4, 'Number of\nconfirmed\npositive cases\nsurpass the\n1000-mark', fontsize=7)

ax.axvline(pd.to_datetime('2020-03-30'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-03-28'), y_max-1.3, '   Cebu City is\n   placed on\n   Enhanced\n   Community\n   Quarantine', fontsize=7)

ax.axvline(pd.to_datetime('2020-04-03'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-04-01'), 1, 'FDA approves\nUP-NIH test kits', fontsize=7)

ax.axvline(pd.to_datetime('2020-04-04'), lw=.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-04-02'), y_max-1.3, '   Davao City is\n   placed on\n   Enhanced\n   Community\n   Quarantine', fontsize=7)

ax.axvline(pd.to_datetime('2020-04-06'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-04-05'), 4.58, 'All regions\nhave at\nleast one\nconfirmed\ncase', fontsize=7)

ax.axvline(pd.to_datetime('2020-04-07'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-04-06'), 1.8, 'President Duterte\nextends the Luzon\nEnhanced\nCommunity\nQuarantine to\nApril 30', fontsize=7)

ax.axvline(pd.to_datetime('2020-04-14'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-04-12'), 1, 'Targeted Mass\ntesting begins\nin selected\ncities', fontsize=7)

ax.axvline(pd.to_datetime('2020-04-17'), lw=0.8, color='violet', linestyle='--', dashes=(10, 10))
ax.text(pd.to_datetime('2020-04-15'), 2.3, 'Sitio Zapatera,\nBrgy. Luz of\nCebu City, the\nepicenter of\nCOVID19 cases\nin Central Vis-\nayas, is placed\nin total lock-\ndown', fontsize=7)



# Plots
df_number_of_cases['total_cases'].copy().apply(lambda x: x / 1000).plot(ax=ax, marker='o',
                                            label='Total', lw=1.5, color='#4da6ff', ms=3.5)

ax.vlines(df_number_of_cases.index, ymin=0, ymax=df_number_of_cases.new_cases / 1000, 
                                  lw=1.5, colors='#ff4d4d', label='New')

ax.scatter(x=df_number_of_cases.index, y=df_number_of_cases.new_cases / 1000, 
                                   color='#ff4d4d', marker='D', s=4)


# Designing
ax.xaxis.set_minor_locator(mdates.DayLocator(interval=5))
ax.xaxis.set_major_locator(mdates.DayLocator(interval=5))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

plt.xlabel(' ')
plt.xlim(pd.to_datetime('2020-03-05'), pd.to_datetime('2020-04-21'))
plt.xticks(rotation=25)

plt.ylabel('Count (in thousands)')
plt.ylim(0, y_max)

plt.legend(loc=2, frameon=True)
ax.text(pd.to_datetime('2020-03-05'), -0.8, 'Data from the Department of Health as of {}'.format(time_stamp), fontsize=8)

plt.savefig('images/total_cases', dpi=300, bbox_inches='tight')

<IPython.core.display.Javascript object>

### New cases only

In [15]:
fig, ax = plt.subplots()
plt.rcParams.update({'font.size': 10})
plt.style.use('seaborn-dark')

# Plots
ax.bar(df_number_of_cases.index, df_number_of_cases.new_cases / 100, width=0.8, color='#005aff') # 
ax.plot(df_number_of_cases.index, df_number_of_cases.new_cases / 100, color="#ff005a")

plt.xlim(pd.to_datetime('2020-03-05'), pd.to_datetime('2020-04-18'))
plt.ylim(0, 6)

ax.xaxis.set_minor_locator(mdates.DayLocator(interval=5))
ax.xaxis.set_major_locator(mdates.DayLocator(interval=5))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

plt.title('New Cases')
plt.xlabel(' ')
plt.ylabel('Count (in hundreds)')
plt.xticks(rotation=25)

ax.text(pd.to_datetime('2020-03-05'), -0.8, 'Data from the Department of Health as of {}'.format(time_stamp), fontsize=8)

plt.savefig('images/new_cases', dpi=300, bbox_inches='tight')

<IPython.core.display.Javascript object>

### Deaths and Recoveries

In [16]:
# Recoveries

df_number_of_recoveries = pd.DataFrame(df[df['RemovalType']=='Recovered'].DateRecover.value_counts())
df_number_of_recoveries.columns = ['new_recoveries']
df_number_of_recoveries.sort_index(inplace=True)
df_number_of_recoveries['total_recoveries'] = np.cumsum(df_number_of_recoveries.new_recoveries)
df_number_of_recoveries.tail()

Unnamed: 0,new_recoveries,total_recoveries
2020-04-12,15,339
2020-04-13,11,350
2020-04-14,9,359
2020-04-15,9,368
2020-04-16,3,371


In [17]:
# Deaths

df_number_of_deaths = pd.DataFrame(df[df['RemovalType']=='Died'].DateDied.value_counts())
df_number_of_deaths.columns = ['new_deaths']
df_number_of_deaths.sort_index(inplace=True)
df_number_of_deaths['total_deaths'] = np.cumsum(df_number_of_deaths.new_deaths)
df_number_of_deaths.head()

Unnamed: 0,new_deaths,total_deaths
2020-02-01,1,1
2020-03-11,3,4
2020-03-12,3,7
2020-03-13,4,11
2020-03-14,4,15


In [18]:
fig, ax = plt.subplots(figsize=(8, 6))
plt.rcParams.update({'font.size': 10})
plt.style.use('seaborn-dark')
plt.title('Recoveries and Deaths')


df_number_of_recoveries['total_recoveries'].plot(ax=ax, label='Total Recoveries', lw=1.5, color='#4da6ff', ms=3.5)
df_number_of_deaths['total_deaths'].plot(ax=ax, label='Total Deaths', lw=1.5, color='#ff4d4d', ms=3.5)

ax.xaxis.set_minor_locator(mdates.DayLocator(interval=5))
ax.xaxis.set_major_locator(mdates.DayLocator(interval=5))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

plt.xlabel(' ')
plt.xlim(pd.to_datetime('2020-03-05'), pd.to_datetime('2020-04-17'))

plt.legend()

# Dont save yet, incomplete

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x220b485be48>

### Exponential Plot

In [19]:
fig, ax = plt.subplots(figsize=(8, 6))
plt.rcParams.update({'font.size': 10})
plt.style.use('seaborn-dark')

no_days_since_10th = np.array([i for i in range(0, len(df_number_of_cases['total_cases'][5:]))])
ax.plot(no_days_since_10th, df_number_of_cases['total_cases'][5:], 
        lw=0.5, color='k')

# Line that doubles everyday
#ax.plot(no_days_since_10th[:30], 1.2**no_days_since_10th[:30])

# Customization
plt.title('Number of Confirmed Positive Cases (as of {})'.format(time_stamp))
plt.xlabel('Days Since the 10th Case (March 10, 2020)')
plt.ylabel('Count')

ax.set_yscale('log')
plt.ylim((10^0, 10**6) )

plt.grid(True)
plt.savefig('images/exponential', dpi=300, bbox_inches='tight')

<IPython.core.display.Javascript object>

# Age Analysis

In [20]:
num_nan_ages = np.sum([pd.isna(i) for i in df.Age])
num_nan_ages

2

### Different bins

In [21]:
fig = plt.figure()

plt.rcParams.update({'font.size': 10})
plt.style.use('seaborn-dark')

ax = plt.subplot(111)

bins = np.array([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105])
n, bins, patches = ax.hist(df.Age, bins, histtype='bar', color='#936ED4',rwidth=0.8, width=2, align='left')
patches[np.argmax(n)].set_fc('#FCB249')

# label each bar with each count
for index, bin_ in enumerate(bins):
    if bin_== 105:
        break
    ax.text(bin_, n[index] + 20, int(n[index]), 
            color='black', fontweight='bold',
            horizontalalignment='center', verticalalignment='center', fontsize=8)
    
if num_nan_ages==0:
    ax.set_title('Number of Infected Per Age Group')
    
if num_nan_ages>0:
    ax.set_title('Number of Infected Per Age Group ({} for validation)'.format(num_nan_ages))
    
# xtick labels
plt.xticks(bins)
ax.set_xlim(-10,104)
ax.set_ylim(0,np.max(n)+150)
    
# Removing spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)

# Removing yticks
ax.get_yaxis().set_visible(False)

# Removing ticks
ax.tick_params(axis=u'both', which=u'both',length=0)

# Time stamp
ax.text(81, -120, 'As of {}'.format(time_stamp), fontsize=8)


# Draw the plot first then adjust the ticklabels
fig.canvas.draw()
labels = np.array([item.get_text() for item in ax.get_xticklabels()])
new_labels = []
for i in range(0,len(labels)-1):
    new_labels.append(labels[i] + '-' + str(int(labels[i+1])-1))
    
ax.set_xticklabels(new_labels, rotation=40)

<IPython.core.display.Javascript object>

[Text(0, 0, '0-4'),
 Text(5, 0, '5-9'),
 Text(10, 0, '10-14'),
 Text(15, 0, '15-19'),
 Text(20, 0, '20-24'),
 Text(25, 0, '25-29'),
 Text(30, 0, '30-34'),
 Text(35, 0, '35-39'),
 Text(40, 0, '40-44'),
 Text(45, 0, '45-49'),
 Text(50, 0, '50-54'),
 Text(55, 0, '55-59'),
 Text(60, 0, '60-64'),
 Text(65, 0, '65-69'),
 Text(70, 0, '70-74'),
 Text(75, 0, '75-79'),
 Text(80, 0, '80-84'),
 Text(85, 0, '85-89'),
 Text(90, 0, '90-94'),
 Text(95, 0, '95-99'),
 Text(100, 0, '100-104')]

In [22]:
# Sanity check

np.sum(n) + num_nan_ages

5878.0

total cases

### Horizontal bar graph

In [23]:
fig = plt.figure(figsize=(9,6))

plt.rcParams.update({'font.size': 10})
plt.style.use('seaborn-dark')
ax = plt.subplot(111)

if num_nan_ages==0:
    ax.set_title('Number of Infected Per Age Group')
    
if num_nan_ages>0:
    ax.set_title('Number of Infected Per Age Group ({} for validation)'.format(num_nan_ages))


bins = np.array([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105])
n, bins, patches = ax.hist(df.Age, bins, histtype='bar', orientation="horizontal",
                           color='#936ED4', rwidth=0.5,  align='left')#,width=2 ,
patches[np.argmax(n)].set_fc('#FCB249')


# label each bar with each count
for index, bin_ in enumerate(bins):
    if bin_== 105:
        break
        
    ax.text(n[index] +20, bin_-0.4, int(n[index]), 
            color='black', fontweight='bold',
            horizontalalignment='center', verticalalignment='center', fontsize=10)
    
    
plt.yticks(bins)
ax.set_ylim(-5,104)
ax.set_xlim(0,np.max(n)+150)

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)

# Removing yticks
ax.get_xaxis().set_visible(False)

plt.yticks(bins)
ax.set_ylim(-5,104)
ax.set_xlim(0,np.max(n)+150)

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)

# Removing yticks
ax.get_xaxis().set_visible(False)

# Draw the plot first then adjust the ticklabels
fig.canvas.draw()
labels = np.array([item.get_text() for item in ax.get_yticklabels()])
new_labels = []
for i in range(0,len(labels)-1):
    new_labels.append(labels[i] + ' - ' + str(int(labels[i+1])-1))
    
ax.set_yticklabels(new_labels)

# Timestamp and source
ax.text(0, -10, 'Data from the Department of Health as of {}'.format(time_stamp), fontsize=8)

plt.savefig('images/num_infected', dpi=300, bbox_inches='tight')

<IPython.core.display.Javascript object>

### Deaths

In [24]:
fig = plt.figure(figsize=(9,6))

plt.rcParams.update({'font.size': 10})
plt.style.use('seaborn-dark')
ax = plt.subplot(111)

ax.set_title('Deaths Per Age Group')

bins = np.array([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105])
n, bins, patches = ax.hist(df[df.RemovalType=='Died'].Age, bins, histtype='bar', orientation="horizontal",
                           color='#998AD3', rwidth=0.5,  align='left')#,width=2 ,#8ac4d3
patches[np.argmax(n)].set_fc('#fdb927')


# label each bar with each count
for index, bin_ in enumerate(bins):
    if bin_== 105:
        break
        
    ax.text(n[index]+1, bin_-0.3, int(n[index]), 
            color='black', fontweight='bold',
            horizontalalignment='center', verticalalignment='center', fontsize=10)
    
plt.yticks(bins)
ax.set_ylim(-5,104)
ax.set_xlim(0,np.max(n)+1)

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)

# Removing yticks
ax.get_xaxis().set_visible(False)

plt.yticks(bins)
ax.set_ylim(-5,104)
ax.set_xlim(0,np.max(n)+5)

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)

# Removing yticks
ax.get_xaxis().set_visible(False)


# Draw the plot first then adjust the ticklabels
fig.canvas.draw()
labels = np.array([item.get_text() for item in ax.get_yticklabels()])
new_labels = []
for i in range(0,len(labels)-1):
    new_labels.append(labels[i] + ' - ' + str(int(labels[i+1])-1))
    
ax.set_yticklabels(new_labels)

# Timestamp and source
ax.text(0, -10, 'Data from the Department of Health as of {}'.format(time_stamp), fontsize=8)

plt.savefig('images/death_age_gp', dpi=300, bbox_inches='tight')

<IPython.core.display.Javascript object>

In [25]:
np.sum(n)

387.0

deaths

### Recoveries

In [26]:
fig = plt.figure(figsize=(9,6))

plt.rcParams.update({'font.size': 10})
plt.style.use('seaborn-dark')
ax = plt.subplot(111)

ax.set_title('Recoveries Per Age Group')

bins = np.array([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105])
n, bins, patches = ax.hist(df[df.RemovalType=='Recovered'].Age, bins, histtype='bar', orientation="horizontal",
                           color='#8aa0d3', rwidth=0.5,  align='left')#,width=2 ,
patches[np.argmax(n)].set_fc('#d38aa0')


# label each bar with each count
for index, bin_ in enumerate(bins):
    if bin_== 105:
        break
        
    ax.text(n[index]+1, bin_-0.3, int(n[index]), 
            color='black', fontweight='bold',
            horizontalalignment='center', verticalalignment='center', fontsize=10)
    
plt.yticks(bins)
ax.set_ylim(-5,89)
ax.set_xlim(0,np.max(n)+1)

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)

# Removing yticks
ax.get_xaxis().set_visible(False)

plt.yticks(bins)
ax.set_ylim(-5,104)
ax.set_xlim(0,np.max(n)+5)

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)

# Removing yticks
ax.get_xaxis().set_visible(False)


# Draw the plot first then adjust the ticklabels
fig.canvas.draw()
labels = np.array([item.get_text() for item in ax.get_yticklabels()])
new_labels = []
for i in range(0,len(labels)-1):
    new_labels.append(labels[i] + ' - ' + str(int(labels[i+1])-1))
    
ax.set_yticklabels(new_labels)

# Timestamp and source
ax.text(0, -10, 'Data from the Department of Health as of {}'.format(time_stamp), fontsize=8)

plt.savefig('images/recoveries_age_gp', dpi=300, bbox_inches='tight')

<IPython.core.display.Javascript object>

In [27]:
np.sum(n)

487.0

number of recoveries

# Mapping the provinces with cases

In [28]:
df_regions = pd.DataFrame(df.RegionRes[pd.notnull(df.RegionRes)].value_counts())
df_regions.columns = ['counts']
df_regions['Province'] = df_regions.index

# Repositioning the columns 
df_regions = df_regions[['Province', 'counts']]

# Changing the indices to a list of numbers
df_regions.index = [i for i in range(0, len(df_regions))]
df_regions[:10]

Unnamed: 0,Province,counts
0,Metropolitan Manila,3823
1,Rizal,303
2,Laguna,235
3,Cavite,195
4,Bulacan,97
5,Cebu,90
6,Davao del Sur,83
7,Batangas,74
8,Bataan,72
9,Pampanga,60


In [29]:
# Sanity check

np.sum(df_regions.counts)

5436

In [30]:
# Getting the number of for validation cells

num_prov_TBA = len(df) - np.sum(df_regions.counts)
num_prov_TBA

442

In [31]:
# Loading the shapefile
phl1 = shapefile.Reader("Shapefiles/gadm36_PHL_1") 

In [None]:
fig, ax = plt.subplots(figsize=(8,12))

if num_prov_TBA == 0:
    plt.title('Provinces with Confirmed COVID19 Cases')
if num_prov_TBA > 0 :
    plt.title('Provinces with Confirmed COVID19 Cases ({} for validation)'.format(num_prov_TBA))

m = Basemap(llcrnrlon=116.3,llcrnrlat=4.4, urcrnrlon=127.8, urcrnrlat=21.1, resolution='f',
                projection='tmerc', lat_0 = 14, lon_0 = 120) 

# Reading the shapefile
m.readshapefile('Shapefiles/gadm36_PHL_1','shf', linewidth=0.1, drawbounds = False)

# Coloring the province
def province_color(condition):
    patches=[]
    for info, shape in zip(m.shf_info, m.shf):
        for i in list(df_regions[condition].Province):
            if info['NAME_1'] == i:
                patches.append( Polygon(np.array(shape), True) )
    return patches

# 1-10
ax.add_collection(PatchCollection(province_color((df_regions.counts<10) & (df_regions.counts>=1)), 
                                  facecolor='#ffe4d3', linewidths=0.1, zorder=2))
# 10-50
ax.add_collection(PatchCollection(province_color((df_regions.counts<50) & (df_regions.counts>=10)), 
                                  facecolor='#ffcbac', linewidths=0.1, zorder=2))
# 50-200
ax.add_collection(PatchCollection(province_color((df_regions.counts<200) & (df_regions.counts>=50)), 
                                  facecolor='#ffa671', linewidths=0.1, zorder=2))
# 200-500
ax.add_collection(PatchCollection(province_color((df_regions.counts<500) & (df_regions.counts>=200)), 
                                  facecolor='#ff8236', linewidths=0.1, zorder=2))
# 500-1000
ax.add_collection(PatchCollection(province_color((df_regions.counts<1000) & (df_regions.counts>=500)), 
                                  facecolor='#db5200', linewidths=0.1, zorder=2))
# greater than 1000
ax.add_collection(PatchCollection(province_color((df_regions.counts>=1000)), 
                                  facecolor='#ff2d0f', linewidths=0.1, zorder=2))

# Legend
more_1000 = mpatches.Patch(color='#ff2d0f', label=r'$\geq$ 1000')
less_1000 = mpatches.Patch(color='#db5200', label=r'500-999')
less_500 = mpatches.Patch(color='#ff8236', label=r'200-499')
less_200 = mpatches.Patch(color='#ffa671', label=r'50-199')
less_50 = mpatches.Patch(color='#ffcbac', label=r'10-49')
less_10 = mpatches.Patch(color='#ffe4d3', label=r'1-9')
leg = plt.legend(loc=2, handles=[less_10, less_50, less_200, less_500, less_1000, more_1000][::-1], frameon=True)
frame = leg.get_frame()
frame.set_facecolor('white')
frame.set_edgecolor('white')


# Make sure that the lakes do not get filled
m.drawmapboundary(fill_color='#c5d0fe')
m.fillcontinents(color='white', lake_color='#c5d0fe')
m.readshapefile('Shapefiles/gadm36_PHL_1','shf', linewidth=0.1, drawbounds = True)

# Top provinces
## Making another axis to make an inset
left, bottom, width, height = [0.75, 0.75, 0.2, 0.2]
ax2 = fig.add_axes([left, bottom, width, height])
ax2.set_title('Provinces with the Most Cases', fontsize=10,x=0.405, y=.98)

## Making the table
top_10_prov_ls = np.array((df_regions.Province[:10]))[np.newaxis].T
top_10_prov_count = np.array((df_regions.counts[:10]))[np.newaxis].T
table = ax2.table(cellText=np.hstack((top_10_prov_ls, top_10_prov_count)),
                  colWidths = [0.2]*100,
                  loc='upper right',
                  fontsize=10,
                  cellLoc='left',
                  edges='closed')
table.auto_set_column_width(col=list(range(len(df_regions))))
ax2.axis("off")
ax2.axis('tight')

ax.text(0, -40000, 'Data from the Department of Health as of {}\nMade by Val Anthony Balagon using Matplotlib and Basemap'.format(time_stamp), fontsize=8)
fig.tight_layout()

In [None]:
plt.savefig('images/provinces_w_cases_{}'.format(time_stamp), dpi=800, bbox_inches='tight')

In [None]:
len(df_regions)

 out of 81 provinces have at least 1 confirmed positive COVID-19 case

### Locations of each case

In [32]:
df_locs = df[(df.ProvCityRes.notnull()) & (df.RegionRes.notnull())][['CaseNo', 'RegionRes', 'ProvCityRes']].copy()

# Replacing Metropolitan Manila to Metro Manila
df_locs.RegionRes.replace(to_replace='Metropolitan Manila', value='Metro Manila', inplace=True)

# Removes parenthesis in cities 
def remove_parenthesis(text):
    if '(' in text:
        return re.findall('(.+) \(.+\)', text)[0]
    else:
        return text
    
# Removes "City of"
def remove_cityof(txt):
    if 'City of' in txt:
        return re.findall('City of (.+)', txt)[0]
    else:
        return txt
    
df_locs.ProvCityRes = df_locs.ProvCityRes.apply(remove_parenthesis).apply(remove_cityof)

# Combining the city and province string for geocoding
df_locs['comp_address'] = [df_locs.iloc[i].ProvCityRes + ', ' + df_locs.iloc[i].RegionRes for i in range(0, len(df_locs))]

df_locs = pd.DataFrame(df_locs.comp_address.value_counts())
df_locs.columns = ['counts']
df_locs['com_address'] = df_locs.index
df_locs.index = [i for i in range(0, len(df_locs))]
df_locs = df_locs[['com_address', 'counts']]
df_locs.head()

Unnamed: 0,com_address,counts
0,"Quezon City, Metro Manila",1016
1,"Manila, Metro Manila",487
2,"Makati, Metro Manila",332
3,"Parañaque, Metro Manila",331
4,"Mandaluyong, Metro Manila",271


In [33]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    display(df_locs)

Unnamed: 0,com_address,counts
0,"Quezon City, Metro Manila",1016
1,"Manila, Metro Manila",487
2,"Makati, Metro Manila",332
3,"Parañaque, Metro Manila",331
4,"Mandaluyong, Metro Manila",271
5,"Pasig, Metro Manila",244
6,"San Juan, Metro Manila",195
7,"Taguig City, Metro Manila",193
8,"Caloocan City, Metro Manila",159
9,"Pasay City, Metro Manila",131


In [None]:
geolocator = Bing('AsCIIUyQuVTJ4Df_KgfhNFOzKq2G18CNh2pKyXuDIfD7BReMtoz9COVbWsqbi4Jh')
def do_geocode_Bing(g):
    try:
        loc = geolocator.geocode(g)
        print(loc.address)
        return loc
    except GeocoderTimedOut:
        return do_geocode(g)

In [35]:
# Geocoding 
coords = []
for i, x in enumerate(df_locs.com_address):
    location = do_geocode_Bing(x)
    coords.append((location.latitude, location.longitude))

Quezon City, NCR, Philippines
Manila, NCR, Philippines
Makati, NCR, Philippines
Parañaque, NCR, Philippines
Mandaluyong, NCR, Philippines
Pasig, NCR, Philippines
San Juan del Monte, NCR, Philippines
Taguig, NCR, Philippines
Caloocan, NCR, Philippines
Pasay, NCR, Philippines
Las Piñas, NCR, Philippines
Muntinlupa, NCR, Philippines
Marikina, NCR, Philippines
Antipolo, Calabarzon, Philippines
Davao, Davao Region, Philippines
Cainta, Calabarzon, Philippines
Cebu, Central Visayas, Philippines
Bacoor, Calabarzon, Philippines
Valenzuela, NCR, Philippines
San Pedro, Calabarzon, Philippines
Calamba, Calabarzon, Philippines
Imus, Calabarzon, Philippines
Taytay, Calabarzon, Philippines
Binangonan, Calabarzon, Philippines
Dasmariñas, Calabarzon, Philippines
Balanga, Central Luzon, Philippines
Santa Rosa, Calabarzon, Philippines
Malabon, NCR, Philippines
Binãn, Calabarzon, Philippines
Navotas, NCR, Philippines
Lipa, Calabarzon, Philippines
Pateros, NCR, Philippines
Santa Cruz, Calabarzon, Philippin

Alfonso, Calabarzon, Philippines
Pio Duran, Bicol Region, Philippines
Digos, Davao Region, Philippines
Piat, Cagayan Valley, Philippines
Panganiban, Bicol Region, Philippines
Laur, Central Luzon, Philippines
Panglao, Central Visayas, Philippines
Butuan, Caraga, Philippines
Tiaong, Calabarzon, Philippines
Cabiao, Central Luzon, Philippines
Jamindan, Western Visayas, Philippines
Echague, Cagayan Valley, Philippines
Ibajay, Western Visayas, Philippines
Sipocot, Bicol Region, Philippines
Arakan, Soccsksargen, Philippines
Santo Tomas, Ilocos Region, Philippines
Zaragoza, Central Luzon, Philippines
Lumba-Bayabao, Autonomous Reg. in Muslim Mindanao, Philippines
Matanao, Davao Region, Philippines
Tanay, Calabarzon, Philippines
La Carlota City, Western Visayas, Philippines
Pitogo, Calabarzon, Philippines
Bokod, Cordillera Administrative Region, Philippines
Tagudin, Ilocos Region, Philippines
Talisay, Calabarzon, Philippines
San Marcelino, Central Luzon, Philippines
Mambusao, Western Visayas, Ph

In [36]:
df_locs['coordinates'] = coords #latitude, longitude
df_locs.head()

Unnamed: 0,com_address,counts,coordinates
0,"Quezon City, Metro Manila",1016,"(14.647660255432129, 121.05149841308594)"
1,"Manila, Metro Manila",487,"(14.588640213012695, 120.98454284667969)"
2,"Makati, Metro Manila",332,"(14.568719863891602, 121.0281982421875)"
3,"Parañaque, Metro Manila",331,"(14.471159934997559, 121.02198791503906)"
4,"Mandaluyong, Metro Manila",271,"(14.578350067138672, 121.03278350830078)"


### Map

In [37]:
# Number of tba

num_TBA_city = len(df) - np.sum(df_locs.counts)

In [38]:
lats = [df_locs.coordinates.iloc[i][0] for i in range(0, len(df_locs))]
longs = [df_locs.coordinates.iloc[i][1] for i in range(0, len(df_locs))]

In [43]:
fig, ax = plt.subplots(figsize=(8,12))

if num_TBA_city == 0:
    plt.title('Locations with Confirmed COVID19 Cases')
if num_TBA_city > 0:
    plt.title('Locations with Confirmed COVID19 Cases ({} for validation)'.format(num_TBA_city))

qual = 'f'
m = Basemap(llcrnrlon=116.3,llcrnrlat=4.4, urcrnrlon=127.3, urcrnrlat=21.1, resolution=qual, 
            lat_0 = 14, lon_0 = 120) #, projection='tmerc' 

m.drawmapboundary(fill_color='#c5d0fe')
m.fillcontinents(color='#FEF3C5', lake_color='#c5d0fe')
m.drawcoastlines(linewidth=0.1)
m.readshapefile('Shapefiles/gadm36_PHL_1','shf', linewidth=0.1, drawbounds = True)

# Scatter Plot
x,y = m(longs, lats)
ax.scatter(x,y, c='red', edgecolors='red', alpha=.3, s=df_locs.counts * 4, zorder=10)

# Table
left, bottom, width, height = [0.16, 0.73, 0.2, 0.2]
ax2 = fig.add_axes([left, bottom, width, height])
ax2.set_title('Cities and Municipalities\nwith the Most Cases', fontsize=10,x=0.215, y=.97)

## Making the table
top_10_cities_ls = np.array((df_locs.com_address[:10]))[np.newaxis].T
top_10_cities_count = np.array((df_locs.counts[:10]))[np.newaxis].T
table = ax2.table(cellText=np.hstack((top_10_cities_ls, top_10_cities_count)),
                  colWidths = [0.2]*100,
                  loc='upper right',
                  fontsize=10,
                  cellLoc='left',
                  edges='closed')
table.auto_set_column_width(col=list(range(len(df_locs))))
ax2.axis("off")
ax2.axis('tight')


# Map inset of Metro Manila
axins = zoomed_inset_axes(ax, 12, loc=1)
m2 = Basemap(llcrnrlon=120.9, llcrnrlat=14.34, urcrnrlon=121.145, urcrnrlat=14.79, 
             resolution=qual, lat_0 = 14.34, lon_0 = 121, ax=axins) 
m.drawmapboundary(fill_color='#c5d0fe')
m.fillcontinents(color='#FEF3C5', lake_color='#c5d0fe')
m.drawcoastlines(linewidth=0.1)
m2.readshapefile('Shapefiles/gadm36_PHL_2', 'shf', linewidth=0.1,  drawbounds=False)

# Highlight Manila
patches = []
for info, shape in zip(m2.shf_info, m2.shf):
    if info['NAME_1'] == 'Metropolitan Manila':
        patches.append(Polygon(np.array(shape), True))        
axins.add_collection(PatchCollection(patches, facecolor='#d0fec5', linewidths=0.1, zorder=2))
m2.readshapefile('Shapefiles/gadm36_PHL_2', 'shf', linewidth=0.1,  drawbounds=True)

x_ins, y_ins = m(longs, lats)
axins.scatter(x_ins, y_ins, c='red', alpha=.3, s=df_locs.counts * 0.7, edgecolors='red', zorder=10)
mark_inset(ax, axins, loc1=2, loc2=4, fc="none", ec="0.5")

fig.tight_layout()
ax.text(116.3, 4, 'Data from the Department of Health as of {}\nMade by Val Anthony Balagon using Matplotlib and Basemap'.format(time_stamp), fontsize=8)

<IPython.core.display.Javascript object>

The dedent function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use inspect.cleandoc instead.
  # Remove the CWD from sys.path while we load stuff.
The dedent function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use inspect.cleandoc instead.
  from ipykernel import kernelapp as app
The dedent function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use inspect.cleandoc instead.
The dedent function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use inspect.cleandoc instead.
The dedent function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use inspect.cleandoc instead.


Text(116.3, 4, 'Data from the Department of Health as of 04-18-2020\nMade by Val Anthony Balagon using Matplotlib and Basemap')

In [40]:
plt.savefig('images/map_of_cases_{}'.format(time_stamp), dpi=800, bbox_inches='tight')

# Folium

### Testing Sites

In [None]:
df_testing_sites = pd.read_csv('Raw Data/laboratories.csv')
df_testing_sites.head()

### Addresses not known

TB Reference Center CHD

Metro Pacific Hospital Holdings, Inc. - Muntinlupa

In [None]:
# Removing

df_testing_sites.drop(df_testing_sites.index[[48,59]], inplace=True)
df_testing_sites.index = [i for i in range(0, len(df_testing_sites))]

# with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
#     display(df_testing_sites)

### Cleaning

In [None]:
# UP National Institute of Health -> UP Manila
df_testing_sites.replace(to_replace ="UP National Institutes of Health", value ="UP Manila", inplace=True) 

df_testing_sites.replace(to_replace ="Tarlac Provincial Lab", value ="Tarlac Provincial Hospital", inplace=True) 

df_testing_sites.replace(to_replace ="New World Diagnostics", value ="New World Diagnostics Quezon City", inplace=True) 

df_testing_sites.replace(to_replace ="Philippine Red Cross", 
                         value ="Philippine Red Cross National Headquarters", inplace=True) 

df_testing_sites.replace(to_replace ="Dr. Jorge P. Royeca City Hospital", value ="Dr. Jorge P. Royeca Hospital", inplace=True) 

df_testing_sites.replace(to_replace ="Victoriano Luna Hospital", value ="Victoriano Luna Medical Center", inplace=True) 

df_testing_sites.replace(to_replace ="Bicol Public Health Laboratory", 
           value ="Bicol Regional Diagnostic and Reference Laboratory", inplace=True) 

# St. Luke's Medical Center Global City
df_testing_sites.replace(to_replace ="St. Luke's Medical Center - BGC", value ="St. Luke's Medical Center Global City", inplace=True) 

df_testing_sites.replace(to_replace ="Philippine Genome Center, UP Diliman", 
           value ="Philippine Genome Center", inplace=True) 

df_testing_sites.replace(to_replace ="Laguna Provincial Hospital", 
           value ="Laguna Medical Center", inplace=True) 

# The Medical City
df_testing_sites.replace(to_replace ="The Medical City", value ="The Medical City Ortigas", inplace=True) 

# Molecular Diagnostics Laboratory -> High Pointe Medical Hub
df_testing_sites.replace(to_replace ="Detoxicare Molecular Diagnostics Laboratory", value ="High Pointe Medical Hub", inplace=True) 

# Governor Celestino Gallares Memorial Medical Center -> Memorial Hospital
df_testing_sites.replace(to_replace ="Governor Celestino Gallares Memorial Medical Center", 
           value ="Governor Celestino Gallares Memorial Hospital", inplace=True) 

# and Medical Center
df_testing_sites.replace(to_replace ="Chinese General Hospital", 
           value ="Chinese General Hospital and Medical Center", inplace=True) 

# Mariano Marcos Hospital and Medical Center
df_testing_sites.replace(to_replace ="Mariano Marcos Hospital and Medical Center", 
           value ="Mariano Marcos Memorial Hospital and Medical Center", inplace=True) 

# De La Salle Medical and Health Sciences Institute - Center for Tuberculosis Laboratory
df_testing_sites.replace(to_replace ="De La Salle Medical and Health Sciences Institute - Center for Tuberculosis Laboratory", 
           value ="De La Salle University Medical Center", inplace=True)#De La Salle Medical and Health Sciences Institute

df_testing_sites.replace(to_replace ="Zamboanga City Medical Center - Clinical Laboratory", 
           value ="Zamboanga City Medical Center", inplace=True) 

df_testing_sites.replace(to_replace ="Department of Agriculture Field Office - Zamboanga", 
           value ="Department of Agriculture (RSO-Zamboanga City)", inplace=True) 

df_testing_sites.replace(to_replace ="Philippine Genome Center, UP Mindanao", 
           value ="University of the Philippines Mindanao", inplace=True) 

df_testing_sites.replace(to_replace ="San Pablo College and Medical Center", 
           value ="San Pablo Colleges Medical Center", inplace=True) 

df_testing_sites.replace(to_replace ="Kaiser Medical Center, Inc.", 
           value ="Kaiser Medical Center, Philippines", inplace=True) 

df_testing_sites.replace(to_replace ="Green City Medical Center", 
           value ="GreenCity Medical Center", inplace=True) 

df_testing_sites.replace(to_replace ="Divine Word Hospital", 
           value ="Divine Word Hospital, Bulacan", inplace=True) 

### Geocoding

In [None]:
geolocator = Nominatim(user_agent="anonymous_joke_lol")
def do_geocode_nom(g):
    try:
        loc = geolocator.geocode(g)
        print(loc.address)
        return loc
    except GeocoderTimedOut:
        return do_geocode(g)

In [None]:
df_testing_sites['coordinates'] = df_testing_sites['health_facility'].apply(do_geocode_nom).apply(lambda x: (x.latitude, x.longitude))

In [None]:
import matplotlib

fig = plt.figure(figsize=(8,12))
ax = fig.add_subplot(111)

reso = 'f'
plt.title('Testing Laboratories and their Accreditation Status')
m = Basemap(llcrnrlon=116.3,llcrnrlat=4.4, urcrnrlon=127.3, urcrnrlat=21.1, resolution=reso, 
            lat_0 = 14, lon_0 = 120, ax=ax) 

# Design
m.drawmapboundary(fill_color='#c5d0fe')
m.fillcontinents(color='#FEF3C5', lake_color='#c5d0fe')
m.drawcoastlines(linewidth=0.1)

# Reading the shapefile
m.readshapefile('C:/Users/tonba/Desktop/Masters Stuff/Projects/COVID19/COVID19_PH_Project/Shapefiles/gadm36_PHL_1',
                'shf', linewidth=0.1, drawbounds = True)

# Conversion of lats longs wrt to the projection
def testing_scatter(map_, axis_, n, marker, label, size, color):
    lats = [i[0] for i in df_testing_sites[df_testing_sites.stage == n].coordinates]
    longs = [i[1] for i in df_testing_sites[df_testing_sites.stage == n].coordinates]
    xpt, ypt = map_(longs, lats)
    return axis_.scatter(xpt, ypt, c=color, zorder=10, marker=marker, alpha=.7, s=size, label=label)

testing_scatter(m, ax, 1, '.', 'Stage 1', 90, color='#ffa41b')
testing_scatter(m, ax, 2, 'x', 'Stage 2', 90, color='#400082')
testing_scatter(m, ax, 3, 'p', 'Stage 3', 90, color='#f09675')
testing_scatter(m, ax, 4, 'v', 'Stage 4', 100, color='#fe346e')
testing_scatter(m, ax, 5, '*', 'Stage 5', 200, color='#01e432')
leg = ax.legend(loc=2, frameon=True)
frame = leg.get_frame()
frame.set_facecolor('white')
frame.set_edgecolor('white')

# Map inset of Metro Manila
axins = zoomed_inset_axes(m.ax, 12, loc=1)#bbox_to_anchor=(700,1020)
m2 = Basemap(llcrnrlon=120.9, llcrnrlat=14.34, urcrnrlon=121.145, urcrnrlat=14.79, 
             resolution=reso, lat_0 = 14.34, lon_0 = 121, ax=axins) 
m2.drawmapboundary(fill_color='#c5d0fe')
m2.fillcontinents(color='#FEF3C5', lake_color='#c5d0fe')
m2.drawcoastlines(linewidth=0.1)

# Reading the provincial shapefile
m2.readshapefile('Shapefiles/gadm36_PHL_2',
                'shf', linewidth=0.1,  drawbounds=False)
# Highlight Manila
patches = []
for info, shape in zip(m2.shf_info, m2.shf):
    if info['NAME_1'] == 'Metropolitan Manila':
        patches.append(Polygon(np.array(shape), True))        
axins.add_collection(PatchCollection(patches, facecolor='#d0fec5', linewidths=0.1, zorder=2))

# Drawing the shapefile
m2.readshapefile('Shapefiles/gadm36_PHL_2',
                'shf', linewidth=0.1, drawbounds = True)

# Plotting
testing_scatter(m2, axins, 1, '.', 'Stage 1', 50, color='#ffa41b')#Self-Assessment
testing_scatter(m2, axins, 2, 'x', 'Stage 2', 50, color='#400082')#Validation
testing_scatter(m2, axins, 3, 'p', 'Stage 3', 50, color='#f09675')#Personnel Training
testing_scatter(m2, axins, 4, 'v', 'Stage 4', 80, color='#fe346e')#Proficiency Testing
testing_scatter(m2, axins, 5, '*', 'Stage 5', 100, color='#01e432')#Fullscale Implementation

mark_inset(ax, axins, loc1=2, loc2=4, fc="none", ec="0.5")

ax.text(116.3, 4, 'Data from the Philippine Department of Health as of {}'.format(time_stamp), fontsize=8)
fig.savefig('images/locs_of_testing_sites_{}'.format(time_stamp), dpi=800, bbox_inches='tight')#

### Folium Interactive

In [None]:
m = folium.Map(location=[np.mean([i[0] for i in df_testing_sites.coordinates]), np.mean([i[1] for i in df_testing_sites.coordinates])], 
               tiles='https://{s}.basemaps.cartocdn.com/light_nolabels/{z}/{x}/{y}{r}.png',
                attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>', 
               zoom_start=5.4)
mc = MarkerCluster()

def mapper(stage,color,icon='default'):
    # Getting the coordinates of each facility
    coords = [i for i in df_testing_sites[df_testing_sites.stage==stage].copy().coordinates]
    
    # Name of each facility with its status
    facilities = [i + '\nStage: {}'.format(str(stage)) for i in np.array(df_testing_sites[df_testing_sites.stage==stage].copy().health_facility)]
    
    #np.array(df_testing_sites[df_testing_sites.stage==stage].copy().health_facility)
    for coord, facility in zip(coords, facilities):
        folium.Marker(coord, radius=6, popup=facility, icon=folium.Icon(color=color,icon=icon)).add_to(mc)

# Stage 1
mapper(1, 'orange')

# Stage 2
mapper(2, 'blue')

# Stage 3
mapper(3, 'purple')

# Stage 4
mapper(4, 'green')

# Stage 5
mapper(5, 'red', 'star')


# Legend

m.add_child(mc)
m

In [None]:
m.save('images/labs.html')

# Comparison with other countries

# Archived

In [None]:
# # From google sheets directly
# df = pd.read_csv('https://docs.google.com/spreadsheets/d/' + 
#                    '1rLAZH7wN9DkigkFD070gy5YbEPr-RPQaqPvnePrh31E' +
#                    '/export?gid=30186846&format=csv')
# df.head()

In [None]:
# Reading the excel file

# excel = pd.ExcelFile('Raw Data/doh_data_drop_04-14-2020.xlsx')  
# df = pd.read_excel(excel, 'Case Information')
# df.head()

In [None]:
# Removing the 16:00 in the date

# def remove_hr(date):
#     # not null
#     if pd.notnull(date):
#     # Makes variable into a text
#         txt = str(date)
#         return pd.to_datetime(txt.split()[0])
#     else:
#         date
        
# df.DateRepConf = df.DateRepConf.apply(remove_hr)
# df.DateRepRem = df.DateRepRem.apply(remove_hr)
# df.DateRecover = df.DateRecover.apply(remove_hr)
# df.DateDied = df.DateDied.apply(remove_hr)


# df.head()
# # with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
# #     display(df)

# Bing Maps

API key: AsCIIUyQuVTJ4Df_KgfhNFOzKq2G18CNh2pKyXuDIfD7BReMtoz9COVbWsqbi4Jh

In [None]:
from geopy.geocoders import Bing

In [None]:

def do_geocode(g):
    try:
        loc = geolocator.geocode(g)
        print(loc.address)
        return loc
    except GeocoderTimedOut:
        return do_geocode(g)

# Testing the addresses

In [None]:
df_locs.com_address.apply(do_geocode)