# P0: COVID-19 Progress to zero metric - Countries

P0 metric defined by: 
Juan M. Lavista Ferres MSCa1 , Ruth B Etzioni PhDb2 and William B. Weeks MD, MBA, PhDc1

Data Visualization by Adriana Moscatelli

In [1]:
import pandas as pd
import numpy as np

## Data Source: European Centre for Disease Prevention and Control

In [2]:
master_data_EDC = pd.read_csv('https://opendata.ecdc.europa.eu/covid19/casedistribution/csv')

In [3]:
import plotly.express as px
fig = px.bar(master_data_EDC, x=master_data_EDC.loc[master_data_EDC['countriesAndTerritories'] == 'South_Korea']['dateRep'], 
y=master_data_EDC.loc[master_data_EDC['countriesAndTerritories'] == 'Brazil']['cases'])
fig.update_xaxes(autorange="reversed")
fig.show()

master_data_EDC.loc[master_data_EDC['countriesAndTerritories']=='Brazil']['cases'].sum()

1228114

### Calculation of the latest progress to zero (PO) datapoint.

P0 at time i is defined one minus the ratio of A(i) to B(i), where A is the 7-day
moving average of new cases ending at day i and B is the 7-day moving average of
new cases corresponding to the historical peak. The measure could be defined using
a shorter interval for the moving average, or it could be defined using a weighted
moving average, but to illustrate the idea, we use a simple 7-day moving average.
Because the measure would not be stable with small numbers of cases, we recommend limiting measure application to geographies with at least 100 COVID-19
cases confirmed within a region.
Defining a 7-day moving average of cases and i as the current day - 6, the calculations
are provided below:

\begin{equation*}
[1] ma(j) <- \frac{1}{7}\sum_{k=j-6}^{j} newcases[k]
\end{equation*}

\begin{equation*}
[2] P0=1-(\frac{ma(i)}{max(ma(7),...,ma(i))})
\end{equation*}

In [4]:
# Rename date column to be consistent with US table on Progress_to_zero_US
master_data_EDC = master_data_EDC.rename(columns={"dateRep": "Date"})

# We convert the Date column into Date/Time so we can find the max/latest date. It is also needed for the groupby operation later
import datetime as dt
master_data_EDC['Date'] = master_data_EDC['Date'].apply(lambda x:dt.datetime.strptime(x,'%d/%m/%Y'))

In [5]:
# We remove the _ from country names. Important for the map visualization
master_data_EDC["countriesAndTerritories"] = master_data_EDC["countriesAndTerritories"].str.replace(r'_', ' ')

In [6]:
# We add a column named geo_level to be used for the filtering in powerbi
master_data_EDC["geo_level"] = 'Country'

In [7]:
# Fix Taiwan's country code
master_data_EDC = master_data_EDC.replace('CNG1925','TWN')

In [8]:
# Fix error for Spain. The report date is max-1day
master_data_EDC.loc[(master_data_EDC['countriesAndTerritories'] == 'Spain'), 'Date'] = master_data_EDC['Date'].apply(pd.DateOffset(1))

## Progress to Zero at the Country level

In [10]:
# For consistency with US PTZ table we rename cases to Confirmed and deaths to Deaths
master_data_EDC = master_data_EDC.rename(columns={"cases": "Confirmed"})
master_data_EDC = master_data_EDC.rename(columns={"deaths": "Deaths"})

# We calculate the 7 day moving average for confirmed and deaths for all countries
master_data_EDC['Confirmed_moving_average'] = master_data_EDC.groupby('countriesAndTerritories')['Confirmed'].transform(lambda x: x.rolling(7, 7).mean().shift(-7))
master_data_EDC['Deaths_moving_average'] = master_data_EDC.groupby('countriesAndTerritories')['Deaths'].transform(lambda x: x.rolling(7, 7).mean().shift(-7))

In [11]:
# master_data_EDC['geoId'].isnull().values.any()
# master_data_EDC['countriesAndTerritories'].isnull().values.any()
# master_data_EDC['countryterritoryCode'].isnull().values.any()

In [12]:
# master_data_EDC.countriesAndTerritories.unique()

In [13]:
# Get the peak value of the moving average at the Country level
peak_confirmed_countries = master_data_EDC.groupby('countriesAndTerritories')['Confirmed_moving_average'].max()
master_data_EDC['Peak_confirmed'] = master_data_EDC['countriesAndTerritories'].map(peak_confirmed_countries)

peak_deaths_countries = master_data_EDC.groupby('countriesAndTerritories')['Deaths_moving_average'].max()
master_data_EDC['Peak_deaths'] = master_data_EDC['countriesAndTerritories'].map(peak_deaths_countries)

In [14]:
# Get the latest average value
# We find the last date - 6 and store the value
from datetime import datetime, timedelta
last_obs_countries = master_data_EDC['Date'].max()
# last_obs_countries = master_data_EDC['Date'].max() - timedelta(days=6)

last_avg_confirmed = master_data_EDC.loc[master_data_EDC['Date'] == last_obs_countries, 'Confirmed_moving_average']
master_data_EDC['Last_avg_obs_confirmed'] = last_avg_confirmed

last_avg_deaths = master_data_EDC.loc[master_data_EDC['Date'] == last_obs_countries, 'Deaths_moving_average']
master_data_EDC['Last_avg_obs_deaths'] = last_avg_deaths

In [15]:
# Compute the Progress to Zero metric for confirmed cases and deaths averages and the peak for countries
master_data_EDC['Progress_to_zero_confirmed'] = 1 - (master_data_EDC['Confirmed_moving_average']/master_data_EDC['Peak_confirmed'])
master_data_EDC['Progress_to_zero_deaths'] = 1 - (master_data_EDC['Deaths_moving_average']/master_data_EDC['Peak_deaths'])

In [16]:
# Cleaning values. We only want to see Progress to Zero for the last observation
master_data_EDC.loc[master_data_EDC.Last_avg_obs_confirmed.isnull(), 'Peak_confirmed'] = master_data_EDC.Last_avg_obs_confirmed

master_data_EDC.loc[master_data_EDC.Last_avg_obs_deaths.isnull(), 'Peak_deaths'] = master_data_EDC.Last_avg_obs_deaths

master_data_EDC.loc[master_data_EDC.Last_avg_obs_confirmed.isnull(), 'Progress_to_zero_confirmed'] = master_data_EDC.Last_avg_obs_confirmed

master_data_EDC.loc[master_data_EDC.Last_avg_obs_deaths.isnull(), 'Progress_to_zero_deaths'] = master_data_EDC.Last_avg_obs_deaths

In [17]:
# master_data_EDC['popData2018'] = pd.to_numeric(master_data_EDC['popData2018'], errors='coerce')

In [18]:
master_data_EDC = master_data_EDC.round(2)

In [19]:
master_data_EDC.dtypes

Date                          datetime64[ns]
day                                    int64
month                                  int64
year                                   int64
Confirmed                              int64
Deaths                                 int64
countriesAndTerritories               object
geoId                                 object
countryterritoryCode                  object
popData2019                          float64
continentExp                          object
geo_level                             object
Confirmed_moving_average             float64
Deaths_moving_average                float64
Peak_confirmed                       float64
Peak_deaths                          float64
Last_avg_obs_confirmed               float64
Last_avg_obs_deaths                  float64
Progress_to_zero_confirmed           float64
Progress_to_zero_deaths              float64
dtype: object

In [20]:
master_data_EDC.to_csv('progress_to_zero_metric_Countries.csv')

In [21]:
import plotly.graph_objects as go

PTZ_countries = master_data_EDC[
    (master_data_EDC['Date'] == last_obs_countries)][['countriesAndTerritories', 'Progress_to_zero_confirmed', 'countryterritoryCode']]

loc_countries = PTZ_countries["countriesAndTerritories"]

fig = go.Figure(data=go.Choropleth(
    locations=PTZ_countries['countryterritoryCode'],
    z=PTZ_countries['Progress_to_zero_confirmed'],
    colorscale='matter',
    reversescale=True,
    autocolorscale=False,
    marker_line_color='#ebccd6',
    colorbar = dict(title='Percentage',
                    tickformat = ',.0%'),
    hoverlabel = dict(font=dict(size=16)),
    showlegend = True,
    text = loc_countries,
    hovertemplate = ' %{text} P0 = <b>%{z: ,.0%}</b><extra></extra>',
))

fig.update_layout(
    title_text='<a href = https://www.medrxiv.org/content/10.1101/2020.05.21.20109298/><b>P0: Progress to zero</b> </a><br><i>A simple metric to measure COVID-19 progress by country/region</i><br>P0 corresponds to the percentage decline from a previously recorded peak level.<br> The metric ranges from 0% (representing a geography that has not yet peaked) to 100% <br>(representing a geography wherein 0 cases have been recorded for at least seven days).',
    font = dict(size=10),
    margin=dict(l=20, r=20, t=160, b=20),
    geo = dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'),
    showlegend=False,
    annotations = [dict(xref='paper',
                        yref='paper',
                        x=0, y=-0.02,
                        showarrow=False,
                        text ='Data: ECDC - Updated: '+ str(last_obs_countries.strftime('%Y-%d-%m')))]
)


fig.show(displayModeBar=False)

import plotly.io as pio
pio.write_html(fig, file='Index.html', auto_open=True)

import os

if not os.path.exists("images"):
    os.mkdir("images")
    
fig.write_image("images/PTZ_countries_map.png")