In [108]:
import pandas as pd
import numpy as np
from matplotlib.dates import date2num

In [2]:
cols = ['location', 'date', 'total_cases', 'new_cases']
dates = ['date']
df = pd.read_csv("csv/owid-covid-data.csv", 
                 usecols=cols,
                 parse_dates=dates)
df.sample()

Unnamed: 0,location,date,total_cases,new_cases
5930,Indonesia,2020-03-17,134,17


In [159]:
# consider days when total_cases exceeded 100
fdf = df.loc[df['total_cases']>100]

In [187]:
def calc_peaks(df, country='India', plot=True):
    # filter sort and prep data
    grp_df = df.loc[df['location']==country]
    grp_df = grp_df.sort_values(by='date')
    grp_df.reset_index(inplace=True, drop=True)
    
    # calc daily new cases avg
    grp_df['avg_new_cases'] = grp_df['new_cases'].rolling(6, min_periods=1, center=True).mean()
    # calc slope of new cases
    grp_df['change_in_new_cases'] = (grp_df['avg_new_cases'].shift(-3) - grp_df['avg_new_cases'].shift(3, fill_value=0)) / 6
    # calc peaks, i.e. when slope is 0
    sign_change = np.nonzero(np.abs(np.diff(np.sign(grp_df['change_in_new_cases'].interpolate(method='pad')))))[0]
    peaks = [grp_df['date'].iloc[sign_change[idx]] for idx in range(len(sign_change))]

    # return if plotting not required
    if not plot:
        return peaks
    
    # plot all 3 cols as subplots
    ax_tot, ax_new, ax_chg = grp_df.plot(
        x = 'date',
        y = ['total_cases', 'new_cases', 'change_in_new_cases'],
        title = [l + country for l in ['Total cases in ', 'Daily new cases in ', 'Change in daily new cases in ']],
        grid = True,
        figsize = (9,10),
        subplots = True,
        sharex = False
    )
    
    # add moving avg to new_cases chart
    grp_df.plot(x = 'date', y = 'avg_new_cases', grid = True, ax = ax_new)
    # add x axis to change_in_new_cases chart
    ax_chg.axhline(y=0, linewidth=2, color='r')
    
    # add vertical line when peak hit
    for peak_date in peaks:
        for ax in [ax_new, ax_chg]:
            ax.axvline(x=peak_date, linewidth=2, color='r')
        return peak_date # 1 peak only
    
    return peaks

In [201]:
COUNTRY_DATA = [
    {
        "location": "India",
        "healthdata_peak": "",
        "sutd_peak": "2020-04-20"
    },
    {
        "location": "China",
        "healthdata_peak": "",
        "sutd_peak": "2020-02-08"
    },
    {
        "location": "South Korea",
        "healthdata_peak": "",
        "sutd_peak": "2020-03-02"
    },
    {
        "location": "United States",
        "healthdata_peak": "2020-04-15",
        "sutd_peak": "2020-04-10"
    },
    {
        "location": "Brazil",
        "healthdata_peak": "",
        "sutd_peak": "2020-04-21"
    },
    {
        "location": "United Kingdom",
        "healthdata_peak": "2020-04-10",
        "sutd_peak": "2020-04-12"
    },
    {
        "location": "Italy",
        "healthdata_peak": "2020-03-27",
        "sutd_peak": "2020-03-29"
    },
    {
        "location": "Spain",
        "healthdata_peak": "2020-04-01",
        "sutd_peak": "2020-04-02"
    },
    {
        "location": "Germany",
        "healthdata_peak": "2020-04-16",
        "sutd_peak": "2020-04-01"
    },
    {
        "location": "France",
        "healthdata_peak": "2020-04-05",
        "sutd_peak": "2020-04-03"
    },
    {
        "location": "Portugal",
        "healthdata_peak": "2020-04-03",
        "sutd_peak": "2020-04-06"
    },
    {
        "location": "United Arab Emirates",
        "healthdata_peak": "",
        "sutd_peak": "2020-04-27"
    },
    {
        "location": "Saudi Arabia",
        "healthdata_peak": "",
        "sutd_peak": "2020-04-27"
    },
    {
        "location": "Pakistan",
        "healthdata_peak": "",
        "sutd_peak": "2020-04-27"
    }
]

In [203]:
print("Country | Our study | HealthData | SUTD")
for c in COUNTRY_DATA:
    our_study = ''
    peaks = calc_peaks(fdf, c['location'], False)
    if len(peaks):
        our_study = str(peaks[0].date())
    
    print(c['location'] + " | " + our_study + " | " + c['healthdata_peak'] + " | " + c['sutd_peak'])

Country | Our study | HealthData | SUTD
India |  |  | 2020-04-20
China | 2020-02-06 |  | 2020-02-08
South Korea | 2020-03-02 |  | 2020-03-02
United States | 2020-04-09 | 2020-04-15 | 2020-04-10
Brazil |  |  | 2020-04-21
United Kingdom | 2020-04-12 | 2020-04-10 | 2020-04-12
Italy | 2020-03-25 | 2020-03-27 | 2020-03-29
Spain | 2020-04-01 | 2020-04-01 | 2020-04-02
Germany | 2020-04-02 | 2020-04-16 | 2020-04-01
France | 2020-04-01 | 2020-04-05 | 2020-04-03
Portugal | 2020-04-01 | 2020-04-03 | 2020-04-06
United Arab Emirates | 2020-04-17 |  | 2020-04-27
Saudi Arabia | 2020-03-27 |  | 2020-04-27
Pakistan | 2020-03-20 |  | 2020-04-27
