# Notebook used to estimate epidemic waves

In [1]:
import pandas as pd
from functools import reduce
pd.options.mode.chained_assignment = None
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('data/world_r_eff.csv',
        usecols=['country','date','R'],
        parse_dates=['date'],
        squeeze=True
        ).sort_index()

In [3]:
trend_length = 14
logic_func = lambda r: reduce(lambda x, y: x + 1 if y else 0, r)

In [4]:
df_total = pd.DataFrame(columns=['country',	'date',	'R', 'increasing', 'decreasing', 'upward_slope_days', 'downward_slope_days' ])
for country in df.country.unique():
    print(country)
    df_country = df.loc[df['country'] == country]
    
    # counts number consecutive days with R>=1 for a give date
    ser = df.loc[df['country'] == country]['R'] >= 1
    df_country['increasing'] = ser.expanding().apply(logic_func)

    # counts number consecutive days with R<1 for a give date
    ser = df_country['R'] < 1
    df_country['decreasing'] = ser.expanding().apply(logic_func)

    # # counts number consecutive days with upward slope
    ser = df_country['increasing'] >= trend_length
    df_country['upward_slope_days'] = ser.expanding().apply(logic_func)

    # # counts number consecutive days with downward_slope
    ser = df_country['decreasing'] >= trend_length
    df_country['downward_slope_days'] = ser.expanding().apply(logic_func)
    
    # append df to total_df
    df_total = pd.concat([df_total, df_country])

Afghanistan
Albania
Algeria
Andorra
Angola
Antigua and Barbuda
Argentina
Armenia
Australia
Austria
Azerbaijan
Bahamas
Bahrain
Bangladesh
Barbados
Belarus
Belgium
Belize
Benin
Bhutan
Brazil
Brunei
Bolivia
Bosnia and Herzegovina
Botswana
Bulgaria
Burkina Faso
Burma
Burundi
Cabo Verde
Cambodia
Cameroon
Canada
Central African Republic
Chad
Chile
China
Colombia
Comoros
Congo (Brazzaville)
Congo (Kinshasa)
Costa Rica
Cote d'Ivoire
Croatia
Cuba
Cyprus
Czechia
Denmark
Djibouti
Dominica
Dominican Republic
Ecuador
Egypt
El Salvador
Equatorial Guinea
Eritrea
Estonia
Eswatini
Ethiopia
Finland
France
Gabon
Gambia
Georgia
Germany
Ghana
Greece
Grenada
Guatemala
Guinea
Guinea-Bissau
Guyana
Haiti
Honduras
Hungary
Iceland
India
Indonesia
Iran
Iraq
Ireland
Israel
Italy
Jamaica
Japan
Jordan
Kazakhstan
Kenya
Kosovo
Kuwait
Kyrgyzstan
Latvia
Lebanon
Lesotho
Liberia
Libya
Liechtenstein
Lithuania
Luxembourg
Madagascar
Malawi
Malaysia
Maldives
Mali
Malta
Mauritania
Mauritius
Mexico
Moldova
Monaco
Mongolia
Monte

In [5]:
df_total[50:80]

Unnamed: 0,country,date,R,increasing,decreasing,upward_slope_days,downward_slope_days
50,Afghanistan,2020-05-18,1.389535,51.0,0.0,38.0,0.0
51,Afghanistan,2020-05-19,1.391146,52.0,0.0,39.0,0.0
52,Afghanistan,2020-05-20,1.384887,53.0,0.0,40.0,0.0
53,Afghanistan,2020-05-21,1.374943,54.0,0.0,41.0,0.0
54,Afghanistan,2020-05-22,1.364635,55.0,0.0,42.0,0.0
55,Afghanistan,2020-05-23,1.35353,56.0,0.0,43.0,0.0
56,Afghanistan,2020-05-24,1.330758,57.0,0.0,44.0,0.0
57,Afghanistan,2020-05-25,1.308847,58.0,0.0,45.0,0.0
58,Afghanistan,2020-05-26,1.286269,59.0,0.0,46.0,0.0
59,Afghanistan,2020-05-27,1.266329,60.0,0.0,47.0,0.0


In [11]:
df_total.max()

country                           Zimbabwe
date                   2021-12-04 00:00:00
R                                 5.764254
increasing                           390.0
decreasing                           401.0
upward_slope_days                    377.0
downward_slope_days                  388.0
dtype: object

In [12]:
df_total.loc[df_total['decreasing'] == 2394]

Unnamed: 0,country,date,R,increasing,decreasing,upward_slope_days,downward_slope_days


In [14]:
# counts downward slopes and finds duration of them
ser = df_total['downward_slope_days'] > 0
count_down = df_total['downward_slope_days'] [ser & (ser != ser.shift(-1))].value_counts(normalize=True)
count_down_df = count_down.to_frame().reset_index().rename(columns={'index':'duration','downward_slope_days':'fraction' })
count_down_df.duration += 13
count_down_df.round({'duration':0, 'fraction':2})
count_down_df = count_down_df.sort_values(by=['duration'])
count_down_df

Unnamed: 0,duration,frequency
0,14.0,0.032733
7,15.0,0.019640
5,16.0,0.022913
1,17.0,0.031097
50,18.0,0.008183
...,...,...
133,210.0,0.001637
121,213.0,0.001637
138,216.0,0.001637
131,293.0,0.001637


In [18]:
# counts downward slopes and finds duration of them
ser = df_total['upward_slope_days'] > 0
count_up = df_total['upward_slope_days'] [ser & (ser != ser.shift(-1))].value_counts(normalize=True)
count_up_df = count_up.to_frame().reset_index().rename(columns={'index':'duration','upward_slope_days':'fraction' })
count_up_df.duration += 13
count_up_df.round({'duration':0, 'fraction':2})
count_up_df = count_up_df.sort_values(by=['duration'])
count_up_df

TypeError: decimals must be an integer, a dict-like or a Series

In [17]:
plt.bar(count_up_df['duration'], count_up_df['frequency'], color='b')
ax.set_major_formatter(FormatStrFormatter('%.2f'))
plt.title('Upward Periods')
plt.xlabel('Duration (days)')
plt.ylabel('Fraction')

NameError: name 'ax' is not defined