In [None]:
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('dark_background')


In [None]:
SAVE_IMAGES = True

VACCINES = {
    'COM': 'Pfizer',
    'AZ': 'AstraZeneca',
    'MOD': 'Moderna',
    'JANSS': 'Janssen',
}

def vaccine_name(vaccine):
  return VACCINES.get(vaccine, vaccine)


In [None]:
def f(value, plus=False):
    if value is None: return None
    value = value if type(value) == int else float(value)
    r = format(value, ",").replace(".","!").replace(",",".").replace("!",",")
    return f"+{r}" if plus and value > 0 else r


In [None]:
# https://qap.ecdc.europa.eu/public/extensions/COVID-19/vaccine-tracker.html#distribution-tab
# https://www.ecdc.europa.eu/en/publications-data/data-covid-19-vaccination-eu-eea
!curl -s --output '/tmp/ecdc_vacinas.csv' "https://opendata.ecdc.europa.eu/covid19/vaccine_tracker/csv/data.csv"
!tail -1 '/tmp/ecdc_vacinas.csv'


In [None]:
data = pd.read_csv('/tmp/ecdc_vacinas.csv') # , parse_dates=['data'], index_col='data', dayfirst=True)
data = data.sort_values(['YearWeekISO', 'ReportingCountry'])
data['date'] = data['YearWeekISO'].apply(lambda x: datetime.datetime.strptime(f"{x}-6", "%Y-W%W-%w"))
data.loc[ data['YearWeekISO'] == '2020-W52', 'date' ] += datetime.timedelta(days=-7)
data.loc[ data['YearWeekISO'] == '2020-W53', 'date' ] += datetime.timedelta(days=-7)
# data.set_index('date', inplace=True)


In [None]:
data.tail(1)


In [None]:
data['Vaccine'].unique()


In [None]:
df = data[ data['ReportingCountry'] == 'PT' ].copy()
df.drop('ReportingCountry', axis=1, inplace=True)
if df['FirstDoseRefused'].notnull().sum() == 0:
  df.drop('FirstDoseRefused', axis=1, inplace=True)

# df.describe()
# PT may have one less week that other countries
dfpt = df
df.tail()


In [None]:
df['Vaccine'].unique()


In [None]:
df = dfpt.copy()
dft = df[ (df['TargetGroup'] == 'ALL') & (df['Region'] == 'PT') ]
dft = dft.drop(['Population', 'Denominator'], axis=1)
dft # .tail()


In [None]:
dft[dft.Vaccine == 'AZ'][['FirstDose']]


In [None]:
janssen = dft[dft.Vaccine == 'JANSS'].copy()
janssen['CUMSUM'] = janssen['FirstDose'].cumsum()
janssen


In [None]:
dft[dft.Vaccine == 'JANSS'][['FirstDose', 'NumberDosesReceived']].sum()


In [None]:
# copy Janssen first dose to second dose
dft.loc[dft.Vaccine == 'JANSS', 'SecondDose'] = dft[dft.Vaccine == 'JANSS']['FirstDose']

dftt = dft.copy()

dft[dft.Vaccine == 'JANSS']


In [None]:
dft = dftt.copy()
rows = []
for k, row in dft.iterrows():
    k = row['date']
    #print(k, row)
    v, d1 = row.Vaccine, row.FirstDose
    if v == 'COM':
        if k >= datetime.datetime.strptime('2021-03-01', '%Y-%m-%d'):
            k2 = k + datetime.timedelta(days=28)
        else:
            k2 = k + datetime.timedelta(days=21)
    elif v == 'AZ':
        k2 = k + datetime.timedelta(days=84) # 12 semanas, multiplo 7
    elif v == 'JANSS':
        k2 = k  # 1st is 2nd
    else:
        k2 = k + datetime.timedelta(days=28)
    rows.append([k2, v, d1])
# AJUSTE 21-28 dias
rows.append([datetime.datetime.strptime('2021-03-27', '%Y-%m-%d'), 'COM', 20_000])

df2 = pd.DataFrame(rows, columns=["d", "Vaccine", "Prediction"])
df2.sort_values("d", inplace=True)
df2.set_index("d", inplace=True)
df_prev = df2.copy()
df2


In [None]:
df2 = df_prev.copy()
dft = dftt.copy()
dft.set_index('date', inplace=True)

CC = "#00876c,#57a18b,#8cbcac,#bed6ce,#f1f1f1,#f1c6c6,#ec9c9d,#ff7700,#0077ff,#e27076,#d43d51".split(",")
C = {
    'COM': CC[0:2],
    'AZ': CC[2:4],
    'MOD': CC[4:6],
    'JANSS': CC[6:8],
    'T': CC[8:10],
}


fig, axes = plt.subplots(figsize=(15,5))
k, kk='Prediction', 'Previsão'
for v in VACCINES:
    axes.plot(df2[ df2.Vaccine == v ][k], label=f"{VACCINES[v]} {kk}", lw=2, marker='o', linestyle="--", markersize=4)#, alpha=0.5)#, color=C[v][0])
axes.plot(df2.groupby(df2.index).sum()[k], label=f"Total {kk}", lw=4, marker='o', linestyle="--", markersize=8)#, alpha=0.5)#, color=C['T'][0])

k, kk='SecondDose', 'Vacinados'
for v in VACCINES:
    axes.plot(dft[ dft.Vaccine == v ][k], label=f"{VACCINES[v]} {kk}", lw=2, marker='s', markersize=4)#, alpha=0.5)#, color=C[v][1])
axes.plot(dft.groupby(dft.index).sum()[k], label=f"Total {kk}", lw=4, marker='s', markersize=9)#, alpha=0.5)#, color=C['T'][1])

plt.xticks(list(reversed(list(reversed(list(set(list(dft.index)+list(df2.index)))))[0::1])), rotation=90)
axes.axvline(linewidth=2, ls="--", color='white', alpha=0.75, x=datetime.datetime.today())
axes.axvline(linewidth=2, ls="--", color='white', alpha=0.75, x=datetime.datetime.today()+datetime.timedelta(days=28))
axes.legend(loc="upper left")
plt.tight_layout()
if SAVE_IMAGES: plt.savefig("../../temp/vacinas_ecdc_previsao.png")
plt.show()


In [None]:
maxY = 0
for v in VACCINES:
    x = dft[ dft.Vaccine == v ].max()
    maxY = max(maxY, x['FirstDose'])
    maxY = max(maxY, x['SecondDose'])
print(maxY)
maxY = maxY * 1.1

In [None]:

fig, axes = plt.subplots(figsize=(15,5))
k, kk='Prediction', 'Previsão'
axes.plot(df2.groupby(df2.index).sum()[k], label=f"Total {kk}", lw=4, marker='o', markersize=8, alpha=0.5) # , color=C[v][0])
k, kk='SecondDose', 'Vacinados'
axes.plot(dft.groupby(dft.index).sum()[k], label=f"Total {kk}", lw=4, marker='s', markersize=8, linestyle="--", alpha=0.5) # , color=C[v][1], alpha=0.5)

plt.xticks(list(reversed(list(reversed(list(set(list(dft.index)+list(df2.index)))))[0::1])), rotation=90)
axes.axvline(linewidth=2, ls="--", color='white', alpha=0.75, x=datetime.datetime.today())
axes.axvline(linewidth=2, ls="--", color='white', alpha=0.75, x=datetime.datetime.today()+datetime.timedelta(days=28))
#plt.ylim(0, maxY)
axes.legend(loc="upper left")
plt.tight_layout()
if SAVE_IMAGES: plt.savefig(f"../../temp/vacinas_ecdc_previsao_total.png")
plt.show()
print()

for v in VACCINES:
    fig, axes = plt.subplots(figsize=(15,5))
    k, kk='Prediction', 'Previsão'
    axes.plot(df2[ df2.Vaccine == v ][k], label=f"{VACCINES[v]} {kk}", lw=4, marker='o', markersize=8, alpha=0.5) # , color=C[v][0])
    k, kk='SecondDose', 'Vacinados'
    k='SecondDose' if v != "JANSS" else 'FirstDose'
    axes.plot(dft[ dft.Vaccine == v ][k], label=f"{VACCINES[v]} {k}", lw=4, marker='s', markersize=8, linestyle="--", alpha=0.5) # , color=C[v][1], alpha=0.5)

    plt.xticks(list(reversed(list(reversed(list(set(list(dft.index)+list(df2.index)))))[0::1])), rotation=90)
    axes.axvline(linewidth=2, ls="--", color='white', alpha=0.75, x=datetime.datetime.today())
    axes.axvline(linewidth=2, ls="--", color='white', alpha=0.75, x=datetime.datetime.today()+datetime.timedelta(days=28))
    plt.ylim(0, maxY)
    axes.legend(loc="upper left")
    plt.tight_layout()
    if SAVE_IMAGES: plt.savefig(f"../../temp/vacinas_ecdc_previsao_{v}.png")
    plt.show()
    print()


In [None]:
fig, axes = plt.subplots(figsize=(15,5))
k, kk='Prediction', 'Previsão'
axes.plot(df2.groupby(df2.index).sum().cumsum()[k], label=f"Total {kk}", lw=4, marker='o', markersize=8, alpha=0.5) # , color=C[v][0])
#print(df2.groupby(df2.index).sum().cumsum()[k])
k, kk='SecondDose', 'Vacinados'
axes.plot(dft.groupby(dft.index).sum().cumsum()[k], label=f"Total {kk}", lw=4, marker='s', markersize=8, linestyle="--", alpha=0.5) # , color=C[v][1], alpha=0.5)

import math
millions = math.ceil(df2.groupby(df2.index).sum().cumsum()[-1:]['Prediction'] / 1_000_000.0)

for i in range(1, millions + 1):
    plt.axhline(y=i*1000000, ls="--", alpha=0.75)
plt.xticks(list(reversed(list(reversed(list(set(list(dft.index)+list(df2.index))))))), rotation=90)
axes.axvline(linewidth=2, ls="--", color='white', alpha=0.75, x=datetime.datetime.today())
axes.axvline(linewidth=2, ls="--", color='white', alpha=0.75, x=datetime.datetime.today()+datetime.timedelta(days=28))
axes.legend(loc="upper left")
plt.tight_layout()
if SAVE_IMAGES: plt.savefig(f"../../temp/vacinas_ecdc_previsao_accumulado.png")
plt.show()



In [None]:
# Recebidas

dfr = dft[ dft['NumberDosesReceived'] > 0 ].copy()
dfr['Resto'] = dfr['NumberDosesReceived'] - dfr['FirstDose'] - dfr['SecondDose']
dfr = dfr.groupby(['YearWeekISO']).sum()
axes = dfr[['FirstDose', 'SecondDose', 'NumberDosesReceived', 'Resto']].plot(figsize=(15,5))
axes.legend(loc="upper left")
plt.show()


In [None]:
dft.tail(2)

In [None]:
x = dft.groupby('Vaccine').sum()
x['stock'] = x['NumberDosesReceived'] - x['FirstDose'] - x['SecondDose'] - x['UnknownDose']
#x = x.cumsum().tail(1)
x['perc'] = 100 * x['stock'] / x['NumberDosesReceived']
x

In [None]:
#recebidas=2.983.590 stock=278.957 az=127.624 34% mod=101.751 (17%) pf=49.582 2.5% 


In [None]:
dfpt[dfpt['Vaccine'] == "JANSS"]


In [None]:
data


In [None]:
# paises
# array(['BG', 'CY', 'CZ', 'DE', 'EE', 'EL', 'FI', 'FR', 'HR', 'HU', 'IE',
#       'IT', 'LT', 'LU', 'LV', 'MT', 'NL', 'NO', 'PL', 'PT', 'SE', 'SI',
#       'AT', 'BE', 'DK', 'ES', 'IS', 'SK', 'RO', 'LI'], dtype=object)
#data['ReportingCountry'].unique()
df = data[ (data['NumberDosesReceived'] > 0) & (data['TargetGroup'] == 'ALL') ]
df = df.copy()

#df.loc[df["ReportingCountry"] == "SE", ['NumberDosesReceived','FirstDose','SecondDose']].sum()

doses = df.groupby("ReportingCountry")[['NumberDosesReceived','FirstDose','SecondDose']].sum().reset_index(level=0)
pop = df[['ReportingCountry', 'Population']].drop_duplicates()


df = doses.merge(pop, how="left", on="ReportingCountry")

df['perc_doses'] = 100 * df['NumberDosesReceived'] / df['Population']
df['perc_doses1'] = 100 * df['FirstDose'] / df['Population']
df['perc_doses2'] = 100 * df['SecondDose'] / df['Population']

df.sort_values(['perc_doses'], ascending=False).reset_index(drop=True)


In [None]:
data['Region'].unique()
