In [None]:
import datetime
import requests
import pandas as pd
import numpy as np
from pathlib import Path

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('dark_background')


In [None]:
# https://github.com/owid/covid-19-data/blob/43901c094631609ee3131f755f619917e6a72943/scripts/scripts/vaccinations/src/vax/batch/portugal.py

def read(source_url: str) -> pd.DataFrame:
    return pd.read_csv(source_url, usecols=[
        "data", "vacinas", "pessoas_vacinadas_completamente", "pessoas_vacinadas_parcialmente"
        # hack
        , "pessoas_inoculadas"
    ])

def rename_columns(df: pd.DataFrame) -> pd.DataFrame:
    return df.rename(columns={
        "data": "date",
        "vacinas": "total_vaccinations",
        "pessoas_vacinadas_completamente": "people_fully_vaccinated",
        # hack
        "pessoas_inoculadas": "people_innoculated",
    })

def format_date(df: pd.DataFrame) -> pd.DataFrame:
    return df.assign(date=pd.to_datetime(df.date, format="%d-%m-%Y").astype(str))


def calculate_metrics(df: pd.DataFrame) -> pd.DataFrame:
    df = df.assign(
        people_vaccinated=df.pessoas_vacinadas_parcialmente + df.people_fully_vaccinated
    )
    return df[["date", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated"
        # hack
        , "people_innoculated",
        ]]

def sanity_checks(df: pd.DataFrame) -> pd.DataFrame:
    assert all(df.total_vaccinations.fillna(0) >= df.people_vaccinated.fillna(0))
    return df

# https://github.com/owid/covid-19-data/blob/43901c094631609ee3131f755f619917e6a72943/scripts/scripts/vaccinations/src/vax/utils/utils.py

def make_monotonic(df: pd.DataFrame) -> pd.DataFrame:
    # Forces vaccination time series to become monotonic.
    # The algorithm assumes that the most recent values are the correct ones,
    # and therefore removes previous higher values.
    df = df.sort_values("date")
    metrics = ("total_vaccinations", "people_vaccinated", "people_fully_vaccinated")
    for metric in metrics:
        while not df[metric].ffill().fillna(0).is_monotonic:
            diff = (df[metric].ffill().shift(-1) - df[metric].ffill())
            df = df[(diff >= 0) | (diff.isna())]
    return df

source_url = 'https://raw.githubusercontent.com/dssg-pt/covid19pt-data/master/vacinas.csv'
df = read(source_url=source_url)
df = (df
        .pipe(rename_columns)
        .pipe(format_date)
        .pipe(calculate_metrics)
        #.pipe(enrich_vaccine_name)
        #.pipe(enrich_columns)
        .pipe(sanity_checks)
        .pipe(make_monotonic)
        .sort_values("date")
)
df
# https://github.com/owid/covid-19-data/blob/43901c094631609ee3131f755f619917e6a72943/public/data/vaccinations/country_data/Portugal.csv
