In [1]:
import pandas as pd
from datetime import datetime, timedelta
import calendar

def last_month_day(date_str):
    year, month = map(int, date_str.split('-'))
    last_day = calendar.monthrange(year, month)[1]

    return f"{date_str}-{last_day}"

def get_sunday_of_week(year_week):

    # Create a datetime object for the first day of the week
    first_day = datetime.strptime(f'{year_week}-1', "%Y-W%W-%w")

    # Calculate the number of days to Sunday (6 represents Sunday)
    days_to_sunday = (6 - first_day.weekday()) % 7

    # Add the number of days to get the Sunday of that week
    sunday = first_day + timedelta(days=days_to_sunday)

    return sunday

# iso code 
iso_df = pd.read_csv("../../../../supporting-files/locations_iso2_codes.csv")

In [2]:
regions_remove = ['FI194', 'FI19', 'FI1D', 'FI193', 'FI1D7', 'FI1B1', 'FI1C4', 'FI195', 'FI200', 'FI1D9',
                   'FI1C', 'FI197', 'FI1C2', 'FI1D2', 'FI1B', 'FI1C3', 'FI1C1', 'FI1D1', 'FI1D3', 'FI1D5', 
                   'FI1C5', 'FI20', 'FI1D8', 'PTCSR02', 'PTCSR05', 'PTCSR07', 'PTCSR01', 'PTCSR04', 'SE11',
                   'SE12', 'SE22', 'SE33', 'SE32', 'SE21', 'SE23', 'SE31']

# new: https://opendata.ecdc.europa.eu/covid19/COVID-19_VC_data_from_May_2024/csv/
new = pd.read_csv("./new.csv")
new = new.loc[~new.Region.isin(regions_remove)]

new_grp = new.groupby(["YearWeekISO", "TargetGroup", "ReportingCountry"], as_index=False)["NumberOfIndivOneDose"].sum()
new_grp.rename(columns={"YearWeekISO": "year_month",
                        "TargetGroup": "target_group", 
                        "ReportingCountry": "iso2_code",
                        "NumberOfIndivOneDose": "doses_administered"}, inplace=True)
new_grp["doses_administered"] = new_grp.doses_administered.astype(int)
new_grp["dose"] = None
new_grp["date"] = new_grp.year_month.apply(last_month_day)
new_grp.drop("year_month", axis=1, inplace=True)
new_grp["iso2_code"] = new_grp.iso2_code.apply(lambda x : x if x != "EL" else "GR")
new_grp = pd.merge(left=new_grp, right=iso_df, on="iso2_code", how="left")
new_grp = new_grp[["date", "location_name", "iso2_code", "target_group", "dose", "doses_administered"]]

new_grp.to_csv("../covid_vax_post23.csv", index=False)
new_grp.head()

Unnamed: 0,date,location_name,iso2_code,target_group,dose,doses_administered
0,2023-09-30,Belgium,BE,ALL,,431019
1,2023-09-30,Bulgaria,BG,ALL,,3455
2,2023-09-30,Cyprus,CY,ALL,,1093
3,2023-09-30,Czechia,CZ,ALL,,14003
4,2023-09-30,Germany,DE,ALL,,201281


In [3]:
regions_remove = ['FI1C1', 'FI1B1', 'FI20', 'FI1D1', 'FI1C3', 'FI197', 'FI1D9', 'FI1D5', 'FI1C5', 'FI1C2', 'FI195', 'FI194', 'FI193', 'FI1D3',
                  'FI1D2', 'FI200', 'FI1D7', 'FI1D8', 'FI1C4', 'FI19', 'FI1C', 'FI1B', 'FI1D', 
                  'FRJ', 'FRM', 'FRY2', 'FRY5', 'FRH', 'FRK', 'UNK', 'FRY3', 'FR1', 'FRD', 'FRL', 'FRY1', 'FRY4', 'FRB', 'FRE', 'FRC', 'FRF', 'FRI', 'FRG', 
                  'ITI1', 'ITI2', 'ITI3', 'ITI4', 'ITH2', 'ITH1', 'ITC4', 'ITC2', 'ITC1', 'ITH3', 'ITG2', 'ITG1', 'ITF6', 'ITF5', 'ITF4', 'ITF3',
                   'ITF2', 'ITF1', 'ITH5', 'ITC3', 'ITH4', 
                   'LT02', 'LT01',
                   'PL52', 'PL21', 'PL22', 'PL71',
                    'PL81', 'PL82', 'PL92X', 'PL43', 'PL41', 'PL62', 'PL63', 'PL51',
                    'PL72', 'PL42', 'PL84', 'PL61', 
                    'PTCSR04', 'PTCSR07',
                   'PTCSR05', 'PTCSR01', 'PTCSR03', 'PTCSR02', 'PTCSR06',
                    'SE11', 'SE12', 'SE21', 'SE22', 'SE23', 'SE31', 'SE32', 'SE33'
                  ]

# old: https://opendata.ecdc.europa.eu/covid19/vaccine_tracker/
old = pd.read_csv("./old_.csv")
old = old.loc[~old.Region.isin(regions_remove)]

old_grp = old.groupby(["YearWeekISO", "TargetGroup", "ReportingCountry"], as_index=False)[['FirstDose',	
                                                                                            'SecondDose',	
                                                                                            'DoseAdditional1',	
                                                                                            'DoseAdditional2',	
                                                                                            'DoseAdditional3',	
                                                                                            'DoseAdditional4',	
                                                                                            'DoseAdditional5',	
                                                                                            'UnknownDose']].sum()

old_grp_reshaped = old_grp.melt(id_vars=['YearWeekISO', 'TargetGroup', 'ReportingCountry'], 
                                value_vars=['FirstDose', 'SecondDose', 'DoseAdditional1', 'DoseAdditional2', 'DoseAdditional3', 'DoseAdditional4', 'DoseAdditional5', 'UnknownDose'])

old_grp_reshaped.rename(columns={"YearWeekISO": "year_week",
                        "TargetGroup": "target_group", 
                        "ReportingCountry": "iso2_code",
                        "variable": "dose",
                        "value": "doses_administered"}, inplace=True)

old_grp_reshaped["date"] = old_grp_reshaped.year_week.apply(get_sunday_of_week)
old_grp_reshaped.drop("year_week", axis=1, inplace=True)
old_grp_reshaped["iso2_code"] = old_grp_reshaped.iso2_code.apply(lambda x : x if x != "EL" else "GR")
old_grp_reshaped = pd.merge(left=old_grp_reshaped, right=iso_df, on="iso2_code", how="left")
old_grp_reshaped = old_grp_reshaped[["date", "location_name", "iso2_code", "target_group", "dose", "doses_administered"]]

old_grp_reshaped.to_csv("../covid_vax_pre23.csv", index=False)
old_grp_reshaped.head()

Unnamed: 0,date,location_name,iso2_code,target_group,dose,doses_administered
0,2020-12-20,Denmark,DK,ALL,FirstDose,2
1,2020-12-20,Denmark,DK,Age25_49,FirstDose,2
2,2020-12-20,Denmark,DK,HCW,FirstDose,0
3,2020-12-20,Denmark,DK,LTCF,FirstDose,0
4,2020-12-27,Denmark,DK,ALL,FirstDose,9
