In [40]:
import pandas as pd
import json
import pycountry # Will probably require pip install pycountry
import datetime
import dateutil.parser as dparser
import collections

In [14]:
address = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations-by-manufacturer.csv'
df = pd.read_csv(address)

In [15]:
df

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Chile,2020-12-24,Pfizer/BioNTech,420
1,Chile,2020-12-25,Pfizer/BioNTech,5198
2,Chile,2020-12-26,Pfizer/BioNTech,8338
3,Chile,2020-12-27,Pfizer/BioNTech,8649
4,Chile,2020-12-28,Pfizer/BioNTech,8649
...,...,...,...,...
2963,United States,2021-04-25,Moderna,100031926
2964,United States,2021-04-25,Pfizer/BioNTech,120373717
2965,United States,2021-04-26,Johnson&Johnson,8087269
2966,United States,2021-04-26,Moderna,100830533


In [122]:
'''
Changes list of dictionaries to just a singular 
dictionary with following format
{
    VARIANT : COUNT
    B.1 : 2,
    B.1.7 : 5
}
'''
def convert_var_dict(lst_counts_variants):
    
    variant_dict = {}
    for curr_dic in lst_counts_variants:
        variant_dict[curr_dic['value']] = curr_dic['count']
    return variant_dict

'''
create_entry makes an entry for the sorted country table with
WEEK break down and then further breakdown into variant strain and count

Entry format example below
{
    WEEK_STAMP : VACCINE_DATA
    "01-2020" : vaccine_data,
    "02-2020" : vaccine_data,
    "03-2020" : vaccine_data
}

vaccine_data looks like

{
    VACCINE_TYPE : COUNT
    Moderna : 10,
    Pfizer : 20
}
    
'''

def create_entry(vaccine_data):
    date_obj = dparser.parse(vaccine_data['date']).date() # Getting the date that the data set was created (This is a parameter in json)
    year, curr_week = date_obj.isocalendar()[0:2] 
    date = str(curr_week) + '-' + str(year)
#     print(vaccine_data)
    entry = {
            date : 
            {
                vaccine_data['vaccine'] : vaccine_data['total_vaccinations']
            }
        }
    return entry

# Add new week date and variant info to entry 
def add_entry(entry, vaccine_data):
    date_obj = dparser.parse(vaccine_data['date']).date() # Getting the date that the data set was created (This is a parameter in json)
    year, curr_week = date_obj.isocalendar()[0:2]
    date = str(curr_week) + '-' + str(year)
    
    vac_type = vaccine_data['vaccine']
    if date in entry:
        if vac_type in entry[date]:
            entry[date][vac_type] = entry[date][vac_type] + vaccine_data['total_vaccinations']
        else :
            entry[date] = { vac_type : vaccine_data['total_vaccinations'] }
    else :
        entry[date] = { vac_type : vaccine_data['total_vaccinations'] }

    return entry        


# Add new week date and variant info to entry 
def add_entry_cummulative(entry, vaccine_data):
    
    date_obj = dparser.parse(vaccine_data['date']).date() # Getting the date that the data set was created (This is a parameter in json)
    year, curr_week = date_obj.isocalendar()[0:2] 
    date = str(curr_week) + '-' + str(year)
    prev_date = str(curr_week - 1) + '-' + str(year)
    
    vac_type = vaccine_data['vaccine']
    if date in entry:
        if vac_type in entry[date]:
            entry[date][vac_type] = entry[date][vac_type] + vaccine_data['total_vaccinations']
        else :
            entry[date] = { vac_type : vaccine_data['total_vaccinations'] }
    else :
        entry[date] = { vac_type : vaccine_data['total_vaccinations'] }


    try:
        cummalitive_dict = entry[prev_date]
        for vac_types in cummalitive_dict:    
            if vac_types in entry[date]:
                entry[date][vac_types] = entry[date][vac_types] + cummalitive_dict[vac_types]
            else :
                entry[date][vac_types] = cummalitive_dict[vac_types]
    except KeyError as e:
        cummalitive_dict = {}
    
    return entry   

In [123]:
df2

Unnamed: 0_level_0,date,vaccine,total_vaccinations
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Chile,2020-12-24,Pfizer/BioNTech,420
Chile,2020-12-25,Pfizer/BioNTech,5198
Chile,2020-12-26,Pfizer/BioNTech,8338
Chile,2020-12-27,Pfizer/BioNTech,8649
Chile,2020-12-28,Pfizer/BioNTech,8649
...,...,...,...
United States,2021-04-25,Moderna,100031926
United States,2021-04-25,Pfizer/BioNTech,120373717
United States,2021-04-26,Johnson&Johnson,8087269
United States,2021-04-26,Moderna,100830533


In [124]:
df2 = df.set_index('location')
country_vaccines = {}
for country, rest in df2.iterrows():
    if country in country_vaccines:
        country_vaccines[country] = add_entry(country_vaccines[country], rest)
    else:
        country_vaccines[country] = create_entry(rest)
        
country_vaccines_cummulative = {}
for country, rest in df2.iterrows():
    if country in country_vaccines_cummulative:
        country_vaccines_cummulative[country] = add_entry_cummulative(country_vaccines_cummulative[country], rest)
    else:
        country_vaccines_cummulative[country] = create_entry(rest)

In [127]:
with open('vaccination_world_weekly.json', 'w') as f:
    json.dump(country_vaccines, f)
    
with open('vaccination_world_weekly_cummulative.json', 'w') as f:
    json.dump(country_vaccines_cummulative, f)