In [1]:
import csv

# Modified from https://automatetheboringstuff.com/chapter14/
def process_csv(filename):
    example_file = open(filename, encoding="utf-8")
    example_reader = csv.reader(example_file)
    example_data = list(example_reader)
    example_file.close()
    return example_data

def cell(csv_header, csv_data, row_idx, col_name):
    col_idx = csv_header.index(col_name)
    val = csv_data[row_idx][col_idx]
    if val == "":
        return None
    
    try: return int(val)
    except ValueError: pass
    
    try: return float(val)
    except ValueError: pass

    return val

def extract_column(csv_header, csv_data, name):
    l = []
    for row in enumerate(csv_data):
         l.append(cell(csv_header, csv_data, row[0], name))
    return l

def unique(l):
    return list(set(l))

def remove_none(l):
    return filter(lambda x: not x is None, l)

def coerce_str(l):
    return list(map(lambda x: str(x), l))

vax_data = process_csv("vaccinations.csv")
vax_header = vax_data[0]
vax_data = vax_data[1:]

vax_header

def vax_cell(row_idx, col_name):
    return cell(vax_header, vax_data)

def vax_col(name):
    return extract_column(vax_header, vax_data, name)

In [2]:
vax_header

['country',
 'date',
 'daily_vaccinations',
 'total_vaccinations',
 'people_vaccinated',
 'people_fully_vaccinated',
 'population']

In [3]:
#Q1: What dates are mentioned in the dataset?
unique(vax_col('date'))

['10/10/2021',
 '10/15/2021',
 '10/14/2021',
 '10/13/2021',
 '10/11/2021',
 '10/16/2021',
 '10/12/2021']

In [4]:
#Q2: What countries are listed in the dataset?
sorted(unique(vax_col('country')))

['Albania',
 'Angola',
 'Anguilla',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Botswana',
 'Brazil',
 'Brunei',
 'Bulgaria',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Cayman Islands',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Congo',
 'Costa Rica',
 "Cote d'Ivoire",
 'Croatia',
 'Cuba',
 'Curacao',
 'Cyprus',
 'Czechia',
 'Denmark',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'El Salvador',
 'England',
 'Equatorial Guinea',
 'Estonia',
 'Eswatini',
 'Ethiopia',
 'Faeroe Islands',
 'Fiji',
 'Finland',
 'France',
 'French Polynesia',
 'Gabon',
 'Georgia',
 'Germany',
 'Gibraltar',
 'Greece',
 'Greenland',
 'Guatemala',
 'Guernsey',
 'Guinea',
 'Guinea-Bissau',
 'Guyana',
 'Haiti',
 'Honduras',
 'Hong Kong',
 'Hungary',
 'Iceland',
 'India',
 'Indonesia',
 'Iran',
 'Ireland',
 'Isle of Man',
 'Israel',
 

In [5]:
#Q3: What are the populations of all the countries in the dataset?
pop = dict(zip(
    vax_col('country'), 
    map(lambda x: None, vax_col('country'))))

for row in zip(vax_col('country'), vax_col('population')):
    if row[1] != None:
        pop[row[0]] = row[1]
pop

{'Albania': 2872972,
 'Angola': 33933280,
 'Anguilla': 15125,
 'Antigua and Barbuda': 98731,
 'Argentina': 45604583,
 'Armenia': 2967346,
 'Aruba': 107192,
 'Austria': 9043326,
 'Azerbaijan': 10223013,
 'Bahamas': 396924,
 'Bahrain': 1748320,
 'Bangladesh': 166320063,
 'Barbados': 287705,
 'Belarus': 9442870,
 'Belgium': 11632619,
 'Benin': 12448178,
 'Bermuda': 62091,
 'Bhutan': 779919,
 'Botswana': 2397551,
 'Brazil': 213990906,
 'Brunei': 441526,
 'Bulgaria': 6896057,
 'Cambodia': 16946879,
 'Cameroon': 27245512,
 'Canada': 38068055,
 'Cayman Islands': 66497,
 'Central African Republic': 4919943,
 'Chad': 16976945,
 'Chile': 19212158,
 'China': 1444200065,
 'Colombia': 51268576,
 'Congo': 5655107,
 'Costa Rica': 5138943,
 "Cote d'Ivoire": 27054208,
 'Croatia': 4081642,
 'Cuba': 11317671,
 'Curacao': 164795,
 'Cyprus': 896022,
 'Czechia': 10724425,
 'Denmark': 5813397,
 'Dominica': 72170,
 'Dominican Republic': 10953914,
 'Ecuador': 17888343,
 'El Salvador': 6518621,
 'England': 5654

In [6]:
#Q4: How many countries have daily vaccination date for the date '10/12/2021'?
len(list(unique(map(lambda x: x[1], filter(lambda x: x[0] == "10/12/2021" and x[2] != None, zip(vax_col('date'), vax_col('country'), vax_col('daily_vaccinations')))))))

78

In [7]:
def daily_vaccinations_on(given_date):
    l = zip(vax_col('date'), vax_col('country'), vax_col('daily_vaccinations'))
    l = filter(lambda x: x[0] == given_date, l)
    l = dict((map(lambda x: (x[1], x[2]), l)))
    return l

In [8]:
#Q5: How many vaccinations are estimated to have been done for each country  on '10/13/2021'?
daily_vaccinations_on('10/13/2021')


{'Albania': 6147,
 'Angola': None,
 'Anguilla': None,
 'Antigua and Barbuda': None,
 'Argentina': None,
 'Armenia': None,
 'Aruba': 201,
 'Austria': 26429,
 'Azerbaijan': None,
 'Bahamas': None,
 'Bahrain': 7192,
 'Bangladesh': 444078,
 'Barbados': 1230,
 'Belarus': None,
 'Belgium': 9622,
 'Benin': None,
 'Bermuda': None,
 'Bhutan': None,
 'Botswana': None,
 'Brazil': None,
 'Brunei': None,
 'Bulgaria': 5548,
 'Cambodia': 285711,
 'Cameroon': None,
 'Canada': 89992,
 'Cayman Islands': 366,
 'Central African Republic': None,
 'Chad': None,
 'Chile': 221139,
 'China': 1423000,
 'Colombia': 273573,
 'Congo': None,
 'Costa Rica': None,
 "Cote d'Ivoire": None,
 'Croatia': 6144,
 'Cuba': 230815,
 'Curacao': None,
 'Cyprus': None,
 'Czechia': 6425,
 'Denmark': 2766,
 'Dominica': None,
 'Dominican Republic': 61965,
 'Ecuador': 44181,
 'El Salvador': 30384,
 'England': 57143,
 'Equatorial Guinea': None,
 'Estonia': 1697,
 'Eswatini': None,
 'Ethiopia': 12386,
 'Faeroe Islands': None,
 'Fiji': 

In [9]:
#Q6: How many vaccinations are estimated to have been done for each country?
vax_sum = dict(zip(
    vax_col('country'), 
    map(lambda x: None, vax_col('country'))))

for row in zip(vax_col('country'), vax_col('daily_vaccinations')):
    if row[1] != None:
        if vax_sum.get(row[0]) != None:
            vax_sum[row[0]] += row[1]
        else:
            vax_sum[row[0]] = row[1]

vax_sum

{'Albania': 12781,
 'Angola': None,
 'Anguilla': None,
 'Antigua and Barbuda': None,
 'Argentina': 1548234,
 'Armenia': None,
 'Aruba': 585,
 'Austria': 124986,
 'Azerbaijan': 105571,
 'Bahamas': None,
 'Bahrain': 48396,
 'Bangladesh': 2838296,
 'Barbados': 5519,
 'Belarus': None,
 'Belgium': 25552,
 'Benin': None,
 'Bermuda': None,
 'Bhutan': None,
 'Botswana': None,
 'Brazil': 6752141,
 'Brunei': None,
 'Bulgaria': 39213,
 'Cambodia': 1767528,
 'Cameroon': None,
 'Canada': 542813,
 'Cayman Islands': 617,
 'Central African Republic': None,
 'Chad': None,
 'Chile': 740426,
 'China': 10439000,
 'Colombia': 1251585,
 'Congo': None,
 'Costa Rica': None,
 "Cote d'Ivoire": None,
 'Croatia': 33831,
 'Cuba': 1286245,
 'Curacao': 53,
 'Cyprus': 6816,
 'Czechia': 38673,
 'Denmark': 9139,
 'Dominica': None,
 'Dominican Republic': 355554,
 'Ecuador': 181736,
 'El Salvador': 149292,
 'England': 351675,
 'Equatorial Guinea': None,
 'Estonia': 12129,
 'Eswatini': 7504,
 'Ethiopia': 111553,
 'Faeroe 

In [10]:
 #Q7: Which country is estimated to have completed the most number of vaccinations across the dates mentioned in the dataset?
max(filter(lambda x: x[1] != None, vax_sum.items()), 
    key = lambda x: x[1])[0]

'India'

In [11]:
 #Q8: How many vaccinations are estimated to have been done across the whole world on each date mentioned in the dataset?

from functools import reduce

def bucketize_reduce(d, row, key, value):
    k = key(row)
    if not k in d:
        d[k] = []
    d[k].append(value(row))
    return d

def bucketize(l, key = lambda x: x[0], value = lambda x: x[1]):
    return reduce(lambda d, row: bucketize_reduce(d, row, lambda x: x[0], lambda x: x[1]), l, {})


buckets = bucketize(filter(lambda x: x[1] != None, zip(vax_col('date'), vax_col('daily_vaccinations'))))
buckets
dict(map(lambda x: (x[0], sum(x[1])), buckets.items()))

{'10/12/2021': 20497398,
 '10/13/2021': 17692532,
 '10/10/2021': 15646236,
 '10/11/2021': 13364434,
 '10/14/2021': 16587172,
 '10/15/2021': 20678860,
 '10/16/2021': 11698636}

In [68]:
# Q9: For each country in the dataset, when is the last day for which we have data about the number of fully vaccinated people?


def filter_fv_country(country):
    dates = list(map(lambda x: x[0], filter(lambda x: x[1] != None, country[1])))
    if len(dates) == 0:
        return (country[0], None)
    else:
        return (country[0], max(dates))
    

fv = zip(vax_col('country'), zip(vax_col('date'), vax_col('people_fully_vaccinated')))
fv = bucketize(fv)
fv = map(filter_fv_country, fv.items())
dict(fv)

{'Albania': '10/13/2021',
 'Angola': '10/14/2021',
 'Anguilla': '10/15/2021',
 'Antigua and Barbuda': '10/13/2021',
 'Argentina': '10/16/2021',
 'Armenia': '10/11/2021',
 'Aruba': '10/15/2021',
 'Austria': '10/16/2021',
 'Azerbaijan': '10/16/2021',
 'Bahamas': '10/15/2021',
 'Bahrain': '10/16/2021',
 'Bangladesh': '10/16/2021',
 'Barbados': '10/16/2021',
 'Belarus': '10/10/2021',
 'Belgium': '10/14/2021',
 'Benin': '10/12/2021',
 'Bermuda': '10/15/2021',
 'Bhutan': '10/10/2021',
 'Botswana': '10/14/2021',
 'Brazil': '10/16/2021',
 'Brunei': '10/14/2021',
 'Bulgaria': '10/16/2021',
 'Cambodia': '10/16/2021',
 'Cameroon': '10/11/2021',
 'Canada': '10/16/2021',
 'Cayman Islands': '10/14/2021',
 'Central African Republic': '10/14/2021',
 'Chad': '10/11/2021',
 'Chile': '10/15/2021',
 'China': None,
 'Colombia': '10/14/2021',
 'Congo': '10/14/2021',
 'Costa Rica': '10/11/2021',
 "Cote d'Ivoire": '10/11/2021',
 'Croatia': '10/16/2021',
 'Cuba': '10/15/2021',
 'Curacao': '10/16/2021',
 'Cypru