## UK Deaths by Vaccination Status


In [7]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()

data = pd.read_csv('table.csv')
df = pd.DataFrame(data, columns=['Year', 'Month', 'Vaccination status', 'Age-standardised mortality rate / 100,000 person-years'])

# pivot table to break status out as columns
columns = ['Date']
row_count = 0
for i, row in df.iterrows():
    status = row['Vaccination status']
    if status == 'Unvaccinated':
        row_count += 1
    if not status in columns:
        columns += [status]   

dp = pd.DataFrame(columns=columns)

data = {}

averages = np.zeros(row_count)

# we keep a count of actual data per date since some data is missing by definition, for example, 
# "Second dose, at least 6 months ago" is not available until 6 months into the data so it cannot
# add to the average until then otherwise it artificially brings down the average.
counts = np.zeros(row_count)

row_index = 0

for i, row in df.iterrows():
    year = row['Year']
    month = row['Month']
    status = row['Vaccination status']
    rate = row['Age-standardised mortality rate / 100,000 person-years']

    frate = 0 
    if rate != 'x':
        frate = float(rate)
    
    if status == 'Unvaccinated':
        data['Date'] = f"{year}.{month}"
    elif rate != 'x' and status != 'Ever vaccinated':
        counts[row_index] += 1
        averages[row_index] += frate

    data[status] = frate

    if status == 'Ever vaccinated':
        dp = dp.append(data, ignore_index=True)
        data = {}
        row_index += 1

dates = dp[columns[0]]
xs = list(range(len(dates)))

for i in range(2, len(columns)):
    unvax = dp[columns[1]]
    label = columns[i]
    vax = dp[label]
    vs = np.array(vax)
    p = figure(width=1000, height=400) 
    p.line(xs, unvax, color='blue', legend_label=columns[1])
    p.line(xs, vax, color='green', legend_label=label)
    show(p)

# averages now has sampled from 7 pools, or 700,000 people, so we have
# to divide by the counts to get that back to "per 100k" 
averages /= counts 
p = figure(width=1000, height=400) 
p.line(xs, unvax, color='blue', legend_label=columns[1])
p.line(xs, averages, color='green', legend_label="Average vaccinated")
show(p)
