# COVID19 testing

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Usual-Python-set-up" data-toc-modified-id="Usual-Python-set-up-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Usual Python set-up</a></span></li><li><span><a href="#Grab-some-testing-data" data-toc-modified-id="Grab-some-testing-data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Grab some testing data</a></span></li><li><span><a href="#Plot-national-test-positivity-rates" data-toc-modified-id="Plot-national-test-positivity-rates-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Plot national test positivity rates</a></span></li><li><span><a href="#Finished" data-toc-modified-id="Finished-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Finished</a></span></li></ul></div>

## Usual Python set-up

In [1]:
# imports
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.units as munits
import datetime
from pathlib import Path

In [2]:
# local imports
sys.path.append(r'../bin')
import plotstuff as ps

In [3]:
# display settings
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

In [4]:
# plotting stuff
plt.style.use('ggplot')

# matplotlib stuff for date formatting xticklabels
converter = mdates.ConciseDateConverter()
munits.registry[np.datetime64] = converter
munits.registry[datetime.date] = converter
munits.registry[datetime.datetime] = converter

CHART_DIRECTORY_BASE = '../charts'
Path(CHART_DIRECTORY_BASE).mkdir(parents=True, exist_ok=True)
CHART_DIRECTORY = CHART_DIRECTORY_BASE + '/!Test-'

%matplotlib inline

## Grab some testing data

In [5]:
data = pd.read_csv(
    'https://github.com/owid/covid-19-data/'+
    'raw/master/public/data/owid-covid-data.csv', 
    header=0)

data.date = pd.to_datetime(data.date) # fix dates

print(f'Data to: {data.date.sort_values().iloc[-1]}')

Data to: 2021-06-13 00:00:00


In [6]:
data.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations',
       'new_vaccinations_smoothed', 'total_vaccinations_per_hun

In [7]:
data.location.unique()

array(['Afghanistan', 'Africa', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Asia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire Sint Eustatius and Saba',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Cayman Islands', 'Central African Republic', 'Chad', 'Chile',
       'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao',
       'Cyprus', 'Czechia', 'Democratic Republic of Congo', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethi

## Plot national test positivity rates

In [8]:
def get_national_data(nation, positivity, all_tests):
    
    CONTENT_THRESHOLD = 100 # days
    MAX_MISSINGNESS = 32 # days - at least monthly data
    
    rate = positivity[nation]
    
    if nation not in all_tests.columns:
        print(f'Exit 1 for {nation} - no test data')
        return None, None
    
    test = all_tests[nation]
    if test.min() < 0:
        print(f'Exit 2 for {nation} - min number of tests less that zero')
        return None, None

    from_test = test.first_valid_index()
    from_rate = rate.first_valid_index()
    if not from_test or not from_rate:
        print(f'Exit 3 for {nation} - first test: {from_test}; first positivity rate: {from_rate}')
        return None, None
    today = pd.to_datetime("today")    
    test = test[(test.index >= from_test) & (test.index <= today)].copy()
    rate = rate[(rate.index >= from_rate) & (rate.index <= today)].copy()
    
    # Note enough Rate Data - noting some nations only have periodic data
    missing_groups = (rate.notna().cumsum())[rate.isna()]
    longest_gap = missing_groups.groupby(missing_groups).agg(len).max()
    content = rate.notna().sum()
    if longest_gap and longest_gap > MAX_MISSINGNESS and content < CONTENT_THRESHOLD:
        print(f'Exit 4 {nation} - longest gap: {longest_gap} days;  data points: {content}')
        return None, None
    rate = rate.dropna() # will not plot a continuous line with missing values
    
    # for next test
    if len(rate) < 2:
        print(f'Exit 5 {nation} - less than two rate data points')
        return None, None
        
    elapsed_days = (rate.index[-1] - rate.index[0]) / pd.Timedelta(days=1)
    if elapsed_days < CONTENT_THRESHOLD:
        print(f'Exit 6 {nation} - longest period with data: {elapsed_days} days') 
        return None, None
    
    return rate, test # data as Series

In [9]:
def plot_national_rate_test(nation, rate, test):
    
    MIN_POSITIVITY = 5 # per cent
    
    fig, ax = plt.subplots()
    ax.xaxis_date()
    axr = ax.twinx()
    ax.bar(test.index, test, color='#dd0000', 
           label='Daily New Tests per Thousand Pop (Smoothed)')
    axr.plot(rate.index, rate, color='#0000dd', 
             lw=2.0, ls='--', label='Positivity Rate')
    
    # messy axis stuff
    ax.set_ylabel(None)
    ax.set_xlabel(None)
    axr.set_ylabel(None)
    ymax = max(MIN_POSITIVITY, rate.max()) * 1.02
    axr.set_ylim([0, ymax])
    axr.grid(False)
    
    # y-axis labels - the hard way
    lHeight = 0.96
    lInstep = 0.02
    fig.text(1.0-lInstep, lHeight, '% Positive',
            ha='right', va='top', fontsize=11,
            color='#333333')
    fig.text(lInstep, lHeight, "Daily Tests/'000",
            ha='left', va='top', fontsize=11,
            color='#333333')

    # put in a legend
    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = axr.get_legend_handles_labels()
    ax.legend(h1+h2, l1+l2, loc='upper left')
    
    # tidy up margins
    MARGINS = 0.01
    ax.margins(MARGINS)
    axr.margins(MARGINS)    

    ps.finalise_plot(ax, 
                     title=f'COVID Test and Positivity Rates: {nation}',
                     set_size_inches=(8, 4),
                     rfooter='Source: https://github.com/owid/covid-19-data/',
                     tight_layout_pad=1,
                     save_as=f'{CHART_DIRECTORY_BASE}/{nation}-tests-pos-rate.png',
                     #display=True,
                    )

In [10]:
# get relevant tables from complete OWID data

all_positivity = data.pivot(index='date', columns='location', 
                values='positive_rate') * 100 # per cent

all_tests = data.pivot(index='date', columns='location', 
                values='new_tests_smoothed_per_thousand') 

In [11]:
# Extract and plot tests and positivity rates for each nation

for nation in all_positivity.columns:

    rate, test = get_national_data(nation, all_positivity, all_tests)
    if rate is None or test is None:
        continue
        
    plot_national_rate_test(nation, rate, test)

Exit 3 for Afghanistan - first test: None; first positivity rate: None
Exit 3 for Africa - first test: None; first positivity rate: None
Exit 3 for Algeria - first test: None; first positivity rate: None
Exit 3 for Angola - first test: None; first positivity rate: None
Exit 3 for Anguilla - first test: None; first positivity rate: None
Exit 6 Antigua and Barbuda - longest period with data: 2.0 days
Exit 3 for Aruba - first test: None; first positivity rate: None
Exit 3 for Asia - first test: None; first positivity rate: None
Exit 6 Azerbaijan - longest period with data: 36.0 days
Exit 3 for Bahamas - first test: None; first positivity rate: None
Exit 3 for Barbados - first test: None; first positivity rate: None
Exit 6 Belize - longest period with data: 15.0 days
Exit 3 for Benin - first test: None; first positivity rate: None
Exit 3 for Bermuda - first test: None; first positivity rate: None
Exit 3 for Bonaire Sint Eustatius and Saba - first test: None; first positivity rate: None
Exi

## Finished

In [12]:
print('Done')

Done
