In [None]:
# default_exp core

# module name here

> API details.

In [11]:
%config IPCompleter.use_jedi = False

In [259]:
#hide
from nbdev.showdoc import *

import warnings, logging, os
import pandas as pd, numpy as np
from datetime import datetime

In [460]:
#export

first_doses  = 51_032_361
second_doses = 25_733_678

data_dir = 'data'
logger = logging.getLogger()
logger.setLevel(logging.INFO)

def read_data():
    filenames = {'main': '2021VAERSDATA.csv', 'vaccine': '2021VAERSVAX.csv', 
                 'symptoms': '2021VAERSSYMPTOMS.csv'}
    data = {}
    for key, filename in filenames.items():
        fpath = os.path.join(data_dir, filename)
        logger.info(f"Reading {key} file {filename}... ")
        try:
            d = data[key] = pd.read_csv(fpath)
        except UnicodeDecodeError:
            d = pd.read_csv(fpath, encoding = 'windows-1254')
        d.columns = [c.lower() for c in d.columns]
        data[key] = d
        logger.info("done.")
    data['reports'] = data['main'].merge(data['vaccine'], on='vaers_id')
    logger.info(f"Merged 'main' {data['main'].shape} and 'vaccine' " +
                 f"{data['vaccine'].shape} into 'reports' {data['reports'].shape}.")
    del(data['main'])
    return data


def read_line_defensively(f, line_number, skips):
    lines_skipped = 0
    while True:
        try:
            return f.readline(), lines_skipped
        except UnicodeDecodeError:
            logger.warning(f"unicode error on line {line_number + lines_skipped}.")
            lines_skipped += 1
        

def read_csv_carefully(fpath):
    rows = []
    total_skips = 0
    with open(fpath, 'r') as f:
        header = [colname.lower() for colname in f.readline().split(',')]
        line = f.readline()
        i = 0
        while line:
            row = line.split(',')
            rows.append(row)
            line, skipped = read_line_defensively(f, i, total_skips)
            total_skips += skipped
            i += skipped + 1
        log_msg = f"Skipped {total_skips} of {i} lines."
        if total_skips > 0:
            logger.warning(log_msg)
        else:
            logger.info(log_msg)
    return pd.DataFrame(rows, columns = header)


def filter_to_covid(data) -> None:
    all_people_to_keep = set()
    for key in ('vaccine', 'reports'):        
        covids = data[key]['vax_type'] == 'COVID19'
        s = covids.sum()
        logger.info(f"Filtering '{key}' to just the {s:,} COVID vaccine rows.")
        people_to_keep = data[key].loc[covids, 'vaers_id']
        all_people_to_keep = all_people_to_keep.union(people_to_keep)
    for key in data:
        keep_rows = data[key]['vaers_id'].isin(all_people_to_keep)
        data[key] = data[key].loc[keep_rows, :]
    
        
        
def get_death_count(data):
    data['reports']['died'] = data['reports']['died'].apply(lambda b: 'Y' if b == 'Y' else 'N')
    death_count = data['reports'].groupby(['died']).size().reset_index(name='adverse event people')
    death_count['prop adverse that died'] = (
        death_count['adverse event people'] / death_count['adverse event people'].sum())
    death_count['first doses']  = first_doses
    death_count['second doses']  = second_doses
    death_count['first doses / adverse'] = (death_count['first doses'] / death_count['adverse event people']).astype(int)
    death_count['second doses / adverse'] = (death_count['second doses'] / death_count['adverse event people']).astype(int)
    return death_count

In [461]:
data['vaccine']['vaers_id']

0         916600
1         916601
2         916602
3         916603
4         916604
          ...   
14932    1057082
14933    1057281
14934    1057348
14935    1057363
14936    1057795
Name: vaers_id, Length: 14937, dtype: int64

In [462]:
data = read_data()
filter_to_covid(data)

INFO:root:Reading main file 2021VAERSDATA.csv... 
INFO:root:done.
INFO:root:Reading vaccine file 2021VAERSVAX.csv... 
INFO:root:done.
INFO:root:Reading symptoms file 2021VAERSSYMPTOMS.csv... 
INFO:root:done.
INFO:root:Merged 'main' (14701, 35) and 'vaccine' (14937, 8) into 'reports' (14937, 42).
INFO:root:Filtering 'vaccine' to just the 14,562 COVID vaccine rows.
INFO:root:Filtering 'reports' to just the 14,562 COVID vaccine rows.


In [463]:
latest_date = max(
    [datetime.strptime(d, '%m/%d/%Y') 
    for d in data['reports']['todays_date'] if not pd.isnull(d)]) \
    .strftime('%B %d, %Y')
print(f"Latest date with data: {latest_date}.")

Latest date with data: February 26, 2021.


In [464]:
death_count = get_death_count(data)
death_count

Unnamed: 0,died,adverse event people,prop adverse that died,first doses,second doses,first doses / adverse,second doses / adverse
0,N,13442,0.921821,51032361,25733678,3796,1914
1,Y,1140,0.078179,51032361,25733678,44765,22573


In [465]:
def expand_sex(s: str):
    if s == 'M':
        return 'male'
    elif s == 'F':
        return 'female'
    else:
        return s
    
def expand_died(died: str):
    if died == 'Y':
        return 'died'
    elif died == 'N':
        return 'had an adverse event'
    else:
        raise ValueError(f"Unknown value {died}.")
        

def get_row_desc(row):
    sex = expand_sex(row['sex'])
    event_type = expand_died(row['died'])
    return f"{int(row.age_yrs)} year-old {sex} {event_type}: {row['symptom_text']}"
    

In [466]:
data['symptoms']

Unnamed: 0,vaers_id,symptom1,symptomversion1,symptom2,symptomversion2,symptom3,symptomversion3,symptom4,symptomversion4,symptom5,symptomversion5
0,916600,Dysphagia,23.1,Epiglottitis,23.1,,,,,,
1,916601,Anxiety,23.1,Dyspnoea,23.1,,,,,,
2,916602,Chest discomfort,23.1,Dysphagia,23.1,Pain in extremity,23.1,Visual impairment,23.1,,
3,916603,Dizziness,23.1,Fatigue,23.1,Mobility decreased,23.1,,,,
4,916604,Injection site erythema,23.1,Injection site pruritus,23.1,Injection site swelling,23.1,Injection site warmth,23.1,,
...,...,...,...,...,...,...,...,...,...,...,...
21688,1057082,SARS-CoV-2 test positive,23.1,Streptococcus test negative,23.1,Taste disorder,23.1,Vomiting,23.1,,
21689,1057281,Death,23.1,,,,,,,,
21690,1057348,Death,23.1,Dysarthria,23.1,Dysstasia,23.1,Fatigue,23.1,Feeding disorder,23.1
21691,1057363,Death,23.1,Dementia,23.1,,,,,,


In [468]:
class SymptomCounter:
    def __init__(self, symptoms_frame):
        self.people_to_symptoms = {}
        self.symptoms_to_counts = {}
        self.symptoms = self.symptoms_to_counts.keys()
        self.people = self.people_to_symptoms.keys()
        for i, row in symptoms_frame.iterrows():
            self.process_row(row)
        
        
    def process_row(self, row):
        person = row['vaers_id']
        if person not in self.people_to_symptoms:
            self.people_to_symptoms[person] = set()
            for key in ('symptom1', 'symptom2'):                
                symptom = row[key]
                if pd.isnull(symptom):
                    continue
                self.people_to_symptoms[person].add(symptom)
                if symptom not in self.symptoms_to_counts:
                    self.symptoms_to_counts[symptom] = 0
                self.symptoms_to_counts[symptom] += 1

sc = SymptomCounter(data['symptoms'])


In [477]:
symptom_counts_df = pd.DataFrame.from_dict(sc.symptoms_to_counts, orient='index').reset_index()
symptom_counts_df.columns = ['symptom', 'people']
symptom_counts_df['proportion_of_adverse'] = symptom_counts_df['people'] / len(sc.people)
symptom_counts_df.sort_values('people', ascending=False, inplace=True)
with pd.option_context('display.max_rows', 500):
    display(symptom_counts_df.head(100))

Unnamed: 0,symptom,people,proportion_of_adverse
11,Chills,1660,0.115254
26,Headache,1001,0.069499
5,Dizziness,1001,0.069499
6,Fatigue,941,0.065334
7,Injection site erythema,805,0.055891
25,Arthralgia,728,0.050545
192,Death,721,0.050059
33,Asthenia,664,0.046102
98,SARS-CoV-2 test positive,592,0.041103
191,COVID-19,542,0.037631


In [388]:
max_rows = 100
for i, row in data['reports'].iterrows():
    if i > max_rows:
        break
    print(get_row_desc(row))
    print()

33 year-old female had an adverse event: Right side of epiglottis swelled up and hinder swallowing pictures taken Benadryl Tylenol taken

73 year-old female had an adverse event: Approximately 30 min post vaccination administration patient demonstrated SOB and anxiousness. Assessed at time of event: Heart sounds normal, Lung sounds clear. Vitals within normal limits for patient. O2 91% on 3 liters NC Continuous flow.   2 consecutive nebulized albuterol treatments were administered. At approximately 1.5 hours post reaction, patients' SOB and anxiousness had subsided and the patient stated that they were feel "much better".

23 year-old female had an adverse event: About 15 minutes after receiving the vaccine, the patient complained about her left arm hurting. She also complained of chest tightness and difficulty swallowing. Patient also had vision changes. We gave the patient 1 tablet of Benadryl 25 mg and called EMS services. EMS checked her out and we advised the patient to go to the 