Setup

In [None]:
# install packages
import Pkg;
Pkg.add("CSV");
Pkg.add("DataFrames");
Pkg.add("FreqTables");
Pkg.add("StatsBase");

In [171]:
# load packages
using CSV;
using DataFrames;
using FreqTables;
using StatsBase;

In [173]:
# setup filepaths
path_source = string(@__DIR__,"\\..\\source");
path_dev = string(@__DIR__,"\\..\\dev");
path_output = string(@__DIR__,"\\..\\output");

Read in dataset

In [174]:
# read in cleaned combined VAERS file
df = CSV.read(joinpath(path_dev,"19-21VAERSCOMB_clean.csv"), DataFrame);
names(df)

8-element Vector{String}:
 "VAERS_ID"
 "RECVDATE"
 "AGE_YRS"
 "VAX_NAME"
 "VAX_TYPE"
 "VAX_MANU"
 "SYMPTOMS"
 "SERIOUS_EVENT"

In [175]:
size(df)

(103050, 8)

In [176]:
first(select(df, ["VAERS_ID", "VAX_NAME", "SYMPTOMS", "SERIOUS_EVENT"]), 10)

Unnamed: 0_level_0,VAERS_ID,VAX_NAME,SYMPTOMS
Unnamed: 0_level_1,Int64,String,String
1,794156,INFLUENZA (SEASONAL) (FLUARIX QUADRIVALENT),"Set([""Injection site joint pain"", ""Injected limb mobility decreased""])"
2,794157,ZOSTER (SHINGRIX),"Set([""Apathy"", ""Injection site pain"", ""Injection site pruritus"", ""Asthenia"", ""Arthralgia"", ""Injection site erythema"", ""Injection site warmth"", ""Injection site swelling"", ""Night sweats"", ""Listless""])"
3,794158,ZOSTER (SHINGRIX),"Set([""Pain"", ""Headache"", ""Nausea"", ""Pyrexia"", ""Chills""])"
4,794160,ZOSTER (SHINGRIX),"Set([""Lip swelling"", ""Lip blister"", ""Pain"", ""Influenza like illness"", ""Asthenia"", ""Injection site erythema"", ""Fatigue"", ""Injection site swelling"", ""Chills""])"
5,794161,ZOSTER (SHINGRIX),"Set([""Pyrexia""])"
6,794163,ZOSTER (SHINGRIX),"Set([""Abdominal pain"", ""Nausea"", ""Pyrexia"", ""Headache"", ""Arthralgia"", ""Fatigue"", ""Dizziness"", ""Myalgia""])"
7,794164,ZOSTER (SHINGRIX),"Set([""Injection site pain""])"
8,794165,INFLUENZA (SEASONAL) (FLUZONE HIGH-DOSE),"Set([""Extra dose administered"", ""No adverse event""])"
9,794165,PNEUMO (PREVNAR13),"Set([""Extra dose administered"", ""No adverse event""])"
10,794166,INFLUENZA (SEASONAL) (FLUCELVAX QUADRIVALENT),"Set([""Bursitis"", ""Injection site reaction""])"


In [177]:
# Convert SYMPTOMS strings back to an array of strings
a = fill([], size(df,1));

for rownumber in 1:size(df, 1)
    str_symptoms = df[rownumber,:SYMPTOMS]
    str_index_start = findfirst(isequal('['), str_symptoms)+2
    str_index_end = findfirst(isequal(']'), str_symptoms)-2
    a[rownumber] = split(df[rownumber,:SYMPTOMS][str_index_start:str_index_end], "\", \"")
end;

df.SYMPTOMS = a;
first(select(df, ["VAERS_ID", "VAX_NAME", "SYMPTOMS", "SERIOUS_EVENT"]), 10)

Unnamed: 0_level_0,VAERS_ID,VAX_NAME,SYMPTOMS
Unnamed: 0_level_1,Int64,String,Array…
1,794156,INFLUENZA (SEASONAL) (FLUARIX QUADRIVALENT),"[""Injection site joint pain"", ""Injected limb mobility decreased""]"
2,794157,ZOSTER (SHINGRIX),"[""Apathy"", ""Injection site pain"", ""Injection site pruritus"", ""Asthenia"", ""Arthralgia"", ""Injection site erythema"", ""Injection site warmth"", ""Injection site swelling"", ""Night sweats"", ""Listless""]"
3,794158,ZOSTER (SHINGRIX),"[""Pain"", ""Headache"", ""Nausea"", ""Pyrexia"", ""Chills""]"
4,794160,ZOSTER (SHINGRIX),"[""Lip swelling"", ""Lip blister"", ""Pain"", ""Influenza like illness"", ""Asthenia"", ""Injection site erythema"", ""Fatigue"", ""Injection site swelling"", ""Chills""]"
5,794161,ZOSTER (SHINGRIX),"[""Pyrexia""]"
6,794163,ZOSTER (SHINGRIX),"[""Abdominal pain"", ""Nausea"", ""Pyrexia"", ""Headache"", ""Arthralgia"", ""Fatigue"", ""Dizziness"", ""Myalgia""]"
7,794164,ZOSTER (SHINGRIX),"[""Injection site pain""]"
8,794165,INFLUENZA (SEASONAL) (FLUZONE HIGH-DOSE),"[""Extra dose administered"", ""No adverse event""]"
9,794165,PNEUMO (PREVNAR13),"[""Extra dose administered"", ""No adverse event""]"
10,794166,INFLUENZA (SEASONAL) (FLUCELVAX QUADRIVALENT),"[""Bursitis"", ""Injection site reaction""]"


EDA

In [178]:
# Frequency table of vaccines
freq_vax = sort(freqtable(df, :VAX_NAME), rev=true);
freq_vax

112-element Named Vector{Int64}
VAX_NAME                                                │ 
────────────────────────────────────────────────────────┼──────
COVID19 (COVID19 (PFIZER-BIONTECH))                     │ 23630
ZOSTER (SHINGRIX)                                       │ 21666
COVID19 (COVID19 (MODERNA))                             │ 20815
PNEUMO (PNEUMOVAX)                                      │  4810
COVID19 (COVID19 (JANSSEN))                             │  2774
INFLUENZA (SEASONAL) (FLUZONE QUADRIVALENT)             │  2154
INFLUENZA (SEASONAL) (FLUZONE HIGH-DOSE)                │  2044
INFLUENZA (SEASONAL) (FLUCELVAX QUADRIVALENT)           │  1912
INFLUENZA (SEASONAL) (FLUZONE HIGH-DOSE QUADRIVALENT)   │  1819
INFLUENZA (SEASONAL) (AFLURIA QUADRIVALENT)             │  1562
INFLUENZA (SEASONAL) (FLUARIX QUADRIVALENT)             │  1531
PNEUMO (PREVNAR13)                                      │  1408
⋮                                                             ⋮
BCG (NO BRAND

In [179]:
# Create dictionary for VAX_NAME:SYMPTOM
# Step 1: create the dict 
vax_to_symptoms_dict = Dict{String, Set{String}}()
# Step 2: populate the keys (VAERS_ID) of the dict
for rownumber in 1:size(df, 1)
    vax_name = df[rownumber, :VAX_NAME]
    if !haskey(vax_to_symptoms_dict, vax_name)
        # this is the set where we will store all of the symptoms for this VAX_NAME
        vax_to_symptoms_dict[vax_name] = Set{String}()
    end
end
# Step 3: populate the values (SYMPTOMS) of the dict
for rownumber in 1:size(df, 1)
    vax_name = df[rownumber, :VAX_NAME]
    symptoms = df[rownumber, :SYMPTOMS]
    for symptom in symptoms 
        push!(vax_to_symptoms_dict[vax_name], symptom)
    end
end
# View dict
vax_to_symptoms_dict

Dict{String, Set{String}} with 112 entries:
  "PNEUMO (PREVNAR)"        => Set(["Laboratory test", "Pyrexia", "Tenderness",…
  "RABIES (RABIE-VAX)"      => Set(["Blood magnesium normal", "Red blood cell s…
  "INFLUENZA (SEASONAL) (F… => Set(["Sinus headache", "Pregnancy", "Cellulitis"…
  "INFLUENZA (SEASONAL) (F… => Set(["Oropharyngeal pain", "Pyrexia", "Injection…
  "DT ADSORBED (NO BRAND N… => Set(["Burning sensation", "Dry mouth", "Cellulit…
  "YELLOW FEVER (NO BRAND … => Set(["Pain", "Mobility decreased", "Muscle disor…
  "COVID19 (COVID19 (UNKNO… => Set(["Tachypnoea", "Full blood count", "Troponin…
  "ANTHRAX (NO BRAND NAME)" => Set(["Arteriogram coronary normal", "Stevens-Joh…
  "MENINGOCOCCAL B (TRUMEN… => Set(["Abdominal pain lower", "Computerised tomog…
  "HEP A (NO BRAND NAME)"   => Set(["Burning sensation", "Electrocardiogram nor…
  "TD ADSORBED (TDVAX)"     => Set(["Full blood count", "Abdominal pain", "Irri…
  "HEP B (NO BRAND NAME)"   => Set(["Burning sensation", "Product

In [182]:
# Create dict of most reported symptoms for all 112 vaccines
symptoms_all_dupes = reduce(vcat, df.SYMPTOMS)
symptoms_freq_dict = StatsBase.countmap(symptoms_all_dupes)
list_symptoms = unique(reduce(vcat, df.SYMPTOMS))
freq_per_symptom = [symptoms_freq_dict[val] for val in list_symptoms]
perm = sortperm(freq_per_symptom; rev=true)
#print(freq_per_symptom[perm])
list_symptoms_mostlikely = list_symptoms[perm]

5565-element Vector{Any}:
 "Headache"
 "Pyrexia"
 "Pain"
 "Chills"
 "Fatigue"
 "Injection site pain"
 "Pain in extremity"
 "Nausea"
 "Injection site erythema"
 "Dizziness"
 "Myalgia"
 "Injection site swelling"
 "Erythema"
 ⋮
 "Therapy cessation"
 "Gastric haemorrhage"
 "Progressive multifocal leukoencephalopathy"
 "Nephrostomy"
 "Nail injury"
 "Vertebral artery stenosis"
 "Mini-tracheostomy"
 "Bladder mass"
 "Gallbladder mass"
 "Scan myocardial perfusion abnormal"
 "Eosinophils urine"
 "Flight of ideas"

In [186]:
# Create dict of most reported symptoms for COVID vaccines
df_covid19 = filter(row -> row.VAX_TYPE in ["COVID19"], df)

symptoms_all_dupes = reduce(vcat, df_covid19.SYMPTOMS)
symptoms_freq_dict = StatsBase.countmap(symptoms_all_dupes)
list_symptoms = unique(reduce(vcat, df_covid19.SYMPTOMS))
freq_per_symptom = [symptoms_freq_dict[val] for val in list_symptoms]
perm = sortperm(freq_per_symptom; rev=true)
#print(freq_per_symptom[perm])
list_symptoms_mostlikely = list_symptoms[perm]

4347-element Vector{Any}:
 "Headache"
 "Pyrexia"
 "Chills"
 "Fatigue"
 "Pain"
 "Nausea"
 "Dizziness"
 "Injection site pain"
 "Pain in extremity"
 "Myalgia"
 "Injection site erythema"
 "Arthralgia"
 "Dyspnoea"
 ⋮
 "Pancreatitis chronic"
 "Escherichia sepsis"
 "Axillary nerve injury"
 "Nail injury"
 "Vertebral artery stenosis"
 "Mini-tracheostomy"
 "Bladder mass"
 "Gallbladder mass"
 "Scan myocardial perfusion abnormal"
 "Eosinophils urine"
 "Flight of ideas"
 "Tilt table test positive"

Contingency tables of Serious Events

In [6]:
# Frequency table of serious events
freq_serious = sort(freqtable(df, :SERIOUS_EVENT), rev=true);
freq_serious

2-element Named Vector{Int64}
SERIOUS_EVENT  │ 
───────────────┼──────
0              │ 92996
1              │ 10054

In [227]:
df.ZOSTER = (df.VAX_NAME .== "ZOSTER (SHINGRIX)");
tbl = freqtable(df, :ZOSTER, :SERIOUS_EVENT)

2×2 Named Matrix{Int64}
ZOSTER ╲ SERIOUS_EVENT │     0      1
───────────────────────┼─────────────
false                  │ 71998   9386
true                   │ 20998    668

In [215]:
# Example contingency tables for VAX of interest = "COVID19 (COVID19 (PFIZER-BIONTECH))" and serious report
df.COVID19_PFIZER = (df.VAX_NAME .== "COVID19 (COVID19 (PFIZER-BIONTECH))");
tbl = freqtable(df, :COVID19_PFIZER, :SERIOUS_EVENT)

2×2 Named Matrix{Int64}
COVID19_PFIZER ╲ SERIOUS_EVENT │     0      1
───────────────────────────────┼─────────────
false                          │ 72987   6433
true                           │ 20009   3621

In [216]:
get_PRR(tbl)

1.8918257590414533

In [217]:
df.COVID19_MODERNA = (df.VAX_NAME .== "COVID19 (COVID19 (MODERNA))");
tbl = freqtable(df, :COVID19_MODERNA, :SERIOUS_EVENT)

2×2 Named Matrix{Int64}
COVID19_MODERNA ╲ SERIOUS_EVENT │     0      1
────────────────────────────────┼─────────────
false                           │ 75792   6443
true                            │ 17204   3611

In [218]:
get_PRR(tbl)

2.214214235673132

In [219]:
df.COVID19_JANSSEN = (df.VAX_NAME .== "COVID19 (COVID19 (JANSSEN))");
tbl = freqtable(df, :COVID19_JANSSEN, :SERIOUS_EVENT)

2×2 Named Matrix{Int64}
COVID19_JANSSEN ╲ SERIOUS_EVENT │     0      1
────────────────────────────────┼─────────────
false                           │ 90376   9900
true                            │  2620    154

In [220]:
get_PRR(tbl)

0.5623103420652087

Among the COVID-19 vaccines, what are the top reported symptoms

Functions for aims analysis

In [221]:
function get_freqtable(df, vax_name_str, symptom_str)
    test = select(df, ["VAX_NAME", "SYMPTOMS"])
    # Create dummy variable for vax_name
    test.VAX_IND = (test.VAX_NAME .== vax_name_str)
    # Create dummy variable for symptom
    test.SYMPTOM_IND = zeros(size(test, 1))
    for rownumber in 1:size(test, 1)
        if symptom_str in test[rownumber,:SYMPTOMS]
            test[rownumber,:SYMPTOM_IND] = 1
        end
    end
    
    # Create frequency table
    tbl = freqtable(test, :VAX_IND, :SYMPTOM_IND)
    return tbl
end;

function get_PRR(tbl)
    # Calculate PRR
    a = tbl[2,2]
    b = tbl[2,1]
    c = tbl[1,2]
    d = tbl[1,1]
    PRR = (a/(a+b))/(c/(c+d))
    
    return PRR
end;

In [222]:
get_freqtable(df, "COVID19 (COVID19 (PFIZER-BIONTECH))", "Headache")

2×2 Named Matrix{Int64}
VAX_IND ╲ SYMPTOM_IND │   0.0    1.0
──────────────────────┼─────────────
false                 │ 66105  13315
true                  │ 18065   5565

In [223]:
get_PRR(get_freqtable(df, "COVID19 (COVID19 (PFIZER-BIONTECH))", "Headache"))

1.404721271689326

In [224]:
get_freqtable(df, "COVID19 (COVID19 (MODERNA))", "Headache")

2×2 Named Matrix{Int64}
VAX_IND ╲ SYMPTOM_IND │   0.0    1.0
──────────────────────┼─────────────
false                 │ 67741  14494
true                  │ 16429   4386

In [225]:
get_PRR(get_freqtable(df, "COVID19 (COVID19 (MODERNA))", "Headache"))

1.1955304771966409

Stratified analysis by Age Group