Setup

In [None]:
# install packages
import Pkg;
Pkg.add("CSV");
Pkg.add("DataFrames");
Pkg.add("FreqTables");

In [2]:
# load packages
using CSV;
using DataFrames;
using FreqTables;

In [3]:
# setup filepaths
path_source = string(@__DIR__,"\\..\\source");
path_dev = string(@__DIR__,"\\..\\dev");
path_output = string(@__DIR__,"\\..\\output");

Read in dataset

In [4]:
CSV.File(joinpath(path_dev,"19-21VAERSCOMB_clean.csv"); types=[Int, String, Int, String, String, String, Set{String}])

103050-element CSV.File{false}:
 CSV.Row: (VAERS_ID = 794156, RECVDATE = Dates.Date("2019-01-01"), AGE_YRS = 69.0, VAX_NAME = "INFLUENZA (SEASONAL) (FLUARIX QUADRIVALENT)", VAX_TYPE = "FLU4", VAX_MANU = "GLAXOSMITHKLINE BIOLOGICALS", SYMPTOMS = "Set([\"Injection site joint pain\", \"Injected limb mobility decreased\"])", SERIOUS_EVENT = 0)
 CSV.Row: (VAERS_ID = 794157, RECVDATE = Dates.Date("2019-01-01"), AGE_YRS = 68.0, VAX_NAME = "ZOSTER (SHINGRIX)", VAX_TYPE = "VARZOS", VAX_MANU = "GLAXOSMITHKLINE BIOLOGICALS", SYMPTOMS = "Set([\"Apathy\", \"Injection site pain\", \"Injection site pruritus\", \"Asthenia\", \"Arthralgia\", \"Injection site erythema\", \"Injection site warmth\", \"Injection site swelling\", \"Night sweats\", \"Listless\"])", SERIOUS_EVENT = 0)
 CSV.Row: (VAERS_ID = 794158, RECVDATE = Dates.Date("2019-01-01"), AGE_YRS = 62.0, VAX_NAME = "ZOSTER (SHINGRIX)", VAX_TYPE = "VARZOS", VAX_MANU = "GLAXOSMITHKLINE BIOLOGICALS", SYMPTOMS = "Set([\"Pain\", \"Headache\", \"Nausea\

In [6]:
# read in cleaned combined VAERS file
df = CSV.read(joinpath(path_dev,"19-21VAERSCOMB_clean.csv"), DataFrame);
names(df)

8-element Vector{String}:
 "VAERS_ID"
 "RECVDATE"
 "AGE_YRS"
 "VAX_NAME"
 "VAX_TYPE"
 "VAX_MANU"
 "SYMPTOMS"
 "SERIOUS_EVENT"

In [7]:
first(select(df, ["VAERS_ID", "VAX_NAME", "SYMPTOMS", "SERIOUS_EVENT"]), 10)

Unnamed: 0_level_0,VAERS_ID,VAX_NAME,SYMPTOMS
Unnamed: 0_level_1,Int64,String,String
1,794156,INFLUENZA (SEASONAL) (FLUARIX QUADRIVALENT),"Set([""Injection site joint pain"", ""Injected limb mobility decreased""])"
2,794157,ZOSTER (SHINGRIX),"Set([""Apathy"", ""Injection site pain"", ""Injection site pruritus"", ""Asthenia"", ""Arthralgia"", ""Injection site erythema"", ""Injection site warmth"", ""Injection site swelling"", ""Night sweats"", ""Listless""])"
3,794158,ZOSTER (SHINGRIX),"Set([""Pain"", ""Headache"", ""Nausea"", ""Pyrexia"", ""Chills""])"
4,794160,ZOSTER (SHINGRIX),"Set([""Lip swelling"", ""Lip blister"", ""Pain"", ""Influenza like illness"", ""Asthenia"", ""Injection site erythema"", ""Fatigue"", ""Injection site swelling"", ""Chills""])"
5,794161,ZOSTER (SHINGRIX),"Set([""Pyrexia""])"
6,794163,ZOSTER (SHINGRIX),"Set([""Abdominal pain"", ""Nausea"", ""Pyrexia"", ""Headache"", ""Arthralgia"", ""Fatigue"", ""Dizziness"", ""Myalgia""])"
7,794164,ZOSTER (SHINGRIX),"Set([""Injection site pain""])"
8,794165,INFLUENZA (SEASONAL) (FLUZONE HIGH-DOSE),"Set([""Extra dose administered"", ""No adverse event""])"
9,794165,PNEUMO (PREVNAR13),"Set([""Extra dose administered"", ""No adverse event""])"
10,794166,INFLUENZA (SEASONAL) (FLUCELVAX QUADRIVALENT),"Set([""Bursitis"", ""Injection site reaction""])"


EDA

In [5]:
# Frequency table of vaccines
freq_vax = sort(freqtable(df, :VAX_NAME), rev=true);
freq_vax

112-element Named Vector{Int64}
VAX_NAME                                                │ 
────────────────────────────────────────────────────────┼──────
COVID19 (COVID19 (PFIZER-BIONTECH))                     │ 23630
ZOSTER (SHINGRIX)                                       │ 21666
COVID19 (COVID19 (MODERNA))                             │ 20815
PNEUMO (PNEUMOVAX)                                      │  4810
COVID19 (COVID19 (JANSSEN))                             │  2774
INFLUENZA (SEASONAL) (FLUZONE QUADRIVALENT)             │  2154
INFLUENZA (SEASONAL) (FLUZONE HIGH-DOSE)                │  2044
INFLUENZA (SEASONAL) (FLUCELVAX QUADRIVALENT)           │  1912
INFLUENZA (SEASONAL) (FLUZONE HIGH-DOSE QUADRIVALENT)   │  1819
INFLUENZA (SEASONAL) (AFLURIA QUADRIVALENT)             │  1562
INFLUENZA (SEASONAL) (FLUARIX QUADRIVALENT)             │  1531
PNEUMO (PREVNAR13)                                      │  1408
VACCINE NOT SPECIFIED (NO BRAND NAME)                   │  1286
TDAP (BOOSTRI

In [6]:
# Frequency table of serious events
freq_serious = sort(freqtable(df, :SERIOUS_EVENT), rev=true);
freq_serious

2-element Named Vector{Int64}
SERIOUS_EVENT  │ 
───────────────┼──────
0              │ 92996
1              │ 10054

In [7]:
# Example contingency tables for VAX of interest = "COVID19 (COVID19 (PFIZER-BIONTECH))" and serious report
df.COVID19_PFIZER = (df.VAX_NAME .== "COVID19 (COVID19 (PFIZER-BIONTECH))");
tbl = freqtable(df, :SERIOUS_EVENT, :COVID19_PFIZER)

2×2 Named Matrix{Int64}
SERIOUS_EVENT ╲ COVID19_PFIZER │ false   true
───────────────────────────────┼─────────────
0                              │ 72987  20009
1                              │  6433   3621

In [8]:
prop(tbl)

2×2 Named Matrix{Float64}
SERIOUS_EVENT ╲ COVID19_PFIZER │     false       true
───────────────────────────────┼─────────────────────
0                              │  0.708268   0.194168
1                              │  0.062426  0.0351383

In [9]:
df.COVID19_MODERNA = (df.VAX_NAME .== "COVID19 (COVID19 (MODERNA))");
tbl = freqtable(df, :SERIOUS_EVENT, :COVID19_MODERNA)

2×2 Named Matrix{Int64}
SERIOUS_EVENT ╲ COVID19_MODERNA │ false   true
────────────────────────────────┼─────────────
0                               │ 75792  17204
1                               │  6443   3611

In [10]:
prop(tbl)

2×2 Named Matrix{Float64}
SERIOUS_EVENT ╲ COVID19_MODERNA │     false       true
────────────────────────────────┼─────────────────────
0                               │  0.735488   0.166948
1                               │  0.062523  0.0350412

In [11]:
df.COVID19_JANSSEN = (df.VAX_NAME .== "COVID19 (COVID19 (JANSSEN))");
tbl = freqtable(df, :SERIOUS_EVENT, :COVID19_JANSSEN)

2×2 Named Matrix{Int64}
SERIOUS_EVENT ╲ COVID19_JANSSEN │ false   true
────────────────────────────────┼─────────────
0                               │ 90376   2620
1                               │  9900    154

In [12]:
prop(tbl)

2×2 Named Matrix{Float64}
SERIOUS_EVENT ╲ COVID19_JANSSEN │      false        true
────────────────────────────────┼───────────────────────
0                               │   0.877011   0.0254246
1                               │  0.0960699  0.00149442