In [2]:
# libraries
import pandas as pd
import re
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from datasage.styles import DARK_THEME,LIGHT_THEME,theme
from datasage.core import Leonardo
import matplotlib.colors
import matplotlib.font_manager as fm
import mplcyberpunk
leo = Leonardo()
plt.style.use(LIGHT_THEME)

In [3]:
# settings
font_family = leo.setup_google_font()
plt.rcParams['font.family'] = font_family
# plt.rcParams['figure.dpi'] = 300
pd.set_option('display.max_colwidth', 300)

# palette
colors = ['#949799', '#242728', '#0085a1']
ccmap = matplotlib.colors.LinearSegmentedColormap.from_list("", colors)
# sns.palplot(colors,size=1)
# plt.title('Palette and font',size=9)
# plt.show()

## Reading data
https://www.kaggle.com/datasets/ayushggarg/covid19-vaccine-adverse-reactions/data?select=VAERSDATA.csv

In [40]:
vd = pd.read_csv('vaers_data.csv').rename(columns=str.lower)
vs = pd.read_csv('vaers_symptoms.csv').rename(columns=str.lower)
vv = pd.read_csv('vaers_vax.csv').rename(columns=str.lower).query("vax_type=='COVID19'")

## Vaccines

In [5]:
'''Cleaning'''
# only known manufacturers
vv = vv[vv['vax_manu'].isin(['PFIZER\\BIONTECH', 'MODERNA', 'JANSSEN'])]
# filter our NaN and unkown vax lot numbers
vv = vv.dropna(subset=['vax_lot']).loc[vv['vax_lot'].str.contains(r'\d', na=False)]
# cleaning vax lot
vv['vax_lot'] = vv['vax_lot'].str.upper().str.strip().str.replace(' ', '')
# exclude lots with only zeros or value "1" or containing unkown
vv = vv[~vv['vax_lot'].str.match(r'^0+$|^1$') & ~vv['vax_lot'].str.contains('unk')]

'''Selecting top lots per manufacturer'''
# pd.set_option('display.max_rows', None)
# print(vv.groupby(['vax_manu', 'vax_lot'])['vaers_id'].count().groupby(level=0).nlargest(50))
# pd.reset_option('display.max_rows')

# getting top lots per manufacturer
top_lots = vv.groupby(['vax_manu', 'vax_lot'])['vaers_id'].count().groupby(level=0).nlargest(50).index.to_list()
vv = vv[vv['vax_lot'].isin([lot[2] for lot in top_lots])]
# only known dose series
vv = vv[vv['vax_dose_series'].isin(['1', '2', '7+', '4', '5', '6', '3'])]
# dropping duplicates as we could have same row multiple times but it's the same vaccine
vv = vv.drop_duplicates()

'''Select valid cases'''
# First create the aggregation
result = vv.groupby('vaers_id').agg({
    'vax_type': 'count',
    'vax_dose_series': lambda x: list(set(x))
}).rename(columns={'vax_type': 'vaccine_count'})

# Filter for valid dose series patterns
valid_cases = (
    # Case 1: vaccine_count=3 must have doses 1,2,3
    ((result['vaccine_count'] == 3) & 
     (result['vax_dose_series'].apply(lambda x: set(x) == {'1', '2', '3'}))) |
    
    # Case 2: vaccine_count=2 must have doses 1,2
    ((result['vaccine_count'] == 2) & 
     (result['vax_dose_series'].apply(lambda x: set(x) == {'1', '2'}))) |
    
    # Case 3: vaccine_count=1 must have only dose 1
    ((result['vaccine_count'] == 1) & 
     (result['vax_dose_series'].apply(lambda x: x == ['1'])))
)

valid_vaers_ids = result[valid_cases].reset_index()['vaers_id'].to_list()

# selecting them
vv = vv[vv['vaers_id'].isin(valid_vaers_ids)]

# Convert dose to numeric for proper sorting
vv['dose_num'] = vv['vax_dose_series'].replace('7+', '7').astype(int)

# Group and aggregate in one step
vaccines = (vv.sort_values(['vaers_id', 'dose_num'])
              .groupby('vaers_id')
              .agg(
                  vaccine_count=('vax_type', 'count'),
                  dose_series=('vax_dose_series', list),
                  vax_lots=('vax_lot', list),
                  vax_manus=('vax_manu', list),
                  vax_routes=('vax_route', list),
                  vax_sites=('vax_site', list),
              )
              .reset_index())

# Drop the temporary column from the original dataframe
vv.drop('dose_num', axis=1, inplace=True)

## Symptoms

In [6]:
# Crea liste di colonne per sintomi e versioni
symptom_cols = [f'symptom{i}' for i in range(1, 6)]
version_cols = [f'symptomversion{i}' for i in range(1, 6)]

# Prepara i dati per la concatenazione
all_data = []
for i in range(5):
    # Estrai le colonne rilevanti
    temp_df = vs[['vaers_id', symptom_cols[i], version_cols[i]]].copy()
    # Rinomina le colonne
    temp_df.columns = ['vaers_id', 'symptom', 'symptomversion']
    # Aggiungi alla lista
    all_data.append(temp_df)

# Concatena tutti i dataframe
all_symptoms = pd.concat(all_data, ignore_index=True)

# Rimuovi i valori nulli
all_symptoms = all_symptoms.dropna(subset=['symptom'])
all_symptoms['symptom'] = all_symptoms['symptom'].str.upper()
all_symptoms['symptomversion'] = all_symptoms['symptomversion'].astype(str)

In [7]:
# pd.set_option('display.max_rows', None)
# print(all_symptoms.groupby(['symptomversion','symptom'])['symptom'].count().sort_index(ascending=False))
# pd.reset_option('display.max_rows')

## Final dataframe

In [12]:
f = vd.merge(vaccines,how='inner',on='vaers_id')

In [42]:
f[f['died']=='Y'][['vaers_id','cage_yr','vax_lots','vax_manus','vax_date','onset_date','datedied','vaccine_count','hospital','symptom_text','cur_ill','recovd','age_yrs']].T

Unnamed: 0,2295,4433,4545,4594,4628,4646,4882,5032,5989,6177,...,183207,183219,183225,183242,183251,183255,183276,183277,183278,183308
vaers_id,909095,914604,914805,914895,914961,914994,915562,915880,918065,918518,...,2758930,2760244,2761411,2762889,2763896,2765106,2767890,2768018,2768019,2775465
cage_yr,66.0,74.0,63.0,78.0,88.0,90.0,88.0,99.0,64.0,50.0,...,44.0,77.0,69.0,85.0,60.0,74.0,51.0,59.0,28.0,40.0
vax_lots,[011J20A],[EH9899],[EH9899],[EL1284],[EL0142],[EL0142],[EL0142],[037K20A],[025J20-2A],[011L20A],...,[ER8727],[011M20A],[010A21A],"[EN5318, EN6201]",[012M20A],"[EN6198, ER8730]",[1805020],[EW0158],[EN6203],"[EN6200, ER8732]"
vax_manus,[MODERNA],[PFIZER\BIONTECH],[PFIZER\BIONTECH],[PFIZER\BIONTECH],[PFIZER\BIONTECH],[PFIZER\BIONTECH],[PFIZER\BIONTECH],[MODERNA],[MODERNA],[MODERNA],...,[PFIZER\BIONTECH],[MODERNA],[MODERNA],"[PFIZER\BIONTECH, PFIZER\BIONTECH]",[MODERNA],"[PFIZER\BIONTECH, PFIZER\BIONTECH]",[JANSSEN],[PFIZER\BIONTECH],[PFIZER\BIONTECH],"[PFIZER\BIONTECH, PFIZER\BIONTECH]"
vax_date,12/23/2020,12/16/2020,12/28/2020,12/28/2020,12/30/2020,12/30/2020,12/30/2020,12/30/2020,12/30/2020,12/31/2020,...,03/17/2021,01/03/2022,11/05/2021,01/27/2021,02/04/2021,05/02/2023,05/01/2021,04/15/2021,03/21/2021,11/04/2021
onset_date,12/25/2020,12/20/2020,12/29/2020,12/30/2020,12/30/2020,12/30/2020,12/30/2020,12/30/2020,01/01/2021,12/31/2020,...,03/18/2024,04/09/2024,12/08/2021,03/24/2021,03/30/2024,05/02/2024,09/05/2023,04/22/2024,05/03/2024,06/03/2024
datedied,12/25/2020,12/20/2020,12/29/2020,12/30/2020,12/30/2020,12/30/2020,12/30/2020,12/31/2020,01/01/2021,12/31/2020,...,03/18/2024,04/09/2024,01/20/2022,03/24/2021,03/30/2024,05/02/2024,09/07/2023,04/22/2024,05/03/2024,06/03/2024
vaccine_count,1,1,1,1,1,1,1,1,1,1,...,1,1,1,2,1,2,1,1,1,2
hospital,,,,,,,,,,,...,,,Y,Y,,,,Y,,
symptom_text,"on 12/24/2020 the resident was sleepy and stayed in bed most of the shift. He stated he was doing okay but requested pain medication for his legs at 250PM. At 255AM on 12/25/2020 the resident was observed in bed lying still, pale, eyes half open and foam coming from mouth and unresponsive. He...",Spouse awoke 12/20 and found spouse dead. Client was not transferred to hospital.,RESIDENT CODED AND EXPIRED,"Injection given on 12/28/20 - no adverse events and no issues yesterday; Death today, 12/30/20, approx.. 2am today (unknown if related - Administrator marked as natural causes)",pt passed away with an hour to hour and 1/2 of receiving vaccine. per nursing home staff they did not expect pt to make it many more days. pt was unresponsive in room when shot was given. per nursing home staff pt was 14 + days post covid,pt was a nursing home pt. pt received first dose of covid vaccine. pt was monitored for 15 minutes after getting shot. staff reported that pt was 15 days post covid. Pt passed away with in 90 minutes of getting vaccine,"pt received vaccine at covid clinic on 12/30 at approximately 3:30, pt vomited 4 minutes after receiving shot--dark brown vomit, staff reported pt had vomited night before. Per staff report pt became short of breath between 6 and 7 pm that night. Pt had DNR on file. pt passed away at approxima...",Patient died within 12 hours of receiving the vaccine.,1/1/2020: Residents was found unresponsive. Pronounced deceased at 6:02pm,syncopal episode - arrested - CPR - death,...,"Online report of sudden death in 47-year-old EAU countermeasure recipient deemed fully vaccinated against COVID-19. Post from 08/23/21 dead recipient posted got my second on May 1st. DOSE#1 date 03/17/21 LOT#ER8727, DOSE#2 date 05/01/21 LOT# UNK. Review of social media posts don't indicate any a...",Patient expired 4/9/2024.,"Online report of hospitalization and death in 70-year-old male recipient of three Moderna inoculations. DOSE#1 date 02/26/21 LOT#010A21A, DOSE#2 date 03/31/21 LOT# N/A, DOSE#3 date 11/05/21 LOT#NA Site of dose #2 was a drive thru inoculation site, DOSE#3 was at a pharmacy location. An online rep...","I am just reporting a possible adverse event. Just want a documentation made. My mom received her vaccinations on 1/27/2021 and 2/18/2021. On 3/8/2021, she experienced a major stroke and succumbed to stroke on 3/24/2021, Obviously, can not say that this was due to the vaccine but wanted to repor...",Online report of sudden death in 64-year-old male recipient of EUA medical countermeasure aka Moderna COVID-19 vaccine. Victim was likely inoculated a second time in 2021 after DOSE#1. DOSE#1 Date 02/04/2021 LOT#012M20A. A review of social media indicates spinal taps in November 2015. Other prio...,Patient expired on 5/2/2024.,Covid-19 Breakthrough Infection,Online report of sudden death in a 62-year-old EUA medical countermeasure recipient. DOSE#1 Date 04/15/2021 LOT#EW0158. Unable to ascertain information on subsequent doses. Victim routinely traveled on long flights. According to online information his kidneys and liver were donated to three diff...,"Online report of sudden death in a 31-year-old EUA countermeasure recipient. Victim was a healthy professional dancer. Location of DOSE#1 pharmacy #unk-DOSE#1 date 02/27/21 LOT#EN6203, DATE of DOSE#2 03/21/21 LOT#UNK, ONLINE COPY of VAX CARD gives pharmacy for DOSE#2. This is a case of interstat...","Online report of sudden death of 41-year-old EUA medical countermeasure recipient. Dose#1 Date 03/10/21 LOT#EN6200, DOSE#2 Date 03/29/21 LOT#ER8732 DOSE#3 Date 11/04/21 LOT# FF2593. Social media posts indicate no post inoculation adverse events nor any subsequent inoculations. Victim posted on s..."
