# Covid Risk Dataset

In [1]:
import json
import pickle
import random
import re
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

warnings.filterwarnings("ignore")
%matplotlib inline

## Import the datasets

In [2]:
# import datasets
data_dataset = 'data/2022VAERSDATA.csv'
vax_dataset = 'data/2022VAERSVAX.csv'
# codecs: latin, latin1, cp1252, ISO-8859-1
dates = ['RECVDATE', 'RPT_DATE', 'DATEDIED', 'VAX_DATE', 'ONSET_DATE', 'TODAYS_DATE']
data = pd.read_csv(data_dataset, parse_dates=dates, encoding='cp1252')
vdata = pd.read_csv(vax_dataset, encoding='cp1252')

# merge datasets based on id
data.set_index('VAERS_ID', inplace=True)
vdata.set_index('VAERS_ID', inplace=True)
df = pd.merge(data, vdata, left_index=True, right_index=True)

# keep only covid vaxs
vax_types = ['COVID19', 'COVID19-2']
df = df[df['VAX_TYPE'].isin(vax_types)]

## Clean up the data

In [3]:
df.head()

Unnamed: 0_level_0,RECVDATE,STATE,AGE_YRS,CAGE_YR,CAGE_MO,SEX,RPT_DATE,SYMPTOM_TEXT,DIED,DATEDIED,...,OFC_VISIT,ER_ED_VISIT,ALLERGIES,VAX_TYPE,VAX_MANU,VAX_LOT,VAX_DOSE_SERIES,VAX_ROUTE,VAX_SITE,VAX_NAME
VAERS_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996874,2022-01-01,,,,,U,NaT,SUSPECTED CLINICAL VACCINATION FAILURE; SUSPEC...,,NaT,...,,,,COVID19,JANSSEN,,1,OT,,COVID19 (COVID19 (JANSSEN))
1996875,2022-01-01,OH,,,,M,NaT,SUSPECTED CLINICAL VACCINATION FAILURE; SUSPEC...,,NaT,...,,,,COVID19,JANSSEN,204A21A,1,,,COVID19 (COVID19 (JANSSEN))
1996876,2022-01-01,MS,34.0,,,F,NaT,Irregular menstrual cycle.; period extremely h...,,NaT,...,,,,COVID19,PFIZER\BIONTECH,Fd8448,2,,LA,COVID19 (COVID19 (PFIZER-BIONTECH))
1996877,2022-01-01,CT,,,,F,NaT,Breakthrough and heavy periods; longer period ...,,NaT,...,,,,COVID19,PFIZER\BIONTECH,EP6955,1,,LA,COVID19 (COVID19 (PFIZER-BIONTECH))
1996878,2022-01-01,,74.0,,,M,NaT,tired; Patient reports feeling achy; chills; t...,,NaT,...,,,,COVID19,PFIZER\BIONTECH,,3,,,COVID19 (COVID19 (PFIZER-BIONTECH))


> Since I'm trying to predict if someone will die from taking the shot, I needed to remove any features that the patient would not know before taking the shot or the ones that I thought would not play a significant role in the outcome.

In [4]:
drop_features = [
    'BIRTH_DEFECT',
    'CAGE_MO',
    'CAGE_YR',
    'DATEDIED',
    'ER_ED_VISIT',
    'ER_VISIT',
    'FORM_VERS',
    'HOSPDAYS',
    'HOSPITAL',
    'LAB_DATA',
    'NUMDAYS',
    'ONSET_DATE',
    'RECOVD',
    'RECVDATE',
    'RPT_DATE',
    'SPLTTYPE',
    'SYMPTOM_TEXT',
    'TODAYS_DATE',
    'VAX_DATE',
    'VAX_LOT',
    'VAX_NAME',
    'VAX_ROUTE',
    'VAX_SITE',
    'VAX_TYPE',
    'V_ADMINBY',
    'V_FUNDBY',
    'X_STAY',
]
binarize_features = ['allergies', 'cur_ill', 'history', 'other_meds', 'prior_vax']
binary_features = ['died', 'l_threat', 'disable', 'ofc_visit']
other_features = ['v_fundby']
none_values = {
    '"none" per patient',
    '"unidentified" per grandmother',
    '(does not apply)',
    ',none',
    '-',
    '- nka',
    '- nkda',
    '- none',
    '- none known',
    '--- 1/6/2022 10:48 pm --- nka',
    '-na-',
    '...',
    '2no',
    '?',
    'allergies: nka',
    'decline',
    'denied',
    'denies',
    'denies any allergies to medications,foods',
    'denies.',
    'did not indicate.',
    'did not provide any information.',
    'dka.',
    'do not know',
    "don't know",
    'done',
    'i do not',
    "i don't know",
    "i don't want to add anything.",
    'kna',
    'known',
    'm/a',
    'mkda',
    'mom reported no allergies',
    'mom stated no on admission',
    'mone',
    'n / a',
    "n'a",
    'n-a',
    'n./a',
    'n.a',
    'n.a.',
    'n.k.a',
    'n.k.d. a',
    'n.k.d.a',
    'n.k.d.a.',
    'n/',
    'n/a',
    'n/a - none',
    'n/a per mother',
    'n/a to the best of my knowledge',
    'n/a unknown',
    'n/a.',
    'n/a. pt did not verbalize any allergies.',
    'n/k',
    'n/k/a',
    'n0',
    'n?a',
    'na',
    'na/',
    'nak',
    'ndka',
    'ndka, nka',
    'ndka.',
    'never',
    'nil',
    'nk',
    'nka',
    'nka  knda',
    'nka  reported by father',
    'nka / nkda',
    'nka as per mother',
    'nka nkda',
    'nka no food or drug allergies otherwise noted',
    'nka reported',
    'nka to meds, unknown to others',
    'nka until this 3rd (booster) vaccine',
    'nka, knda, no known environmental or food allergies.',
    'nka, none reported.',
    'nka.',
    'nka/nkda',
    'nkda',
    'nkda  nkfa',
    'nkda at the time',
    'nkda listed in patient profile.',
    'nkda listed on form.',
    'nkda nka',
    'nkda nkfa',
    'nkda no food allergies either',
    'nkda no known food allergies',
    'nkda no other allergies',
    'nkda or allergies reported',
    'nkda or to foods.',
    'nkda reported',
    'nkda, food allergies, or other products',
    'nkda, food allergy, or other allergy',
    'nkda, kkfa',
    'nkda, nka',
    'nkda, nkea.',
    'nkda, nkfa',
    'nkda, no environmental allergies, no food allergies',
    'nkda, no food allergies',
    'nkda, no known allergies',
    'nkda, no known food allergies',
    'nkda, no known food or product allergies',
    'nkda, no known other allergies.',
    'nkda, no reported food allergies',
    'nkda, none',
    'nkda.',
    'nkda/ no known foord allergies',
    'nkda/nka',
    'nkda/nkfa',
    'nkda/no know allergies',
    'nkda; nka',
    'nkda; nkfa',
    'nkda; nkfa.',
    'nkda; no allergies to food or any other products',
    'nkda= no',
    'nkda`',
    'nkdfa',
    'nkfda',
    'nkma',
    'nkma / no food allergies',
    'nkma and no food',
    'nkma.',
    'nkma/nka',
    'nknda',
    'nkne',
    'nknown',
    'no',
    'no  not that aware of.',
    'no according to patient questionnaire',
    'no active allergies',
    'no adverse reactions reported',
    'no allegies',
    'no allergic reactions to vaccines.',
    'no allergies',
    'no allergies any medications and food',
    'no allergies at the moment.',
    'no allergies at the time',
    'no allergies at the time of vaccination.',
    'no allergies currently.',
    'no allergies documented',
    'no allergies found so far.',
    'no allergies indicated',
    'no allergies listed at time of vaccine.',
    'no allergies listed for medications,food components, vaccine components, or latex',
    'no allergies noted',
    'no allergies on file',
    'no allergies per mom',
    'no allergies per patient informed consent form',
    'no allergies per screening checklist',
    'no allergies per vaccination checklist',
    'no allergies reported',
    'no allergies reported by parent.',
    'no allergies reported.',
    'no allergies s at the moment.',
    'no allergies stated;nka',
    'no allergies to any food or medications were reported.',
    'no allergies to any food or medications.',
    'no allergies to any food or other products.',
    "no allergies to any medications or food or any products that i'm aware of.",
    'no allergies to anything',
    'no allergies to food or medications',
    'no allergies to medication or food',
    'no allergies to medication or food that i know of.',
    'no allergies to medication to food, or other products.',
    'no allergies to medications',
    'no allergies to medications and food',
    'no allergies to medications or food',
    'no allergies to medications, food or other products.',
    'no allergies to medications, food, or other products noted at the time of vaccination.',
    'no allergies to medications, food, or other products.',
    'no allergies to medications, foods, or other products.',
    'no allergies to meds or foods',
    'no allergies to my knowledge.',
    'no allergies to previous vaccines',
    'no allergies, food or other products.',
    'no allergies, nkda',
    'no allergies.',
    'no allergy',
    'no allergy reported.',
    "no allergy's",
    'no documented allergies',
    'no documented known allergies',
    'no drug allergies',
    'no drug allergies known',
    'no e',
    'no food allergy, no drug allergy',
    'no food or drug allergies',
    'no food or drug allergies reported.',
    'no food, drug, latex, or venom allergies.none per pt.',
    'no history of allergic reaction to covid-19 vaccines, polysorbate, or any vaccine or injectable medication.',
    "no history of allergic reactions per patient's vaccine consent form",
    'no know',
    'no know allergies.',
    'no know allergy',
    'no know drug  or food allergies',
    'no know drug allergies',
    'no know drug allergy',
    'no know drug or food allergy',
    'no known',
    'no known  allergies on file.',
    'no known alergies',
    'no known allegies',
    'no known allergies',
    'no known allergies according to consent form',
    'no known allergies according to our records',
    'no known allergies at this time.',
    'no known allergies discussed',
    'no known allergies documented.',
    'no known allergies informed by mother',
    'no known allergies listed',
    'no known allergies listed on pqcf form',
    'no known allergies no known medication allergies',
    'no known allergies noted',
    'no known allergies noted on file',
    'no known allergies on file at the pharmacy',
    'no known allergies per her profile',
    'no known allergies per mom.',
    'no known allergies per parent',
    'no known allergies per patient',
    'no known allergies per profile',
    'no known allergies reported',
    'no known allergies reported.',
    'no known allergies stated by patient.',
    'no known allergies to any medication, food, or other products',
    'no known allergies to any of the above',
    'no known allergies to anything',
    'no known allergies to food or medication',
    'no known allergies to food or medications',
    'no known allergies to food, medications or other products.',
    'no known allergies to medication',
    'no known allergies to medication or foods',
    'no known allergies to medication, food, or other products.',
    'no known allergies to medications foods or environmental agents',
    'no known allergies to medications or food',
    'no known allergies to medications or food.',
    'no known allergies to medications, food or other products noted at the time of vaccination.',
    'no known allergies to medications, food or other products.',
    'no known allergies to medications, food, or other products as reported by patient',
    'no known allergies to medications, food, or other products noted at the time of vaccination.',
    'no known allergies to medications, food, or other products noted.',
    'no known allergies to medications, foods, or other products.',
    'no known allergies, no known drug allergies',
    "no known allergies, per var patient didn't list any allergies",
    'no known allergies.',
    'no known allergies. no prior reactions two first and second doses of moderna.',
    'no known allergy',
    'no known allergy reported',
    'no known allergys',
    'no known documented allergies.',
    'no known drug allergie',
    'no known drug allergies',
    'no known drug allergies noted',
    'no known drug allergies or food allergies',
    'no known drug allergies, no known food allergies',
    'no known drug allergies.',
    'no known drug allergies; no known food allergies',
    'no known drug allergy',
    'no known drug allergy, others unknown.',
    'no known drug allergy.',
    'no known drug allergy;',
    'no known drug or food allergies',
    'no known drug or food allergies.',
    'no known drug or food allergy',
    'no known drug, food, latex, or venom allergies.',
    'no known drug, food, or other allergies.',
    'no known drug, food, other product allergies.',
    'no known drug/food allergies',
    'no known durg allergy',
    'no known food allergies or drug allergies.',
    'no known food or drug allergies',
    'no known food or drug allergies.',
    'no known food or medication allergies.',
    'no known food, drug, latex, or venom allergies',
    'no known food, drug, latex, or venom allergies.',
    'no known food, drug, latex, or venom allergy',
    'no known food/drug allergies',
    'no known food/environmental or drug allergies',
    'no known med allergies',
    'no known medical or food allergies',
    'no known medication allergies',
    'no known medication allergies, cats, dogs. nuts, seafood',
    'no known medication or other allergies',
    'no known meds, food or other products',
    'no known or stated drug, food, or other allergies',
    'no listed allergies',
    'no medication allergies  reported',
    'no medication or food allergies',
    'no medication or food allergies known.',
    'no medication or food allergies.',
    'no new allergies. or sensitivities',
    'no none allergies',
    'no not really',
    'no official allergies; allergy test to take place 02/2022',
    'no other reactions before vaccine.',
    'no per pt',
    'no reported allergies',
    'no reported allergies at time of vaccination',
    'no reported allergies.',
    'no reported drug and food allergies',
    'no seasional allergies',
    'no sever allergies noted.',
    'no stated',
    'no true allergies',
    'no true allergies.',
    'no vaccine or injectable medication allergies.',
    'no, according to written and verbal questions asked',
    'no.',
    'no.   not that i know of at time of booster vaccine',
    'no.no.',
    'no/unknown',
    'nobe',
    'noe',
    'nome',
    'non',
    'non known',
    'non known of',
    'non known. non stated',
    'non listed or given',
    'non reported',
    'non stated',
    'non that i am aware of',
    'nona',
    'none',
    'none  given',
    "none  i 'm  aware of",
    'none  known',
    'none  that i am aware of.',
    'none (nkda)',
    'none / denies',
    'none acknowledged',
    'none as per mother',
    'none as stated by father',
    'none as stated by patient.',
    'none at the time but now taking antihistamine and vitamin d',
    'none at time',
    'none at time of vaccination',
    'none aware',
    'none aware of',
    'none aware of.',
    'none declared',
    'none detected. blood test done 3/8/2022',
    'none disclosed',
    'none disclosed at time of assessment.',
    'none disclosed.',
    'none documented',
    'none ever before',
    'none for now',
    'none for the moment',
    'none given',
    'none indicated',
    'none indicated on patient intake form.',
    'none indicated on screening questionnaire',
    'none indicated on the system',
    'none indicated on vaccination form',
    'none indicatedd',
    'none know',
    'none know of',
    'none know,',
    'none knowm',
    'none known',
    'none known allergies',
    'none known at the time',
    'none known at the time.',
    'none known at this time',
    'none known at this time.',
    'none known at time of vaccination',
    'none known at time of vaccine admin',
    'none known before now.',
    'none known of',
    'none known of at that time',
    'none known prior to vaccination',
    'none known. family history of food allergies.',
    'none known/encounter at time of vaccination.',
    'none known/reported',
    'none listed',
    'none listed by patient/parent',
    'none listed in todays chart note for todays visit or medical history.',
    'none listed on consent',
    'none listed on consent form',
    'none listed on immunization form',
    'none listed.',
    'none mentioned',
    'none none',
    'none noted',
    'none noted at the time',
    'none noted in emr',
    'none noted on chart',
    'none noted on consent form',
    'none noted on consent form.',
    'none noted on form',
    'none noted on pre-screening vaccination paperwork for flu and covid shots.',
    'none noted on var',
    'none noted.',
    'none of any kind',
    "none of which i'm aware",
    'none on file',
    'none on file, pt stated no allergies also',
    "none on file, said wasn't allergic to anything",
    'none or unknown',
    'none per consent form',
    'none per guardian',
    'none per guardians',
    'none per informed consent',
    'none per mother',
    'none per patient',
    'none per patient profile',
    "none per patient's mother",
    'none per pt',
    'none per pt hx',
    'none per screening form',
    'none per vaccine administration record',
    'none per var form',
    'none previously',
    'none prior',
    'none prior to moderna vaccine',
    'none provided',
    'none recorded',
    'none report',
    'none reported',
    'none reported / nka',
    'none reported at appointment.',
    'none reported at the time',
    'none reported at time of vaccination',
    'none reported at time of vaccination.',
    'none reported by client',
    'none reported by father',
    'none reported by mother/patient.',
    'none reported by parent',
    'none reported by patient',
    'none reported by pt',
    'none reported by the patient',
    'none reported on case report from hospital',
    'none reported on pre-vaccination checklist',
    'none reported on vars form',
    'none reported via cdc pre vaccination checklist',
    'none reported.',
    'none so far.',
    'none specified',
    'none stated and patient filled out none on sheet',
    'none stated by patient',
    'none stated by patient.',
    'none stated on var',
    'none that  is known of',
    'none that am aware',
    'none that are known.',
    'none that i am aware of',
    "none that i can think of, huh, please, i've tried to share unto those in misc top - down own public official's. it's just senseless this has been allowed to go on for 20 plus years & yeah, i'm not the only one that's locally witnessed it. sigh, i just want my home life back (however), you know. housing solution's could have been last 20 plus year's - (using existing plentiful space's) sro's (single room occupancy) conversions 2/ large bathroom at the end of hallway as well as 2 rooms on each floor (w/ wall out between them) thus converting them 2 support services as well as 2 room's converted into a small kitchen area like in office like building's for just each floor's guest (each guest must have a floor id stating their room #. plus small kitchen area's on each floor via some room's converted into such, thus, so everyone is not piling all into the 1st floor lobby, & i know because i've stay in sro's before year's ago. this could have been done the last 20 plus years throughout cities but would have required involved from real estate industry (itself)!",
    'none that i know',
    'none that i know of or have experienced.',
    'none that i know of.',
    "none that i'm aware of",
    "none that i'm aware of.",
    'none that know of',
    'none that we are aware',
    'none that we are aware of',
    'none that we know of',
    'none that were reported',
    'none to  other vaccine or injectable meds',
    'none to immunizations',
    'none to knowledge',
    'none to knowlegde',
    'none to my knowledge',
    'none to my knowledge.',
    'none to note',
    'none to report',
    'none to rph knowledge',
    'none to vaccines',
    'none voiced',
    'none was reported at this present time',
    'none were listed',
    'none yet',
    'none!',
    'none,',
    'none, but now maybe j&j',
    'none, just the hives now for the past 2-3 weeks',
    'none, nkda',
    'none, patient confirmed',
    'none.',
    'none. *please note, i am not sure of the exact date the condition i am reporting began.',
    'none. healthy before this psycho vaccine   you will pay for what you?re doing to people',
    'none. i have never had an allergic reaction to any vaccines ever and have always kept up to date on all the vaccinations recommended by my doctor and the cdc. i also have no food allergies and no medication allergies.',
    'none. no personal nor family history of allergies ever.',
    'none. there was no reaction.',
    'none..',
    'none/ unknown',
    'nonenone',
    'nonexistent',
    'nonne',
    'nono',
    'nonr',
    'nons',
    'nonw',
    'noone',
    'nope',
    'not  known',
    'not able to determine',
    'not according to her consent form',
    'not allergic',
    'not allergic  any of the ingredients in the vaccine',
    'not allergic to anything',
    'not allergies',
    'not any',
    'not any to date',
    'not applicable',
    'not applicable (reporting due to administering booster dose earlier than patient was suppose to get it)',
    'not applicable in relation to vaccine error.',
    'not applicable.',
    'not assessed',
    'not at this time.',
    'not available',
    'not aware',
    'not aware of',
    'not aware of any',
    'not aware of any allergies',
    'not before this',
    'not currently',
    'not i know.',
    'not known',
    'not known per mom and dad',
    'not known.',
    'not listed',
    'not on file',
    'not prior to vaccination',
    'not provided',
    'not reported',
    'not sure',
    'not sure.',
    'not that i am aware of',
    'not that i am aware of.',
    'not that i am aware off.',
    'not that i an aware of',
    'not that i know',
    'not that i know of',
    'not that i know of; no',
    "not that i'm aware of.",
    'not that patient know of.',
    'nothing',
    'nothing i know of',
    'nothing known',
    'nothing per mother',
    'nothing that i am aware of.',
    'nothing that i know of',
    "nothing that i'm aware of.",
    'nothing was reported',
    'notjing',
    'noun',
    'now',
    'np',
    'patient denies any allergies to medications, food, or other products.',
    'pt reported no known allergies',
    'reports none',
    'she is not allergic to any medication, food, or product.',
    'the patient had no known allergies.',
    'there are not any known drug allergies',
    'ukn',
    'uknown',
    'unable to assess',
    'unable to reach parent to obtain information',
    'unk',
    'unkinown',
    'unkknown',
    'unknow',
    'unknown',
    'unknown (possible moderna allergy from this event)',
    'unknown -- none reported',
    'unknown -answered no for prior serious allergic reactions',
    'unknown but the hospital may have her records',
    'unknown history, not our patient.',
    'unknown to filer',
    'unknown to my knowledge-not in chart',
    'unknown to reporter',
    'unknown, parent unable to provide information',
    "unknown- can't reach patient again",
    'unknown-no severe allergies noted',
    'unknown.',
    'unknown.  no allergy known to pfizer product given.',
    'unknown.  no pfizer product allergies.',
    'unknown. not listed',
    'unknown/ not listed',
    'unknown; none disclosed',
    'unknownf',
    'unknows',
    'unkown',
    'unsure',
    'was not assessed',
    'zero',
}

In [5]:
# convert features into binary values
def binarize(binarize_features=binarize_features):
    for bin_feat in binarize_features:
        bin_feat = bin_feat.lower()
        df[bin_feat].fillna(0, inplace=True)
        df[bin_feat] = df[bin_feat].apply(lambda x: 1 if x != 0 else 0)

In [6]:
# drop unused features
df.drop(drop_features, axis=1, inplace=True)

# clean up the data
df.columns = df.columns.str.lower()
categorical = list(df.dtypes[df.dtypes == 'object'].index)
numerical = list(df.dtypes[df.dtypes != 'object'].index)

for c in categorical:
    df[c] = df[c].str.lower()

# take care of null values
df['vax_dose_series'].fillna(0, inplace=True)
df['vax_dose_series'] = df['vax_dose_series'].replace({'unk': np.nan, '7+': 7})
df[binary_features] = df[binary_features].fillna('n')
df[binary_features] = df[binary_features].replace({'n': 0, 'y': 1})
df[binarize_features] = df[binarize_features].fillna(0)
df['allergies'] = df['allergies'].apply(lambda x: 0 if x in none_values else x)
df['age_yrs'].fillna(df['age_yrs'].mean(), inplace=True)
df.rename(columns={'vax_manu': 'vax_name'}, inplace=True)
df['vax_name'] = df['vax_name'].replace({'unknown manufacturer': 'unknown'})
binarize()
numerical.append(categorical.pop())
categorical.remove('died')
categorical.remove('vax_manu')

In [7]:
# fill temporary -1 placeholder for vax_dose_series based on its current distrubution
dose_distrubution = df.vax_dose_series.value_counts(normalize=True)
missing_doses = df.vax_dose_series.isnull()
df.loc[missing_doses, 'vax_dose_series'] = np.random.choice(
    dose_distrubution.index,
    size=len(df[missing_doses]),
    p=dose_distrubution.values
)
df[['age_yrs', 'vax_dose_series']] = df[['age_yrs', 'vax_dose_series']].astype(np.int16)

In [8]:
# fill missing state values based on its current distrubution
# vax_mean = int(df.vax_dose_series.median())
state_distrubution = df.state.value_counts(normalize=True)
missing_states = df.state.isnull()
df.loc[missing_states, 'state'] = np.random.choice(
    state_distrubution.index,
    size=len(df[missing_states]),
    p=state_distrubution.values
)

### Handle the hugely unbalanced dataset

In [9]:
# Balance the dataset
dead = df[df.died == 1].died.count()
shuffled_df = df.sample(frac=1, random_state=1)
dead_df = shuffled_df[shuffled_df['died'] == 1]
alive_df = shuffled_df[shuffled_df['died'] == 0].sample(n=dead, random_state=1)
vax_deaths_df = pd.concat([dead_df, alive_df])

In [10]:
vax_deaths_df.head()

Unnamed: 0_level_0,state,age_yrs,sex,died,l_threat,disable,other_meds,cur_ill,history,prior_vax,ofc_visit,allergies,vax_name,vax_dose_series
VAERS_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2110766,wi,90,f,1,0,0,0,0,1,0,0,0,pfizer\biontech,2
2005680,wi,64,f,1,0,0,0,0,1,0,0,0,pfizer\biontech,2
2148183,ar,77,m,1,0,0,1,1,1,0,0,0,moderna,1
2161790,wa,51,f,1,0,0,0,0,0,0,0,0,pfizer\biontech,2
2216072,sd,92,m,1,0,0,0,0,1,0,0,0,pfizer\biontech,3


In [11]:
vax_deaths_df.state.unique()

array(['wi', 'ar', 'wa', 'sd', 'ak', 'az', 'ky', 'mi', 'me', 'ny', 'ca',
       'ia', 'id', 'sc', 'tn', 'fl', 'tx', 'nm', 'mn', 'al', 'ok', 'oh',
       'ut', 'nv', 'pr', 'mt', 'nj', 'nh', 'mo', 'co', 'in', 'ma', 'ga',
       'il', 'la', 'ne', 'pa', 'or', 'va', 'wv', 'ct', 'md', 'hi', 'nc',
       'ks', 'ms', 'ri', 'nd', 'dc', 'de', 'vt', 'gu', 'wy', 'as'],
      dtype=object)

In [12]:
vax_deaths_df.age_yrs.min()

0

In [13]:
vax_deaths_df.age_yrs.max()

107

In [14]:
vax_deaths_df.sex.unique()

array(['f', 'm', 'u'], dtype=object)

In [15]:
vax_deaths_df.l_threat.unique()

array([0, 1])

In [16]:
vax_deaths_df.disable.unique()

array([0, 1])

In [17]:
vax_deaths_df.other_meds.unique()

array([0, 1])

In [18]:
vax_deaths_df.cur_ill.unique()

array([0, 1])

In [19]:
vax_deaths_df.history.unique()

array([1, 0])

In [20]:
vax_deaths_df.prior_vax.unique()

array([0, 1])

In [21]:
vax_deaths_df.ofc_visit.unique()

array([0, 1])

In [22]:
vax_deaths_df.allergies.unique()

array([0, 1])

In [23]:
vax_deaths_df.vax_name.unique()

array(['pfizer\\biontech', 'moderna', 'janssen', 'unknown', 'novavax'],
      dtype=object)

In [24]:
vax_deaths_df.vax_dose_series.unique()

array([2, 1, 3, 4, 0, 5, 7, 6], dtype=int16)