In [2]:
import pandas as pd

In [59]:
# Reading data frame of vaccinated people details 
df = pd.read_csv(r"D:\Shai\COVID-19 World Vaccine Adverse Reactions\2021VAERSDATA.csv",
                 encoding_errors='ignore', low_memory=False)
df.columns

Index(['VAERS_ID', 'RECVDATE', 'STATE', 'AGE_YRS', 'CAGE_YR', 'CAGE_MO', 'SEX',
       'RPT_DATE', 'SYMPTOM_TEXT', 'DIED', 'DATEDIED', 'L_THREAT', 'ER_VISIT',
       'HOSPITAL', 'HOSPDAYS', 'X_STAY', 'DISABLE', 'RECOVD', 'VAX_DATE',
       'ONSET_DATE', 'NUMDAYS', 'LAB_DATA', 'V_ADMINBY', 'V_FUNDBY',
       'OTHER_MEDS', 'CUR_ILL', 'HISTORY', 'PRIOR_VAX', 'SPLTTYPE',
       'FORM_VERS', 'TODAYS_DATE', 'BIRTH_DEFECT', 'OFC_VISIT', 'ER_ED_VISIT',
       'ALLERGIES'],
      dtype='object')

In [60]:
# Reading data frame of illness symptoms
df2 = pd.read_csv(r"D:\Shai\COVID-19 World Vaccine Adverse Reactions\2021VAERSSYMPTOMS.csv")
df2.columns

Index(['VAERS_ID', 'SYMPTOM1', 'SYMPTOMVERSION1', 'SYMPTOM2',
       'SYMPTOMVERSION2', 'SYMPTOM3', 'SYMPTOMVERSION3', 'SYMPTOM4',
       'SYMPTOMVERSION4', 'SYMPTOM5', 'SYMPTOMVERSION5'],
      dtype='object')

In [61]:
# Reading data frame of vaccine manufacturing details
df3 = pd.read_csv(r"D:\Shai\COVID-19 World Vaccine Adverse Reactions\2021VAERSVAX.csv")
df3.columns

Index(['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES',
       'VAX_ROUTE', 'VAX_SITE', 'VAX_NAME'],
      dtype='object')

In [62]:
# Dropping symptom version cols
df2_clean = df2.drop(['SYMPTOMVERSION1','SYMPTOMVERSION2','SYMPTOMVERSION3',
                   'SYMPTOMVERSION4','SYMPTOMVERSION5'], axis=1)
# Stacking cols to keep symptoms in one column
df2_clean = df2_clean.set_index('VAERS_ID')
df2_stack = df2_clean.stack(0)

# Cleaning stacked data frame
df2_stack = df2_stack.reset_index()
df2_stack = df2_stack.drop('level_1', axis=1)
df2_stack.columns = ['VAERS_ID','SYMPTOM']
df2_stack.head()

Unnamed: 0,VAERS_ID,SYMPTOM
0,916600,Dysphagia
1,916600,Epiglottitis
2,916601,Anxiety
3,916601,Dyspnoea
4,916602,Chest discomfort


In [63]:
# Creating a vaccine-hospitalization data frame
vh_df = pd.merge(df, df3, how='inner', on='VAERS_ID')
vh_df.columns

Index(['VAERS_ID', 'RECVDATE', 'STATE', 'AGE_YRS', 'CAGE_YR', 'CAGE_MO', 'SEX',
       'RPT_DATE', 'SYMPTOM_TEXT', 'DIED', 'DATEDIED', 'L_THREAT', 'ER_VISIT',
       'HOSPITAL', 'HOSPDAYS', 'X_STAY', 'DISABLE', 'RECOVD', 'VAX_DATE',
       'ONSET_DATE', 'NUMDAYS', 'LAB_DATA', 'V_ADMINBY', 'V_FUNDBY',
       'OTHER_MEDS', 'CUR_ILL', 'HISTORY', 'PRIOR_VAX', 'SPLTTYPE',
       'FORM_VERS', 'TODAYS_DATE', 'BIRTH_DEFECT', 'OFC_VISIT', 'ER_ED_VISIT',
       'ALLERGIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES',
       'VAX_ROUTE', 'VAX_SITE', 'VAX_NAME'],
      dtype='object')

In [64]:
# Creating a vaccine-symptom data frame
vs_df = pd.merge(df2_stack, df3, how='inner', on='VAERS_ID')
vs_df.head()

Unnamed: 0,VAERS_ID,SYMPTOM,VAX_TYPE,VAX_MANU,VAX_LOT,VAX_DOSE_SERIES,VAX_ROUTE,VAX_SITE,VAX_NAME
0,916600,Dysphagia,COVID19,MODERNA,037K20A,1,IM,LA,COVID19 (COVID19 (MODERNA))
1,916600,Epiglottitis,COVID19,MODERNA,037K20A,1,IM,LA,COVID19 (COVID19 (MODERNA))
2,916601,Anxiety,COVID19,MODERNA,025L20A,1,IM,RA,COVID19 (COVID19 (MODERNA))
3,916601,Dyspnoea,COVID19,MODERNA,025L20A,1,IM,RA,COVID19 (COVID19 (MODERNA))
4,916602,Chest discomfort,COVID19,PFIZER\BIONTECH,EL1284,1,IM,LA,COVID19 (COVID19 (PFIZER-BIONTECH))


In [65]:
# Saving data frame
vh_df.to_csv(r"D:\Shai\COVID-19 World Vaccine Adverse Reactions\vaccine-hospitalization.csv")
vs_df.to_csv(r"D:\Shai\COVID-19 World Vaccine Adverse Reactions\vaccine-symptom.csv")

In [11]:
# Death statistics
clean_df = pd.read_excel(r"D:\Shai\COVID-19 World Vaccine Adverse Reactions\vaccine-hospitalization after power bi cleaning.xlsx")
death = clean_df.loc[clean_df['DIED'] =='Y']
death_count = death[['DIED']].count()
death_bd = death[['BIRTH_DEFECT']].notnull().sum()
death_allergy = death[['ALLERGIES']].notnull().sum()
death_history = death[['HISTORY']].notnull().sum()
print("Dead: ", death_count, "Birth disability: ", death_bd, " Allergy: ",
      death_allergy, " History: ", death_history)

Dead:  DIED    1850
dtype: int64 Birth disability:  BIRTH_DEFECT    1
dtype: int64  Allergy:  ALLERGIES    981
dtype: int64  History:  HISTORY    1278
dtype: int64
