In [71]:
import numpy as np
from tqdm import tqdm
from pymongo import MongoClient
sys.path.append('../statistical_calculations')  # Adds the statistical_calculations directory to the path
import disproportionaly_analysis
import pandas as pd
client = MongoClient('mongodb://localhost:27017/')
db = client['vaers']  
col = db['reports']

In [47]:
Pfizer_adverse_reactions = [
    "Vomiting",
    "Lymphadenopathy",
    "Rash", 
    "Pruritus", 
    "Urticaria", 
    "Angioedema",
    "Anaphylaxis",
    "Decreased appetite",
    "Insomnia",
    "Headache",
    "Dizziness",
    "Lethargy",
    #"Acute peripheral facial paralysis",
    "Facial paralysis",
    "Paraesthesia",
    "Hypoaesthesia",
    "Myocarditis",
    "Pericarditis",
    "Diarrhoea",
    "Nausea",
    "Hyperhidrosis",
    "Night sweats",
    "Erythema multiforme",
    "Arthralgia",
    "Myalgia",
    "Pain in extremity",
    "Heavy menstrual bleeding",
    "Injection site pain",
    "Fatigue",
    "Chills",
    "Pyrexia",
    "Injection site swelling",
    "Injection site redness",
    "Asthenia",
    "Malaise",
    "Injection site pruritus",
    "Extensive swelling of vaccinated limb",
    "Facial swelling"
]

In [48]:
order = np.load('../Array_Generation/Arrays_Pfizer/order_Pfizer.npy', allow_pickle=True)
y_true = np.load('../Array_Generation/Arrays_Pfizer/y_true_Pfizer.npy', allow_pickle=True)

In [49]:
df = pd.DataFrame({
    'y_true': y_true,
    'symptom': order
})

# Get 5 samples where y_true is 1
df_y_true_1 = df[df['y_true'] == 1].sample(n=10, random_state=42)

# Get 5 samples where y_true is 0
df_y_true_0 = df[df['y_true'] == 0].sample(n=10, random_state=42)

# Concatenate the two DataFrames
df_sampled = pd.concat([df_y_true_1, df_y_true_0])

In [50]:
df_sampled

Unnamed: 0,y_true,symptom
4606,1,Decreased appetite
5603,1,Dizziness
8002,1,Pyrexia
7963,1,Diarrhoea
3383,1,Heavy menstrual bleeding
6835,1,Lethargy
5959,1,Injection site swelling
4008,1,Vomiting
8755,1,Chills
3611,1,Injection site pain


In [51]:
manufacturer = 'PFIZER\\BIONTECH'
vaccine = 'COVID19'
values = []

reactions = df_sampled['symptom'].tolist()

for symptom in tqdm(reactions):
    N = col.estimated_document_count()

    
    D_query = {
    "vax_data": {
        "$elemMatch": {
            "VAX_MANU": manufacturer,
            "VAX_TYPE": vaccine
            }
        }
    }
    D = col.count_documents(D_query)

    
    E_query = {
    "symptoms": {
        "$elemMatch": {
            "$in": [symptom]
            }
        }
    }
    E = col.count_documents(E_query)

    DE_query = {'$and': [D_query, E_query]}
    DE = col.count_documents(DE_query)

    if DE >= 3:  
        De = D - DE
        dE = E - DE
        de = N - (DE + De + dE)
               
        # Ensure all necessary data is available
        if None in [DE, dE, De, de]:
            continue
        
        contingency_table = [
            [DE, dE],
            [De, de]
        ]

        rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
        rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
        
        prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
        prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
        
        ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
        ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
        
        chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
        IC = disproportionaly_analysis.information_component(contingency_table)
        IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
        if symptom in Pfizer_adverse_reactions:
            adverse_reaction = "Y"
        else:
            adverse_reaction = "N"
        
        values.append({
            'manufacturer': manufacturer,
            'symptom': symptom,
            'Adverse reaction':adverse_reaction,
            'DE': DE,
            'dE': dE,
            'De': De, 
            'de': de,
            'IC': IC,
            'IC Lower': IC_ci_lower,
            'IC Upper': IC_ci_upper,
            'rrr': rrr, 
            'rrr Lower': rrr_ci_lower,
            'rrr Upper': rrr_ci_upper,
            'prr': prr, 
            'prr Lower': prr_ci_lower,
            'prr Upper': prr_ci_upper,
            'ror': ror, 
            'ror Lower': ror_ci_lower,
            'ror Upper': ror_ci_upper,
        })

100%|██████████| 20/20 [01:18<00:00,  3.93s/it]


In [52]:
df = pd.DataFrame(values)

In [53]:
df

Unnamed: 0,manufacturer,symptom,Adverse reaction,DE,dE,De,de,IC,IC Lower,IC Upper,rrr,rrr Lower,rrr Upper,prr,prr Lower,prr Upper,ror,ror Lower,ror Upper
0,PFIZER\BIONTECH,Decreased appetite,Y,12094,19953,932095,1600873,0.035922,0.035685,0.036159,1.025212,1.004124,1.046742,1.040493,1.0174,1.06411,1.041019,1.017622,1.064953
1,PFIZER\BIONTECH,Dizziness,Y,76118,90521,868071,1530305,0.311403,0.311364,0.311443,1.240914,1.230727,1.251186,1.443496,1.430135,1.456981,1.482384,1.467646,1.497271
2,PFIZER\BIONTECH,Pyrexia,Y,96430,242286,847759,1378540,-0.370703,-0.37073,-0.370675,0.773406,0.7682,0.778647,0.683221,0.678443,0.688033,0.647189,0.642071,0.652346
3,PFIZER\BIONTECH,Diarrhoea,Y,26365,43495,917824,1577331,0.035977,0.035869,0.036086,1.025251,1.011029,1.039673,1.040557,1.024977,1.056374,1.041722,1.025687,1.058008
4,PFIZER\BIONTECH,Heavy menstrual bleeding,Y,12559,3757,931630,1617069,1.064254,1.063961,1.064547,2.091088,2.043242,2.140054,5.738407,5.533512,5.950888,5.802284,5.594116,6.018198
5,PFIZER\BIONTECH,Lethargy,Y,8273,13846,935916,1606980,0.023016,0.022671,0.023361,1.016081,0.990847,1.041959,1.02569,0.99825,1.053885,1.025917,0.998233,1.054369
6,PFIZER\BIONTECH,Injection site swelling,Y,10447,91545,933742,1529281,-1.845476,-1.845695,-1.845256,0.278264,0.272755,0.283884,0.1959,0.192005,0.199874,0.186903,0.183129,0.190756
7,PFIZER\BIONTECH,Vomiting,Y,31751,66427,912438,1554399,-0.186778,-0.186865,-0.186692,0.878566,0.867711,0.889556,0.820522,0.809819,0.831366,0.814277,0.803261,0.825443
8,PFIZER\BIONTECH,Chills,Y,63788,116250,880401,1504576,-0.055125,-0.055169,-0.055081,0.962511,0.954157,0.970939,0.941941,0.933204,0.950759,0.937734,0.928395,0.947167
9,PFIZER\BIONTECH,Injection site pain,Y,41772,124846,902417,1495980,-0.554116,-0.554179,-0.554054,0.681074,0.673986,0.688237,0.574365,0.568204,0.580593,0.554663,0.548385,0.561012


In [54]:
N = col.estimated_document_count()
print(N)

2565015


In [70]:
for _, row in df.iterrows():
    assert row['DE'] + row['dE'] + row['De'] + row['de'] == N

In [None]:
reactions