In [214]:
import sys
sys.path.append('../statistical_calculations')  # Adds the statistical_calculations directory to the path
import disproportionaly_analysis
from tqdm import tqdm
import json

# Connect to your MongoDB instance
client = MongoClient('mongodb://localhost:27017/')
db = client['vaers']  
collection = db['combinations']

## create adverse reaction list for each manufacturer

### PFIZER/BIONTECH

In [285]:
Pfizer_adverse_reactions = [
    "Vomiting",
    "Lymphadenopathy",
    "Rash", 
    "Pruritus", 
    "Urticaria", 
    "Angioedema",
    "Anaphylaxis",
    "Decreased appetite",
    "Insomnia",
    "Headache",
    "Dizziness",
    "Lethargy",
    #"Acute peripheral facial paralysis",
    "Facial paralysis",
    "Paraesthesia",
    "Hypoaesthesia",
    "Myocarditis",
    "Pericarditis",
    "Diarrhoea",
    "Nausea",
    "Hyperhidrosis",
    "Night sweats",
    "Erythema multiforme",
    "Arthralgia",
    "Myalgia",
    "Pain in extremity",
    "Heavy menstrual bleeding",
    "Injection site pain",
    "Fatigue",
    "Chills",
    "Pyrexia",
    "Injection site swelling",
    "Injection site redness",
    "Asthenia",
    "Malaise",
    "Injection site pruritus",
    "Extensive swelling of vaccinated limb",
    "Facial swelling"
]

In [286]:
len(Pfizer_adverse_reactions)

38

In [287]:
from tqdm import tqdm
import disproportionaly_analysis

signals_Pfizer = []

for reaction in Pfizer_adverse_reactions:
    query = {
      "vaccine": "COVID19",
      "manufacturer": "PFIZER\BIONTECH",
      "symptom": reaction   
    }
    
    results = list(collection.find(query))
    
    if not results:
        result_dict = {
            'symptom': reaction,
            'status': "Not found",
        }
        signals_Pfizer.append(result_dict)
        continue
    
    for result in tqdm(results):
        DE = result.get("DE")
        dE = result.get("dE")
        De = result.get("De")
        de = result.get("de")
        
        # Ensure all necessary data is available
        if None in [DE, dE, De, de]:
            continue
        
        contingency_table = [
            [DE, dE],
            [De, de]
        ]
        
        # Compute statistical measures
        rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
        rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
        
        prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
        prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
        
        ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
        ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
        
        chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
        IC = disproportionaly_analysis.information_component(contingency_table)
        IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
        
        # Determine if the reaction meets criteria for a signal
        method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
        method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and ror_ci_lower > 1
        
        
        if method_1 or method_2:
            result_dict = {
                'vaccine': result.get("vaccine"),
                'manufacturer': result.get("manufacturer"),
                'symptom': result.get("symptom"),
                'status': "Detected",
                'DE': DE,
                'dE': dE,
                'De': De,
                'de': de,
                'rrr': [rrr, [rrr_ci_lower, rrr_ci_upper]],
                'prr': [prr, [prr_ci_lower, prr_ci_upper]],
                'ror': [ror, [ror_ci_lower, ror_ci_upper]],
                'chi_square': chi_square,
                'IC': [IC, [IC_ci_lower, IC_ci_upper]],
                'Chi-square_Test': method_1,
                'CI_Test': method_2  
            }
            signals_Pfizer.append(result_dict)
        else:
            result_dict = {
                'vaccine': result.get("vaccine"),
                'manufacturer': result.get("manufacturer"),
                'symptom': result.get("symptom"),
                'status': "Undetected",
                'DE': DE,
                'dE': dE,
                'De': De,
                'de': de,
                'rrr': [rrr, [rrr_ci_lower, rrr_ci_upper]],
                'prr': [prr, [prr_ci_lower, prr_ci_upper]],
                'ror': [ror, [ror_ci_lower, ror_ci_upper]],
                'chi_square': chi_square,
                'IC': [IC, [IC_ci_lower, IC_ci_upper]],
                'Chi-square_Test': method_1,
                'CI_Test': method_2  
            }
            signals_Pfizer.append(result_dict)


100%|██████████| 1/1 [00:00<00:00, 5737.76it/s]
100%|██████████| 1/1 [00:00<00:00, 8630.26it/s]
100%|██████████| 1/1 [00:00<00:00, 5053.38it/s]
100%|██████████| 1/1 [00:00<00:00, 7738.57it/s]
100%|██████████| 1/1 [00:00<00:00, 6017.65it/s]
100%|██████████| 1/1 [00:00<00:00, 6710.89it/s]
100%|██████████| 1/1 [00:00<00:00, 3167.90it/s]
100%|██████████| 1/1 [00:00<00:00, 5940.94it/s]
100%|██████████| 1/1 [00:00<00:00, 5907.47it/s]
100%|██████████| 1/1 [00:00<00:00, 4539.29it/s]
100%|██████████| 1/1 [00:00<00:00, 14315.03it/s]
100%|██████████| 1/1 [00:00<00:00, 11459.85it/s]
100%|██████████| 1/1 [00:00<00:00, 12595.51it/s]
100%|██████████| 1/1 [00:00<00:00, 8371.86it/s]
100%|██████████| 1/1 [00:00<00:00, 10205.12it/s]
100%|██████████| 1/1 [00:00<00:00, 13025.79it/s]
100%|██████████| 1/1 [00:00<00:00, 8594.89it/s]
100%|██████████| 1/1 [00:00<00:00, 6765.01it/s]
100%|██████████| 1/1 [00:00<00:00, 12595.51it/s]
100%|██████████| 1/1 [00:00<00:00, 13981.01it/s]
100%|██████████| 1/1 [00:00<00:00

In [218]:
count_method_1_true = sum(entry['Chi-square_Test'] for entry in detected_signals_Pfizer_product_info)
count_method_2_true = sum(entry['CI_Test'] for entry in detected_signals_Pfizer_product_info)
count_both_true = sum(entry['Chi-square_Test'] and entry['CI_Test'] for entry in detected_signals_Pfizer_product_info)

print("Number of entries where both method_1 and method_2 are true:", count_both_true)
print("Number of entries where method_1 is true:", count_method_1_true)
print("Number of entries where method_2 is true:", count_method_2_true)

Number of entries where both method_1 and method_2 are true: 2
Number of entries where method_1 is true: 2
Number of entries where method_2 is true: 22


In [289]:
file_name_filtered = "Signals_Pfizer.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_Pfizer, f)

In [288]:
detected_count = 0
undetected_count = 0
not_found_count = 0

for result in signals_Pfizer:
    status = result['status']
    if status == 'Detected':
        detected_count += 1
    elif status == 'Undetected':
        undetected_count += 1
    elif status == 'Not found':
        not_found_count += 1

print("Detected count:", detected_count)
print("Undetected count:", undetected_count)
print("Not found count:", not_found_count)


Detected count: 22
Undetected count: 12
Not found count: 4


['Anaphylaxis', 'Injection site redness', 'Facial swelling']

### MODERNA

In [324]:
Moderna_adverse_reactions = [
    "Lymphadenopathy",
    "Anaphylaxis",
    "Hypersensitivity",
    "Decreased appetite",
    "Irritability", 
    "Crying",
    "Headache",
    "Sleepiness",
    "Dizziness",
    #"Acute peripheral facial paralysis",
    "Facial paralysis", #instead of facial paralysis
    "Hypoaesthesia",
    "Paraesthesia",
    "Myocarditis",
    "Pericarditis",
    "Nausea", 
    "Vomiting",
    "Diarrhoea",
    "Abdominal pain",
    "Rash",
    "Urticaria",
    "Erythema multiforme",
    "Mechanical urticaria",
    "Chronic urticaria",
    "Myalgia",
    "Arthralgia",
    "Heavy menstrual bleeding",
    "Injection site pain",
    "Fatigue",
    "Chills",
    "Pyrexia",
    "Injection site swelling",
    "Injection site erythema",
    "Injection site urticaria",
    "Injection site rash",
    "Delayed injection site reaction",
    "Injection site pruritus",
    "Facial swelling",
    "Extensive swelling of vaccinated limb"
]


In [325]:
len(Moderna_adverse_reactions)

38

In [326]:
from tqdm import tqdm
import disproportionaly_analysis

signals_Moderna = []

for reaction in Moderna_adverse_reactions:
    query = {
      "vaccine": "COVID19",
      "manufacturer": "MODERNA",
      "symptom": reaction   
    }
    
    results = list(collection.find(query))
    
    if not results:
        result_dict = {
            'symptom': reaction,
            'status': "Not found",
        }
        signals_Moderna.append(result_dict)
        continue
    
    for result in tqdm(results):
        DE = result.get("DE")
        dE = result.get("dE")
        De = result.get("De")
        de = result.get("de")
        
        # Ensure all necessary data is available
        if None in [DE, dE, De, de]:
            continue
        
        contingency_table = [
            [DE, dE],
            [De, de]
        ]
        
        # Compute statistical measures
        rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
        rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
        
        prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
        prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
        
        ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
        ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
        
        chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
        IC = disproportionaly_analysis.information_component(contingency_table)
        IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
        
        # Determine if the reaction meets criteria for a signal
        method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
        method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and ror_ci_lower > 1
        
        
        if method_1 or method_2:
            result_dict = {
                'vaccine': result.get("vaccine"),
                'manufacturer': result.get("manufacturer"),
                'symptom': result.get("symptom"),
                'status': "Detected",
                'DE': DE,
                'dE': dE,
                'De': De,
                'de': de,
                'rrr': [rrr, [rrr_ci_lower, rrr_ci_upper]],
                'prr': [prr, [prr_ci_lower, prr_ci_upper]],
                'ror': [ror, [ror_ci_lower, ror_ci_upper]],
                'chi_square': chi_square,
                'IC': [IC, [IC_ci_lower, IC_ci_upper]],
                'Chi-square_Test': method_1,
                'CI_Test': method_2  
            }
            signals_Moderna.append(result_dict)
        else:
            result_dict = {
                'vaccine': result.get("vaccine"),
                'manufacturer': result.get("manufacturer"),
                'symptom': result.get("symptom"),
                'status': "Undetected",
                'DE': DE,
                'dE': dE,
                'De': De,
                'de': de,
                'rrr': [rrr, [rrr_ci_lower, rrr_ci_upper]],
                'prr': [prr, [prr_ci_lower, prr_ci_upper]],
                'ror': [ror, [ror_ci_lower, ror_ci_upper]],
                'chi_square': chi_square,
                'IC': [IC, [IC_ci_lower, IC_ci_upper]],
                'Chi-square_Test': method_1,
                'CI_Test': method_2  
            }
            signals_Moderna.append(result_dict)


100%|██████████| 1/1 [00:00<00:00, 11881.88it/s]
100%|██████████| 1/1 [00:00<00:00, 3761.71it/s]
100%|██████████| 1/1 [00:00<00:00, 7061.12it/s]
100%|██████████| 1/1 [00:00<00:00, 6069.90it/s]
100%|██████████| 1/1 [00:00<00:00, 5991.86it/s]
100%|██████████| 1/1 [00:00<00:00, 6034.97it/s]
100%|██████████| 1/1 [00:00<00:00, 5127.51it/s]
100%|██████████| 1/1 [00:00<00:00, 5761.41it/s]
100%|██████████| 1/1 [00:00<00:00, 5197.40it/s]
100%|██████████| 1/1 [00:00<00:00, 5622.39it/s]
100%|██████████| 1/1 [00:00<00:00, 5841.65it/s]
100%|██████████| 1/1 [00:00<00:00, 6009.03it/s]
100%|██████████| 1/1 [00:00<00:00, 5833.52it/s]
100%|██████████| 1/1 [00:00<00:00, 12446.01it/s]
100%|██████████| 1/1 [00:00<00:00, 6168.09it/s]
100%|██████████| 1/1 [00:00<00:00, 5349.88it/s]
100%|██████████| 1/1 [00:00<00:00, 9279.43it/s]
100%|██████████| 1/1 [00:00<00:00, 4136.39it/s]
100%|██████████| 1/1 [00:00<00:00, 6492.73it/s]
100%|██████████| 1/1 [00:00<00:00, 6626.07it/s]
100%|██████████| 1/1 [00:00<00:00, 557

In [306]:
count_method_1_true = sum(entry['Chi-square_Test'] for entry in detected_signals_Moderna_product_info)
count_method_2_true = sum(entry['CI_Test'] for entry in detected_signals_Moderna_product_info)
count_both_true = sum(entry['Chi-square_Test'] and entry['CI_Test'] for entry in detected_signals_Moderna_product_info)

print("Number of entries where both method_1 and method_2 are true:", count_both_true)
print("Number of entries where method_1 is true:", count_method_1_true)
print("Number of entries where method_2 is true:", count_method_2_true)

Number of entries where both method_1 and method_2 are true: 3
Number of entries where method_1 is true: 3
Number of entries where method_2 is true: 23


In [327]:
file_name_filtered = "Signals_Moderna.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_Moderna, f)

In [328]:
detected_count = 0
undetected_count = 0
not_found_count = 0

for result in signals_Moderna:
    status = result['status']
    if status == 'Detected':
        detected_count += 1
    elif status == 'Undetected':
        undetected_count += 1
    elif status == 'Not found':
        not_found_count += 1

print("Detected count:", detected_count)
print("Undetected count:", undetected_count)
print("Not found count:", not_found_count)


Detected count: 23
Undetected count: 10
Not found count: 5


### JANSSEN

In [290]:
Janssen_adverse_reactions = [
    "Lymphadenopathy",
    "Immune thrombocytopenia",
    "Anaphylaxis",
    "Headache",
    "Dizziness",
    "Tremor",
    "Urticaria",
    "Hypersensitivity",
    "Paraesthesia",
    "Hypoaesthesia",
    "Facial paralysis",
    "Tinnitus",
    "Guillain-Barre syndrome",
    #"Transverse myelitis", not in DB
    "Myelitis", #instead of Transverse myelitis
    "Venous thromboembolism",
    "Thrombosis in combination with thrombocytopenia",
    "Myocarditis", 
    "Pericarditis",
    "Capillary leak syndrome",
    #"Small vessel cutaneous vasculitis", not in DB
    "Cutaneous vasculitis", #instead of Small vessel cutaneous vasculitis"
    "Nausea",
    "Cough", 
    "Oropharyngeal pain",
    "Sneezing",
    "Diarrhoea",
    "Vomiting",
    "Rash",
    "Hyperhidrosis",
    "Myalgia",
    "Arthralgia",
    "Muscular weakness",
    "Back pain",
    "Pain in extremity",
    "Fatigue",
    "Injection site pain",
    "Injection site swelling",
    "Chills",
    "Pyrexia"
]

In [291]:
len(Janssen_adverse_reactions)

38

In [294]:
from tqdm import tqdm
import disproportionaly_analysis

signals_Janssen = []

for reaction in Janssen_adverse_reactions:
    query = {
      "vaccine": "COVID19",
      "manufacturer": "JANSSEN",
      "symptom": reaction   
    }
    
    results = list(collection.find(query))
    
    if not results:
        result_dict = {
            'symptom': reaction,
            'status': "Not found",
        }
        signals_Janssen.append(result_dict)
        continue
    
    for result in tqdm(results):
        DE = result.get("DE")
        dE = result.get("dE")
        De = result.get("De")
        de = result.get("de")
        
        # Ensure all necessary data is available
        if None in [DE, dE, De, de]:
            continue
        
        contingency_table = [
            [DE, dE],
            [De, de]
        ]
        
        # Compute statistical measures
        rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
        rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
        
        prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
        prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
        
        ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
        ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
        
        chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
        IC = disproportionaly_analysis.information_component(contingency_table)
        IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
        
        # Determine if the reaction meets criteria for a signal
        method_1 = rrr > 2 and prr > 2 and ror > 2  and IC >0 and chi_square > 3.841
        method_2 = IC_ci_lower > 0 #and ror_ci_lower > 1 and prr_ci_lower > 1 and ror_ci_lower > 1
        
        
        if method_1 or method_2:
            result_dict = {
                'vaccine': result.get("vaccine"),
                'manufacturer': result.get("manufacturer"),
                'symptom': result.get("symptom"),
                'status': "Detected",
                'DE': DE,
                'dE': dE,
                'De': De,
                'de': de,
                'rrr': [rrr, [rrr_ci_lower, rrr_ci_upper]],
                'prr': [prr, [prr_ci_lower, prr_ci_upper]],
                'ror': [ror, [ror_ci_lower, ror_ci_upper]],
                'chi_square': chi_square,
                'IC': [IC, [IC_ci_lower, IC_ci_upper]],
                'Chi-square_Test': method_1,
                'CI_Test': method_2  
            }
            signals_Janssen.append(result_dict)
        else:
            result_dict = {
                'vaccine': result.get("vaccine"),
                'manufacturer': result.get("manufacturer"),
                'symptom': result.get("symptom"),
                'status': "Undetected",
                'DE': DE,
                'dE': dE,
                'De': De,
                'de': de,
                'rrr': [rrr, [rrr_ci_lower, rrr_ci_upper]],
                'prr': [prr, [prr_ci_lower, prr_ci_upper]],
                'ror': [ror, [ror_ci_lower, ror_ci_upper]],
                'chi_square': chi_square,
                'IC': [IC, [IC_ci_lower, IC_ci_upper]],
                'Chi-square_Test': method_1,
                'CI_Test': method_2  
            }
            signals_Janssen.append(result_dict)


100%|██████████| 1/1 [00:00<00:00, 10280.16it/s]
100%|██████████| 1/1 [00:00<00:00, 6105.25it/s]
100%|██████████| 1/1 [00:00<00:00, 12446.01it/s]
100%|██████████| 1/1 [00:00<00:00, 5833.52it/s]
100%|██████████| 1/1 [00:00<00:00, 12052.60it/s]
100%|██████████| 1/1 [00:00<00:00, 5899.16it/s]
100%|██████████| 1/1 [00:00<00:00, 13066.37it/s]
100%|██████████| 1/1 [00:00<00:00, 9939.11it/s]
100%|██████████| 1/1 [00:00<00:00, 12483.05it/s]
100%|██████████| 1/1 [00:00<00:00, 13231.24it/s]
100%|██████████| 1/1 [00:00<00:00, 12520.31it/s]
100%|██████████| 1/1 [00:00<00:00, 5940.94it/s]
100%|██████████| 1/1 [00:00<00:00, 6061.13it/s]
100%|██████████| 1/1 [00:00<00:00, 6204.59it/s]
100%|██████████| 1/1 [00:00<00:00, 9000.65it/s]
100%|██████████| 1/1 [00:00<00:00, 10512.04it/s]
100%|██████████| 1/1 [00:00<00:00, 4712.70it/s]
100%|██████████| 1/1 [00:00<00:00, 6213.78it/s]
100%|██████████| 1/1 [00:00<00:00, 4457.28it/s]
100%|██████████| 1/1 [00:00<00:00, 7503.23it/s]
100%|██████████| 1/1 [00:00<00:0

In [234]:
count_method_1_true = sum(entry['Chi-square_Test'] for entry in detected_signals_Janssen_product_info)
count_method_2_true = sum(entry['CI_Test'] for entry in detected_signals_Janssen_product_info)
count_both_true = sum(entry['Chi-square_Test'] and entry['CI_Test'] for entry in detected_signals_Janssen_product_info)

print("Number of entries where both method_1 and method_2 are true:", count_both_true)
print("Number of entries where method_1 is true:", count_method_1_true)
print("Number of entries where method_2 is true:", count_method_2_true)

Number of entries where both method_1 and method_2 are true: 2
Number of entries where method_1 is true: 2
Number of entries where method_2 is true: 23


In [296]:
file_name_filtered = "Signals_Janssen.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_Janssen, f)

In [295]:
detected_count = 0
undetected_count = 0
not_found_count = 0

for result in signals_Janssen:
    status = result['status']
    if status == 'Detected':
        detected_count += 1
    elif status == 'Undetected':
        undetected_count += 1
    elif status == 'Not found':
        not_found_count += 1

print("Detected count:", detected_count)
print("Undetected count:", undetected_count)
print("Not found count:", not_found_count)


Detected count: 23
Undetected count: 12
Not found count: 3


### NOVAVAX

In [252]:
Novavax_adverse_reactions = [
    "Headache",
    "Nausea",
    "Vomiting",
    "Myalgia",
    "Arthralgia",
    "Injection site tenderness",
    "Injection site pain",
    "Fatigue",
    "Malaise",
    "Injection site redness",
    "Injection site swelling",
    "Pyrexia",
    "Pain in extremity",
    "Injection site pruritus",
    "Chills",
    "Injection site warmth",
    "Lymphadenopathy",
    "Anaphylaxis",
    "Paraesthesia",
    "Hypoaesthesia",
    "Myocarditis",
    "Pericarditis",
    "Hypertension",
    "Rash",
    "Erythema",
    "Pruritus",
    "Urticaria"
]


In [253]:
len(Novavax_adverse_reactions)

27

In [277]:
from tqdm import tqdm
import disproportionaly_analysis

signals_Novavax = []

for reaction in Novavax_adverse_reactions:
    query = {
      "vaccine": "COVID19",
      "manufacturer": "NOVAVAX",
      "symptom": reaction   
    }
    
    results = list(collection.find(query))
    
    if not results:
        result_dict = {
            'symptom': reaction,
            'status': "Not found",
        }
        signals_Novavax.append(result_dict)
        continue
    
    for result in tqdm(results):
        DE = result.get("DE")
        dE = result.get("dE")
        De = result.get("De")
        de = result.get("de")
        
        # Ensure all necessary data is available
        if None in [DE, dE, De, de]:
            continue
        
        contingency_table = [
            [DE, dE],
            [De, de]
        ]
        
        # Compute statistical measures
        rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
        rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
        
        prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
        prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
        
        ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
        ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
        
        chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
        IC = disproportionaly_analysis.information_component(contingency_table)
        IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
        
        # Determine if the reaction meets criteria for a signal
        method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
        method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and ror_ci_lower > 1
        
        
        if method_1 or method_2:
            result_dict = {
                'vaccine': result.get("vaccine"),
                'manufacturer': result.get("manufacturer"),
                'symptom': result.get("symptom"),
                'status': "Detected",
                'DE': DE,
                'dE': dE,
                'De': De,
                'de': de,
                'rrr': [rrr, [rrr_ci_lower, rrr_ci_upper]],
                'prr': [prr, [prr_ci_lower, prr_ci_upper]],
                'ror': [ror, [ror_ci_lower, ror_ci_upper]],
                'chi_square': chi_square,
                'IC': [IC, [IC_ci_lower, IC_ci_upper]],
                'Chi-square_Test': method_1,
                'CI_Test': method_2  
            }
            signals_Novavax.append(result_dict)
        else:
            result_dict = {
                'vaccine': result.get("vaccine"),
                'manufacturer': result.get("manufacturer"),
                'symptom': result.get("symptom"),
                'status': "Undetected",
                'DE': DE,
                'dE': dE,
                'De': De,
                'de': de,
                'rrr': [rrr, [rrr_ci_lower, rrr_ci_upper]],
                'prr': [prr, [prr_ci_lower, prr_ci_upper]],
                'ror': [ror, [ror_ci_lower, ror_ci_upper]],
                'chi_square': chi_square,
                'IC': [IC, [IC_ci_lower, IC_ci_upper]],
                'Chi-square_Test': method_1,
                'CI_Test': method_2  
            }
            signals_Novavax.append(result_dict)


100%|██████████| 1/1 [00:00<00:00, 11335.96it/s]
100%|██████████| 1/1 [00:00<00:00, 12945.38it/s]
100%|██████████| 1/1 [00:00<00:00, 3401.71it/s]
100%|██████████| 1/1 [00:00<00:00, 8405.42it/s]
100%|██████████| 1/1 [00:00<00:00, 6132.02it/s]
100%|██████████| 1/1 [00:00<00:00, 7543.71it/s]
100%|██████████| 1/1 [00:00<00:00, 6978.88it/s]
100%|██████████| 1/1 [00:00<00:00, 1540.32it/s]
100%|██████████| 1/1 [00:00<00:00, 5966.29it/s]
100%|██████████| 1/1 [00:00<00:00, 5849.80it/s]
100%|██████████| 1/1 [00:00<00:00, 5983.32it/s]
100%|██████████| 1/1 [00:00<00:00, 8371.86it/s]
100%|██████████| 1/1 [00:00<00:00, 6150.01it/s]
100%|██████████| 1/1 [00:00<00:00, 6087.52it/s]
100%|██████████| 1/1 [00:00<00:00, 6017.65it/s]
100%|██████████| 1/1 [00:00<00:00, 6096.37it/s]
100%|██████████| 1/1 [00:00<00:00, 6087.52it/s]
100%|██████████| 1/1 [00:00<00:00, 2898.62it/s]
100%|██████████| 1/1 [00:00<00:00, 6168.09it/s]
100%|██████████| 1/1 [00:00<00:00, 12905.55it/s]
100%|██████████| 1/1 [00:00<00:00, 13

In [278]:
signals_Novavax

[{'vaccine': 'COVID19',
  'manufacturer': 'NOVAVAX',
  'symptom': 'Headache',
  'status': 'Undetected',
  'DE': 47,
  'dE': 299984,
  'De': 387,
  'de': 2264597,
  'rrr': [0.9258314044875885, [0.7067661232458569, 1.2127969371238332]],
  'prr': [0.9258197841212054, [0.7067572501345857, 1.2127817189098693]],
  'ror': [0.9168108173348918, [0.6773028879494799, 1.2410135697590121]],
  'chi_square': 0.23787410583975377,
  'IC': [-0.11117859476557136, [-0.15932968886490653, -0.06302750066623619]],
  'Chi-square_Test': False,
  'CI_Test': False},
 {'vaccine': 'COVID19',
  'manufacturer': 'NOVAVAX',
  'symptom': 'Nausea',
  'status': 'Undetected',
  'DE': 36,
  'dE': 171313,
  'De': 398,
  'de': 2393268,
  'rrr': [1.241712651945192, [0.9081307897201052, 1.6978284708042712]],
  'prr': [1.241763445845655, [0.9081679315425532, 1.697897934822888]],
  'ror': [1.263631496223654, [0.8983762932749257, 1.7773894638599732]],
  'chi_square': 1.5656146052053002,
  'IC': [0.3123313542534876, [0.251283693520

In [256]:
count_method_1_true = sum(entry['Chi-square_Test'] for entry in detected_signals_Novavax_product_info)
count_method_2_true = sum(entry['CI_Test'] for entry in detected_signals_Novavax_product_info)
count_both_true = sum(entry['Chi-square_Test'] and entry['CI_Test'] for entry in detected_signals_Novavax_product_info)

print("Number of entries where both method_1 and method_2 are true:", count_both_true)
print("Number of entries where method_1 is true:", count_method_1_true)
print("Number of entries where method_2 is true:", count_method_2_true)

Number of entries where both method_1 and method_2 are true: 1
Number of entries where method_1 is true: 1
Number of entries where method_2 is true: 3


In [280]:
file_name_filtered = "Signals_Novavax.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_Novavax, f)

In [279]:
detected_count = 0
undetected_count = 0
not_found_count = 0

for result in signals_Novavax:
    status = result['status']
    if status == 'Detected':
        detected_count += 1
    elif status == 'Undetected':
        undetected_count += 1
    elif status == 'Not found':
        not_found_count += 1

print("Detected count:", detected_count)
print("Undetected count:", undetected_count)
print("Not found count:", not_found_count)


Detected count: 3
Undetected count: 20
Not found count: 4
