In [1]:
from pymongo import MongoClient
import sys
sys.path.append('../statistical_calculations')  # Adds the statistical_calculations directory to the path
import disproportionaly_analysis
from tqdm import tqdm

# Connect to your MongoDB instance
client = MongoClient('mongodb://localhost:27017/')
db = client['vaers']  
collection = db['combinations cleaned']

## mRNA

In [2]:
query = {
  "vaccine": { "$regex": "COVID19", "$options": "i" },
  "$or": [
    { "manufacturer": "MODERNA" },
    { "manufacturer": "PFIZER\\BIONTECH" }
  ]
}

results = collection.find(query)
detected_signals_mRNA = []

for result in tqdm(results):
  DE = result.get("DE")
  if DE >= 3:
      dE = result.get("dE")
      De = result.get("De")
      de = result.get("de")
      contingency_table = [
        [DE, dE],
        [De, de]
      ]
      rrr  = disproportionaly_analysis.relative_reporting_ratio(contingency_table)
      sd_rrr = disproportionaly_analysis.sd_rrr(contingency_table)
      rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
      
      prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table)
      sd_prr = disproportionaly_analysis.sd_prr(contingency_table)
      prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
      
      ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table)
      sd_ror = disproportionaly_analysis.sd_ror(contingency_table)
      ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
      
      
      chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
      
      IC = disproportionaly_analysis.information_component(contingency_table)
      IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
      
      
    
      method = IC_ci_lower > 0 
        
      if method:
          result_dict = {
              'vaccine': result.get("vaccine"),
              'manufacturer': result.get("manufacturer"),
              'symptom': result.get("symptom"),
              'IC': [IC, [IC_ci_lower, IC_ci_upper]],
              'method': method,
          }
          detected_signals_mRNA.append(result_dict)
  


20510it [00:00, 51837.54it/s]


In [13]:
len(detected_signals_mRNA)

10182

## Protein

In [3]:
query = {
  "vaccine": { "$regex": "COVID19", "$options": "i" },
  "manufacturer": "NOVAVAX"
}

results = collection.find(query)
detected_signals_protein = []

for result in tqdm(results):
  DE = result.get("DE")
  if DE >= 3: 
      dE = result.get("dE")
      De = result.get("De")
      de = result.get("de")
      contingency_table = [
        [DE, dE],
        [De, de]
      ]
      rrr  = disproportionaly_analysis.relative_reporting_ratio(contingency_table)
      sd_rrr = disproportionaly_analysis.sd_rrr(contingency_table)
      rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
      
      prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table)
      sd_prr = disproportionaly_analysis.sd_prr(contingency_table)
      prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
      
      ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table)
      sd_ror = disproportionaly_analysis.sd_ror(contingency_table)
      ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
      
      
      chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
      
      IC = disproportionaly_analysis.information_component(contingency_table)
      IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
      
    
      method = IC_ci_lower > 0 
        
      if method:
          result_dict = {
              'vaccine': result.get("vaccine"),
              'manufacturer': result.get("manufacturer"),
              'symptom': result.get("symptom"),
              'IC': [IC, [IC_ci_lower, IC_ci_upper]],
              'method': method,
          }
          detected_signals_protein.append(result_dict)
  


159it [00:00, 2548.98it/s]


In [4]:
len(detected_signals_protein)

96

## Vector

In [7]:
query = {
  "vaccine": { "$regex": "COVID19", "$options": "i" },
  "manufacturer": "JANSSEN"
}

results = collection.find(query)
detected_signals_vector = []

for result in tqdm(results): 
  DE = result.get("DE")
  if DE >= 3: 
      dE = result.get("dE")
      De = result.get("De")
      de = result.get("de")
      contingency_table = [
        [DE, dE],
        [De, de]
      ]
      rrr  = disproportionaly_analysis.relative_reporting_ratio(contingency_table)
      sd_rrr = disproportionaly_analysis.sd_rrr(contingency_table)
      rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
      
      prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table)
      sd_prr = disproportionaly_analysis.sd_prr(contingency_table)
      prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
      
      ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table)
      sd_ror = disproportionaly_analysis.sd_ror(contingency_table)
      ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
      
      
      chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
      
      IC = disproportionaly_analysis.information_component(contingency_table)
      IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
      
    
      method = IC_ci_lower > 0 
        
      if method:
          result_dict = {
              'vaccine': result.get("vaccine"),
              'manufacturer': result.get("manufacturer"),
              'symptom': result.get("symptom"),
              'IC': [IC, [IC_ci_lower, IC_ci_upper]],
              'method': method,
          }
          detected_signals_vector.append(result_dict)
 


3678it [00:00, 22173.14it/s]


In [8]:
len(detected_signals_vector)

1835

## Unknown

In [9]:
query = {
  "vaccine": { "$regex": "COVID19", "$options": "i" },
  "manufacturer": "UNKNOWN MANUFACTURER"
}

results = collection.find(query)
detected_signals_unknown = []

for result in tqdm(results):
  DE = result.get("DE")
  if DE >= 3: 
      dE = result.get("dE")
      De = result.get("De")
      de = result.get("de")
      contingency_table = [
        [DE, dE],
        [De, de]
      ]
      rrr  = disproportionaly_analysis.relative_reporting_ratio(contingency_table)
      sd_rrr = disproportionaly_analysis.sd_rrr(contingency_table)
      rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
      
      prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table)
      sd_prr = disproportionaly_analysis.sd_prr(contingency_table)
      prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
      
      ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table)
      sd_ror = disproportionaly_analysis.sd_ror(contingency_table)
      ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
      
      
      chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
      
      IC = disproportionaly_analysis.information_component(contingency_table)
      IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
      
    
      method = IC_ci_lower > 0 
        
      if method:
          result_dict = {
              'vaccine': result.get("vaccine"),
              'manufacturer': result.get("manufacturer"),
              'symptom': result.get("symptom"),
              'IC': [IC, [IC_ci_lower, IC_ci_upper]],
              'method': method,
          }
          detected_signals_unknown.append(result_dict)
      


0it [00:00, ?it/s]


In [10]:
len(detected_signals_unknown)

0

In [12]:
count_method = sum(entry['method'] for entry in detected_signals_unknown)


print("Number of entries where both method is true:", count_method)


Number of entries where both method is true: 0


In [32]:
len(detected_signals_mRNA)

5972

In [33]:
count_method_1_true = sum(entry['method_1'] for entry in detected_signals_mRNA)
count_method_2_true = sum(entry['method_2'] for entry in detected_signals_mRNA)
count_both_true = sum(entry['method_1'] and entry['method_2'] for entry in detected_signals_mRNA)

print("Number of entries where both method_1 and method_2 are true:", count_both_true)
print("Number of entries where method_1 is true:", count_method_1_true)
print("Number of entries where method_2 is true:", count_method_2_true)

Number of entries where both method_1 and method_2 are true: 2023
Number of entries where method_1 is true: 2023
Number of entries where method_2 is true: 5972


In [16]:
len(detected_signals_protein)

49

In [25]:
count_method_1_true = sum(entry['method_1'] for entry in detected_signals_protein)
count_method_2_true = sum(entry['method_2'] for entry in detected_signals_protein)
count_both_true = sum(entry['method_1'] and entry['method_2'] for entry in detected_signals_protein)

print("Number of entries where both method_1 and method_2 are true:", count_both_true)
print("Number of entries where method_1 is true:", count_method_1_true)
print("Number of entries where method_2 is true:", count_method_2_true)

Number of entries where both method_1 and method_2 are true: 37
Number of entries where method_1 is true: 37
Number of entries where method_2 is true: 49


In [17]:
len(detected_signals_vector)

985

In [26]:
count_method_1_true = sum(entry['method_1'] for entry in detected_signals_vector)
count_method_2_true = sum(entry['method_2'] for entry in detected_signals_vector)
count_both_true = sum(entry['method_1'] and entry['method_2'] for entry in detected_signals_vector)

print("Number of entries where both method_1 and method_2 are true:", count_both_true)
print("Number of entries where method_1 is true:", count_method_1_true)
print("Number of entries where method_2 is true:", count_method_2_true)

Number of entries where both method_1 and method_2 are true: 468
Number of entries where method_1 is true: 468
Number of entries where method_2 is true: 985
