In [6]:
from pymongo import MongoClient
import sys
sys.path.append('../statistical_calculations')  # Adds the statistical_calculations directory to the path
import disproportionaly_analysis
from tqdm import tqdm
import json
from datetime import datetime, timedelta 
from calendar import monthrange

# Connect to your MongoDB instance
client = MongoClient('mongodb://localhost:27017/')
db = client['vaers']  
col = db['reports cleaned']

In [7]:
end_date = datetime(2020, 11, 30)

# Define a function to reduce the date range by a month
def reduce_date_range_by_month(start_date):
    # Get the year and month of the start date
    year = start_date.year
    month = start_date.month
    
    # Calculate the new year and month
    if month == 1:  # If the current month is January
        new_year = year - 1
        new_month = 12  # December of the previous year
    else:
        new_year = year
        new_month = month - 1
    
    # Construct the new start date
    _, last_day_of_previous_month = monthrange(new_year, new_month)
    new_start_date = datetime(new_year, new_month, last_day_of_previous_month)
    
    return new_start_date

def reduce_date_range_by_day(start_date):
    new_start_date = start_date - timedelta(days=1)  # Subtract one day from the start_date
    return new_start_date

### PFIZER/BIONTECH

In [8]:
file_name_filtered = "Arrays/Signals_Pfizer.json"
with open(file_name_filtered, 'r') as f:
    Signals_Pfizer = json.load(f)

In [9]:
filtered_signals_Pfizer = [signal for signal in Signals_Pfizer if signal["status"] != "Not found"]

In [10]:
len(filtered_signals_Pfizer)

34

In [11]:
from datetime import datetime

# Loop until a signal is not found anymore
signals_dict_Pfizer = []
i= 0
for reaction in tqdm(filtered_signals_Pfizer):
    i+=1
    symptom = reaction['symptom']
    start_date = datetime(2023, 12, 31)
    first_found = datetime(2000, 1, 1)
    while True:
        N_query = {"RECVDATE": {"$lte": start_date}}
        N = col.count_documents(N_query)
        
        D_query = {
            "RECVDATE": {"$lte": start_date},
            "vax_data": {
                "$elemMatch": {
                    "VAX_MANU": reaction["manufacturer"],
                    "VAX_TYPE": reaction["vaccine"]
                }
            }
        }
        D = col.count_documents(D_query)
        
        E_query = {
            "RECVDATE": {"$lte": start_date},
            "symptoms": {
                "$elemMatch": {
                    "$in": [reaction["symptom"]]
                }
            }
        }
        E = col.count_documents(E_query)
        
        DE_query = {'$and': [D_query, E_query]}
        DE = col.count_documents(DE_query)
        if DE >= 3: 
        
            De = D - DE
            dE = E - DE
            de = N - (DE + De + dE)
            
                
            # Ensure all necessary data is available
            if None in [DE, dE, De, de]:
                continue
            
            contingency_table = [
                [DE, dE],
                [De, de]
            ]
            #print(contingency_table)
            # Compute statistical measures
            rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
            rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
            
            prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
            prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
            
            ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
            ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
            
            chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
            IC = disproportionaly_analysis.information_component(contingency_table)
            IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
            
            # Determine if the reaction meets criteria for a signal
            method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
            IC_CI = IC_ci_lower > 0 
                    
            if IC_CI:
                #print(f"Signal found for {start_date}")
                first_found = start_date
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")
            else:
                #print("Signal not found")
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
            if start_date < end_date:
                break
        else:
            break
            
    if first_found == datetime(2000, 1, 1):
        print(f"Saved {symptom} as first_found = None because it was not detected by the end of an month")
        signals_dict_Pfizer.append({"manufacturer": reaction["manufacturer"], "symptom": reaction["symptom"], "first_found": "NaN"})
    else:
            
        print(f"found month {first_found} for symptom {i} {symptom}, checking day")
        start_date = first_found
        while True:
            #print(f"checking day {start_date}")
                    #print(f"checking symptom {reaction['symptom']}")
            N_query = {"RECVDATE": {"$lte": start_date}}
            N = col.count_documents(N_query)
            
            D_query = {
                "RECVDATE": {"$lte": start_date},
                "vax_data": {
                    "$elemMatch": {
                        "VAX_MANU": reaction["manufacturer"],
                        "VAX_TYPE": reaction["vaccine"]
                    }
                }
            }
            D = col.count_documents(D_query)
            
            E_query = {
                "RECVDATE": {"$lte": start_date},
                "symptoms": {
                    "$elemMatch": {
                        "$in": [reaction["symptom"]]
                    }
                }
            }
            E = col.count_documents(E_query)
            
            DE_query = {'$and': [D_query, E_query]}
            DE = col.count_documents(DE_query)
            if DE >= 3: 
            
                De = D - DE
                dE = E - DE
                de = N - (DE + De + dE)
                
                    
                # Ensure all necessary data is available
                if None in [DE, dE, De, de]:
                    continue
                
                contingency_table = [
                    [DE, dE],
                    [De, de]
                ]
                #print(contingency_table)
                # Compute statistical measures
                rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
                rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
                
                prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
                prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
                
                ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
                ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
                
                chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
                IC = disproportionaly_analysis.information_component(contingency_table)
                IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
                
                # Determine if the reaction meets criteria for a signal
                method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
                method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                IC_CI = IC_ci_lower > 0
                    
                if IC_CI:
                    #print(f"Signal found for {start_date}")
                    first_found = start_date
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Pfizer.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop        
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")
                else:
                    #print("Signal not found")
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Pfizer.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
    
            else: 
                #print(f"DE < 3 for {start_date}")
                signal_info = {
                    "manufacturer": reaction["manufacturer"],
                    "symptom": reaction["symptom"],
                    "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                }
                signals_dict_Pfizer.append(signal_info)
                print(f"signal {i} {symptom} found at {first_found}")
                break
    

  0%|          | 0/34 [00:00<?, ?it/s]

found month 2021-01-31 00:00:00 for symptom 1 Vomiting, checking day


  3%|▎         | 1/34 [07:51<4:19:05, 471.07s/it]

signal 1 Vomiting found at 2021-01-09 00:00:00
found month 2020-12-31 00:00:00 for symptom 2 Lymphadenopathy, checking day


  6%|▌         | 2/34 [13:51<3:36:28, 405.90s/it]

signal 2 Lymphadenopathy found at 2020-12-20 00:00:00


  9%|▉         | 3/34 [18:19<2:57:19, 343.21s/it]

Saved Rash as first_found = None because it was not detected by the end of an month
found month 2020-12-31 00:00:00 for symptom 4 Pruritus, checking day


 12%|█▏        | 4/34 [24:31<2:57:11, 354.38s/it]

signal 4 Pruritus found at 2020-12-17 00:00:00
found month 2020-12-31 00:00:00 for symptom 5 Urticaria, checking day


 15%|█▍        | 5/34 [30:23<2:50:51, 353.51s/it]

signal 5 Urticaria found at 2020-12-22 00:00:00
found month 2020-12-31 00:00:00 for symptom 6 Angioedema, checking day


 18%|█▊        | 6/34 [34:07<2:24:27, 309.56s/it]

signal 6 Angioedema found at 2020-12-20 00:00:00
found month 2020-12-31 00:00:00 for symptom 7 Decreased appetite, checking day


 21%|██        | 7/34 [40:10<2:27:11, 327.10s/it]

signal 7 Decreased appetite found at 2020-12-28 00:00:00
found month 2021-02-28 00:00:00 for symptom 8 Insomnia, checking day


 24%|██▎       | 8/34 [47:35<2:37:55, 364.45s/it]

signal 8 Insomnia found at 2021-02-08 00:00:00
found month 2020-12-31 00:00:00 for symptom 9 Headache, checking day


 26%|██▋       | 9/34 [54:22<2:37:21, 377.66s/it]

signal 9 Headache found at 2020-12-15 00:00:00
found month 2020-12-31 00:00:00 for symptom 10 Dizziness, checking day


 29%|██▉       | 10/34 [1:00:49<2:32:14, 380.60s/it]

signal 10 Dizziness found at 2020-12-15 00:00:00
found month 2021-01-31 00:00:00 for symptom 11 Lethargy, checking day


 32%|███▏      | 11/34 [1:08:26<2:34:51, 403.96s/it]

signal 11 Lethargy found at 2021-01-13 00:00:00
found month 2020-12-31 00:00:00 for symptom 12 Facial paralysis, checking day


 35%|███▌      | 12/34 [1:11:43<2:05:07, 341.25s/it]

signal 12 Facial paralysis found at 2020-12-18 00:00:00
found month 2020-12-31 00:00:00 for symptom 13 Paraesthesia, checking day


 38%|███▊      | 13/34 [1:16:25<1:53:05, 323.12s/it]

signal 13 Paraesthesia found at 2020-12-15 00:00:00
found month 2020-12-31 00:00:00 for symptom 14 Hypoaesthesia, checking day


 41%|████      | 14/34 [1:22:06<1:49:32, 328.64s/it]

signal 14 Hypoaesthesia found at 2020-12-17 00:00:00
found month 2021-03-31 00:00:00 for symptom 15 Myocarditis, checking day


 44%|████▍     | 15/34 [1:26:00<1:35:02, 300.12s/it]

signal 15 Myocarditis found at 2021-03-03 00:00:00
found month 2021-02-28 00:00:00 for symptom 16 Pericarditis, checking day


 47%|████▋     | 16/34 [1:29:39<1:22:43, 275.73s/it]

signal 16 Pericarditis found at 2021-02-23 00:00:00
found month 2020-12-31 00:00:00 for symptom 17 Diarrhoea, checking day


 50%|█████     | 17/34 [1:33:50<1:16:00, 268.29s/it]

signal 17 Diarrhoea found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 18 Nausea, checking day


 53%|█████▎    | 18/34 [1:40:19<1:21:13, 304.59s/it]

signal 18 Nausea found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 19 Hyperhidrosis, checking day


 56%|█████▌    | 19/34 [1:46:28<1:20:58, 323.87s/it]

signal 19 Hyperhidrosis found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 20 Night sweats, checking day


 59%|█████▉    | 20/34 [1:51:13<1:12:50, 312.14s/it]

signal 20 Night sweats found at 2020-12-21 00:00:00


 62%|██████▏   | 21/34 [1:53:28<56:08, 259.08s/it]  

Saved Erythema multiforme as first_found = None because it was not detected by the end of an month
found month 2020-12-31 00:00:00 for symptom 22 Arthralgia, checking day


 65%|██████▍   | 22/34 [1:58:00<52:34, 262.87s/it]

signal 22 Arthralgia found at 2020-12-17 00:00:00
found month 2020-12-31 00:00:00 for symptom 23 Myalgia, checking day


 68%|██████▊   | 23/34 [2:04:11<54:07, 295.20s/it]

signal 23 Myalgia found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 24 Pain in extremity, checking day


 71%|███████   | 24/34 [2:10:20<52:55, 317.51s/it]

signal 24 Pain in extremity found at 2020-12-17 00:00:00
found month 2021-01-31 00:00:00 for symptom 25 Heavy menstrual bleeding, checking day


 74%|███████▎  | 25/34 [2:13:35<42:06, 280.73s/it]

signal 25 Heavy menstrual bleeding found at 2021-01-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 26 Injection site pain, checking day


 76%|███████▋  | 26/34 [2:19:33<40:30, 303.83s/it]

signal 26 Injection site pain found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 27 Fatigue, checking day


 79%|███████▉  | 27/34 [2:25:38<37:35, 322.26s/it]

signal 27 Fatigue found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 28 Chills, checking day


 82%|████████▏ | 28/34 [2:31:28<33:03, 330.64s/it]

signal 28 Chills found at 2020-12-16 00:00:00
found month 2021-01-31 00:00:00 for symptom 29 Pyrexia, checking day


 85%|████████▌ | 29/34 [2:39:16<30:58, 371.69s/it]

signal 29 Pyrexia found at 2021-01-08 00:00:00


 88%|████████▊ | 30/34 [2:43:14<22:06, 331.70s/it]

Saved Injection site swelling as first_found = None because it was not detected by the end of an month
found month 2020-12-31 00:00:00 for symptom 31 Asthenia, checking day


 91%|█████████ | 31/34 [2:48:55<16:43, 334.43s/it]

signal 31 Asthenia found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 32 Malaise, checking day


 94%|█████████▍| 32/34 [2:54:36<11:12, 336.32s/it]

signal 32 Malaise found at 2020-12-16 00:00:00


 97%|█████████▋| 33/34 [2:58:31<05:05, 305.87s/it]

Saved Injection site pruritus as first_found = None because it was not detected by the end of an month


100%|██████████| 34/34 [3:00:35<00:00, 318.68s/it]

Saved Extensive swelling of vaccinated limb as first_found = None because it was not detected by the end of an month





In [12]:
file_name_filtered = "Arrays/First_found_filtered_Signals_Pfizer_IC_CI.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_Pfizer, f)

In [13]:
signals_dict_Pfizer

[{'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Vomiting',
  'first_found': '2021-01-09'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Lymphadenopathy',
  'first_found': '2020-12-20'},
 {'manufacturer': 'PFIZER\\BIONTECH', 'symptom': 'Rash', 'first_found': 'NaN'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Pruritus',
  'first_found': '2020-12-17'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Urticaria',
  'first_found': '2020-12-22'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Angioedema',
  'first_found': '2020-12-20'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Decreased appetite',
  'first_found': '2020-12-28'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Insomnia',
  'first_found': '2021-02-08'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Headache',
  'first_found': '2020-12-15'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Dizziness',
  'first_found': '2020-12-15'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'sym

### MODERNA

In [14]:
file_name_filtered = "Arrays/Signals_Moderna.json"
with open(file_name_filtered, 'r') as f:
    Signals_Moderna = json.load(f)

In [15]:
filtered_signals_Moderna = [signal for signal in Signals_Moderna if signal["status"] != "Not found"]

In [16]:
len(filtered_signals_Moderna)

33

In [17]:
from datetime import datetime

# Loop until a signal is not found anymore
signals_dict_Moderna= []
i= 0
for reaction in tqdm(filtered_signals_Moderna):
    i+=1
    symptom = reaction['symptom']
    start_date = datetime(2023, 12, 31)
    first_found = datetime(2000, 1, 1)
    while True:
        N_query = {"RECVDATE": {"$lte": start_date}}
        N = col.count_documents(N_query)
        
        D_query = {
            "RECVDATE": {"$lte": start_date},
            "vax_data": {
                "$elemMatch": {
                    "VAX_MANU": reaction["manufacturer"],
                    "VAX_TYPE": reaction["vaccine"]
                }
            }
        }
        D = col.count_documents(D_query)
        
        E_query = {
            "RECVDATE": {"$lte": start_date},
            "symptoms": {
                "$elemMatch": {
                    "$in": [reaction["symptom"]]
                }
            }
        }
        E = col.count_documents(E_query)
        
        DE_query = {'$and': [D_query, E_query]}
        DE = col.count_documents(DE_query)
        if DE >= 3: 
        
            De = D - DE
            dE = E - DE
            de = N - (DE + De + dE)
            
                
            # Ensure all necessary data is available
            if None in [DE, dE, De, de]:
                continue
            
            contingency_table = [
                [DE, dE],
                [De, de]
            ]
            #print(contingency_table)
            # Compute statistical measures
            rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
            rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
            
            prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
            prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
            
            ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
            ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
            
            chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
            IC = disproportionaly_analysis.information_component(contingency_table)
            IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
            
            # Determine if the reaction meets criteria for a signal
            method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
            method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
            IC_CI = IC_ci_lower > 0
                    
            if IC_CI:
                #print(f"Signal found for {start_date}")
                first_found = start_date
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")
            else:
                #print("Signal not found")
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
            if start_date < end_date:
                break
        else:
            break
            
    if first_found == datetime(2000, 1, 1):
        print(f"Saved {symptom} as first_found = None because it was not detected by the end of an month")
        signals_dict_Moderna.append({"manufacturer": reaction["manufacturer"], "symptom": reaction["symptom"], "first_found": "NaN"})
    else:
            
        print(f"found month {first_found} for symptom {i} {symptom}, checking day")
        start_date = first_found
        while True:
            #print(f"checking day {start_date}")
                    #print(f"checking symptom {reaction['symptom']}")
            N_query = {"RECVDATE": {"$lte": start_date}}
            N = col.count_documents(N_query)
            
            D_query = {
                "RECVDATE": {"$lte": start_date},
                "vax_data": {
                    "$elemMatch": {
                        "VAX_MANU": reaction["manufacturer"],
                        "VAX_TYPE": reaction["vaccine"]
                    }
                }
            }
            D = col.count_documents(D_query)
            
            E_query = {
                "RECVDATE": {"$lte": start_date},
                "symptoms": {
                    "$elemMatch": {
                        "$in": [reaction["symptom"]]
                    }
                }
            }
            E = col.count_documents(E_query)
            
            DE_query = {'$and': [D_query, E_query]}
            DE = col.count_documents(DE_query)
            if DE >= 3: 
            
                De = D - DE
                dE = E - DE
                de = N - (DE + De + dE)
                
                    
                # Ensure all necessary data is available
                if None in [DE, dE, De, de]:
                    continue
                
                contingency_table = [
                    [DE, dE],
                    [De, de]
                ]
                #print(contingency_table)
                # Compute statistical measures
                rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
                rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
                
                prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
                prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
                
                ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
                ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
                
                chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
                IC = disproportionaly_analysis.information_component(contingency_table)
                IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
                
                # Determine if the reaction meets criteria for a signal
                method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
                method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                
                IC_CI = IC_ci_lower > 0 
                    
                if IC_CI:
                    #print(f"Signal found for {start_date}")
                    first_found = start_date
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Moderna.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop        
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")
                else:
                    #print("Signal not found")
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Moderna.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
    
            else: 
                #print(f"DE < 3 for {start_date}")
                signal_info = {
                    "manufacturer": reaction["manufacturer"],
                    "symptom": reaction["symptom"],
                    "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                }
                signals_dict_Moderna.append(signal_info)
                print(f"signal {i} {symptom} found at {first_found}")
                break
    

  0%|          | 0/33 [00:00<?, ?it/s]

found month 2020-12-31 00:00:00 for symptom 1 Lymphadenopathy, checking day


  3%|▎         | 1/33 [02:16<1:12:44, 136.38s/it]

signal 1 Lymphadenopathy found at 2020-12-29 00:00:00


  6%|▌         | 2/33 [04:42<1:13:20, 141.95s/it]

Saved Hypersensitivity as first_found = None because it was not detected by the end of an month
found month 2020-12-31 00:00:00 for symptom 3 Decreased appetite, checking day


  9%|▉         | 3/33 [07:43<1:19:53, 159.80s/it]

signal 3 Decreased appetite found at 2020-12-25 00:00:00


 12%|█▏        | 4/33 [10:09<1:14:39, 154.48s/it]

Saved Irritability as first_found = None because it was not detected by the end of an month


 15%|█▌        | 5/33 [12:40<1:11:30, 153.23s/it]

Saved Crying as first_found = None because it was not detected by the end of an month
found month 2020-12-31 00:00:00 for symptom 6 Headache, checking day


 18%|█▊        | 6/33 [16:21<1:19:22, 176.39s/it]

signal 6 Headache found at 2020-12-22 00:00:00
found month 2020-12-31 00:00:00 for symptom 7 Dizziness, checking day


 21%|██        | 7/33 [19:46<1:20:26, 185.64s/it]

signal 7 Dizziness found at 2020-12-22 00:00:00
found month 2020-12-31 00:00:00 for symptom 8 Facial paralysis, checking day


 24%|██▍       | 8/33 [22:30<1:14:26, 178.67s/it]

signal 8 Facial paralysis found at 2020-12-28 00:00:00
found month 2020-12-31 00:00:00 for symptom 9 Hypoaesthesia, checking day


 27%|██▋       | 9/33 [25:42<1:13:06, 182.76s/it]

signal 9 Hypoaesthesia found at 2020-12-22 00:00:00
found month 2020-12-31 00:00:00 for symptom 10 Paraesthesia, checking day


 30%|███       | 10/33 [28:55<1:11:16, 185.94s/it]

signal 10 Paraesthesia found at 2020-12-22 00:00:00
found month 2021-05-31 00:00:00 for symptom 11 Myocarditis, checking day


 33%|███▎      | 11/33 [31:40<1:05:49, 179.54s/it]

signal 11 Myocarditis found at 2021-05-24 00:00:00
found month 2021-06-30 00:00:00 for symptom 12 Pericarditis, checking day


 36%|███▋      | 12/33 [34:18<1:00:37, 173.21s/it]

signal 12 Pericarditis found at 2021-06-01 00:00:00
found month 2020-12-31 00:00:00 for symptom 13 Nausea, checking day


 39%|███▉      | 13/33 [37:08<57:21, 172.07s/it]  

signal 13 Nausea found at 2020-12-22 00:00:00
found month 2020-12-31 00:00:00 for symptom 14 Vomiting, checking day


 42%|████▏     | 14/33 [40:22<56:34, 178.66s/it]

signal 14 Vomiting found at 2020-12-23 00:00:00
found month 2020-12-31 00:00:00 for symptom 15 Diarrhoea, checking day


 45%|████▌     | 15/33 [43:28<54:16, 180.90s/it]

signal 15 Diarrhoea found at 2020-12-28 00:00:00


 48%|████▊     | 16/33 [45:55<48:22, 170.73s/it]

Saved Abdominal pain as first_found = None because it was not detected by the end of an month
found month 2021-05-31 00:00:00 for symptom 17 Rash, checking day


 52%|█████▏    | 17/33 [50:33<54:06, 202.89s/it]

signal 17 Rash found at 2021-05-05 00:00:00
found month 2021-01-31 00:00:00 for symptom 18 Urticaria, checking day


 55%|█████▍    | 18/33 [55:02<55:40, 222.73s/it]

signal 18 Urticaria found at 2021-01-05 00:00:00
found month 2021-01-31 00:00:00 for symptom 19 Erythema multiforme, checking day


 58%|█████▊    | 19/33 [58:25<50:35, 216.82s/it]

signal 19 Erythema multiforme found at 2021-01-07 00:00:00
found month 2021-01-31 00:00:00 for symptom 20 Mechanical urticaria, checking day


 61%|██████    | 20/33 [1:00:37<41:27, 191.36s/it]

signal 20 Mechanical urticaria found at 2021-01-13 00:00:00
found month 2020-12-31 00:00:00 for symptom 21 Myalgia, checking day


 64%|██████▎   | 21/33 [1:03:13<36:11, 180.94s/it]

signal 21 Myalgia found at 2020-12-23 00:00:00
found month 2020-12-31 00:00:00 for symptom 22 Arthralgia, checking day


 67%|██████▋   | 22/33 [1:06:27<33:52, 184.73s/it]

signal 22 Arthralgia found at 2020-12-24 00:00:00
found month 2021-01-31 00:00:00 for symptom 23 Heavy menstrual bleeding, checking day


 70%|██████▉   | 23/33 [1:08:11<26:46, 160.63s/it]

signal 23 Heavy menstrual bleeding found at 2021-01-27 00:00:00
found month 2020-12-31 00:00:00 for symptom 24 Injection site pain, checking day


 73%|███████▎  | 24/33 [1:11:31<25:52, 172.48s/it]

signal 24 Injection site pain found at 2020-12-23 00:00:00
found month 2020-12-31 00:00:00 for symptom 25 Fatigue, checking day


 76%|███████▌  | 25/33 [1:15:00<24:27, 183.39s/it]

signal 25 Fatigue found at 2020-12-23 00:00:00
found month 2020-12-31 00:00:00 for symptom 26 Chills, checking day


 79%|███████▉  | 26/33 [1:18:24<22:07, 189.58s/it]

signal 26 Chills found at 2020-12-22 00:00:00
found month 2020-12-31 00:00:00 for symptom 27 Pyrexia, checking day


 82%|████████▏ | 27/33 [1:22:11<20:04, 200.67s/it]

signal 27 Pyrexia found at 2020-12-25 00:00:00
found month 2021-01-31 00:00:00 for symptom 28 Injection site swelling, checking day


 85%|████████▍ | 28/33 [1:26:43<18:30, 222.08s/it]

signal 28 Injection site swelling found at 2021-01-03 00:00:00
found month 2021-01-31 00:00:00 for symptom 29 Injection site erythema, checking day


 88%|████████▊ | 29/33 [1:31:20<15:54, 238.50s/it]

signal 29 Injection site erythema found at 2021-01-06 00:00:00
found month 2020-12-31 00:00:00 for symptom 30 Injection site urticaria, checking day


 91%|█████████ | 30/33 [1:33:59<10:44, 214.70s/it]

signal 30 Injection site urticaria found at 2020-12-31 00:00:00
found month 2021-01-31 00:00:00 for symptom 31 Injection site rash, checking day


 94%|█████████▍| 31/33 [1:38:20<07:37, 228.51s/it]

signal 31 Injection site rash found at 2021-01-05 00:00:00
found month 2020-12-31 00:00:00 for symptom 32 Injection site pruritus, checking day


 97%|█████████▋| 32/33 [1:41:13<03:32, 212.06s/it]

signal 32 Injection site pruritus found at 2020-12-30 00:00:00
found month 2022-03-31 00:00:00 for symptom 33 Extensive swelling of vaccinated limb, checking day


100%|██████████| 33/33 [1:43:51<00:00, 188.82s/it]

signal 33 Extensive swelling of vaccinated limb found at 2022-03-01 00:00:00





In [18]:
file_name_filtered = "Arrays/First_found_filtered_Signals_Moderna_IC_CI.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_Moderna, f)

In [19]:
len(signals_dict_Moderna)

33

### JANSSEN

In [20]:
file_name_filtered = "Arrays/Signals_Janssen.json"
with open(file_name_filtered, 'r') as f:
    Signals_Janssen = json.load(f)

In [21]:
filtered_signals_Janssen =  [signal for signal in Signals_Janssen if signal["status"] != "Not found"]

In [22]:
from datetime import datetime

# Loop until a signal is not found anymore
signals_dict_Janssen = []
i= 0
for reaction in tqdm(filtered_signals_Janssen):
    i+=1
    symptom = reaction['symptom']
    start_date = datetime(2023, 12, 31)
    first_found = datetime(2000, 1, 1)
    while True:
        N_query = {"RECVDATE": {"$lte": start_date}}
        N = col.count_documents(N_query)
        
        D_query = {
            "RECVDATE": {"$lte": start_date},
            "vax_data": {
                "$elemMatch": {
                    "VAX_MANU": reaction["manufacturer"],
                    "VAX_TYPE": reaction["vaccine"]
                }
            }
        }
        D = col.count_documents(D_query)
        
        E_query = {
            "RECVDATE": {"$lte": start_date},
            "symptoms": {
                "$elemMatch": {
                    "$in": [reaction["symptom"]]
                }
            }
        }
        E = col.count_documents(E_query)
        
        DE_query = {'$and': [D_query, E_query]}
        DE = col.count_documents(DE_query)
        if DE >= 3: 
        
            De = D - DE
            dE = E - DE
            de = N - (DE + De + dE)
            
                
            # Ensure all necessary data is available
            if None in [DE, dE, De, de]:
                continue
            
            contingency_table = [
                [DE, dE],
                [De, de]
            ]
            #print(contingency_table)
            # Compute statistical measures
            rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
            rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
            
            prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
            prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
            
            ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
            ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
            
            chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
            IC = disproportionaly_analysis.information_component(contingency_table)
            IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
            
            # Determine if the reaction meets criteria for a signal
            method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
            method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                
            IC_CI = IC_ci_lower > 0 
                    
            if IC_CI:
                #print(f"Signal found for {start_date}")
                first_found = start_date
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")
            else:
                #print("Signal not found")
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
            if start_date < end_date:
                break
        else:
            break
            
    if first_found == datetime(2000, 1, 1):
        print(f"Saved {symptom} as first_found = None because it was not detected by the end of an month")
        signals_dict_Janssen.append({"manufacturer": reaction["manufacturer"], "symptom": reaction["symptom"], "first_found": "NaN"})
    else:
            
        print(f"found month {first_found} for symptom {i} {symptom}, checking day")
        start_date = first_found
        while True:
            #print(f"checking day {start_date}")
                    #print(f"checking symptom {reaction['symptom']}")
            N_query = {"RECVDATE": {"$lte": start_date}}
            N = col.count_documents(N_query)
            
            D_query = {
                "RECVDATE": {"$lte": start_date},
                "vax_data": {
                    "$elemMatch": {
                        "VAX_MANU": reaction["manufacturer"],
                        "VAX_TYPE": reaction["vaccine"]
                    }
                }
            }
            D = col.count_documents(D_query)
            
        
            E_query = {
                "RECVDATE": {"$lte": start_date},
                "symptoms": {
                    "$elemMatch": {
                        "$in": [reaction["symptom"]]
                    }
                }
            }
            E = col.count_documents(E_query)
            
            DE_query = {'$and': [D_query, E_query]}
            DE = col.count_documents(DE_query)
            if DE >= 3: 
            
                De = D - DE
                dE = E - DE
                de = N - (DE + De + dE)
                
                    
                # Ensure all necessary data is available
                if None in [DE, dE, De, de]:
                    continue
                
                contingency_table = [
                    [DE, dE],
                    [De, de]
                ]
                #print(contingency_table)
                # Compute statistical measures
                rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
                rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
                
                prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
                prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
                
                ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
                ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
                
                chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
                IC = disproportionaly_analysis.information_component(contingency_table)
                IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
                
                # Determine if the reaction meets criteria for a signal
                method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
                method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                IC_CI = IC_ci_lower > 0
                    
                if IC_CI:
                    #print(f"Signal found for {start_date}")
                    first_found = start_date
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Janssen.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop        
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")
                else:
                    #print("Signal not found")
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Janssen.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
    
            else: 
                #print(f"DE < 3 for {start_date}")
                signal_info = {
                    "manufacturer": reaction["manufacturer"],
                    "symptom": reaction["symptom"],
                    "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                }
                signals_dict_Janssen.append(signal_info)
                print(f"signal {i} {symptom} found at {first_found}")
                break
    

  3%|▎         | 1/35 [00:56<31:44, 56.02s/it]

Saved Lymphadenopathy as first_found = None because it was not detected by the end of an month
found month 2021-04-30 00:00:00 for symptom 2 Immune thrombocytopenia, checking day


  6%|▌         | 2/35 [02:05<35:11, 63.98s/it]

signal 2 Immune thrombocytopenia found at 2021-04-21 00:00:00
found month 2021-03-31 00:00:00 for symptom 3 Headache, checking day


  9%|▊         | 3/35 [04:23<52:12, 97.90s/it]

signal 3 Headache found at 2021-03-04 00:00:00
found month 2021-03-31 00:00:00 for symptom 4 Dizziness, checking day


 11%|█▏        | 4/35 [06:18<53:58, 104.46s/it]

signal 4 Dizziness found at 2021-03-05 00:00:00
found month 2021-03-31 00:00:00 for symptom 5 Tremor, checking day


 14%|█▍        | 5/35 [07:56<51:01, 102.05s/it]

signal 5 Tremor found at 2021-03-05 00:00:00


 17%|█▋        | 6/35 [08:53<42:01, 86.94s/it] 

Saved Urticaria as first_found = None because it was not detected by the end of an month


 20%|██        | 7/35 [09:46<35:22, 75.79s/it]

Saved Hypersensitivity as first_found = None because it was not detected by the end of an month
found month 2021-03-31 00:00:00 for symptom 8 Paraesthesia, checking day


 23%|██▎       | 8/35 [11:26<37:36, 83.57s/it]

signal 8 Paraesthesia found at 2021-03-07 00:00:00
found month 2021-03-31 00:00:00 for symptom 9 Hypoaesthesia, checking day


 26%|██▌       | 9/35 [13:08<38:36, 89.09s/it]

signal 9 Hypoaesthesia found at 2021-03-05 00:00:00
found month 2021-04-30 00:00:00 for symptom 10 Facial paralysis, checking day


 29%|██▊       | 10/35 [14:35<36:54, 88.58s/it]

signal 10 Facial paralysis found at 2021-04-14 00:00:00
found month 2021-03-31 00:00:00 for symptom 11 Tinnitus, checking day


 31%|███▏      | 11/35 [15:55<34:24, 86.01s/it]

signal 11 Tinnitus found at 2021-03-08 00:00:00
found month 2021-08-31 00:00:00 for symptom 12 Guillain-Barre syndrome, checking day


 34%|███▍      | 12/35 [17:17<32:26, 84.65s/it]

signal 12 Guillain-Barre syndrome found at 2021-08-04 00:00:00


 37%|███▋      | 13/35 [17:56<25:56, 70.76s/it]

Saved Myelitis as first_found = None because it was not detected by the end of an month


 40%|████      | 14/35 [18:40<21:58, 62.78s/it]

Saved Myocarditis as first_found = None because it was not detected by the end of an month


 43%|████▎     | 15/35 [19:23<18:57, 56.88s/it]

Saved Pericarditis as first_found = None because it was not detected by the end of an month
found month 2021-07-31 00:00:00 for symptom 16 Capillary leak syndrome, checking day


 46%|████▌     | 16/35 [20:04<16:29, 52.08s/it]

signal 16 Capillary leak syndrome found at 2021-07-30 00:00:00
found month 2021-06-30 00:00:00 for symptom 17 Cutaneous vasculitis, checking day


 49%|████▊     | 17/35 [21:17<17:32, 58.45s/it]

signal 17 Cutaneous vasculitis found at 2021-06-02 00:00:00
found month 2021-03-31 00:00:00 for symptom 18 Nausea, checking day


 51%|█████▏    | 18/35 [23:16<21:41, 76.53s/it]

signal 18 Nausea found at 2021-03-04 00:00:00
found month 2021-04-30 00:00:00 for symptom 19 Cough, checking day


 54%|█████▍    | 19/35 [24:59<22:31, 84.49s/it]

signal 19 Cough found at 2021-04-26 00:00:00
found month 2021-03-31 00:00:00 for symptom 20 Oropharyngeal pain, checking day


 57%|█████▋    | 20/35 [26:35<22:01, 88.07s/it]

signal 20 Oropharyngeal pain found at 2021-03-07 00:00:00
found month 2021-06-30 00:00:00 for symptom 21 Sneezing, checking day


 60%|██████    | 21/35 [27:54<19:55, 85.38s/it]

signal 21 Sneezing found at 2021-06-14 00:00:00
found month 2021-03-31 00:00:00 for symptom 22 Diarrhoea, checking day


 63%|██████▎   | 22/35 [29:32<19:19, 89.17s/it]

signal 22 Diarrhoea found at 2021-03-07 00:00:00
found month 2021-03-31 00:00:00 for symptom 23 Vomiting, checking day


 66%|██████▌   | 23/35 [31:17<18:46, 93.87s/it]

signal 23 Vomiting found at 2021-03-06 00:00:00


 69%|██████▊   | 24/35 [32:18<15:22, 83.90s/it]

Saved Rash as first_found = None because it was not detected by the end of an month
found month 2021-03-31 00:00:00 for symptom 25 Hyperhidrosis, checking day


 71%|███████▏  | 25/35 [33:56<14:41, 88.17s/it]

signal 25 Hyperhidrosis found at 2021-03-05 00:00:00
found month 2021-03-31 00:00:00 for symptom 26 Myalgia, checking day


 74%|███████▍  | 26/35 [35:47<14:14, 94.95s/it]

signal 26 Myalgia found at 2021-03-06 00:00:00
found month 2021-03-31 00:00:00 for symptom 27 Arthralgia, checking day


 77%|███████▋  | 27/35 [37:34<13:08, 98.61s/it]

signal 27 Arthralgia found at 2021-03-05 00:00:00
found month 2021-05-31 00:00:00 for symptom 28 Muscular weakness, checking day


 80%|████████  | 28/35 [39:13<11:30, 98.61s/it]

signal 28 Muscular weakness found at 2021-05-02 00:00:00
found month 2021-03-31 00:00:00 for symptom 29 Back pain, checking day


 83%|████████▎ | 29/35 [40:48<09:46, 97.75s/it]

signal 29 Back pain found at 2021-03-06 00:00:00
found month 2021-03-31 00:00:00 for symptom 30 Pain in extremity, checking day


 86%|████████▌ | 30/35 [42:43<08:34, 102.91s/it]

signal 30 Pain in extremity found at 2021-03-08 00:00:00
found month 2021-03-31 00:00:00 for symptom 31 Fatigue, checking day


 89%|████████▊ | 31/35 [44:52<07:22, 110.55s/it]

signal 31 Fatigue found at 2021-03-05 00:00:00


 91%|█████████▏| 32/35 [45:57<04:50, 96.96s/it] 

Saved Injection site pain as first_found = None because it was not detected by the end of an month


 94%|█████████▍| 33/35 [46:56<02:51, 85.73s/it]

Saved Injection site swelling as first_found = None because it was not detected by the end of an month
found month 2021-03-31 00:00:00 for symptom 34 Chills, checking day


 97%|█████████▋| 34/35 [48:52<01:34, 94.79s/it]

signal 34 Chills found at 2021-03-05 00:00:00
found month 2021-03-31 00:00:00 for symptom 35 Pyrexia, checking day


100%|██████████| 35/35 [51:16<00:00, 87.91s/it] 

signal 35 Pyrexia found at 2021-03-05 00:00:00





In [23]:
file_name_filtered = "Arrays/First_found_filtered_Signals_Janssen_IC_CI.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_Janssen, f)

In [24]:
len(signals_dict_Janssen)

35

### NOVAVAX

In [25]:
file_name_filtered = "Arrays/Signals_Novavax.json"
with open(file_name_filtered, 'r') as f:
    Signals_Novavax = json.load(f)

In [26]:
filtered_signals_Novavax = [signal for signal in Signals_Novavax if signal["status"] != "Not found"]

In [27]:
len(filtered_signals_Novavax)

23

In [28]:
from datetime import datetime

# Loop until a signal is not found anymore
signals_dict_Novavax = []
i= 0
for reaction in tqdm(filtered_signals_Novavax):
    i+=1
    symptom = reaction['symptom']
    start_date = datetime(2023, 12, 31)
    first_found = datetime(2000, 1, 1)
    while True:
        N_query = {"RECVDATE": {"$lte": start_date}}
        N = col.count_documents(N_query)
        
        D_query = {
            "RECVDATE": {"$lte": start_date},
            "vax_data": {
                "$elemMatch": {
                    "VAX_MANU": reaction["manufacturer"],
                    "VAX_TYPE": reaction["vaccine"]
                }
            }
        }
        D = col.count_documents(D_query)
        
        E_query = {
            "RECVDATE": {"$lte": start_date},
            "symptoms": {
                "$elemMatch": {
                    "$in": [reaction["symptom"]]
                }
            }
        }
        E = col.count_documents(E_query)
        
        DE_query = {'$and': [D_query, E_query]}
        DE = col.count_documents(DE_query)
        if DE >= 3: 
        
            De = D - DE
            dE = E - DE
            de = N - (DE + De + dE)
            
                
            # Ensure all necessary data is available
            if None in [DE, dE, De, de]:
                continue
            
            contingency_table = [
                [DE, dE],
                [De, de]
            ]
            #print(contingency_table)
            # Compute statistical measures
            rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
            rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
            
            prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
            prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
            
            ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
            ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
            
            chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
            IC = disproportionaly_analysis.information_component(contingency_table)
            IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
            
            # Determine if the reaction meets criteria for a signal
            method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
            method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                
            IC_CI = IC_ci_lower > 0
                    
            if IC_CI:
                #print(f"Signal found for {start_date}")
                first_found = start_date
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")
            else:
                #print("Signal not found")
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
            if start_date < end_date:
                break
        else:
            break
            
    if first_found == datetime(2000, 1, 1):
        print(f"Saved {symptom} as first_found = None because it was not detected by the end of an month")
        signals_dict_Novavax.append({"manufacturer": reaction["manufacturer"], "symptom": reaction["symptom"], "first_found": "NaN"})
    else:
            
        print(f"found month {first_found} for symptom {i} {symptom}, checking day")
        start_date = first_found
        while True:
            #print(f"checking day {start_date}")
                    #print(f"checking symptom {reaction['symptom']}")
            N_query = {"RECVDATE": {"$lte": start_date}}
            N = col.count_documents(N_query)
            
            D_query = {
                "RECVDATE": {"$lte": start_date},
                "vax_data": {
                    "$elemMatch": {
                        "VAX_MANU": reaction["manufacturer"],
                        "VAX_TYPE": reaction["vaccine"]
                    }
                }
            }
            D = col.count_documents(D_query)
            
            E_query = {
                "RECVDATE": {"$lte": start_date},
                "symptoms": {
                    "$elemMatch": {
                        "$in": [reaction["symptom"]]
                    }
                }
            }
            E = col.count_documents(E_query)
            
            DE_query = {'$and': [D_query, E_query]}
            DE = col.count_documents(DE_query)
            if DE >= 3: 
            
                De = D - DE
                dE = E - DE
                de = N - (DE + De + dE)
                
                    
                # Ensure all necessary data is available
                if None in [DE, dE, De, de]:
                    continue
                
                contingency_table = [
                    [DE, dE],
                    [De, de]
                ]
                #print(contingency_table)
                # Compute statistical measures
                rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
                rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
                
                prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
                prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
                
                ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
                ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
                
                chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
                IC = disproportionaly_analysis.information_component(contingency_table)
                IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
                
                # Determine if the reaction meets criteria for a signal
                method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
                method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                    
                IC_CI = IC_ci_lower > 0 
                    
                if IC_CI:
                    #print(f"Signal found for {start_date}")
                    first_found = start_date
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Novavax.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop        
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")
                else:
                    #print("Signal not found")
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Novavax.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
    
            else: 
                #print(f"DE < 3 for {start_date}")
                signal_info = {
                    "manufacturer": reaction["manufacturer"],
                    "symptom": reaction["symptom"],
                    "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                }
                signals_dict_Novavax.append(signal_info)
                print(f"signal {i} {symptom} found at {first_found}")
                break
    

  0%|          | 0/23 [00:00<?, ?it/s]

found month 2022-10-31 00:00:00 for symptom 1 Headache, checking day


  4%|▍         | 1/23 [01:25<31:19, 85.43s/it]

signal 1 Headache found at 2022-10-13 00:00:00
found month 2023-01-31 00:00:00 for symptom 2 Nausea, checking day


  9%|▊         | 2/23 [02:35<26:47, 76.53s/it]

signal 2 Nausea found at 2023-01-19 00:00:00


 13%|█▎        | 3/23 [02:57<17:09, 51.46s/it]

Saved Vomiting as first_found = None because it was not detected by the end of an month
found month 2023-02-28 00:00:00 for symptom 4 Myalgia, checking day


 17%|█▋        | 4/23 [03:56<17:13, 54.39s/it]

signal 4 Myalgia found at 2023-02-04 00:00:00


 22%|██▏       | 5/23 [04:16<12:38, 42.12s/it]

Saved Arthralgia as first_found = None because it was not detected by the end of an month


 26%|██▌       | 6/23 [04:42<10:20, 36.48s/it]

Saved Injection site pain as first_found = None because it was not detected by the end of an month
found month 2022-09-30 00:00:00 for symptom 7 Fatigue, checking day


 30%|███       | 7/23 [05:59<13:16, 49.80s/it]

signal 7 Fatigue found at 2022-09-06 00:00:00
found month 2022-12-31 00:00:00 for symptom 8 Malaise, checking day


 35%|███▍      | 8/23 [06:58<13:10, 52.69s/it]

signal 8 Malaise found at 2022-12-02 00:00:00
found month 2022-10-31 00:00:00 for symptom 9 Injection site swelling, checking day


 39%|███▉      | 9/23 [07:57<12:48, 54.90s/it]

signal 9 Injection site swelling found at 2022-10-04 00:00:00


 43%|████▎     | 10/23 [08:31<10:28, 48.33s/it]

Saved Pyrexia as first_found = None because it was not detected by the end of an month


 48%|████▊     | 11/23 [08:54<08:06, 40.51s/it]

Saved Pain in extremity as first_found = None because it was not detected by the end of an month
found month 2023-04-30 00:00:00 for symptom 12 Injection site pruritus, checking day


 52%|█████▏    | 12/23 [09:42<07:52, 42.97s/it]

signal 12 Injection site pruritus found at 2023-04-14 00:00:00


 57%|█████▋    | 13/23 [10:07<06:13, 37.34s/it]

Saved Chills as first_found = None because it was not detected by the end of an month


 61%|██████    | 14/23 [10:18<04:25, 29.54s/it]

Saved Injection site warmth as first_found = None because it was not detected by the end of an month


 65%|██████▌   | 15/23 [10:34<03:23, 25.48s/it]

Saved Lymphadenopathy as first_found = None because it was not detected by the end of an month
found month 2022-08-31 00:00:00 for symptom 16 Paraesthesia, checking day


 70%|██████▉   | 16/23 [11:26<03:53, 33.35s/it]

signal 16 Paraesthesia found at 2022-08-07 00:00:00
found month 2022-08-31 00:00:00 for symptom 17 Hypoaesthesia, checking day


 74%|███████▍  | 17/23 [12:16<03:50, 38.46s/it]

signal 17 Hypoaesthesia found at 2022-08-07 00:00:00
found month 2022-08-31 00:00:00 for symptom 18 Pericarditis, checking day


 78%|███████▊  | 18/23 [12:37<02:45, 33.10s/it]

signal 18 Pericarditis found at 2022-08-31 00:00:00


 83%|████████▎ | 19/23 [12:45<01:42, 25.70s/it]

Saved Hypertension as first_found = None because it was not detected by the end of an month


 87%|████████▋ | 20/23 [13:01<01:07, 22.63s/it]

Saved Rash as first_found = None because it was not detected by the end of an month


 91%|█████████▏| 21/23 [13:07<00:35, 17.69s/it]

Saved Erythema as first_found = None because it was not detected by the end of an month


 96%|█████████▌| 22/23 [13:25<00:17, 17.60s/it]

Saved Pruritus as first_found = None because it was not detected by the end of an month


100%|██████████| 23/23 [13:36<00:00, 35.52s/it]

Saved Urticaria as first_found = None because it was not detected by the end of an month





In [29]:
file_name_filtered = "Arrays/First_found_filtered_Signals_Novavax_IC_CI.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_Novavax, f)

In [30]:
signals_dict_Novavax

[{'manufacturer': 'NOVAVAX',
  'symptom': 'Headache',
  'first_found': '2022-10-13'},
 {'manufacturer': 'NOVAVAX', 'symptom': 'Nausea', 'first_found': '2023-01-19'},
 {'manufacturer': 'NOVAVAX', 'symptom': 'Vomiting', 'first_found': 'NaN'},
 {'manufacturer': 'NOVAVAX',
  'symptom': 'Myalgia',
  'first_found': '2023-02-04'},
 {'manufacturer': 'NOVAVAX', 'symptom': 'Arthralgia', 'first_found': 'NaN'},
 {'manufacturer': 'NOVAVAX',
  'symptom': 'Injection site pain',
  'first_found': 'NaN'},
 {'manufacturer': 'NOVAVAX',
  'symptom': 'Fatigue',
  'first_found': '2022-09-06'},
 {'manufacturer': 'NOVAVAX',
  'symptom': 'Malaise',
  'first_found': '2022-12-02'},
 {'manufacturer': 'NOVAVAX',
  'symptom': 'Injection site swelling',
  'first_found': '2022-10-04'},
 {'manufacturer': 'NOVAVAX', 'symptom': 'Pyrexia', 'first_found': 'NaN'},
 {'manufacturer': 'NOVAVAX',
  'symptom': 'Pain in extremity',
  'first_found': 'NaN'},
 {'manufacturer': 'NOVAVAX',
  'symptom': 'Injection site pruritus',
  'fi

### combine all signals and save as a dict with the symptom as key

In [31]:
signals_dict_combined = {}

# Merge signals_dict_Novavax
for entry in signals_dict_Novavax:
    symptom = entry.pop('symptom')  # Remove symptom from the entry and store it
    if symptom not in signals_dict_combined:
        signals_dict_combined[symptom] = [entry]
    else:
        signals_dict_combined[symptom].append(entry)

# Merge signals_dict_Janssen
for entry in signals_dict_Janssen:
    symptom = entry.pop('symptom')  # Remove symptom from the entry and store it
    if symptom not in signals_dict_combined:
        signals_dict_combined[symptom] = [entry]
    else:
        signals_dict_combined[symptom].append(entry)

# Merge signals_dict_Moderna
for entry in signals_dict_Moderna:
    symptom = entry.pop('symptom')  # Remove symptom from the entry and store it
    if symptom not in signals_dict_combined:
        signals_dict_combined[symptom] = [entry]
    else:
        signals_dict_combined[symptom].append(entry)

# Merge signals_dict_Pfizer
for entry in signals_dict_Pfizer:
    symptom = entry.pop('symptom')  # Remove symptom from the entry and store it
    if symptom not in signals_dict_combined:
        signals_dict_combined[symptom] = [entry]
    else:
        signals_dict_combined[symptom].append(entry)
        
print(signals_dict_combined)

{'Headache': [{'manufacturer': 'NOVAVAX', 'first_found': '2022-10-13'}, {'manufacturer': 'JANSSEN', 'first_found': '2021-03-04'}, {'manufacturer': 'MODERNA', 'first_found': '2020-12-22'}, {'manufacturer': 'PFIZER\\BIONTECH', 'first_found': '2020-12-15'}], 'Nausea': [{'manufacturer': 'NOVAVAX', 'first_found': '2023-01-19'}, {'manufacturer': 'JANSSEN', 'first_found': '2021-03-04'}, {'manufacturer': 'MODERNA', 'first_found': '2020-12-22'}, {'manufacturer': 'PFIZER\\BIONTECH', 'first_found': '2020-12-16'}], 'Vomiting': [{'manufacturer': 'NOVAVAX', 'first_found': 'NaN'}, {'manufacturer': 'JANSSEN', 'first_found': '2021-03-06'}, {'manufacturer': 'MODERNA', 'first_found': '2020-12-23'}, {'manufacturer': 'PFIZER\\BIONTECH', 'first_found': '2021-01-09'}], 'Myalgia': [{'manufacturer': 'NOVAVAX', 'first_found': '2023-02-04'}, {'manufacturer': 'JANSSEN', 'first_found': '2021-03-06'}, {'manufacturer': 'MODERNA', 'first_found': '2020-12-23'}, {'manufacturer': 'PFIZER\\BIONTECH', 'first_found': '2020

In [32]:
file_name_filtered = "Arrays/All_signals_filtered_first_found_IC_CI.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_combined, f)