In [100]:
from pymongo import MongoClient
import sys
sys.path.append('../statistical_calculations')  # Adds the statistical_calculations directory to the path
import disproportionaly_analysis
from tqdm import tqdm
import json
from datetime import datetime, timedelta 
from calendar import monthrange

# Connect to your MongoDB instance
client = MongoClient('mongodb://localhost:27017/')
db = client['vaers']  
col = db['reports']

In [101]:
end_date = datetime(2020, 11, 30)

# Define a function to reduce the date range by a month
def reduce_date_range_by_month(start_date):
    # Get the year and month of the start date
    year = start_date.year
    month = start_date.month
    
    # Calculate the new year and month
    if month == 1:  # If the current month is January
        new_year = year - 1
        new_month = 12  # December of the previous year
    else:
        new_year = year
        new_month = month - 1
    
    # Construct the new start date
    _, last_day_of_previous_month = monthrange(new_year, new_month)
    new_start_date = datetime(new_year, new_month, last_day_of_previous_month)
    
    return new_start_date

def reduce_date_range_by_day(start_date):
    new_start_date = start_date - timedelta(days=1)  # Subtract one day from the start_date
    return new_start_date

### PFIZER/BIONTECH

In [143]:
file_name_filtered = "Signals_Pfizer.json"
with open(file_name_filtered, 'r') as f:
    Signals_Pfizer = json.load(f)

In [144]:
filtered_signals_Pfizer = [signal for signal in Signals_Pfizer if signal["status"] != "Not found"]

In [145]:
len(filtered_signals_Pfizer)

34

In [146]:
from datetime import datetime

# Loop until a signal is not found anymore
signals_dict_Pfizer = []
i= 0
for reaction in tqdm(filtered_signals_Pfizer):
    i+=1
    symptom = reaction['symptom']
    start_date = datetime(2023, 12, 31)
    first_found = datetime(2000, 1, 1)
    while True:
        N_query = {"RECVDATE": {"$lte": start_date}}
        N = col.count_documents(N_query)
        
        D_query = {
            "RECVDATE": {"$lte": start_date},
            "vax_data": {
                "$elemMatch": {
                    "VAX_MANU": reaction["manufacturer"],
                    "VAX_TYPE": reaction["vaccine"]
                }
            }
        }
        D = col.count_documents(D_query)
        
        E_query = {
            "RECVDATE": {"$lte": start_date},
            "symptoms": {
                "$elemMatch": {
                    "$in": [reaction["symptom"]]
                }
            }
        }
        E = col.count_documents(E_query)
        
        DE_query = {'$and': [D_query, E_query]}
        DE = col.count_documents(DE_query)
        if DE >= 3: 
        
            De = D - DE
            dE = E - DE
            de = N - (DE + De + dE)
            
                
            # Ensure all necessary data is available
            if None in [DE, dE, De, de]:
                continue
            
            contingency_table = [
                [DE, dE],
                [De, de]
            ]
            #print(contingency_table)
            # Compute statistical measures
            rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
            rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
            
            prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
            prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
            
            ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
            ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
            
            chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
            IC = disproportionaly_analysis.information_component(contingency_table)
            IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
            
            # Determine if the reaction meets criteria for a signal
            method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
            method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                
            if method_1 or method_2:
                #print(f"Signal found for {start_date}")
                first_found = start_date
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")
            else:
                #print("Signal not found")
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
            if start_date < end_date:
                break
        else:
            break
            
    if first_found == datetime(2000, 1, 1):
        print(f"Saved {symptom} as first_found = None because it was not detected by the end of an month")
        signals_dict_Pfizer.append({"manufacturer": reaction["manufacturer"], "symptom": reaction["symptom"], "first_found": "NaN"})
    else:
            
        print(f"found month {first_found} for symptom {i} {symptom}, checking day")
        start_date = first_found
        while True:
            #print(f"checking day {start_date}")
                    #print(f"checking symptom {reaction['symptom']}")
            N_query = {"RECVDATE": {"$lte": start_date}}
            N = col.count_documents(N_query)
            
            D_query = {
                "RECVDATE": {"$lte": start_date},
                "vax_data": {
                    "$elemMatch": {
                        "VAX_MANU": reaction["manufacturer"],
                        "VAX_TYPE": reaction["vaccine"]
                    }
                }
            }
            D = col.count_documents(D_query)
            
            E_query = {
                "RECVDATE": {"$lte": start_date},
                "symptoms": {
                    "$elemMatch": {
                        "$in": [reaction["symptom"]]
                    }
                }
            }
            E = col.count_documents(E_query)
            
            DE_query = {'$and': [D_query, E_query]}
            DE = col.count_documents(DE_query)
            if DE >= 3: 
            
                De = D - DE
                dE = E - DE
                de = N - (DE + De + dE)
                
                    
                # Ensure all necessary data is available
                if None in [DE, dE, De, de]:
                    continue
                
                contingency_table = [
                    [DE, dE],
                    [De, de]
                ]
                #print(contingency_table)
                # Compute statistical measures
                rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
                rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
                
                prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
                prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
                
                ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
                ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
                
                chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
                IC = disproportionaly_analysis.information_component(contingency_table)
                IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
                
                # Determine if the reaction meets criteria for a signal
                method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
                method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                    
                if method_1 or method_2:
                    #print(f"Signal found for {start_date}")
                    first_found = start_date
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Pfizer.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop        
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")
                else:
                    #print("Signal not found")
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Pfizer.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
    
            else: 
                #print(f"DE < 3 for {start_date}")
                signal_info = {
                    "manufacturer": reaction["manufacturer"],
                    "symptom": reaction["symptom"],
                    "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                }
                signals_dict_Pfizer.append(signal_info)
                print(f"signal {i} {symptom} found at {first_found}")
                break
    

  0%|          | 0/34 [00:00<?, ?it/s]

found month 2021-01-31 00:00:00 for symptom 1 Vomiting, checking day


  3%|▎         | 1/34 [07:31<4:08:22, 451.60s/it]

signal 1 Vomiting found at 2021-01-15 00:00:00
found month 2020-12-31 00:00:00 for symptom 2 Lymphadenopathy, checking day


  6%|▌         | 2/34 [13:36<3:33:41, 400.66s/it]

signal 2 Lymphadenopathy found at 2020-12-24 00:00:00


  9%|▉         | 3/34 [18:41<3:04:21, 356.81s/it]

Saved Rash as first_found = None because it was not detected by the end of an month
found month 2020-12-31 00:00:00 for symptom 4 Pruritus, checking day


 12%|█▏        | 4/34 [25:55<3:13:37, 387.23s/it]

signal 4 Pruritus found at 2020-12-18 00:00:00


 15%|█▍        | 5/34 [30:51<2:51:18, 354.43s/it]

Saved Urticaria as first_found = None because it was not detected by the end of an month
found month 2021-01-31 00:00:00 for symptom 6 Angioedema, checking day


 18%|█▊        | 6/34 [35:04<2:29:14, 319.82s/it]

signal 6 Angioedema found at 2021-01-07 00:00:00
found month 2021-01-31 00:00:00 for symptom 7 Decreased appetite, checking day


 21%|██        | 7/34 [40:46<2:27:14, 327.21s/it]

signal 7 Decreased appetite found at 2021-01-07 00:00:00
found month 2021-03-31 00:00:00 for symptom 8 Insomnia, checking day


 24%|██▎       | 8/34 [48:36<2:41:32, 372.78s/it]

signal 8 Insomnia found at 2021-03-01 00:00:00
found month 2020-12-31 00:00:00 for symptom 9 Headache, checking day


 26%|██▋       | 9/34 [56:20<2:47:11, 401.28s/it]

signal 9 Headache found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 10 Dizziness, checking day


 29%|██▉       | 10/34 [1:03:41<2:45:25, 413.55s/it]

signal 10 Dizziness found at 2020-12-15 00:00:00
found month 2021-01-31 00:00:00 for symptom 11 Lethargy, checking day


 32%|███▏      | 11/34 [1:11:57<2:48:13, 438.85s/it]

signal 11 Lethargy found at 2021-01-27 00:00:00
found month 2020-12-31 00:00:00 for symptom 12 Facial paralysis, checking day


 35%|███▌      | 12/34 [1:16:43<2:23:48, 392.20s/it]

signal 12 Facial paralysis found at 2020-12-18 00:00:00
found month 2020-12-31 00:00:00 for symptom 13 Paraesthesia, checking day


 38%|███▊      | 13/34 [1:21:34<2:06:31, 361.52s/it]

signal 13 Paraesthesia found at 2020-12-15 00:00:00
found month 2020-12-31 00:00:00 for symptom 14 Hypoaesthesia, checking day


 41%|████      | 14/34 [1:28:22<2:05:14, 375.73s/it]

signal 14 Hypoaesthesia found at 2020-12-17 00:00:00
found month 2021-03-31 00:00:00 for symptom 15 Myocarditis, checking day


 44%|████▍     | 15/34 [1:32:31<1:46:49, 337.35s/it]

signal 15 Myocarditis found at 2021-03-25 00:00:00
found month 2021-03-31 00:00:00 for symptom 16 Pericarditis, checking day


 47%|████▋     | 16/34 [1:36:33<1:32:35, 308.64s/it]

signal 16 Pericarditis found at 2021-03-04 00:00:00
found month 2020-12-31 00:00:00 for symptom 17 Diarrhoea, checking day


 50%|█████     | 17/34 [1:41:16<1:25:17, 301.00s/it]

signal 17 Diarrhoea found at 2020-12-17 00:00:00
found month 2020-12-31 00:00:00 for symptom 18 Nausea, checking day


 53%|█████▎    | 18/34 [1:48:31<1:30:59, 341.20s/it]

signal 18 Nausea found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 19 Hyperhidrosis, checking day


 56%|█████▌    | 19/34 [1:55:29<1:31:03, 364.21s/it]

signal 19 Hyperhidrosis found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 20 Night sweats, checking day


 59%|█████▉    | 20/34 [2:00:49<1:21:54, 351.02s/it]

signal 20 Night sweats found at 2020-12-21 00:00:00


 62%|██████▏   | 21/34 [2:03:01<1:01:49, 285.31s/it]

Saved Erythema multiforme as first_found = None because it was not detected by the end of an month
found month 2020-12-31 00:00:00 for symptom 22 Arthralgia, checking day


 65%|██████▍   | 22/34 [2:07:45<56:59, 284.93s/it]  

signal 22 Arthralgia found at 2020-12-17 00:00:00
found month 2020-12-31 00:00:00 for symptom 23 Myalgia, checking day


 68%|██████▊   | 23/34 [2:14:51<1:00:01, 327.36s/it]

signal 23 Myalgia found at 2020-12-17 00:00:00
found month 2020-12-31 00:00:00 for symptom 24 Pain in extremity, checking day


 71%|███████   | 24/34 [2:22:05<59:51, 359.10s/it]  

signal 24 Pain in extremity found at 2020-12-17 00:00:00
found month 2021-01-31 00:00:00 for symptom 25 Heavy menstrual bleeding, checking day


 74%|███████▎  | 25/34 [2:25:22<46:34, 310.50s/it]

signal 25 Heavy menstrual bleeding found at 2021-01-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 26 Injection site pain, checking day


 76%|███████▋  | 26/34 [2:31:33<43:51, 328.88s/it]

signal 26 Injection site pain found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 27 Fatigue, checking day


 79%|███████▉  | 27/34 [2:38:58<42:24, 363.47s/it]

signal 27 Fatigue found at 2020-12-16 00:00:00
found month 2020-12-31 00:00:00 for symptom 28 Chills, checking day


 82%|████████▏ | 28/34 [2:46:15<38:34, 385.73s/it]

signal 28 Chills found at 2020-12-16 00:00:00
found month 2021-01-31 00:00:00 for symptom 29 Pyrexia, checking day


 85%|████████▌ | 29/34 [2:55:47<36:47, 441.54s/it]

signal 29 Pyrexia found at 2021-01-10 00:00:00


 88%|████████▊ | 30/34 [3:00:41<26:29, 397.27s/it]

Saved Injection site swelling as first_found = None because it was not detected by the end of an month
found month 2021-01-31 00:00:00 for symptom 31 Asthenia, checking day


 91%|█████████ | 31/34 [3:09:29<21:49, 436.35s/it]

signal 31 Asthenia found at 2021-01-07 00:00:00
found month 2020-12-31 00:00:00 for symptom 32 Malaise, checking day


 94%|█████████▍| 32/34 [3:16:29<14:22, 431.49s/it]

signal 32 Malaise found at 2020-12-16 00:00:00


 97%|█████████▋| 33/34 [3:21:15<06:27, 387.78s/it]

Saved Injection site pruritus as first_found = None because it was not detected by the end of an month


100%|██████████| 34/34 [3:23:22<00:00, 358.91s/it]

Saved Extensive swelling of vaccinated limb as first_found = None because it was not detected by the end of an month





In [147]:
file_name_filtered = "First_found_filtered_Signals_Pfizer.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_Pfizer, f)

In [106]:
signals_dict_Pfizer

[{'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Lymphadenopathy',
  'first_found': '2020-12-24'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Angioedema',
  'first_found': '2021-01-07'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Decreased appetite',
  'first_found': '2021-01-07'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Insomnia',
  'first_found': '2021-03-01'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Headache',
  'first_found': '2020-12-16'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Dizziness',
  'first_found': '2020-12-15'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Facial paralysis',
  'first_found': '2020-12-18'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Paraesthesia',
  'first_found': '2020-12-15'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Hypoaesthesia',
  'first_found': '2020-12-17'},
 {'manufacturer': 'PFIZER\\BIONTECH',
  'symptom': 'Myocarditis',
  'first_found': '2021-03-25'},
 {'manufac

### MODERNA

In [167]:
file_name_filtered = "Signals_Moderna.json"
with open(file_name_filtered, 'r') as f:
    Signals_Moderna = json.load(f)

In [168]:
filtered_signals_Moderna = [signal for signal in Signals_Moderna if signal["status"] != "Not found"]

In [169]:
len(filtered_signals_Moderna)

35

In [170]:
from datetime import datetime

# Loop until a signal is not found anymore
signals_dict_Moderna= []
i= 0
for reaction in tqdm(filtered_signals_Moderna):
    i+=1
    symptom = reaction['symptom']
    start_date = datetime(2023, 12, 31)
    first_found = datetime(2000, 1, 1)
    while True:
        N_query = {"RECVDATE": {"$lte": start_date}}
        N = col.count_documents(N_query)
        
        D_query = {
            "RECVDATE": {"$lte": start_date},
            "vax_data": {
                "$elemMatch": {
                    "VAX_MANU": reaction["manufacturer"],
                    "VAX_TYPE": reaction["vaccine"]
                }
            }
        }
        D = col.count_documents(D_query)
        
        E_query = {
            "RECVDATE": {"$lte": start_date},
            "symptoms": {
                "$elemMatch": {
                    "$in": [reaction["symptom"]]
                }
            }
        }
        E = col.count_documents(E_query)
        
        DE_query = {'$and': [D_query, E_query]}
        DE = col.count_documents(DE_query)
        if DE >= 3: 
        
            De = D - DE
            dE = E - DE
            de = N - (DE + De + dE)
            
                
            # Ensure all necessary data is available
            if None in [DE, dE, De, de]:
                continue
            
            contingency_table = [
                [DE, dE],
                [De, de]
            ]
            #print(contingency_table)
            # Compute statistical measures
            rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
            rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
            
            prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
            prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
            
            ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
            ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
            
            chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
            IC = disproportionaly_analysis.information_component(contingency_table)
            IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
            
            # Determine if the reaction meets criteria for a signal
            method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
            method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                
            if method_1 or method_2:
                #print(f"Signal found for {start_date}")
                first_found = start_date
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")
            else:
                #print("Signal not found")
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
            if start_date < end_date:
                break
        else:
            break
            
    if first_found == datetime(2000, 1, 1):
        print(f"Saved {symptom} as first_found = None because it was not detected by the end of an month")
        signals_dict_Moderna.append({"manufacturer": reaction["manufacturer"], "symptom": reaction["symptom"], "first_found": "NaN"})
    else:
            
        print(f"found month {first_found} for symptom {i} {symptom}, checking day")
        start_date = first_found
        while True:
            #print(f"checking day {start_date}")
                    #print(f"checking symptom {reaction['symptom']}")
            N_query = {"RECVDATE": {"$lte": start_date}}
            N = col.count_documents(N_query)
            
            D_query = {
                "RECVDATE": {"$lte": start_date},
                "vax_data": {
                    "$elemMatch": {
                        "VAX_MANU": reaction["manufacturer"],
                        "VAX_TYPE": reaction["vaccine"]
                    }
                }
            }
            D = col.count_documents(D_query)
            
            E_query = {
                "RECVDATE": {"$lte": start_date},
                "symptoms": {
                    "$elemMatch": {
                        "$in": [reaction["symptom"]]
                    }
                }
            }
            E = col.count_documents(E_query)
            
            DE_query = {'$and': [D_query, E_query]}
            DE = col.count_documents(DE_query)
            if DE >= 3: 
            
                De = D - DE
                dE = E - DE
                de = N - (DE + De + dE)
                
                    
                # Ensure all necessary data is available
                if None in [DE, dE, De, de]:
                    continue
                
                contingency_table = [
                    [DE, dE],
                    [De, de]
                ]
                #print(contingency_table)
                # Compute statistical measures
                rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
                rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
                
                prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
                prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
                
                ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
                ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
                
                chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
                IC = disproportionaly_analysis.information_component(contingency_table)
                IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
                
                # Determine if the reaction meets criteria for a signal
                method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
                method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                    
                if method_1 or method_2:
                    #print(f"Signal found for {start_date}")
                    first_found = start_date
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Moderna.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop        
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")
                else:
                    #print("Signal not found")
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Moderna.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
    
            else: 
                #print(f"DE < 3 for {start_date}")
                signal_info = {
                    "manufacturer": reaction["manufacturer"],
                    "symptom": reaction["symptom"],
                    "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                }
                signals_dict_Moderna.append(signal_info)
                print(f"signal {i} {symptom} found at {first_found}")
                break
    

  0%|          | 0/35 [00:00<?, ?it/s]

found month 2020-12-31 00:00:00 for symptom 1 Asthenia, checking day


  3%|▎         | 1/35 [03:45<2:08:03, 225.98s/it]

signal 1 Asthenia found at 2020-12-31 00:00:00
found month 2020-12-31 00:00:00 for symptom 2 Hyperhidrosis, checking day


  6%|▌         | 2/35 [08:49<2:29:30, 271.82s/it]

signal 2 Hyperhidrosis found at 2020-12-23 00:00:00
found month 2020-12-31 00:00:00 for symptom 3 Lymphadenopathy, checking day


  9%|▊         | 3/35 [13:40<2:29:36, 280.50s/it]

signal 3 Lymphadenopathy found at 2020-12-30 00:00:00
found month 2020-12-31 00:00:00 for symptom 4 Pruritus, checking day


  9%|▊         | 3/35 [18:01<3:12:16, 360.50s/it]


KeyboardInterrupt: 

In [None]:
file_name_filtered = "First_found_filtered_Signals_Moderna.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_Moderna, f)

In [None]:
len(signals_dict_Moderna)

### JANSSEN

In [148]:
file_name_filtered = "Signals_Janssen.json"
with open(file_name_filtered, 'r') as f:
    Signals_Janssen = json.load(f)

In [149]:
filtered_signals_Janssen =  [signal for signal in Signals_Janssen if signal["status"] != "Not found"]

In [150]:
from datetime import datetime

# Loop until a signal is not found anymore
signals_dict_Janssen = []
i= 0
for reaction in tqdm(filtered_signals_Janssen):
    i+=1
    symptom = reaction['symptom']
    start_date = datetime(2023, 12, 31)
    first_found = datetime(2000, 1, 1)
    while True:
        N_query = {"RECVDATE": {"$lte": start_date}}
        N = col.count_documents(N_query)
        
        D_query = {
            "RECVDATE": {"$lte": start_date},
            "vax_data": {
                "$elemMatch": {
                    "VAX_MANU": reaction["manufacturer"],
                    "VAX_TYPE": reaction["vaccine"]
                }
            }
        }
        D = col.count_documents(D_query)
        
        E_query = {
            "RECVDATE": {"$lte": start_date},
            "symptoms": {
                "$elemMatch": {
                    "$in": [reaction["symptom"]]
                }
            }
        }
        E = col.count_documents(E_query)
        
        DE_query = {'$and': [D_query, E_query]}
        DE = col.count_documents(DE_query)
        if DE >= 3: 
        
            De = D - DE
            dE = E - DE
            de = N - (DE + De + dE)
            
                
            # Ensure all necessary data is available
            if None in [DE, dE, De, de]:
                continue
            
            contingency_table = [
                [DE, dE],
                [De, de]
            ]
            #print(contingency_table)
            # Compute statistical measures
            rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
            rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
            
            prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
            prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
            
            ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
            ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
            
            chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
            IC = disproportionaly_analysis.information_component(contingency_table)
            IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
            
            # Determine if the reaction meets criteria for a signal
            method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
            method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                
            if method_1 or method_2:
                #print(f"Signal found for {start_date}")
                first_found = start_date
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")
            else:
                #print("Signal not found")
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
            if start_date < end_date:
                break
        else:
            break
            
    if first_found == datetime(2000, 1, 1):
        print(f"Saved {symptom} as first_found = None because it was not detected by the end of an month")
        signals_dict_Janssen.append({"manufacturer": reaction["manufacturer"], "symptom": reaction["symptom"], "first_found": "NaN"})
    else:
            
        print(f"found month {first_found} for symptom {i} {symptom}, checking day")
        start_date = first_found
        while True:
            #print(f"checking day {start_date}")
                    #print(f"checking symptom {reaction['symptom']}")
            N_query = {"RECVDATE": {"$lte": start_date}}
            N = col.count_documents(N_query)
            
            D_query = {
                "RECVDATE": {"$lte": start_date},
                "vax_data": {
                    "$elemMatch": {
                        "VAX_MANU": reaction["manufacturer"],
                        "VAX_TYPE": reaction["vaccine"]
                    }
                }
            }
            D = col.count_documents(D_query)
            
        
            E_query = {
                "RECVDATE": {"$lte": start_date},
                "symptoms": {
                    "$elemMatch": {
                        "$in": [reaction["symptom"]]
                    }
                }
            }
            E = col.count_documents(E_query)
            
            DE_query = {'$and': [D_query, E_query]}
            DE = col.count_documents(DE_query)
            if DE >= 3: 
            
                De = D - DE
                dE = E - DE
                de = N - (DE + De + dE)
                
                    
                # Ensure all necessary data is available
                if None in [DE, dE, De, de]:
                    continue
                
                contingency_table = [
                    [DE, dE],
                    [De, de]
                ]
                #print(contingency_table)
                # Compute statistical measures
                rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
                rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
                
                prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
                prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
                
                ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
                ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
                
                chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
                IC = disproportionaly_analysis.information_component(contingency_table)
                IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
                
                # Determine if the reaction meets criteria for a signal
                method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
                method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                    
                if method_1 or method_2:
                    #print(f"Signal found for {start_date}")
                    first_found = start_date
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Janssen.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop        
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")
                else:
                    #print("Signal not found")
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Janssen.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
    
            else: 
                #print(f"DE < 3 for {start_date}")
                signal_info = {
                    "manufacturer": reaction["manufacturer"],
                    "symptom": reaction["symptom"],
                    "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                }
                signals_dict_Janssen.append(signal_info)
                print(f"signal {i} {symptom} found at {first_found}")
                break
    

  3%|▎         | 1/35 [01:06<37:37, 66.39s/it]

Saved Lymphadenopathy as first_found = None because it was not detected by the end of an month
found month 2021-04-30 00:00:00 for symptom 2 Immune thrombocytopenia, checking day


  6%|▌         | 2/35 [02:19<38:46, 70.50s/it]

signal 2 Immune thrombocytopenia found at 2021-04-24 00:00:00
found month 2021-03-31 00:00:00 for symptom 3 Headache, checking day


  9%|▊         | 3/35 [04:55<58:22, 109.44s/it]

signal 3 Headache found at 2021-03-04 00:00:00
found month 2021-03-31 00:00:00 for symptom 4 Dizziness, checking day


 11%|█▏        | 4/35 [07:05<1:00:46, 117.62s/it]

signal 4 Dizziness found at 2021-03-05 00:00:00
found month 2021-03-31 00:00:00 for symptom 5 Tremor, checking day


 14%|█▍        | 5/35 [08:59<58:05, 116.17s/it]  

signal 5 Tremor found at 2021-03-05 00:00:00


 17%|█▋        | 6/35 [10:07<48:13, 99.77s/it] 

Saved Urticaria as first_found = None because it was not detected by the end of an month


 20%|██        | 7/35 [11:10<40:58, 87.81s/it]

Saved Hypersensitivity as first_found = None because it was not detected by the end of an month
found month 2021-03-31 00:00:00 for symptom 8 Paraesthesia, checking day


 23%|██▎       | 8/35 [13:04<43:17, 96.20s/it]

signal 8 Paraesthesia found at 2021-03-12 00:00:00
found month 2021-03-31 00:00:00 for symptom 9 Hypoaesthesia, checking day


 26%|██▌       | 9/35 [14:59<44:10, 101.92s/it]

signal 9 Hypoaesthesia found at 2021-03-10 00:00:00
found month 2021-05-31 00:00:00 for symptom 10 Facial paralysis, checking day


 29%|██▊       | 10/35 [16:36<41:55, 100.60s/it]

signal 10 Facial paralysis found at 2021-05-11 00:00:00
found month 2021-03-31 00:00:00 for symptom 11 Tinnitus, checking day


 31%|███▏      | 11/35 [18:05<38:45, 96.92s/it] 

signal 11 Tinnitus found at 2021-03-09 00:00:00
found month 2021-08-31 00:00:00 for symptom 12 Guillain-Barre syndrome, checking day


 34%|███▍      | 12/35 [19:35<36:19, 94.78s/it]

signal 12 Guillain-Barre syndrome found at 2021-08-27 00:00:00


 37%|███▋      | 13/35 [20:15<28:44, 78.41s/it]

Saved Myelitis as first_found = None because it was not detected by the end of an month


 40%|████      | 14/35 [21:02<24:04, 68.79s/it]

Saved Myocarditis as first_found = None because it was not detected by the end of an month


 43%|████▎     | 15/35 [21:47<20:35, 61.77s/it]

Saved Pericarditis as first_found = None because it was not detected by the end of an month
found month 2021-07-31 00:00:00 for symptom 16 Capillary leak syndrome, checking day


 46%|████▌     | 16/35 [22:31<17:49, 56.29s/it]

signal 16 Capillary leak syndrome found at 2021-07-30 00:00:00
found month 2021-09-30 00:00:00 for symptom 17 Cutaneous vasculitis, checking day


 49%|████▊     | 17/35 [23:49<18:50, 62.78s/it]

signal 17 Cutaneous vasculitis found at 2021-09-01 00:00:00
found month 2021-03-31 00:00:00 for symptom 18 Nausea, checking day


 51%|█████▏    | 18/35 [26:02<23:46, 83.88s/it]

signal 18 Nausea found at 2021-03-04 00:00:00
found month 2021-08-31 00:00:00 for symptom 19 Cough, checking day


 54%|█████▍    | 19/35 [28:01<25:10, 94.41s/it]

signal 19 Cough found at 2021-08-30 00:00:00
found month 2021-03-31 00:00:00 for symptom 20 Oropharyngeal pain, checking day


 57%|█████▋    | 20/35 [29:49<24:38, 98.58s/it]

signal 20 Oropharyngeal pain found at 2021-03-18 00:00:00


 60%|██████    | 21/35 [30:38<19:32, 83.74s/it]

Saved Sneezing as first_found = None because it was not detected by the end of an month
found month 2021-03-31 00:00:00 for symptom 22 Diarrhoea, checking day


 63%|██████▎   | 22/35 [32:29<19:55, 91.94s/it]

signal 22 Diarrhoea found at 2021-03-15 00:00:00
found month 2021-03-31 00:00:00 for symptom 23 Vomiting, checking day


 66%|██████▌   | 23/35 [34:28<19:59, 99.95s/it]

signal 23 Vomiting found at 2021-03-10 00:00:00


 69%|██████▊   | 24/35 [35:40<16:45, 91.44s/it]

Saved Rash as first_found = None because it was not detected by the end of an month
found month 2021-03-31 00:00:00 for symptom 25 Hyperhidrosis, checking day


 71%|███████▏  | 25/35 [37:30<16:10, 97.05s/it]

signal 25 Hyperhidrosis found at 2021-03-05 00:00:00
found month 2021-03-31 00:00:00 for symptom 26 Myalgia, checking day


 74%|███████▍  | 26/35 [39:34<15:47, 105.23s/it]

signal 26 Myalgia found at 2021-03-06 00:00:00
found month 2021-03-31 00:00:00 for symptom 27 Arthralgia, checking day


 77%|███████▋  | 27/35 [41:35<14:39, 109.90s/it]

signal 27 Arthralgia found at 2021-03-07 00:00:00
found month 2021-07-31 00:00:00 for symptom 28 Muscular weakness, checking day


 80%|████████  | 28/35 [43:27<12:53, 110.47s/it]

signal 28 Muscular weakness found at 2021-07-14 00:00:00
found month 2021-03-31 00:00:00 for symptom 29 Back pain, checking day


 83%|████████▎ | 29/35 [45:15<10:58, 109.76s/it]

signal 29 Back pain found at 2021-03-07 00:00:00
found month 2021-03-31 00:00:00 for symptom 30 Pain in extremity, checking day


 86%|████████▌ | 30/35 [47:25<09:39, 115.95s/it]

signal 30 Pain in extremity found at 2021-03-27 00:00:00
found month 2021-03-31 00:00:00 for symptom 31 Fatigue, checking day


 89%|████████▊ | 31/35 [49:51<08:19, 124.79s/it]

signal 31 Fatigue found at 2021-03-05 00:00:00


 91%|█████████▏| 32/35 [51:08<05:31, 110.49s/it]

Saved Injection site pain as first_found = None because it was not detected by the end of an month


 94%|█████████▍| 33/35 [52:19<03:17, 98.68s/it] 

Saved Injection site swelling as first_found = None because it was not detected by the end of an month
found month 2021-03-31 00:00:00 for symptom 34 Chills, checking day


 97%|█████████▋| 34/35 [54:31<01:48, 108.68s/it]

signal 34 Chills found at 2021-03-05 00:00:00
found month 2021-03-31 00:00:00 for symptom 35 Pyrexia, checking day


100%|██████████| 35/35 [57:14<00:00, 98.12s/it] 

signal 35 Pyrexia found at 2021-03-06 00:00:00





In [151]:
file_name_filtered = "First_found_filtered_Signals_Janssen.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_Janssen, f)

In [114]:
len(signals_dict_Janssen)

23

### NOVAVAX

In [138]:
file_name_filtered = "Signals_Novavax.json"
with open(file_name_filtered, 'r') as f:
    Signals_Novavax = json.load(f)

In [139]:
filtered_signals_Novavax = [signal for signal in Signals_Novavax if signal["status"] != "Not found"]

In [140]:
len(filtered_signals_Novavax)

23

In [141]:
from datetime import datetime

# Loop until a signal is not found anymore
signals_dict_Novavax = []
i= 0
for reaction in tqdm(filtered_signals_Novavax):
    i+=1
    symptom = reaction['symptom']
    start_date = datetime(2023, 12, 31)
    first_found = datetime(2000, 1, 1)
    while True:
        N_query = {"RECVDATE": {"$lte": start_date}}
        N = col.count_documents(N_query)
        
        D_query = {
            "RECVDATE": {"$lte": start_date},
            "vax_data": {
                "$elemMatch": {
                    "VAX_MANU": reaction["manufacturer"],
                    "VAX_TYPE": reaction["vaccine"]
                }
            }
        }
        D = col.count_documents(D_query)
        
        E_query = {
            "RECVDATE": {"$lte": start_date},
            "symptoms": {
                "$elemMatch": {
                    "$in": [reaction["symptom"]]
                }
            }
        }
        E = col.count_documents(E_query)
        
        DE_query = {'$and': [D_query, E_query]}
        DE = col.count_documents(DE_query)
        if DE >= 3: 
        
            De = D - DE
            dE = E - DE
            de = N - (DE + De + dE)
            
                
            # Ensure all necessary data is available
            if None in [DE, dE, De, de]:
                continue
            
            contingency_table = [
                [DE, dE],
                [De, de]
            ]
            #print(contingency_table)
            # Compute statistical measures
            rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
            rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
            
            prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
            prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
            
            ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
            ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
            
            chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
            IC = disproportionaly_analysis.information_component(contingency_table)
            IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
            
            # Determine if the reaction meets criteria for a signal
            method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
            method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                
            if method_1 or method_2:
                #print(f"Signal found for {start_date}")
                first_found = start_date
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")
            else:
                #print("Signal not found")
                start_date = reduce_date_range_by_month(start_date)
                #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
            if start_date < end_date:
                break
        else:
            break
            
    if first_found == datetime(2000, 1, 1):
        print(f"Saved {symptom} as first_found = None because it was not detected by the end of an month")
        signals_dict_Novavax.append({"manufacturer": reaction["manufacturer"], "symptom": reaction["symptom"], "first_found": "NaN"})
    else:
            
        print(f"found month {first_found} for symptom {i} {symptom}, checking day")
        start_date = first_found
        while True:
            #print(f"checking day {start_date}")
                    #print(f"checking symptom {reaction['symptom']}")
            N_query = {"RECVDATE": {"$lte": start_date}}
            N = col.count_documents(N_query)
            
            D_query = {
                "RECVDATE": {"$lte": start_date},
                "vax_data": {
                    "$elemMatch": {
                        "VAX_MANU": reaction["manufacturer"],
                        "VAX_TYPE": reaction["vaccine"]
                    }
                }
            }
            D = col.count_documents(D_query)
            
            E_query = {
                "RECVDATE": {"$lte": start_date},
                "symptoms": {
                    "$elemMatch": {
                        "$in": [reaction["symptom"]]
                    }
                }
            }
            E = col.count_documents(E_query)
            
            DE_query = {'$and': [D_query, E_query]}
            DE = col.count_documents(DE_query)
            if DE >= 3: 
            
                De = D - DE
                dE = E - DE
                de = N - (DE + De + dE)
                
                    
                # Ensure all necessary data is available
                if None in [DE, dE, De, de]:
                    continue
                
                contingency_table = [
                    [DE, dE],
                    [De, de]
                ]
                #print(contingency_table)
                # Compute statistical measures
                rrr, sd_rrr = disproportionaly_analysis.relative_reporting_ratio(contingency_table), disproportionaly_analysis.sd_rrr(contingency_table)
                rrr_ci_lower, rrr_ci_upper = disproportionaly_analysis.confidence_interval(rrr, sd_rrr)
                
                prr, sd_prr = disproportionaly_analysis.proportional_reporting_ratio(contingency_table), disproportionaly_analysis.sd_prr(contingency_table)
                prr_ci_lower, prr_ci_upper = disproportionaly_analysis.confidence_interval(prr, sd_prr)
                
                ror, sd_ror = disproportionaly_analysis.reporting_odds_ratio(contingency_table), disproportionaly_analysis.sd_ror(contingency_table)
                ror_ci_lower, ror_ci_upper = disproportionaly_analysis.confidence_interval(ror, sd_ror)
                
                chi_square = disproportionaly_analysis.chi_square_yates(contingency_table)
                IC = disproportionaly_analysis.information_component(contingency_table)
                IC_ci_lower, IC_ci_upper = disproportionaly_analysis.confidence_interval_information_component(contingency_table, IC)
                
                # Determine if the reaction meets criteria for a signal
                method_1 = rrr > 2 and prr > 2 and ror > 2 and chi_square > 3.841
                method_2 = IC_ci_lower > 0 and ror_ci_lower > 1 and prr_ci_lower > 1 and rrr_ci_lower > 1
                    
                if method_1 or method_2:
                    #print(f"Signal found for {start_date}")
                    first_found = start_date
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Novavax.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop        
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")
                else:
                    #print("Signal not found")
                    if start_date.day == 1:  # Check if the day is the first day of the month
                        signal_info = {
                            "manufacturer": reaction["manufacturer"],
                            "symptom": reaction["symptom"],
                            "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                        }
                        signals_dict_Novavax.append(signal_info)
                        print(f"signal {i} {symptom} found at {first_found}")
                        break  # If it is the first day, exit the loop
                    start_date = reduce_date_range_by_day(start_date)
                    #print(f"Reducing by a month..., checking {start_date}")# Exit the loop if no signal is found anymore
    
            else: 
                #print(f"DE < 3 for {start_date}")
                signal_info = {
                    "manufacturer": reaction["manufacturer"],
                    "symptom": reaction["symptom"],
                    "first_found": first_found.strftime("%Y-%m-%d")  # Convert datetime to string
                }
                signals_dict_Novavax.append(signal_info)
                print(f"signal {i} {symptom} found at {first_found}")
                break
    

  4%|▍         | 1/23 [00:34<12:42, 34.66s/it]

Saved Headache as first_found = None because it was not detected by the end of an month


  9%|▊         | 2/23 [01:02<10:45, 30.72s/it]

Saved Nausea as first_found = None because it was not detected by the end of an month


 13%|█▎        | 3/23 [01:25<09:05, 27.29s/it]

Saved Vomiting as first_found = None because it was not detected by the end of an month


 17%|█▋        | 4/23 [01:48<08:06, 25.62s/it]

Saved Myalgia as first_found = None because it was not detected by the end of an month


 22%|██▏       | 5/23 [02:11<07:18, 24.35s/it]

Saved Arthralgia as first_found = None because it was not detected by the end of an month


 26%|██▌       | 6/23 [02:38<07:13, 25.52s/it]

Saved Injection site pain as first_found = None because it was not detected by the end of an month


 30%|███       | 7/23 [03:10<07:21, 27.61s/it]

Saved Fatigue as first_found = None because it was not detected by the end of an month


 35%|███▍      | 8/23 [03:33<06:29, 25.95s/it]

Saved Malaise as first_found = None because it was not detected by the end of an month


 39%|███▉      | 9/23 [03:55<05:49, 24.99s/it]

Saved Injection site swelling as first_found = None because it was not detected by the end of an month


 43%|████▎     | 10/23 [04:33<06:13, 28.73s/it]

Saved Pyrexia as first_found = None because it was not detected by the end of an month


 48%|████▊     | 11/23 [04:57<05:29, 27.44s/it]

Saved Pain in extremity as first_found = None because it was not detected by the end of an month


 52%|█████▏    | 12/23 [05:14<04:25, 24.10s/it]

Saved Injection site pruritus as first_found = None because it was not detected by the end of an month


 57%|█████▋    | 13/23 [05:40<04:06, 24.69s/it]

Saved Chills as first_found = None because it was not detected by the end of an month


 61%|██████    | 14/23 [05:52<03:08, 20.90s/it]

Saved Injection site warmth as first_found = None because it was not detected by the end of an month


 65%|██████▌   | 15/23 [06:09<02:37, 19.71s/it]

Saved Lymphadenopathy as first_found = None because it was not detected by the end of an month
found month 2022-08-31 00:00:00 for symptom 16 Paraesthesia, checking day


 70%|██████▉   | 16/23 [07:04<03:32, 30.36s/it]

signal 16 Paraesthesia found at 2022-08-07 00:00:00
found month 2022-08-31 00:00:00 for symptom 17 Hypoaesthesia, checking day


 74%|███████▍  | 17/23 [07:57<03:44, 37.34s/it]

signal 17 Hypoaesthesia found at 2022-08-07 00:00:00
found month 2022-08-31 00:00:00 for symptom 18 Pericarditis, checking day


 78%|███████▊  | 18/23 [08:19<02:43, 32.68s/it]

signal 18 Pericarditis found at 2022-08-31 00:00:00


 83%|████████▎ | 19/23 [08:28<01:42, 25.62s/it]

Saved Hypertension as first_found = None because it was not detected by the end of an month


 87%|████████▋ | 20/23 [08:45<01:09, 23.03s/it]

Saved Rash as first_found = None because it was not detected by the end of an month


 91%|█████████▏| 21/23 [08:52<00:36, 18.07s/it]

Saved Erythema as first_found = None because it was not detected by the end of an month


 96%|█████████▌| 22/23 [09:10<00:18, 18.21s/it]

Saved Pruritus as first_found = None because it was not detected by the end of an month


100%|██████████| 23/23 [09:23<00:00, 24.50s/it]

Saved Urticaria as first_found = None because it was not detected by the end of an month





In [142]:
file_name_filtered = "First_found_filtered_Signals_Novavax.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_Novavax, f)

In [128]:
signals_dict_Novavax

[{'manufacturer': 'NOVAVAX',
  'symptom': 'Paraesthesia',
  'first_found': '2022-08-07'},
 {'manufacturer': 'NOVAVAX',
  'symptom': 'Hypoaesthesia',
  'first_found': '2022-08-07'},
 {'manufacturer': 'NOVAVAX',
  'symptom': 'Pericarditis',
  'first_found': '2022-08-31'}]

### combine all signals and save as a dict with the symptom as key

In [157]:
signals_dict_combined = {}

# Merge signals_dict_Novavax
for entry in signals_dict_Novavax:
    symptom = entry.pop('symptom')  # Remove symptom from the entry and store it
    if symptom not in signals_dict_combined:
        signals_dict_combined[symptom] = [entry]
    else:
        signals_dict_combined[symptom].append(entry)

# Merge signals_dict_Janssen
for entry in signals_dict_Janssen:
    symptom = entry.pop('symptom')  # Remove symptom from the entry and store it
    if symptom not in signals_dict_combined:
        signals_dict_combined[symptom] = [entry]
    else:
        signals_dict_combined[symptom].append(entry)

# Merge signals_dict_Moderna
for entry in signals_dict_Moderna:
    symptom = entry.pop('symptom')  # Remove symptom from the entry and store it
    if symptom not in signals_dict_combined:
        signals_dict_combined[symptom] = [entry]
    else:
        signals_dict_combined[symptom].append(entry)

# Merge signals_dict_Pfizer
for entry in signals_dict_Pfizer:
    symptom = entry.pop('symptom')  # Remove symptom from the entry and store it
    if symptom not in signals_dict_combined:
        signals_dict_combined[symptom] = [entry]
    else:
        signals_dict_combined[symptom].append(entry)
        
print(signals_dict_combined)

{'Headache': [{'manufacturer': 'NOVAVAX', 'first_found': 'NaN'}, {'manufacturer': 'JANSSEN', 'first_found': '2021-03-04'}, {'manufacturer': 'MODERNA', 'first_found': '2020-12-21'}, {'manufacturer': 'PFIZER\\BIONTECH', 'first_found': '2020-12-16'}], 'Nausea': [{'manufacturer': 'NOVAVAX', 'first_found': 'NaN'}, {'manufacturer': 'JANSSEN', 'first_found': '2021-03-04'}, {'manufacturer': 'MODERNA', 'first_found': '2020-12-23'}, {'manufacturer': 'PFIZER\\BIONTECH', 'first_found': '2020-12-16'}], 'Vomiting': [{'manufacturer': 'NOVAVAX', 'first_found': 'NaN'}, {'manufacturer': 'JANSSEN', 'first_found': '2021-03-10'}, {'manufacturer': 'MODERNA', 'first_found': '2020-12-24'}, {'manufacturer': 'PFIZER\\BIONTECH', 'first_found': '2021-01-15'}], 'Myalgia': [{'manufacturer': 'NOVAVAX', 'first_found': 'NaN'}, {'manufacturer': 'JANSSEN', 'first_found': '2021-03-06'}, {'manufacturer': 'MODERNA', 'first_found': '2020-12-23'}, {'manufacturer': 'PFIZER\\BIONTECH', 'first_found': '2020-12-17'}], 'Arthralgi

In [158]:
file_name_filtered = "All_signals_filtered_first_found.json"
with open(file_name_filtered, 'w') as f:
    json.dump(signals_dict_combined, f)