# Projects for Analysis of Anti-Depressant Drugs’s Adverse Events by FDA Adverse Event Reporting System (FAERS) from January 2019 to December 2023

## 7. STAT ANALYSIS W PRR & ROR

In [1]:
# # re-load data if needed
import pandas as pd
data = pd.read_csv("FAERS_mapped_data.csv")

data['event_dt'] = pd.to_datetime(data['event_dt'])

print(data.info())
print(data.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 118577 entries, 0 to 118576
Data columns (total 14 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   primaryid      118577 non-null  int64         
 1   role_cod       118577 non-null  object        
 2   prod_ai        118577 non-null  object        
 3   indi_pt        118577 non-null  object        
 4   event_dt       118577 non-null  datetime64[ns]
 5   age            118577 non-null  float64       
 6   age_grp        118577 non-null  object        
 7   gender         118577 non-null  object        
 8   occr_country   118577 non-null  object        
 9   pt             118577 non-null  object        
 10  outc_cod       118577 non-null  object        
 11  prod_ai_group  118577 non-null  object        
 12  drug_cate      118577 non-null  object        
 13  ae_cate        118577 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(1

In [2]:
import pandas as pd
from scipy.stats import fisher_exact, chi2
from scipy.stats import chi2_contingency
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
pre_covid = data[data['event_dt'] < '2020-03-01']
during_covid = data[(data['event_dt'] >= '2020-03-01') & (data['event_dt'] < '2022-01-01')]
post_covid = data[data['event_dt'] >= '2022-01-01']

In [4]:
data[data['drug_cate'] == 'antidepressants']['prod_ai_group'].value_counts()

prod_ai_group
selective serotonin reuptake inhibitors             46931
serotonin and norepinephrine reuptake inhibitors    24518
atypical antidepressants                            16245
n-methyl d-aspartate antagonists                     8956
tricyclic and tetracyclic antidepressants            7312
benzodiazepines                                      3377
nonbenzodiazepine receptor modulator                  685
monoamine oxidase inhibitors                          343
acid-a receptor positive modulators                   173
azaspirodecanedione                                   161
monoamine oxidase inhibitors                           61
anticonvulsants                                         1
Name: count, dtype: int64

In [5]:
import pandas as pd
import numpy as np
from scipy.stats import fisher_exact, chi2

# Function to calculate PRR and ROR for antidepressants and others
def calculate_prr_ror(period_data, antidepressant_groups, event_column="ae_cate"):
    """
    Calculate PRR and ROR for each `ae_cate` and antidepressant type, with separate p-values for PRR and ROR.
    """
    results = []

    # Split the dataset for antidepressants and others
    all_data = period_data

    # Loop through antidepressant groups and calculate PRR and ROR
    for group in antidepressant_groups:
        # Data for the specific antidepressant group
        group_data = all_data[(all_data["prod_ai_group"] == group) & (all_data["drug_cate"] == 'antidepressants')]
        # All other data excluding the current group (can be other antidepressants and 'others' drug category)
        other_data = all_data[all_data["prod_ai_group"] != group]

        for event in period_data[event_column].unique():
            # Counts for the event in the group
            a = len(group_data[group_data[event_column] == event])
            b = len(other_data[other_data[event_column] == event])
            c = len(group_data) - a
            d = len(other_data) - b

            # Skip invalid contingency tables
            if any(x < 0 for x in [a, b, c, d]) or (a + b == 0) or (c + d == 0):
                continue

            prr = None
            ror = None
            p_value_prr = None
            p_value_ror = None

            try:
                # PRR Calculation
                if (a + b > 0) and (c + d > 0):
                    prr = (a / (a + b)) / (c / (c + d))

                    # Fisher's exact test for PRR significance
                    contingency_table_prr = [[a, b], [c, d]]
                    _, p_value_prr = fisher_exact(contingency_table_prr)

                # ROR Calculation
                if b > 0 and c > 0 and d > 0:
                    ror = (a * d) / (b * c)
                    se_log_ror = ((1 / a) + (1 / b) + (1 / c) + (1 / d)) ** 0.5
                    log_ror = np.log(ror)

                    # Wald test for ROR significance (calculating p-value)
                    z_score_ror = log_ror / se_log_ror
                    p_value_ror = 2 * (1 - chi2.cdf(abs(z_score_ror), df=1))

            except ZeroDivisionError:
                pass

            results.append({
                "Antidepressant_Type": group,
                "AE_Cate": event,
                "PRR": prr,
                "PRR_P_Value": p_value_prr,
                "ROR": ror,
                "ROR_P_Value": p_value_ror
            })

    return pd.DataFrame(results)

In [6]:
# Get unique antidepressant types
antidepressant_groups = [
    'selective serotonin reuptake inhibitors',
    'serotonin and norepinephrine reuptake inhibitors',
    'atypical antidepressants',
    'n-methyl d-aspartate antagonists',
    'tricyclic and tetracyclic antidepressants',
    'benzodiazepines',
    'nonbenzodiazepine receptor modulator',
    'monoamine oxidase inhibitors',
    'acid-a receptor positive modulators',
    'azaspirodecanedione',
    'monoamine oxidase inhibitors',
    'anticonvulsants'
]

# Calculate PRR and ROR for each period
pre_covid_results = calculate_prr_ror(pre_covid, antidepressant_groups)
during_covid_results = calculate_prr_ror(during_covid, antidepressant_groups)
post_covid_results = calculate_prr_ror(post_covid, antidepressant_groups)

In [7]:
# Summarize PRR, ROR, and P-values for each antidepressant type
def summarize_prr_ror(data, period_name):
    """
    Summarize PRR, ROR, and p-values for each antidepressant type.
    """
    summary = []
    for antidepressant in data["Antidepressant_Type"].unique():
        subset = data[data["Antidepressant_Type"] == antidepressant]

        # Calculate average PRR, ROR, and their p-values
        avg_prr = subset["PRR"].mean()
        avg_prr_p_value = subset["PRR_P_Value"].mean() if "PRR_P_Value" in subset.columns else None
        avg_ror = subset["ROR"].mean()
        avg_ror_p_value = subset["ROR_P_Value"].mean() if "ROR_P_Value" in subset.columns else None

        # Append the results to the summary list
        summary.append({
            "Antidepressant_Type": antidepressant,
            "Period": period_name,
            "Average_PRR": avg_prr,
            "Average_PRR_P_Value": avg_prr_p_value,
            "Average_ROR": avg_ror,
            "Average_ROR_P_Value": avg_ror_p_value
        })

    return pd.DataFrame(summary)

In [8]:
# Summarize results for each period and combine summaries into a single DataFrame
pre_covid_summary = summarize_prr_ror(pre_covid_results, "Pre-COVID")
during_covid_summary = summarize_prr_ror(during_covid_results, "During-COVID")
post_covid_summary = summarize_prr_ror(post_covid_results, "Post-COVID")

all_summaries = pd.concat([pre_covid_summary, during_covid_summary, post_covid_summary], ignore_index=True)

In [9]:
# Rename columns for final display consistency
all_summaries.rename(columns={
    "Average_PRR": "PRR",
    "Average_PRR_P_Value": "PRR_P_Value",
    "Average_ROR": "ROR",
    "Average_ROR_P_Value": "ROR_P_Value"
}, inplace=True)

# Create a final summary table with the desired format
final_summary = all_summaries[[
    "Period", "Antidepressant_Type", "PRR", "PRR_P_Value", 
    "ROR", "ROR_P_Value"
]]

# Display the summary table
display(final_summary)

Unnamed: 0,Period,Antidepressant_Type,PRR,PRR_P_Value,ROR,ROR_P_Value
0,Pre-COVID,selective serotonin reuptake inhibitors,1.014978,0.176801,1.079827,0.380823
1,Pre-COVID,serotonin and norepinephrine reuptake inhibitors,0.947986,0.21529,0.944527,0.451786
2,Pre-COVID,atypical antidepressants,0.962999,0.210572,0.969835,0.445149
3,Pre-COVID,n-methyl d-aspartate antagonists,0.799406,0.209066,0.80179,0.431821
4,Pre-COVID,tricyclic and tetracyclic antidepressants,1.067897,0.093621,1.083164,0.271422
5,Pre-COVID,benzodiazepines,1.069264,0.179921,1.07903,0.37887
6,Pre-COVID,nonbenzodiazepine receptor modulator,0.80909,0.216324,0.810829,0.520491
7,Pre-COVID,monoamine oxidase inhibitors,0.798163,0.585881,0.799085,0.719612
8,Pre-COVID,acid-a receptor positive modulators,0.599411,0.753063,0.599966,0.511127
9,Pre-COVID,azaspirodecanedione,0.939516,0.675011,0.941768,0.792541


> THE END!