In [None]:
%pip install requests pandas python-dotenv

In [7]:
import requests
import os
import pandas as pd
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv('fda_api_key')

def get_reaction_counts(search_query):
    """Fetch the top 50 effects and their counts for a specific search.
    """
    url = f"https://api.fda.gov/drug/event.json?api_key={api_key}&search={search_query}&count=patient.reaction.reactionmeddrapt.exact" 
    response = requests.get(url).json()
    return pd.DataFrame(response['results'])

In [8]:
target_drug = 'patient.drug.medicinalproduct: "OZEMPIC"'
market_background = 'patient.drug.drugcharacterization:1'

In [9]:
df_target = get_reaction_counts(target_drug)
df_target.columns = ['reaction', 'target_count']

df_background = get_reaction_counts(market_background)
df_background.columns = ['reaction', 'background_count']

df_signals = pd.merge(df_target, df_background, on='reaction', how='inner')

total_target_reports = 10000
total_background_reports = 5000000

In [10]:
def calculate_prr(row):
    """
    a: Target drug with reaction
    b: Target drug without reaction
    c: Background drugs with reaction
    d: Background drugs without reaction
    """
    a = row['target_count']
    b = total_target_reports - a
    c = row['background_count']
    d = total_background_reports - c

    prr = (a / (a + b)) / (c / (c + d))
    return prr

In [11]:
df_signals['prr_score'] = df_signals.apply(calculate_prr, axis=1)

signals = df_signals[df_signals['prr_score'] > 3].sort_values(by='prr_score', ascending=False)
signals.to_csv('../data/ozempic_safety_signals.csv', index=False)
print(signals.head(10))

                                            reaction  target_count  \
4                                 DECREASED APPETITE          3742   
5                                       CONSTIPATION          3416   
8                            BLOOD GLUCOSE INCREASED          2477   
9           WRONG TECHNIQUE IN PRODUCT USAGE PROCESS          2414   
6                                   WEIGHT DECREASED          3340   
13  INAPPROPRIATE SCHEDULE OF PRODUCT ADMINISTRATION          1942   
17                                       DEHYDRATION          1504   
2                                           VOMITING          4987   
7               PRODUCT USE IN UNAPPROVED INDICATION          2503   
12                              ABDOMINAL PAIN UPPER          2155   

    background_count  prr_score  
4             216713   8.633538  
5             200405   8.522741  
8             180509   6.861154  
9             189864   6.357182  
6             263725   6.332354  
13            157188   6.