In [133]:
import pandas as pd
import numpy as np
import os, requests
import warnings
warnings.filterwarnings("ignore")

In [9]:
organizations = requests.get('https://api.moalmanac.org/organizations').json()['data']
organizations

[{'id': 'fda',
  'name': 'Food and Drug Administration',
  'description': 'Regulatory agency that approves drugs for use in the United States.',
  'url': 'https://www.accessdata.fda.gov/scripts/cder/daf/index.cfm',
  'last_updated': '2025-04-03'},
 {'id': 'ema',
  'name': 'European Medicines Agency',
  'description': 'Regulatory agency that approves medicines for use in the European Union.',
  'url': 'https://www.ema.europa.eu/en/medicines',
  'last_updated': '2024-10-20'},
 {'id': 'hse',
  'name': 'Health Service Executive',
  'description': 'Regulatory agency that approves medicines for reimbursement by the public health system in the Republic of Ireland.',
  'url': 'https://www.hse.ie/eng/services/list/5/cancer/profinfo/chemoprotocols/',
  'last_updated': '2024-10-16'},
 {'id': 'hpra',
  'name': 'Health Products Regulatory Authority',
  'description': 'National register of authorized medicines in the Republic of Ireland.',
  'url': 'http://www.hpra.ie/homepage/medicines/medicines-in

here, we're interested in only the ones in ireland

In [22]:
data = requests.get('https://api.moalmanac.org/statements').json()['data']
data = pd.json_normalize(data)
data.head(1)

Unnamed: 0,id,type,description,contributions,reportedIn,direction,indication.id,indication.indication,indication.initial_approval_date,indication.initial_approval_url,...,proposition.objectTherapeutic.conceptType,proposition.objectTherapeutic.name,proposition.objectTherapeutic.primaryCoding.id,proposition.objectTherapeutic.primaryCoding.code,proposition.objectTherapeutic.primaryCoding.name,proposition.objectTherapeutic.primaryCoding.system,proposition.objectTherapeutic.primaryCoding.systemVersion,proposition.objectTherapeutic.primaryCoding.iris,proposition.objectTherapeutic.mappings,proposition.objectTherapeutic.extensions
0,0,Statement,The U.S. Food and Drug Administration (FDA) gr...,"[{'id': 0, 'type': 'Contribution', 'agent': {'...","[{'id': 'doc:fda.verzenio', 'type': 'Document'...",supports,ind:fda.verzenio:0,Verzenio is a kinase inhibitor indicated in co...,2023-03-03,https://www.accessdata.fda.gov/drugsatfda_docs...,...,,,,,,,,,,


In [23]:
data['agency'] = data['indication.id'].apply(lambda x: x.split(':')[1].split('.')[0])
data['agency'].value_counts()

agency
fda     631
ema     422
hse     350
hc      332
hpra     22
Name: count, dtype: int64

In [26]:
data = data[data.agency.isin(['ema', 'hse', 'hpra'])]
data.to_csv('moa_all_ireland_statements.csv', index = False)

In [178]:
formatted_table = data[['indication.id', 'agency',
                        'indication.description',
                        'indication.indication',
                        'proposition.conditionQualifier.name',
                        'proposition.biomarkers',
                        'proposition.objectTherapeutic.therapies',
                        'proposition.objectTherapeutic.name',
                        'proposition.objectTherapeutic.extensions',
                        'indication.document.url',
                        'indication.document.publication_date'
                        ]]
formatted_table['therapy_name'] = formatted_table.apply(lambda x: [i['name'] for i in x['proposition.objectTherapeutic.therapies']]
                                                            if str(x['proposition.objectTherapeutic.therapies']) != 'nan'
                                                            else [x['proposition.objectTherapeutic.name']], axis=1)
formatted_table['therapy_approach'] = formatted_table.apply(lambda x: 'Combination Therapy'
                                                            if str(x['proposition.objectTherapeutic.therapies']) != 'nan'
                                                            else 'Monotherapy', axis=1)
formatted_table['therapy_strategy'] = formatted_table.apply(lambda x: [i['extensions'][0]['value'][0] for i in x['proposition.objectTherapeutic.therapies']
                                                                       if i['extensions'][0]['name'] == 'therapy_strategy']
                                                            if str(x['proposition.objectTherapeutic.therapies']) != 'nan'
                                                            else [i['value'][0] for i in x['proposition.objectTherapeutic.extensions']
                                                                  if i['name'] == 'therapy_strategy'], axis=1)
formatted_table['therapy_type'] = formatted_table.apply(lambda x: [i['extensions'][1]['value'] for i in x['proposition.objectTherapeutic.therapies']
                                                                       if i['extensions'][1]['name'] == 'therapy_type']
                                                            if str(x['proposition.objectTherapeutic.therapies']) != 'nan'
                                                            else [i['value'] for i in x['proposition.objectTherapeutic.extensions']
                                                                  if i['name'] == 'therapy_type'], axis=1)
formatted_table.head(1)


Unnamed: 0,indication.id,agency,indication.description,indication.indication,proposition.conditionQualifier.name,proposition.biomarkers,proposition.objectTherapeutic.therapies,proposition.objectTherapeutic.name,proposition.objectTherapeutic.extensions,indication.document.url,indication.document.publication_date,therapy_name,therapy_approach,therapy_strategy,therapy_type
963,ind:ema.adcetris:0,ema,The European Medicines Agency (EMA) has author...,ADCETRIS is indicated for adult patients with ...,Hodgkin Lymphoma,"[{'id': 0, 'name': 'CD30 +', 'genes': [], 'typ...","[{'id': 3, 'conceptType': 'Drug', 'name': 'Vin...",,,https://www.ema.europa.eu/en/documents/product...,2023-12-01,"[Vinblastine, Dacarbazine, Brentuximab Vedotin...",Combination Therapy,"[Vinca alkaloid chemotherapy, Nonclassical alk...","[Targeted therapy, Chemotherapy, Targeted ther..."


In [179]:
def format_biomarker(biomarker_list):
    '''
    Format biomarker entry
    '''
    biomarkers = []
    for i in biomarker_list:
        name = i['name']
        extension_dict = {}
        for entry in i['extensions']:
            extension_dict[entry['name']] = entry['value']
        present = extension_dict.get('present', '')
        if present == True:
            present = 'present'
        if name != '':
            biomarkers.append('{marker} [{present}]'.format(marker=name, present=present))
    return biomarkers
formatted_table['biomarker'] = formatted_table['proposition.biomarkers'].apply(format_biomarker)
formatted_table = formatted_table.drop(columns = ['proposition.biomarkers', 'proposition.objectTherapeutic.extensions',
                                                  'proposition.objectTherapeutic.name', 'proposition.objectTherapeutic.therapies'])

In [180]:
formatted_table.columns

Index(['indication.id', 'agency', 'indication.description',
       'indication.indication', 'proposition.conditionQualifier.name',
       'indication.document.url', 'indication.document.publication_date',
       'therapy_name', 'therapy_approach', 'therapy_strategy', 'therapy_type',
       'biomarker'],
      dtype='object')

In [183]:
formatted_table.columns = ['indication_id', 'agency',
                           'description', 'indication',
                           'cancer_type', 
                           'approval_url', 'publication_date',
                           'therapy_name', 'therapy_approach',
                           'therapy_strategy', 'therapy_type', 'biomarker']
formatted_table.to_csv('moa_all_ireland_formatted.csv', index=False)
formatted_table.head(1)

Unnamed: 0,indication_id,agency,description,indication,cancer_type,approval_url,publication_date,therapy_name,therapy_approach,therapy_strategy,therapy_type,biomarker
963,ind:ema.adcetris:0,ema,The European Medicines Agency (EMA) has author...,ADCETRIS is indicated for adult patients with ...,Hodgkin Lymphoma,https://www.ema.europa.eu/en/documents/product...,2023-12-01,"[Vinblastine, Dacarbazine, Brentuximab Vedotin...",Combination Therapy,"[Vinca alkaloid chemotherapy, Nonclassical alk...","[Targeted therapy, Chemotherapy, Targeted ther...",[CD30 + [present]]


- approval status
- approval org : agency
- description : description
- indication : indication.indication
- cancer_type : proposition.conditionQualifier.name
- biomarker : proposition.biomarkers
- therapy_drug 
- therapy_approach
- therapy_strategy
- therapy_type
- approval_url : indication.document.url
- publication_date : indication.document.publication_date