In [30]:
import pandas as pd 
from openai import OpenAI
import requests

In [36]:
#Note: For Part 2, keep this dataset but keep only trials in Phase 1, 2 or 3.
'''At myTomorrows, we index clinical trials against conditions ourselves, to improve the performance of our Search engine. 
Now, for three sample patients, we want to see what trials might be eligible for them.
'''

patient_1 = pd.read_json("mds_case/patient_01.json")
patient_2 = pd.read_json("mds_case/patient_02.json")
patient_3 = pd.read_json("mds_case/patient_03.json")

patients = [patient_1,patient_2,patient_3]

patient_1.head()
#patient_1.describe()

Unnamed: 0,profile
age,53
condition,Amyotrophic Lateral Sclerosis
country,France
gender,Male
profile,Histology: None\nPrevious treatment: riluzole ...


In [6]:
patient_1['profile'].profile

'Histology: None\nPrevious treatment: riluzole since two months\nBiomarkers: SOD1 mutation\nTests: None\nComorbidities: rheumatoid arthritis\nOther: None'

In [59]:


def fetch_studies(query):
    url = "https://clinicaltrials.gov/api/v2/studies"
    headers = {
        "accept": "application/json"
    }
    params = {
        "query.cond": query,
        "filter.overallStatus": "RECRUITING|ENROLLING_BY_INVITATION|AVAILABLE"
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()  # Return the JSON response if successful
    else:
        return response.status_code  # Return the HTTP status code if not successful



query = "Amyotrophic Lateral Sclerosis"
result = fetch_studies(query)
print(len(result['studies']))
print(result)


10
{'studies': [{'protocolSection': {'identificationModule': {'nctId': 'NCT05632757', 'orgStudyIdInfo': {'id': 'RCAPHM22_0204'}, 'secondaryIdInfos': [{'id': 'ID-RCB', 'type': 'OTHER', 'domain': '2022-A01748-35'}], 'organization': {'fullName': 'Assistance Publique Hopitaux De Marseille', 'class': 'OTHER'}, 'briefTitle': 'Anticipated Patient and Caregiver Burden', 'officialTitle': 'Anticipated Patient and Caregiver Burden: Impact in People With Amyotrophic Lateral Sclerosis', 'acronym': 'FARP'}, 'statusModule': {'statusVerifiedDate': '2024-01', 'overallStatus': 'RECRUITING', 'expandedAccessInfo': {'hasExpandedAccess': False}, 'startDateStruct': {'date': '2023-06-22', 'type': 'ACTUAL'}, 'primaryCompletionDateStruct': {'date': '2024-12-22', 'type': 'ESTIMATED'}, 'completionDateStruct': {'date': '2025-06-22', 'type': 'ESTIMATED'}, 'studyFirstSubmitDate': '2022-11-16', 'studyFirstSubmitQcDate': '2022-11-27', 'studyFirstPostDateStruct': {'date': '2022-12-01', 'type': 'ACTUAL'}, 'lastUpdateSub

In [60]:
#Within the disease “Duchenne Muscular Dystrophy”, what are common eligibility criteria?

query = "Duchenne Muscular Dystrophy"
result = fetch_studies(query)
print(len(result['studies']))
print(result['studies'])


10
[{'protocolSection': {'identificationModule': {'nctId': 'NCT05933057', 'orgStudyIdInfo': {'id': 'DSC/14/2357/50'}, 'secondaryIdInfos': [{'id': '2023-503521-19', 'type': 'EUDRACT_NUMBER'}, {'id': 'U1111-1295-1799', 'type': 'OTHER', 'domain': 'Universal Trial Number (UTN)'}, {'id': '1008441', 'type': 'OTHER', 'domain': 'IRAS ID'}, {'id': '277453', 'type': 'OTHER', 'domain': 'Health Canada'}], 'organization': {'fullName': 'Italfarmaco', 'class': 'INDUSTRY'}, 'briefTitle': 'Efficacy, Safety and Tolerability of Givinostat in Non-ambulant Patients With Duchenne Muscular Dystrophy', 'officialTitle': 'Randomised, Double-blind, Placebo-controlled, Multicentre Study to Evaluate the Efficacy, Safety and Tolerability of Givinostat in Non-ambulant Patients With Duchenne Muscular Dystrophy', 'acronym': 'ULYSSES'}, 'statusModule': {'statusVerifiedDate': '2024-02', 'overallStatus': 'RECRUITING', 'expandedAccessInfo': {'hasExpandedAccess': False}, 'startDateStruct': {'date': '2024-02-19', 'type': 'A

In [14]:

def find_eligable_studies(patient):
    '''
    Finds a list of studies for one patient based on his condition
    '''
    condition = patient['profile'].condition
    eligable_studies = fetch_studies(condition)
    amount_studies = len(eligable_studies['studies'])
    return amount_studies, eligable_studies

In [71]:
amount_studies, studies = find_eligable_studies(patient_1)
study1= studies['studies'][0]

In [68]:
study1[0]['protocolSection']['eligibilityModule']['eligibilityCriteria']

{'eligibilityCriteria': '* INCLUSION CRITERIA:\n\nThe study population will consist of several subgroups of patients. An individual must meet one of the following subgroup inclusion criteria to participate in this study.\n\nSBMA subgroup:\n\n1. Male\n2. Genetically confirmed SBMA\n3. Able to travel to the NIH\n4. Greater than 18 years old\n\nSBMA carriers:\n\n1. Female\n2. Genetically confirmed SBMA heterozygote\n3. Able to travel to the NIH\n4. Greater than 18 years old\n\nOther motor neuron disease patients:\n\n1. Diagnosis of motor neuron disease other than SBMA (e.g.. amyotrophic lateral sclerosis (ALS), spinal muscular atrophy)\n2. Able to travel to the NIH\n3. Greater than 18 years old\n4. . Male\n\nHealthy male control:\n\n1. Male\n2. No history or diagnosis of liver disease\n3. No history of SBMA or other motor neuron disease\n4. Greater than 18 years old\n5. No diagnosis of diabetes or insulin resistance\n6. No history of alcohol abuse within the last 1 year\n7. No history of 

In [75]:
'''
Based on one Patient and one Study, check the eligabilty of tha Patient:
'''

def get_study_info(study):
    title = study['protocolSection']['identificationModule']['officialTitle']
    info= f'''Title: {title}'''
    try:
        #description = study['protocolSection']['descriptionModule']['detailedDescription']
        eligibilityCriteria = f"eligibilityCriteria: {study['protocolSection']['eligibilityModule']['eligibilityCriteria']}"
        info += f'\n{eligibilityCriteria}'
    except:
        pass
    
    try:
        sex = f"required sex: {study['protocolSection']['eligibilityModule']['sex']}"
        info += f'\n{sex}'
    except:
        pass

    try:
        minimumAge = f"required minimum Age: {study['protocolSection']['eligibilityModule']['minimumAge']}"
        info += f'\n{minimumAge}'
    except:
        pass

    return info

def get_patient_info(patient):
    condition = patient['profile'].condition
    age = patient['profile'].age
    description = patient['profile'].profile
    gender = patient['profile'].gender
    country = patient['profile'].country
    info =  f'''condition: {condition} \nage: {age} \ndescription: {description} \ngender: {gender} \ncountry: {country}'''

    return info

def create_promt(patient, study):
    study_info= get_study_info(study)
    patient_info= get_patient_info(patient)
    promt = f'''You are an expert doctor and you need to determine the eligibility of the patient for the given trail based on the information of the paitient and the train:
Patient information
{patient_info}
Trial Information
{study_info}
Predict whether the patient is eligible/uneligible/uncertain
Only return the category
    '''
    return promt

In [77]:
study4= studies['studies'][4]

In [78]:
promt =create_promt(patient_1, study4)
print(promt)

You are an expert doctor and you need to determine the eligibility of the patient for the given trail based on the information of the paitient and the train:
Patient information
condition: Amyotrophic Lateral Sclerosis 
age: 53 
description: Histology: None
Previous treatment: riluzole since two months
Biomarkers: SOD1 mutation
Tests: None
Comorbidities: rheumatoid arthritis
Other: None 
gender: Male 
country: France
Trial Information
Title: A Phase 1, Multicenter, Open-Label, Dose Escalation Study to Assess the Safety, Tolerability, Pharmacokinetics, Pharmacodynamics, and Clinical Activity of Orally Administered FHD-286, as Monotherapy or Combination Therapy, in Subjects With Advanced Hematologic Malignancies
eligibilityCriteria: Key Inclusion Criteria:

1. Subject must be ≥16 years of age.
2. Subject must:

   • Have a confirmed diagnosis of R/R AML, R/R MDS, or R/R CMML not in blast crisis

   AND

   • Be an appropriate candidate for treatment with LDAC (Arm A) or decitabine (Arm B

In [92]:
#Call OpenAI Model
from env_file import OPENAI_API_KEY

client = OpenAI(api_key=OPENAI_API_KEY)

def get_openAI_response(client, promt, model='gpt-4'):
    
  response = client.chat.completions.create(
    model=model,
    messages=[
      {"role": "user", "content": promt},
    ]
  )
  return response.choices[0].message.content

In [93]:
output = get_openAI_response(client, promt, model='gpt-4')
output

'uneligible'

In [79]:
from collections import defaultdict

eligable_studies_per_patient = defaultdict(lambda:[])
for idx, patient in enumerate(patients):
    #find eligable studies:
    amount_studies, studies = find_eligable_studies(patient)
    #studies_per_patient[patient] = studies['studies']
    print(f'found {amount_studies} studies for {patient}')
    for study in studies['studies']:
        promt =create_promt(patient, study)
        output = get_openAI_response(client, promt, model='gpt-4')
        study_id = study['protocolSection']['identificationModule']['nctId']
        print(f'{study_id} is {output}')
        if output == 'eligible':
            patient_name = f'Patient_{idx+1}'
            eligable_studies_per_patient[patient_name].append(study)
            print(f'{study_id} is saved!')
        else:
            print(f'{study_id} is rejected!')

found 10 studies for                                                      profile
age                                                       53
condition                      Amyotrophic Lateral Sclerosis
country                                               France
gender                                                  Male
profile    Histology: None\nPrevious treatment: riluzole ...
NCT05632757 is uncertain
NCT05632757 is rejected!
NCT05306457 is uneligible
NCT05306457 is rejected!
NCT05137665 is eligible
NCT05137665 is saved!
NCT05474235 is eligible
NCT05474235 is saved!
NCT04885374 is uneligible
NCT04885374 is rejected!
NCT05716074 is uncertain
NCT05716074 is rejected!
NCT05819931 is uncertain
NCT05819931 is rejected!
NCT06325865 is uncertain
NCT06325865 is rejected!
NCT02916966 is eligible
NCT02916966 is saved!
NCT05340660 is eligible
NCT05340660 is saved!
found 10 studies for                                                      profile
age                                          

In [80]:
eligable_studies_per_patient

defaultdict(<function __main__.<lambda>()>,
            {'Patient_1': [{'protocolSection': {'identificationModule': {'nctId': 'NCT05137665',
                 'orgStudyIdInfo': {'id': '21-500-101-70-09'},
                 'organization': {'fullName': 'Target ALS Foundation, Inc.',
                  'class': 'OTHER'},
                 'briefTitle': 'Target ALS Biomarker Study; Longitudinal Biofluids, Clinical Measures, and At Home Measures',
                 'officialTitle': 'Target ALS Biomarker Study; Longitudinal Biofluids, Clinical Measures, and At - Home Measures',
                 'acronym': 'TALSLB'},
                'statusModule': {'statusVerifiedDate': '2023-11',
                 'overallStatus': 'RECRUITING',
                 'expandedAccessInfo': {'hasExpandedAccess': False},
                 'startDateStruct': {'date': '2021-01-01', 'type': 'ACTUAL'},
                 'primaryCompletionDateStruct': {'date': '2031-12-31',
                  'type': 'ESTIMATED'},
              

In [91]:
def ids_of_eligible_studies(eligable_studies_per_patient):
    for patient in eligable_studies_per_patient.keys():
        print(f'{patient}')
        for study in eligable_studies_per_patient[patient]:
            print(study['protocolSection']['identificationModule']['nctId'])

ids_of_eligible_studies(eligable_studies_per_patient)

Patient_1
NCT05137665
NCT05474235
NCT02916966
NCT05340660
Patient_2
NCT05673057
Patient_3
NCT04322357
NCT04626674
NCT03689660
