Figure out how to get clinical trials in clinicaltrials.gov

In [2]:
file_path = "NCT04414150.json"

In [11]:
import json

def load_json_file(file_path):
    """
    Load a JSON file from the specified file path and return the JSON object.
    
    :param file_path: str, path to the JSON file
    :return: dict, loaded JSON object
    """
    with open(file_path, 'r') as file:
        json_data = json.load(file)
    return json_data


def process_clinical_trial_data(data):
    """
    Process a JSON object of clinical trial data to extract relevant attributes.
    
    :param data: dict, JSON object of clinical trial data
    :return: dict, structured summary of extracted information
    """
    # Extract identification information
    identification_info = data['protocolSection']['identificationModule']
    
    # Extract conditions
    conditions_info = data['protocolSection']['conditionsModule']['conditions']
    
    # Extract interventions
    interventions_list = data['protocolSection']['armsInterventionsModule']['interventions']
    interventions_descriptions = [
        intervention.get('description', 'No description provided') for intervention in interventions_list
    ]
    
    # Extract eligibility information
    eligibility_info = data['protocolSection']['eligibilityModule']
    eligibility_criteria_text = eligibility_info.get('eligibilityCriteria', 'Not provided')

    # Extract summary information
    summary_info = data['protocolSection']['descriptionModule']['briefSummary']
    
    # Summarize the extracted information
    extracted_info = {
        'NCT ID': identification_info.get('nctId', 'Not provided'),
        'Brief Title': identification_info.get('briefTitle', 'Not provided'),
        'Brief Summary': summary_info if summary_info else 'Not provided',
        'Official Title': identification_info.get('officialTitle', 'Not provided'),
        'Conditions': conditions_info if conditions_info else 'Not provided',
        'Interventions Description': interventions_descriptions,
        'Eligibility': {
            'Healthy Volunteers': eligibility_info.get('healthyVolunteers', 'Not specified'),
            'Sex': eligibility_info.get('sex', 'Not specified'),
            'Minimum Age': eligibility_info.get('minimumAge', 'Not specified'),
            'Maximum Age': eligibility_info.get('maximumAge', 'Not specified'),
            'Standard Ages': eligibility_info.get('stdAges', 'Not specified'),
            'Criteria Text ': eligibility_criteria_text
        }
    }
    
    return extracted_info

# Now we'll test the functions with the file we previously loaded
json_data = load_json_file(file_path)
clinical_trial_summary = process_clinical_trial_data(json_data)
clinical_trial_summary

{'NCT ID': 'NCT04414150',
 'Brief Title': 'A Trial of SHR-1802 in Patients With Failure of Standard Treatment for Advanced Malignant Tumours',
 'Brief Summary': 'This is the first study to test SHR-1802 in humans. The primary purpose of this study is to see if SHR-1802 is safe and tolerable for patients with locally advanced/unresectable or metastatic malignancies that are refractory to available therapy or for which no standard therapy is available.',
 'Official Title': 'Tolerability, Safety and Pharmacokinetic Characteristics of SHR-1802 in Patients With Advanced Malignancy: a Phase I Clinical Study',
 'Conditions': ['Malignant Tumours'],
 'Interventions Description': ['This study will evaluate the preliminary safety, tolerability, pharmacokinetic characteristics and initial efficacy of SHR-1802 The goal is to establish the maximum tolerated dose (MTD) and/or recommended Phase 2 dose (RP2D) of sequential escalating doses of SHR-1802 when administered to patients with locally advanced

In [1]:
folder = "ctg-studies.json"

# Go through all the json files in the folder and process them
import os

# def process_clinical_trials_folder(folder):
#     """
#     Process a folder of JSON files containing clinical trial data.
    
#     :param folder: str, path to the folder containing the JSON files
#     :return: list, list of structured summaries of extracted information
#     """
#     clinical_trials_summaries = []
#     for file in os.listdir(folder):
#         file_path = os.path.join(folder, file)
#         json_data = load_json_file(file_path)
#         clinical_trial_summary = process_clinical_trial_data(json_data)
#         clinical_trials_summaries.append(clinical_trial_summary)
#     return clinical_trials_summaries

# Make a 

In [15]:
import textwrap

# Update the function to use the new format_criteria function
def format_clinical_trial_summary(summary):
    """
    Generate a formatted summary for a clinical trial data dictionary.
    
    :param summary: dict, structured summary of extracted clinical trial information
    :return: str, formatted summary suitable for presentation to a doctor
    """
    # Helper function to format a list with bullet points
    def format_list(items):
        return "\n".join(f"• {item}" for item in items)

    # Helper function to wrap text for better readability
    def wrap_text(text, width=80):
        return "\n".join(textwrap.wrap(text, width=width))

    # Start building the formatted summary
    formatted_summary = f"Clinical Trial Summary:\n\n"

    # # Add identification information
    # formatted_summary += f"NCT ID: {summary['NCT ID']}\n"
    # formatted_summary += f"Brief Title: {wrap_text(summary['Brief Title'])}\n"
    # formatted_summary += f"Brief Summary: {wrap_text(summary['Brief Summary'])}\n"
    # formatted_summary += f"Official Title: {wrap_text(summary['Official Title'])}\n\n"
    
    # Add conditions
    formatted_summary += "Conditions:\n"
    formatted_summary += f"{format_list(summary['Conditions'])}\n\n"
    
    # # Add interventions
    # formatted_summary += "Interventions Description:\n"
    # interventions_formatted = format_list(summary['Interventions Description'])
    # formatted_summary += f"{wrap_text(interventions_formatted, width=100)}\n\n"
    
    # Add eligibility information
    eligibility = summary['Eligibility']
    formatted_summary += "Eligibility Criteria:\n"
    # For each key in eligibility, check if it's the criteria text and format accordingly
    for key, value in eligibility.items():
        if key == 'Criteria Text ':
            # formatted_criteria = format_criteria(value)
            # formatted_summary += wrap_text(formatted_criteria, width=100) + "\n"
            formatted_summary += '\n' + value
        else:
            formatted_summary += f"{key}: {wrap_text(str(value))}\n"
    
    return formatted_summary

# Format the summary with the updated function
formatted_summary = format_clinical_trial_summary(clinical_trial_summary)
print(formatted_summary)

Clinical Trial Summary:

Conditions:
• Malignant Tumours

Eligibility Criteria:
Healthy Volunteers: False
Sex: ALL
Minimum Age: 18 Years
Maximum Age: 75 Years
Standard Ages: ['ADULT', 'OLDER_ADULT']

Inclusion Criteria:

1. Voluntary participation and written informed consent;
2. Aged 18-75 years (inclusive), males and females;
3. Patient must have histologically or clinically confirmed advanced and/or metastatic malignancies for which failure of standard treatment or lack of effective standard treatment;
4. At least one measurable lesion according to RECIST v1.1;
5. ECOG score of 0-1;
6. Expected survival ≥ 12 weeks;
7. Adequate bone marrow reserve and organ function were confirmed by baseline examination
8. For female patients of childbearing potential or male patients with partners of childbearing potential who are not sterilized by surgical operations, they are required to use a medically approved contraceptive measure during the study treatment period and within 3 months after the