In [1]:
from Bio import Entrez
import pandas as pd
import time
Entrez.email = "john.saxon2002@gmail.com"

In [2]:
def search_pubmed(query, max_results=10):
    """Search PubMed and return article titles and IDs."""
    
    try:
        
        # Search PubMed
        handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
        record = Entrez.read(handle)
        handle.close()
        
        # Extract PubMed IDs
        pubmed_ids = record["IdList"]
        
        # Fetch article details
        handle = Entrez.efetch(db="pubmed", id=",".join(pubmed_ids), retmode="xml")
        records = Entrez.read(handle)
        handle.close()

        # Extract details
        articles = []
        for article in records["PubmedArticle"]:
            medline = article["MedlineCitation"]
            article_data = medline["Article"]

            # Extract fields
            pmid = medline["PMID"]
            title = article_data["ArticleTitle"]
            authors = ", ".join([author["LastName"] + " " + author["ForeName"]
                                for author in article_data.get("AuthorList", []) 
                                if "LastName" in author and "ForeName" in author])
            journal = article_data["Journal"]["Title"]
            pub_date = article_data["Journal"]["JournalIssue"]["PubDate"]
            abstract = article_data.get("Abstract", {}).get("AbstractText", ["No abstract available"])[0]
            pub_type = ", ".join(article_data.get("PublicationTypeList", []))
            mesh_terms = ", ".join([term["DescriptorName"] for term in medline.get("MeshHeadingList", [])])

            # Append to results
            articles.append({
                "PMID": pmid,
                "Title": title,
                "Authors": authors if authors else "No authors listed",
                "Journal": journal,
                "Publication Date": pub_date,
                "Abstract": abstract,
                "Publication Type": pub_type,
                "MeSH Terms": mesh_terms
            })
        
        return articles

    except Exception as e:
        print(f"Error searching '{query}': {e}")
        return []

In [3]:
trials_df = pd.read_csv("adult-ctg-studies.csv")

In [5]:
trials_df.head()

Unnamed: 0,NCT Number,Study Title,Study URL,Study Status,Brief Summary,Study Results,Conditions,Interventions,Primary Outcome Measures,Secondary Outcome Measures,Other Outcome Measures,Age,Phases,Study Type,Study Design,Start Date,Primary Completion Date,Completion Date,Locations
0,NCT06226571,A Study of SNDX-5613 in Combination With Inten...,https://clinicaltrials.gov/study/NCT06226571,RECRUITING,The purpose of this study is to evaluate the s...,NO,Acute Myeloid Leukemias,DRUG: SNDX-5613|DRUG: Chemotherapy Regimen|DRU...,Dose Escalation: Number of Participants with D...,Maximum Plasma Concentration (Cmax) of SNDX-56...,,"ADULT, OLDER_ADULT",PHASE1,INTERVENTIONAL,Allocation: NA|Intervention Model: SEQUENTIAL|...,2024-05-21,2027-02,2027-02,"UCLA Medical Hematology, Burbank, California, ..."
1,NCT04629443,Phase I/II Trial of S64315 Plus Azacitidine in...,https://clinicaltrials.gov/study/NCT04629443,COMPLETED,The purpose of this study is to assess the saf...,YES,Acute Myeloid Leukaemia,DRUG: S 64315 (also referred as MIK665) and az...,Dose Limiting Toxicity (DLT) (Phase I - Dose E...,Assess Anti-leukemic Activity of S64315 in Com...,,"ADULT, OLDER_ADULT",PHASE1|PHASE2,INTERVENTIONAL,Allocation: NA|Intervention Model: SINGLE_GROU...,2021-02-17,2023-08-25,2023-08-25,University of Texas MD Anderson Cancer Center ...
2,NCT06345365,MA+AZA Regimen for the Treatment of Newly Diag...,https://clinicaltrials.gov/study/NCT06345365,RECRUITING,Investigator proposed to apply the new dosage ...,NO,Acute Myeloid Leukaemia,"DRUG: mitoxantrone liposome, Ara-Cytarabine an...","Complete remission rate, Bone marrow primitive...","Incidence of adverse events, Incidence of adve...",,"ADULT, OLDER_ADULT",PHASE3,INTERVENTIONAL,Allocation: RANDOMIZED|Intervention Model: PAR...,2024-01-18,2026-12-31,2028-12-31,The First Affiliated Hospital of Zhengzhou Uni...
3,NCT06326697,Bioequivalence of Azacitidine 300 Mg Film-Coat...,https://clinicaltrials.gov/study/NCT06326697,RECRUITING,"A Randomized, Single Oral Dose, Open Label, Tw...",NO,Acute Myeloid Leukaemia,DRUG: Azacitidine|DRUG: Onureg,"Cmax, Maximum measured plasma concentration ov...","AUC0-∞, The area under the plasma concentratio...",,"ADULT, OLDER_ADULT",PHASE1,INTERVENTIONAL,Allocation: RANDOMIZED|Intervention Model: CRO...,2024-05-02,2025-05,2025-05,King Faisal Specialist Hospital and Research C...
4,NCT04106076,Phase I Study of UCART123 in Patient With Adve...,https://clinicaltrials.gov/study/NCT04106076,WITHDRAWN,"This is a Phase I, open-label, dose escalation...",NO,Acute Myeloid Leukaemia,BIOLOGICAL: UCART123,Incidence of AE/SAE/DLT [Safety and Tolerabili...,,,"ADULT, OLDER_ADULT",PHASE1,INTERVENTIONAL,Allocation: NA|Intervention Model: SINGLE_GROU...,2019-07-11,2019-12-05,2019-12-05,


In [6]:
search_terms = trials_df["NCT Number"].tolist()

In [8]:
all_results = []
searches = []
for term in search_terms:
    print(f"Searching: {term}...")
    results = search_pubmed(term, max_results=10)
    searches.append({
        "term" : term,
        "results" : len(results)
    })
    all_results += results
    
    # Delay to prevent API abuse
    time.sleep(0.1)


Searching: NCT06226571...
Error searching 'NCT06226571': Supplied id parameter is empty.
Searching: NCT04629443...
Error searching 'NCT04629443': Supplied id parameter is empty.
Searching: NCT06345365...
Error searching 'NCT06345365': Supplied id parameter is empty.
Searching: NCT06326697...
Error searching 'NCT06326697': Supplied id parameter is empty.
Searching: NCT04106076...
Error searching 'NCT04106076': Supplied id parameter is empty.
Searching: NCT05712278...
Error searching 'NCT05712278': Supplied id parameter is empty.
Searching: NCT03796390...
Error searching 'NCT03796390': Supplied id parameter is empty.
Searching: NCT03672695...
Searching: NCT06105658...
Error searching 'NCT06105658': Supplied id parameter is empty.
Searching: NCT06764589...
Error searching 'NCT06764589': Supplied id parameter is empty.
Searching: NCT06783478...
Error searching 'NCT06783478': Supplied id parameter is empty.
Searching: NCT06788756...
Error searching 'NCT06788756': Supplied id parameter is em

In [34]:
articles_df = pd.DataFrame(all_results)
articles_df.to_csv("adult-pubmed-search-results.csv", index=False)


In [None]:
searches_df = pd.DataFrame(searches).set_index("term")
searches_df = searches_df[searches_df["results"] > 0]

In [20]:
searches_df = trials_df.set_index("NCT Number").join(searches_df, how="right")

In [22]:
searches_df.to_csv("adult-searches.csv", index=True)

In [23]:
searches_df.head()

Unnamed: 0_level_0,Study Title,Study URL,Study Status,Brief Summary,Study Results,Conditions,Interventions,Primary Outcome Measures,Secondary Outcome Measures,Other Outcome Measures,Age,Phases,Study Type,Study Design,Start Date,Primary Completion Date,Completion Date,Locations,results
term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
NCT03672695,Phase I Dose Escalation Study of Intravenously...,https://clinicaltrials.gov/study/NCT03672695,COMPLETED,The purpose of this study is to determine the ...,NO,Acute Myeloid Leukaemia,COMBINATION_PRODUCT: S 64315 (also referred as...,"Incidence of Dose Limiting Toxicity (DLTs), At...","Anti-leukemic activity, Using blood, bone marr...",,"ADULT, OLDER_ADULT",PHASE1,INTERVENTIONAL,Allocation: NON_RANDOMIZED|Intervention Model:...,2018-11-28,2022-11-12,2023-05-30,"Smilow Cancer Hospital at Yale, New Haven, Con...",1
NCT04172844,"Pevonedistat, Azacitidine (or Decitabine), and...",https://clinicaltrials.gov/study/NCT04172844,TERMINATED,This is a phase Ib study with a 3 + 3 dose esc...,NO,Acute Myelogenous Leukemia,DRUG: Azacitidine|DRUG: Venetoclax|DRUG: Pevon...,Recommended phase 2 dose of pevonedistat when ...,"Complete Remission Rate, The number of subject...",,"ADULT, OLDER_ADULT",PHASE1,INTERVENTIONAL,Allocation: NON_RANDOMIZED|Intervention Model:...,2020-01-13,2021-12-12,2022-02-27,"Mayo Clinic, Rochester, Minnesota, 55905, Unit...",1
NCT03904251,CPX-351 and Gemtuzumab Ozogamicin in Treating ...,https://clinicaltrials.gov/study/NCT03904251,TERMINATED,This phase Ib trial studies the best dose of g...,NO,Acute Myelogenous Leukemia,DRUG: Gemtuzumab Ozogamicin|DRUG: Liposome-enc...,"Maximum tolerated dose (MTD), MTD is defined a...","Objective response rate (ORR), Objective respo...",Genotype at CD33 splicing single nucleotide po...,"ADULT, OLDER_ADULT",PHASE1,INTERVENTIONAL,Allocation: NA|Intervention Model: SINGLE_GROU...,2019-07-18,2023-10-25,2023-10-25,"UCLA / Jonsson Comprehensive Cancer Center, Lo...",1
NCT03971799,Study of Anti-CD33 Chimeric Antigen Receptor-E...,https://clinicaltrials.gov/study/NCT03971799,RECRUITING,This phase 1/2 trial aims to determine the saf...,NO,Acute Myelogenous Leukemia,BIOLOGICAL: CD33CART autologous|BIOLOGICAL: CD...,"Maximum tolerated dose - Autologous Arm, To de...","Feasibility of CD33CART manufacture, To determ...",,"CHILD, ADULT",PHASE1|PHASE2,INTERVENTIONAL,Allocation: NON_RANDOMIZED|Intervention Model:...,2020-01-08,2029-12,2039-12,"Children's Hospital of Los Angeles, Los Angele...",1
NCT04257175,CAR-T CD19 for Acute Myelogenous Leukemia With...,https://clinicaltrials.gov/study/NCT04257175,RECRUITING,Chimeric antigen receptor (CAR-T) engineered T...,NO,Acute Myeloid Leukemia,BIOLOGICAL: CAR-T CD19,The change in the peripheral blood counts and ...,,,"ADULT, OLDER_ADULT",PHASE2|PHASE3,INTERVENTIONAL,Allocation: NA|Intervention Model: SINGLE_GROU...,2020-02-18,2024-12-01,2024-12-01,"Chaim Sheba Medical Center, Ramat Gan, 57261, ...",1


In [24]:
mrd_terms = [
    "MRD",
    "Measurable Residual Disease",
    "Minimal Residual Disease",
]

In [25]:
print("Primary Outcome Measures: " + str(trials_df["Primary Outcome Measures"].str.contains("|".join(mrd_terms), case=False).sum()))
print("Secondary Outcome Measures: " + str(trials_df["Secondary Outcome Measures"].str.contains("|".join(mrd_terms), case=False).sum()))
print("Other Outcome Measures: " + str(trials_df["Other Outcome Measures"].str.contains("|".join(mrd_terms), case=False).sum()))

Primary Outcome Measures: 88
Secondary Outcome Measures: 302
Other Outcome Measures: 35


In [None]:
print("Primary Outcome Measures: " + str(searches_df["Primary Outcome Measures"].str.contains("|".join(mrd_terms), case=False).sum()))
print("Secondary Outcome Measures: " + str(searches_df["Secondary Outcome Measures"].str.contains("|".join(mrd_terms), case=False).sum()))
print("Other Outcome Measures: " + str(searches_df["Other Outcome Measures"].str.contains("|".join(mrd_terms), case=False).sum())) 

Primary Outcome Measures: 7
Secondary Outcome Measures: 40
Other Outcome Measures: 9


In [42]:
mesh_terms = articles_df["MeSH Terms"].apply(lambda x: x.split(", ")).explode().value_counts()

In [45]:
mesh_terms = pd.DataFrame(mesh_terms).reset_index().rename(columns={"index": "MeSH Term", 0: "Count"})

In [46]:
mesh_terms

Unnamed: 0,MeSH Terms,count
0,Humans,179
1,Leukemia,139
2,Acute,121
3,Myeloid,120
4,Aged,104
...,...,...
464,Exercise,1
465,Naphthyridines,1
466,Adenoid Cystic,1
467,Protein-Arginine N-Methyltransferases,1


In [None]:
articles_df.head()

In [38]:
trials_df.head()

Unnamed: 0,NCT Number,Study Title,Study URL,Study Status,Brief Summary,Study Results,Conditions,Interventions,Primary Outcome Measures,Secondary Outcome Measures,Other Outcome Measures,Age,Phases,Study Type,Study Design,Start Date,Primary Completion Date,Completion Date,Locations
0,NCT06226571,A Study of SNDX-5613 in Combination With Inten...,https://clinicaltrials.gov/study/NCT06226571,RECRUITING,The purpose of this study is to evaluate the s...,NO,Acute Myeloid Leukemias,DRUG: SNDX-5613|DRUG: Chemotherapy Regimen|DRU...,Dose Escalation: Number of Participants with D...,Maximum Plasma Concentration (Cmax) of SNDX-56...,,"ADULT, OLDER_ADULT",PHASE1,INTERVENTIONAL,Allocation: NA|Intervention Model: SEQUENTIAL|...,2024-05-21,2027-02,2027-02,"UCLA Medical Hematology, Burbank, California, ..."
1,NCT04629443,Phase I/II Trial of S64315 Plus Azacitidine in...,https://clinicaltrials.gov/study/NCT04629443,COMPLETED,The purpose of this study is to assess the saf...,YES,Acute Myeloid Leukaemia,DRUG: S 64315 (also referred as MIK665) and az...,Dose Limiting Toxicity (DLT) (Phase I - Dose E...,Assess Anti-leukemic Activity of S64315 in Com...,,"ADULT, OLDER_ADULT",PHASE1|PHASE2,INTERVENTIONAL,Allocation: NA|Intervention Model: SINGLE_GROU...,2021-02-17,2023-08-25,2023-08-25,University of Texas MD Anderson Cancer Center ...
2,NCT06345365,MA+AZA Regimen for the Treatment of Newly Diag...,https://clinicaltrials.gov/study/NCT06345365,RECRUITING,Investigator proposed to apply the new dosage ...,NO,Acute Myeloid Leukaemia,"DRUG: mitoxantrone liposome, Ara-Cytarabine an...","Complete remission rate, Bone marrow primitive...","Incidence of adverse events, Incidence of adve...",,"ADULT, OLDER_ADULT",PHASE3,INTERVENTIONAL,Allocation: RANDOMIZED|Intervention Model: PAR...,2024-01-18,2026-12-31,2028-12-31,The First Affiliated Hospital of Zhengzhou Uni...
3,NCT06326697,Bioequivalence of Azacitidine 300 Mg Film-Coat...,https://clinicaltrials.gov/study/NCT06326697,RECRUITING,"A Randomized, Single Oral Dose, Open Label, Tw...",NO,Acute Myeloid Leukaemia,DRUG: Azacitidine|DRUG: Onureg,"Cmax, Maximum measured plasma concentration ov...","AUC0-∞, The area under the plasma concentratio...",,"ADULT, OLDER_ADULT",PHASE1,INTERVENTIONAL,Allocation: RANDOMIZED|Intervention Model: CRO...,2024-05-02,2025-05,2025-05,King Faisal Specialist Hospital and Research C...
4,NCT04106076,Phase I Study of UCART123 in Patient With Adve...,https://clinicaltrials.gov/study/NCT04106076,WITHDRAWN,"This is a Phase I, open-label, dose escalation...",NO,Acute Myeloid Leukaemia,BIOLOGICAL: UCART123,Incidence of AE/SAE/DLT [Safety and Tolerabili...,,,"ADULT, OLDER_ADULT",PHASE1,INTERVENTIONAL,Allocation: NA|Intervention Model: SINGLE_GROU...,2019-07-11,2019-12-05,2019-12-05,
