In [21]:
from transformers import pipeline

# Set up the token classification pipeline
pipe = pipeline("token-classification", model="Clinical-AI-Apollo/Medical-NER", aggregation_strategy='simple')

# Use the pipeline on a sample text
text = """
'DISEASE CHARACTERISTICS:\n\n* Histologically confirmed chronic phase chronic myelogenous leukemia (CML)\n\n  * Presence of t(9;22)(q34;q11) with at least 20 cells examined in metaphase by cytogenetic examination of the bone marrow\n* Complete hematologic remission during prior therapy\\* as seen on 2 separate blood count analyses, defined by the following:\n\n  * WBC no greater than 10,000/mm\\^3 AND platelet count no greater than 450,000/mm\\^3\n  * Disappearance of all signs and symptoms of disease, including palpable splenomegaly\n  * Normal differential counts (i.e., absence of blasts, promyelocytes, myelocytes, and metamyelocytes) NOTE: \\*Continuation of therapy that led to complete hematologic remission is required during study participation\n* Persistent cytogenetic disease despite 12 months of prior imatinib mesylate therapy, which may have included a trial dose-escalation OR intolerant of imatinib mesylate at a dose greater than 400 mg/day\n* Not in complete cytogenetic remission within 30 days of study entry\n\n  * Persistent Philadelphia chromosome by bone marrow exam\n\nPATIENT CHARACTERISTICS:\n\nAge\n\n* 18 and over\n\nPerformance status\n\n* ECOG 0-2\n\nLife expectancy\n\n* More than 6 months\n\nHematopoietic\n\n* See Disease Characteristics\n\nHepatic\n\n* Not specified\n\nRenal\n\n* Not specified\n\nOther\n\n* Not pregnant or nursing\n* Fertile patients must use effective contraception\n* No uncontrolled active infective\n* No serious medical or psychiatric illness that would prevent giving informed consent or limit survival to less than 6 months\n* No other malignancy not in remission except curatively treated basal cell skin cancer or carcinoma in situ of the cervix\n\nPRIOR CONCURRENT THERAPY:\n\nBiologic therapy\n\n* Prior sargramostim (GM-CSF) allowed\n* Prior interferon alfa for CML allowed\n* No prior stem cell transplantation\n* Concurrent interferon alfa\\* for CML allowed NOTE: \\*No dose increase during study participation\n\nChemotherapy\n\n* At least 4 weeks since prior chemotherapy\n\nEndocrine therapy\n\n* Not specified\n\nRadiotherapy\n\n* At least 4 weeks since prior radiotherapy\n* No concurrent radiotherapy\n\nSurgery\n\n* At least 4 weeks since prior surgery\n\nOther\n\n* Prior imatinib mesylate for CML allowed\n* No other concurrent medication for CML\n* Concurrent imatinib mesylate\\* for CML allowed NOTE: \\*No dose increase during study participation'
"""
result = pipe(text)
print(result)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[{'entity_group': 'DISEASE_DISORDER', 'score': 0.41900337, 'word': 'DISEASE', 'start': 2, 'end': 9}, {'entity_group': 'DETAILED_DESCRIPTION', 'score': 0.43658236, 'word': 'Histological', 'start': 29, 'end': 42}, {'entity_group': 'DETAILED_DESCRIPTION', 'score': 0.6389923, 'word': 'chronic phase', 'start': 54, 'end': 68}, {'entity_group': 'DISEASE_DISORDER', 'score': 0.7695001, 'word': 'chronic myelogenous leukemia', 'start': 68, 'end': 97}, {'entity_group': 'DISEASE_DISORDER', 'score': 0.592165, 'word': 'CML', 'start': 99, 'end': 102}, {'entity_group': 'SIGN_SYMPTOM', 'score': 0.85616136, 'word': 't(9;22)(q34;q11', 'start': 120, 'end': 136}, {'entity_group': 'DETAILED_DESCRIPTION', 'score': 0.47536024, 'word': ')', 'start': 136, 'end': 137}, {'entity_group': 'LAB_VALUE', 'score': 0.68545294, 'word': 'at least 20 cells examined in', 'start': 142, 'end': 172}, {'entity_group': 'DETAILED_DESCRIPTION', 'score': 0.7111942, 'word': 'metaphase', 'start': 172, 'end': 182}, {'entity_group': 'DI

In [17]:
result[0].get('word')

'48 year-old'

In [18]:
result[0].get('entity_group')

'AGE'

In [19]:
result[1].get('word')

'girl'

In [20]:
result[1].get('entity_group')

'SEX'

In [24]:
from transformers import pipeline

# Set up the token classification pipeline
pipe = pipeline("token-classification", model="Clinical-AI-Apollo/Medical-NER", aggregation_strategy='simple')

# Define the function to filter specific entities
def extract_specific_entities(text, target_entities=['DISEASE_DISORDER', 'MEDICATION']):
    # Get the full list of entities from the pipeline
    results = pipe(text)
    
    # Filter the results to keep only the desired entities
    filtered_results = [entity for entity in results if entity['entity_group'] in target_entities]
    
    return filtered_results

# Example patient record
patient_record = "45 year old woman diagnosed with CAD, prescribed with aspirin."

# Extract specific entities
entities = extract_specific_entities(patient_record)
print(entities)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[{'entity_group': 'DISEASE_DISORDER', 'score': 0.21398772, 'word': 'CAD', 'start': 32, 'end': 36}, {'entity_group': 'MEDICATION', 'score': 0.5350736, 'word': 'aspirin', 'start': 53, 'end': 61}]


In [1]:
import sys
import os

# Ensure the 'models' directory is in the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'models')))

from entity_extractor import EntityExtractor

extractor = EntityExtractor()

extractor.extract_specific_entities("45 year old woman diagnosed with CAD and hypertension, prescribed with aspirin and metformin.")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'diseases': ['CAD', 'hypertension'], 'medications': ['aspirin', 'metformin']}

In [4]:
results = extractor.extract_specific_entities("45 year old woman diagnosed with CAD and hypertension, prescribed with aspirin and metformin.")
results.get('diseases','Unknown')

['CAD', 'hypertension']

In [5]:
results.get('medications','Unknown')

['aspirin', 'metformin']

In [None]:
def process_patient_records(patient_records):
    """
    Process a list of patient records to extract specific entities.

    Args:
        patient_records (list): List of patient records (strings).

    Returns:
        list: List of dictionaries with extracted entities for each patient record.
    """
    extractor = EntityExtractor()
    study_data = []
    for record in patient_records:
        entities = extractor.extract_specific_entities(record)
        study_data.append(entities)
    return study_data

if __name__ == "__main__":
    # Example list of eligibility criteria (or load your actual data here)
    eligibility_criteria = [
        "45 year old woman diagnosed with CAD and hypertension, prescribed with aspirin and metformin.",
        "50 year old man with diabetes, taking insulin and metformin.",
        # Add more eligibility criteria as needed
    ]
    
    # Process the eligibility criteria
    study_data = process_patient_records(eligibility_criteria)
    
    # Print the results
    for data in study_data:
        print(data)