In [1]:
import pandas as pd

In [6]:
df_161 = pd.read_csv('/home/alexandra/Downloads/datasets/halal/Colorectal_161_Summarized.csv')
df_201 = pd.read_csv('/home/alexandra/Downloads/datasets/halal/Colorectal_201_Summarized.csv')
df_309 = pd.read_csv('/home/alexandra/Downloads/datasets/halal/Colorectal_309_Summarized.csv')
df_213 = pd.read_csv('/home/alexandra/Downloads/datasets/halal/Esophagael_213_Summarized.csv')
df_218 = pd.read_csv('/home/alexandra/Downloads/datasets/halal/Lung_218_Summarized.csv')
df_261 = pd.read_csv('/home/alexandra/Downloads/datasets/halal/Lung_261_Summarized.csv')

In [4]:
from transformers import pipeline

# Load a pre-trained QA model
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")


model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [8]:
def general_sanity_check(description, linearized_info):
    """
    Perform a general sanity check by querying the model to verify facts in the description.
    
    Args:
        description (str): The generated natural language description of the data.
        linearized_info (str): The linearized data as a reference for extracting expected values.
    
    Returns:
        dict: A dictionary with each feature and a tuple indicating the presence and accuracy of the value.
    """
    # Extract individual features and their values from the linearized info
    feature_values = {item.split(':')[0].strip(): item.split(':')[1].strip() for item in linearized_info.split(',')}
    results = {}
    
    for feature, value in feature_values.items():
        # Construct the questions based on feature type
        is_present_question = f"Is {feature} mentioned in the description?"
        value_question = f"What is the value of {feature}?"

        # Query for presence
        presence_response = qa_pipeline({
            'context': description,
            'question': is_present_question
        })
        is_present = "yes" in presence_response['answer'].strip().lower()
        
        # Query for value accuracy if applicable
        if is_present:
            value_response = qa_pipeline({
                'context': description,
                'question': value_question
            })
            # Normalize the response for comparison
            retrieved_value = value_response['answer'].strip().lower()
            is_correct = (retrieved_value == value.lower())
            results[feature] = (is_present, is_correct)
        else:
            results[feature] = (is_present, False)

    return results



In [11]:
def refined_sanity_check(description, feature_values):
    """
    Perform refined sanity checks by adjusting question phrasing and ensuring exact matches.

    Args:
        description (str): The natural language summary of the patient's data.
        feature_values (dict): Dictionary of feature names and their expected textual representation.

    Returns:
        dict: A dictionary with each feature and a tuple indicating the presence and accuracy of the value.
    """
    results = {}
    questions = {
        "age": "How old is the patient?",
        "sex": "What is the sex of the patient?",
        "bmi": "What is the patient's BMI?",
        "treatment arm": "What treatment is the patient receiving?",
        "chemotherapy cycles": "How many chemotherapy cycles has the patient completed?",
        "histology": "What is the histology of the cancer?",
        "performance status": "What is the patient's performance status?",
        "KRAS biomarker": "What is the KRAS biomarker status?"
    }
    
    for feature, question in questions.items():
        expected_value = feature_values.get(feature)
        answer = qa_pipeline({
            'context': description,
            'question': question
        })
        answer_text = answer['answer'].strip().lower()
        
        # Normalize expected value for better comparison
        expected_norm = expected_value.lower().replace('"', '').replace('-', ' ')
        
        is_correct = (answer_text == expected_norm)
        results[feature] = is_correct

    return results

# Use the function on your example
feature_values = {
    "age": "57",
    "sex": "female",
    "bmi": "20.97",
    "treatment arm": "FOLFOX",
    "chemotherapy cycles": "12",
    "histology": "well differentiated",
    "performance status": "fully active",
    "KRAS biomarker": "wild-type"
}

sanity_results = refined_sanity_check(df_161['summary'][0], feature_values)
print(sanity_results)


{'age': False, 'sex': False, 'bmi': True, 'treatment arm': False, 'chemotherapy cycles': True, 'histology': False, 'performance status': True, 'KRAS biomarker': False}


In [10]:
print(df_161['linearized'][0])
print(df_161['summary'][0])

race: other, sex: female, age: 57, bmi: 20.97, treatment arm: FOLFOX, chemotherapy cycles: 12, histology: well differentiated, performance status: fully active, KRAS biomarker: wild-type
The patient is a 57-year-old female of a race categorized as "other" with a BMI of 20.97. She is undergoing treatment with the FOLFOX chemotherapy regimen and has completed 12 cycles. Her cancer histology is well-differentiated, and she has a fully active performance status. Additionally, her KRAS biomarker status is wild-type.


In [9]:
# Sample data from your DataFrame
sample_description = df_161['summary'].iloc[0]
sample_linearized_info = df_161['linearized'].iloc[0]

# Assuming the QA pipeline is already defined and loaded
sanity_results = general_sanity_check(sample_description, sample_linearized_info)
print(sanity_results)

{'race': (False, False), 'sex': (False, False), 'age': (False, False), 'bmi': (False, False), 'treatment arm': (False, False), 'chemotherapy cycles': (False, False), 'histology': (False, False), 'performance status': (False, False), 'KRAS biomarker': (False, False)}
