# Evaluate UMLS

In [16]:
import pandas as pd
from pathlib import Path

In [122]:
annotations = pd.read_csv(f'../annotations/combined_pd.csv')
patients = annotations[annotations.group_name == 'patients']
# Only keep those where `umls` annotation is not empty
patients = patients[patients.umls.notnull()]
fields_keep = ['pmcid', 'group_name', 'subgroup_name', 'count', 'umls', 'diagnosis']
patients = patients[fields_keep]
outputs = Path('../outputs')

In [123]:
all_predictions = []
for pred in list(outputs.glob('*umls*')):
    md = pred.stem.split('_')
    if md[0] == 'chunked':
        chunk = True
        prompt = md[1]
        model_name = md[2]
    else:
        chunk = False
        source = md[1]
        prompt = md[2]
        model_name = md[3]
    pred_df = pd.read_csv(pred)

    pred_df['chunk'] = chunk    
    pred_df['source'] = source
    pred_df['prompt'] = prompt
    pred_df['model_name'] = model_name

    all_predictions.append(pred_df)



In [124]:
# For each PMCID, get the best match for each UMLS code, without replacement (i.e. only one match per UMLS code)
# If no match is found, then the UMLS code is not matched

def match_umls_codes(umls_predicitons, annot_patients):
    annot_patients = annot_patients.copy()


    for pmcid, group in annot_patients.groupby('pmcid'):
        group_preds = umls_predicitons[umls_predicitons.pmcid == pmcid]

        if len(group_preds) == 0:
            continue

        used_group_ixs = []
        for row in group.itertuples():
            if pd.isnull(row.umls):
                continue

            correct = [c.strip() for c in row.umls.split(', ')]
            
            matches = group_preds[(group_preds['umls_cui'].isin(correct)) & (group_preds['group_ix'].isin(group) == False)]
            if len(matches) > 0:
                m =  matches[matches['umls_prob'] == matches['umls_prob'].max()].iloc[0]
                used_group_ixs.append(m['group_ix'])
                
                annot_patients.at[row.Index, 'umls_match'] = 1
                annot_patients.at[row.Index, 'umls_prob'] = m['umls_prob']
                annot_patients.at[row.Index, 'diagnosis_pred'] = m['diagnosis']
            else:
                annot_patients.at[row.Index, 'umls_match'] = 0


    annot_patients['source'] = umls_predicitons['source'].iloc[0]
    annot_patients['prompt'] = umls_predicitons['prompt'].iloc[0]
    annot_patients['model_name'] = umls_predicitons['model_name'].iloc[0]
    annot_patients['chunk'] = umls_predicitons['chunk'].iloc[0]

    return annot_patients



In [141]:
# For each prediction file, get the best match for each UMLS code, without replacement (i.e. only one match per UMLS code)
matches = []
for pred_df in all_predictions:
    matches.append(match_umls_codes(pred_df, patients))

matches = pd.concat(matches)

summary = matches.groupby(['source', 'prompt', 'model_name', 'chunk']).(match_n=('umls_match','sum'), 
                                            match_mean= ('umls_match','mean'),
                                            Average=('Grade','mean'))
summary['null_prediction'] = matches.groupby(['source', 'prompt', 'model_name', 'chunk'])['diagnosis_pred'].apply(lambda x: x.isna().mean()).reset_index()['diagnosis_pred']

    

In [145]:
matches

Unnamed: 0,pmcid,group_name,subgroup_name,count,umls,diagnosis,umls_match,umls_prob,diagnosis_pred,source,prompt,model_name,chunk
0,4522562,patients,rpc,5,C4049190,thermo-coagulation lesions in the right parah...,0.0,,,md,demographics-zeroshot,gpt-4o-2024-05-13,False
1,3701149,patients,22q11.2DS,13,C2936346,22q11.2 Deletion Syndrome,1.0,0.868713,22q11.2 Deletion Syndrome,md,demographics-zeroshot,gpt-4o-2024-05-13,False
2,7275020,patients,_,53,C3508472,acute mild traumatic brain injury,1.0,1.000000,mild traumatic brain injury,md,demographics-zeroshot,gpt-4o-2024-05-13,False
3,7539836,patients,_,50,C3508472,acute mild traumatic brain injury,1.0,1.000000,mild traumatic brain injury,md,demographics-zeroshot,gpt-4o-2024-05-13,False
4,11024046,patients,_,16,C0002395,AD,1.0,1.000000,Alzheimer's disease,md,demographics-zeroshot,gpt-4o-2024-05-13,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,5416685,patients,primary study,20,C0041696,unipolar major depression,1.0,0.824893,unipolar major depression,html,demographics-zeroshot,gpt-4o-mini-2024-07-18,False
142,5416685,patients,validation study,19,C0041696,unipolar major depression,1.0,0.824893,unipolar major depression,html,demographics-zeroshot,gpt-4o-mini-2024-07-18,False
143,6350268,patients,_,31,C3805043,vascular mild cognitive impairment\n,,,,html,demographics-zeroshot,gpt-4o-mini-2024-07-18,False
144,4215530,patients,_,24,C3897192,Very preterm birth\n,,,,html,demographics-zeroshot,gpt-4o-mini-2024-07-18,False


#### Look at UMLS with no prediction at all

In [9]:
gpt_4_o = pd.read_csv('../outputs/full_md_demographics-zeroshot_gpt-4o-2024-05-13_clean.csv')
isna = patients[patients.umls_match.isna()].pmcid.unique()
gpt_4_diagna = gpt_4_o[gpt_4_o.pmcid.isin(isna)]


In [10]:
# Fully missing from GPT_4 predictions
set(isna) - set(gpt_4_diagna.pmcid.unique())

set()

In [11]:
for pmcid in isna:
    if pmcid not in gpt_4_diagna.pmcid.unique():
        continue
    print(pmcid)
    display(gpt_4_diagna[gpt_4_diagna.pmcid == pmcid])
    display(patients[patients.pmcid == pmcid])
    print()

5394595


Unnamed: 0,count,diagnosis,group_name,subgroup_name,male_count,female_count,age_mean,age_range,age_minimum,age_maximum,age_median,imaging_sample,pmcid
282,42,bronchial asthma,patients,asthmatic patients,,,,,,,,yes,5394595.0
283,60,,healthy,healthy controls,,,,,,,,yes,5394595.0
284,17,bronchial asthma,patients,GCBT-treated asthmatic patients,7.0,10.0,50.76,,,,,yes,5394595.0


Unnamed: 0.1,Unnamed: 0,group_name,subgroup_name,project_name,annotator_name,pmcid,umls,diagnosis,count,male count,age mean,female count,age minimum,age maximum,age median,umls_match,umls_prob,diagnosis_pred
33,306,patients,_,participant_demographics,Jerome_Dockes,5394595,,bronchial asthma without acute attacks,42,,,,,,,,,



5371603


Unnamed: 0,count,diagnosis,group_name,subgroup_name,male_count,female_count,age_mean,age_range,age_minimum,age_maximum,age_median,imaging_sample,pmcid
349,14,congenitally profoundly deaf,patients,CS group,3.0,11.0,25.0,18-33,18.0,33.0,,yes,5371603.0
350,15,normally hearing,healthy,NH group,6.0,9.0,25.166667,20-37,20.0,37.0,,yes,5371603.0


Unnamed: 0.1,Unnamed: 0,group_name,subgroup_name,project_name,annotator_name,pmcid,umls,diagnosis,count,male count,age mean,female count,age minimum,age maximum,age median,umls_match,umls_prob,diagnosis_pred
45,341,patients,_,participant_demographics,Jerome_Dockes,5371603,C0011053,deafness,14,3.0,25.0,11.0,18.0,33.0,,,,



6004505


Unnamed: 0,count,diagnosis,group_name,subgroup_name,male_count,female_count,age_mean,age_range,age_minimum,age_maximum,age_median,imaging_sample,pmcid
445,100,multi-problem young adults with varying levels...,patients,multi-problem young adults,100.0,0.0,,18-27,18.0,27.0,,yes,6004505.0
446,22,,healthy,healthy controls,22.0,0.0,,,,,,yes,6004505.0


Unnamed: 0.1,Unnamed: 0,group_name,subgroup_name,project_name,annotator_name,pmcid,umls,diagnosis,count,male count,age mean,female count,age minimum,age maximum,age median,umls_match,umls_prob,diagnosis_pred
87,216,patients,_,participant_demographics,Jerome_Dockes,6004505,,Multi-Problem Young Adults,100,100.0,,,,,,,,



5104469


Unnamed: 0,count,diagnosis,group_name,subgroup_name,male_count,female_count,age_mean,age_range,age_minimum,age_maximum,age_median,imaging_sample,pmcid
387,21,history of childhood abuse,patients,young people with a history of childhood abuse,15.0,6.0,17.5,,,,,yes,5104469.0
388,19,psychiatric conditions,patients,psychiatric controls,9.0,10.0,16.9,,,,,yes,5104469.0
389,27,no history of psychiatric illness and childhoo...,healthy,healthy controls,21.0,6.0,17.5,,,,,yes,5104469.0


Unnamed: 0.1,Unnamed: 0,group_name,subgroup_name,project_name,annotator_name,pmcid,umls,diagnosis,count,male count,age mean,female count,age minimum,age maximum,age median,umls_match,umls_prob,diagnosis_pred
125,126,patients,severe childhood abuse,participant_demographics,Jerome_Dockes,5104469,C5828513,Severe Childhood Abuse,21,15.0,17.5,6.0,,,,0.0,,
263,127,patients,psychiatric controls,participant_demographics,Jerome_Dockes,5104469,,,19,9.0,16.9,10.0,,,,,,



11033924


Unnamed: 0,count,diagnosis,group_name,subgroup_name,male_count,female_count,age_mean,age_range,age_minimum,age_maximum,age_median,imaging_sample,pmcid
479,122,relapsing-remitting multiple sclerosis,patients,MS patients,38.0,84.0,37.6,18-65,18.0,65.0,,yes,11033924.0
480,97,no diagnosis,healthy,HC subjects,41.0,56.0,38.1,18-65,18.0,65.0,,yes,11033924.0


Unnamed: 0.1,Unnamed: 0,group_name,subgroup_name,project_name,annotator_name,pmcid,umls,diagnosis,count,male count,age mean,female count,age minimum,age maximum,age median,umls_match,umls_prob,diagnosis_pred
442,464,patients,_,participant_demographics,ju-chi_yu,11033924,,,114,,,,,,,,,



