In [1]:
import pandas as pd
import os 
import json 

from IPython.display import display

# Performance Metrics

The best and worst configurations results for classification performance metrics can be seen here. Currently, only the best results are printed, but if you are interested, you can use `get_worst_result()` method to see the worst configurations. 

In [2]:
def get_best_result(df: pd.DataFrame, key: str = None):
    if key == 'sensitivity':
        df = df[df['youden'] > 0.5]
        idx = df[key].idxmax()
    elif key == 'specificity':
        df = df[df['youden'] > 0.5]
        idx = df[key].idxmax()
    else:
        idx = df[key].idxmax()
    best_result = df.loc[idx]
    return best_result

def get_worst_result(df: pd.DataFrame, key: str):
    idx = df[key].idxmin()
    worst_result = df.loc[idx]
    return worst_result

def extract_config(df_line: pd.DataFrame):
    df_line = df_line[['config_hash_code',
                       'projection_method',
                       'thickness',
                       'overlap',
                       'breast_skin_removal',
                       'top',
                       'op_shift']]
    return df_line

## Best Configuration Results

### Load your performance metrics CSV file 

All performance metrics are saved in `performance_metrics-full.csv`. You can change it the file path here. 

In [3]:
cur_dir = os.getcwd()
metrics_dir = os.path.join(cur_dir, 'metrics')
csv_filepath = os.path.join(metrics_dir, f'performance_metrics-full.csv')
df = pd.read_csv(csv_filepath)

### Save the list of SeriesInstanceUIDs and corresponding configurations

The predictions results are saved under `data\output\predictions` in JSON files. First, the file names are read and saved as configuration hash codes. Secondly, each file has a list of DICOM files with SeriesInstanceUIDs. 

In [4]:
def list_predictions(pred_dir): 
    # To extract config hash code later 
    filename = 'predictions_'
    extension = '.json'
    
    preds = {}
    for root, dirs, files in os.walk(pred_dir):
        for pred_file in files:
            # Extract config hash code 
            config_hash_code = pred_file[len(filename):-len(extension)]
            
            pred_filepath = os.path.join(pred_dir, pred_file)
            with open(pred_filepath, 'r') as file:
                pred_dict = json.load(file)
                series_instance_uids = set(pred_dict.keys())
                preds[config_hash_code] = series_instance_uids
        break  # Stop iteration after the first level
    return preds, len(files)

Invert the predictions dictionary such that each key is the SeriesInstanceUID and the value is the list of config hash codes. 

In [5]:
def invert_dict(original_dict):
    inverted_dict = {}
    for key, value_list in original_dict.items():
        for value in value_list:
            if value not in inverted_dict:
                inverted_dict[value] = [key]
            elif key not in inverted_dict[value]:
                inverted_dict[value].append(key)
    return inverted_dict

In [6]:
# Predictions contain benign, cancer, normal, and actionable 
preds, config_num = list_predictions(pred_dir=os.path.join(cur_dir, 'data/output/predictions'))
preds = invert_dict(preds)
series_instance_uids = list(preds.keys())

In [7]:
from program.performance_metrics import PerformanceMetrics
import yaml 

with open('config/dataset.yaml', 'r') as file:
    dataset_config = yaml.load(file, Loader=yaml.FullLoader)

with open('config/settings.yaml', 'r') as file:
    settings_config = yaml.load(file, Loader=yaml.FullLoader)

# Replace $ROOT$ placeholder by the provided --root value
for config in [settings_config, dataset_config]:
    for cfg in config:
        if isinstance(config[cfg], str) and '$ROOT$' in config[cfg]:
            config[cfg] = config[cfg].replace('$ROOT$', cur_dir)

perf_metrics = PerformanceMetrics(dataset_config=dataset_config, settings_config=settings_config)
perf_metrics.init()

In [8]:
# Filter out the DBT volumes with "actionable" label 
labels = ['normal', 'benign', 'cancer']

def filter_preds(labels, series_instance_uids):
    filtered_series = perf_metrics.filter_series(dataset='train', 
                                                 labels=labels,
                                                 series_instance_uids=series_instance_uids)
    filtered_series_instance_uids = filtered_series['SeriesInstanceUID'].to_list()
    filtered_preds = {key: value for key, value in preds.items() if key in filtered_series_instance_uids}
    return filtered_preds
        
filtered_preds_all = filter_preds(labels, series_instance_uids)

In [9]:
print('--------------Configurations--------------')
print('Number of configurations: ', config_num, '\n')

print('-----------------Dataset------------------')
print('Number of DICOM files: ', len(filtered_preds_all))

for label in labels:        
    print(f'Number of {label} DBT images: ', len(filter_preds([label], series_instance_uids)))

--------------Configurations--------------
Number of configurations:  24 

-----------------Dataset------------------
Number of DICOM files:  1880
Number of normal DBT images:  1680
Number of benign DBT images:  124
Number of cancer DBT images:  76


In [10]:
performance_metrics_filepath = os.path.join(metrics_dir, 'performance_metrics_dicom_list.json')
with open(performance_metrics_filepath, 'w') as file:
    json.dump(filtered_preds_all, file, indent=4)

### 1. Sensitivity & Specificity
For sensitivity and specificity, we are looking at the results which have Youden metric larger than 0.5. Youden metric is a compromise between sensitivity and specificity. 

$$J = sensitivitiy + specificity - 1$$

In [11]:
# print(df.head(5))

In [12]:
best_sensitivity = get_best_result(df, key='sensitivity')
display(extract_config(best_sensitivity))
print("Best sensitivity value: ", best_sensitivity['sensitivity'])

config_hash_code       723f00a764c4e6d58e49bb4c5860af3b
projection_method                              soft_mip
thickness                                            14
overlap                                               7
breast_skin_removal                                   1
top                                                   3
op_shift                                            0.1
Name: 279, dtype: object

Best sensitivity value:  0.921052632


In [13]:
best_specificity = get_best_result(df, key='specificity')
display(extract_config(best_specificity))
print("Best specificity value: ", best_specificity['specificity'])

config_hash_code       44ed11e82d9ff1a2fa57dd54b9abc3ca
projection_method                                   aip
thickness                                             8
overlap                                               4
breast_skin_removal                                   0
top                                                   3
op_shift                                          0.325
Name: 1034, dtype: object

Best specificity value:  0.969512195


### 2. Youden

In [14]:
best_youden = get_best_result(df, key='youden')
display(extract_config(best_youden))
print("Best youden value: ", best_youden['youden'])

config_hash_code       61222f22071d5999c98d26eea4638834
projection_method                              soft_mip
thickness                                             6
overlap                                               0
breast_skin_removal                                   0
top                                                   1
op_shift                                            0.3
Name: 783, dtype: object

Best youden value:  0.670702532


### 3. Accuracy Score

In [15]:
best_accuracy_score = get_best_result(df, key='accuracy_score')
display(extract_config(best_accuracy_score))
print("Best accuracy value: ", best_accuracy_score['accuracy_score'])

config_hash_code       118fbb88b272fdcffc902dfdeef0b802
projection_method                                   mip
thickness                                            14
overlap                                               7
breast_skin_removal                                   0
top                                                   3
op_shift                                          0.375
Name: 1328, dtype: object

Best accuracy value:  0.963829787


### 4. Area under the ROC Curve


In [16]:
best_auc = get_best_result(df, key='auc')
display(extract_config(best_auc))
print("Best AUC ROC value: ", best_auc['auc'])

config_hash_code       65c28bee8dfe5ba35701b66636682dd9
projection_method                                   aip
thickness                                             8
overlap                                               4
breast_skin_removal                                   1
top                                                   3
op_shift                                            0.0
Name: 122, dtype: object

Best AUC ROC value:  0.87839888


## Worst Configuration Results

### 1. Sensitivity & Specificity

In [17]:
worst_sensitivity = get_worst_result(df, key='sensitivity')
display(extract_config(worst_sensitivity))
print("Worst sensitivity value: ", worst_sensitivity['sensitivity'])

config_hash_code       495d15cd74891936bc1c8f70eae7e2cb
projection_method                                   aip
thickness                                            14
overlap                                               0
breast_skin_removal                                   0
top                                                   3
op_shift                                          0.375
Name: 1341, dtype: object

Worst sensitivity value:  0.0


In [18]:
worst_specificity = get_worst_result(df, key='specificity')
display(extract_config(worst_specificity))
print("Worst specificity value: ", worst_specificity['specificity'])

config_hash_code       01cf1a3db9ec88e8ef8f0893d9dd6fb3
projection_method                                   aip
thickness                                            12
overlap                                               6
breast_skin_removal                                   1
top                                                   1
op_shift                                           -0.2
Name: 1762, dtype: object

Worst specificity value:  0.0


### 2. Yoden

In [19]:
worst_youden = get_worst_result(df, key='youden')
display(extract_config(worst_youden))
print("Worst youden value: ", worst_youden['youden'])

config_hash_code       f83058018babfef646d054c91fc4e2ce
projection_method                              soft_mip
thickness                                            10
overlap                                               0
breast_skin_removal                                   1
top                                                   3
op_shift                                            0.4
Name: 1527, dtype: object

Worst youden value:  -0.003880266


### 3. Accuracy Score

In [20]:
worst_accuracy_score = get_worst_result(df, key='accuracy_score')
display(extract_config(worst_accuracy_score))
print("Worst accuracy value: ", worst_accuracy_score['accuracy_score'])

config_hash_code       01cf1a3db9ec88e8ef8f0893d9dd6fb3
projection_method                                   aip
thickness                                            12
overlap                                               6
breast_skin_removal                                   1
top                                                   1
op_shift                                           -0.2
Name: 1762, dtype: object

Worst accuracy value:  0.040425532


### 4. Area under the ROC Curve

In [21]:
worst_auc = get_worst_result(df, key='auc')
display(extract_config(worst_auc))
print("Worst AUC ROC value: ", worst_auc['auc'])

config_hash_code       2a4f51daa7b0e83dfe49f85ecb26becf
projection_method                                   aip
thickness                                            14
overlap                                               0
breast_skin_removal                                   1
top                                                   1
op_shift                                            0.0
Name: 9, dtype: object

Worst AUC ROC value:  0.818604855
