# Late Fusion Aggregators

### Take Classifications and Try Multiple Aggregate Functions

Two models:
1. Average of Individual classifiers
2. Average ranking (ignores "model confidence")
3. (later) Weighted Average (weighted by AUC of each class, scaled between 0 and 1)
4. (later) Decision tree embedder (explainable - can also retrieve for each data point which values were important (for classifications, human explainable, for embeddings, perhaps add up % of image/notes/tab features used in decision)

Benefits of Late Fusion:
Good with missing data!!! (Huge for product side - multiple fusion models to handle different use cases)

In [6]:
import json

import numpy as np
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score, hamming_loss, multilabel_confusion_matrix
from sklearn.preprocessing import LabelBinarizer

from sklearn import svm

from sklearn.metrics import roc_curve, auc, roc_auc_score

import seaborn as sn
import matplotlib.pyplot as plt

In [7]:
#load data
combined_train_df = pd.read_pickle(r'../data/s3/fusion_data/combined_train_df_all_embeddings_classifications.pkl')
combined_val_df = pd.read_pickle(r'../data/s3/fusion_data/combined_val_df_all_embeddings_classifications.pkl')
combined_test_df = pd.read_pickle(r'../data/s3/fusion_data/combined_test_df_all_embeddings_classifications.pkl')

In [8]:
combined_train_df.columns

Index(['index', 'patient_id', 'visit_id', 'study_id', 'temperature',
       'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain', 'acuity',
       'positive_label_total', 'finding_names', 'radiology_note',
       'discharge_note', 'chief_complaint',
       'major_surgical_or_invasive_procedure', 'history_of_present_illness',
       'past_medical_history', 'family_history', 'atelectasis', 'cardiomegaly',
       'lung_opacity', 'pleural_effusion', 'dataset_type', 'dicom_id',
       'notes_classifications', 'notes_embeddings', 'notes_hidden_embeddings',
       'img_classifications', 'img_embeddings', 'img_densefeatures',
       'tabular_classifications0', 'tabular_classifications1',
       'tabular_classifications2', 'tabular_classifications3',
       'tabular_classifications4', 'notes_classifications0',
       'notes_classifications1', 'notes_classifications2',
       'notes_classifications3', 'notes_classifications4',
       'img_classifications0', 'img_classifications1', 'img_classif

In [9]:
#create aggregated embeddings datasets
#loading datasets
train_classes_df = combined_train_df[['patient_id', 'finding_names', 'tabular_classifications0', 'tabular_classifications1',
                                      'tabular_classifications2', 'tabular_classifications3', 'tabular_classifications4', 
                                      'notes_classifications0', 'notes_classifications1', 'notes_classifications2', 
                                      'notes_classifications3', 'notes_classifications4', 'img_classifications0', 
                                      'img_classifications1', 'img_classifications2', 'img_classifications3', 
                                      'img_classifications4']]  

val_classes_df = combined_val_df[['patient_id', 'finding_names', 'tabular_classifications0', 'tabular_classifications1',
                                      'tabular_classifications2', 'tabular_classifications3', 'tabular_classifications4', 
                                      'notes_classifications0', 'notes_classifications1', 'notes_classifications2', 
                                      'notes_classifications3', 'notes_classifications4', 'img_classifications0', 
                                      'img_classifications1', 'img_classifications2', 'img_classifications3', 
                                      'img_classifications4']] 

test_classes_df = combined_test_df[['patient_id', 'finding_names', 'tabular_classifications0', 'tabular_classifications1',
                                      'tabular_classifications2', 'tabular_classifications3', 'tabular_classifications4', 
                                      'notes_classifications0', 'notes_classifications1', 'notes_classifications2', 
                                      'notes_classifications3', 'notes_classifications4', 'img_classifications0', 
                                      'img_classifications1', 'img_classifications2', 'img_classifications3', 
                                      'img_classifications4']]

In [10]:
train_classes_df.head()

Unnamed: 0,patient_id,finding_names,tabular_classifications0,tabular_classifications1,tabular_classifications2,tabular_classifications3,tabular_classifications4,notes_classifications0,notes_classifications1,notes_classifications2,notes_classifications3,notes_classifications4,img_classifications0,img_classifications1,img_classifications2,img_classifications3,img_classifications4
0,11388716,atelectasis,0.208681,0.158534,0.11088,0.248646,0.273259,0.122906,0.306664,0.04976,0.397685,0.122985,0.002679,0.986562,0.001635,0.008502,0.000622
1,11539363,atelectasis,0.312157,0.195341,0.1448,0.174402,0.1733,0.474254,0.192949,0.216597,0.081928,0.034271,0.085059,0.906197,0.004783,0.003748,0.000213
2,10833304,atelectasis,0.396899,0.157088,0.163143,0.136013,0.146858,0.461573,0.215725,0.161198,0.123959,0.037544,0.002391,0.997314,0.000177,0.000105,1.2e-05
3,19849119,atelectasis,0.290064,0.17608,0.165073,0.162788,0.205994,0.23269,0.496566,0.054956,0.093965,0.121823,0.000162,0.999696,2.2e-05,5.3e-05,6.7e-05
4,11749991,atelectasis,0.413125,0.163641,0.124322,0.176546,0.122366,0.271716,0.461655,0.045886,0.163782,0.056961,0.002136,0.997198,0.000164,0.00016,0.000342


In [41]:
#create average individual predictions
train_classes_df['avg_classifications0'] = train_classes_df[['tabular_classifications0', 'notes_classifications0', 'img_classifications0']].mean(axis=1,skipna=True)
train_classes_df['avg_classifications1'] = train_classes_df[['tabular_classifications1', 'notes_classifications1', 'img_classifications1']].mean(axis=1,skipna=True)
train_classes_df['avg_classifications2'] = train_classes_df[['tabular_classifications2', 'notes_classifications2', 'img_classifications2']].mean(axis=1,skipna=True)
train_classes_df['avg_classifications3'] = train_classes_df[['tabular_classifications3', 'notes_classifications3', 'img_classifications3']].mean(axis=1,skipna=True)
train_classes_df['avg_classifications4'] = train_classes_df[['tabular_classifications4', 'notes_classifications4', 'img_classifications4']].mean(axis=1,skipna=True)

val_classes_df['avg_classifications0'] = val_classes_df[['tabular_classifications0', 'notes_classifications0', 'img_classifications0']].mean(axis=1,skipna=True)
val_classes_df['avg_classifications1'] = val_classes_df[['tabular_classifications1', 'notes_classifications1', 'img_classifications1']].mean(axis=1,skipna=True)
val_classes_df['avg_classifications2'] = val_classes_df[['tabular_classifications2', 'notes_classifications2', 'img_classifications2']].mean(axis=1,skipna=True)
val_classes_df['avg_classifications3'] = val_classes_df[['tabular_classifications3', 'notes_classifications3', 'img_classifications3']].mean(axis=1,skipna=True)
val_classes_df['avg_classifications4'] = val_classes_df[['tabular_classifications4', 'notes_classifications4', 'img_classifications4']].mean(axis=1,skipna=True)

test_classes_df['avg_classifications0'] = test_classes_df[['tabular_classifications0', 'notes_classifications0', 'img_classifications0']].mean(axis=1,skipna=True)
test_classes_df['avg_classifications1'] = test_classes_df[['tabular_classifications1', 'notes_classifications1', 'img_classifications1']].mean(axis=1,skipna=True)
test_classes_df['avg_classifications2'] = test_classes_df[['tabular_classifications2', 'notes_classifications2', 'img_classifications2']].mean(axis=1,skipna=True)
test_classes_df['avg_classifications3'] = test_classes_df[['tabular_classifications3', 'notes_classifications3', 'img_classifications3']].mean(axis=1,skipna=True)
test_classes_df['avg_classifications4'] = test_classes_df[['tabular_classifications4', 'notes_classifications4', 'img_classifications4']].mean(axis=1,skipna=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_classes_df['avg_classifications0'] = train_classes_df[['tabular_classifications0', 'notes_classifications0', 'img_classifications0']].mean(axis=1,skipna=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_classes_df['avg_classifications1'] = train_classes_df[['tabular_classifications1', 'notes_classifications1', 'img_classifications1']].mean(axis=1,skipna=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats 

In [42]:
train_classes_df.head()

Unnamed: 0,patient_id,finding_names,tabular_classifications0,tabular_classifications1,tabular_classifications2,tabular_classifications3,tabular_classifications4,notes_classifications0,notes_classifications1,notes_classifications2,...,img_classifications0,img_classifications1,img_classifications2,img_classifications3,img_classifications4,avg_classifications0,avg_classifications1,avg_classifications2,avg_classifications3,avg_classifications4
0,11388716,atelectasis,0.208681,0.158534,0.11088,0.248646,0.273259,0.122906,0.306664,0.04976,...,0.002679,0.986562,0.001635,0.008502,0.000622,0.111422,0.48392,0.054092,0.218278,0.132289
1,11539363,atelectasis,0.312157,0.195341,0.1448,0.174402,0.1733,0.474254,0.192949,0.216597,...,0.085059,0.906197,0.004783,0.003748,0.000213,0.29049,0.431496,0.12206,0.086693,0.069261
2,10833304,atelectasis,0.396899,0.157088,0.163143,0.136013,0.146858,0.461573,0.215725,0.161198,...,0.002391,0.997314,0.000177,0.000105,1.2e-05,0.286955,0.456709,0.108173,0.086692,0.061471
3,19849119,atelectasis,0.290064,0.17608,0.165073,0.162788,0.205994,0.23269,0.496566,0.054956,...,0.000162,0.999696,2.2e-05,5.3e-05,6.7e-05,0.174305,0.557448,0.073351,0.085602,0.109295
4,11749991,atelectasis,0.413125,0.163641,0.124322,0.176546,0.122366,0.271716,0.461655,0.045886,...,0.002136,0.997198,0.000164,0.00016,0.000342,0.228992,0.540831,0.056791,0.113496,0.059889


In [43]:
#save files for Adam!

#as csv
train_classes_df.to_csv('../data/s3/fusion_data/classifications_late_fusion_avg_train.csv')
val_classes_df.to_csv('../data/s3/fusion_data/classifications_late_fusion_avg_val.csv')
test_classes_df.to_csv('../data/s3/fusion_data/classifications_late_fusion_avg_test.csv')

#as pkl
train_classes_df.to_pickle("../data/s3/fusion_data/classifications_late_fusion_avg_train.pkl")
val_classes_df.to_pickle("../data/s3/fusion_data/classifications_late_fusion_avg_val.pkl")
test_classes_df.to_pickle("../data/s3/fusion_data/classifications_late_fusion_avg_test.pkl")

## Average Classification function based on ONNX output

In [11]:
#function for taking in an array

In [25]:
#working locally - generate 3 random numpy arrays of length 5
tabular_output = np.random.dirichlet(np.ones(5),size=1)
notes_output = np.random.dirichlet(np.ones(5),size=1)
img_output = np.random.dirichlet(np.ones(5),size=1)

print(f'Tabular output is: {tabular_output}')
print(f'Notes output is: {notes_output}')
print(f'Images output is: {img_output}')

Tabular output is: [[0.04143802 0.42334627 0.48597758 0.03576375 0.01347438]]
Notes output is: [[0.25730603 0.35403407 0.09199061 0.09496215 0.20170715]]
Images output is: [[0.43851299 0.01778309 0.13387403 0.2871992  0.12263069]]


In [28]:
def late_fusion_avg(output1 = None, output2 = None, output3 = None):
    '''Aggregates findings by averaging the class level softmax scores'''
    if not isinstance(output1, np.ndarray) or not isinstance(output2, np.ndarray) or not isinstance(output3, np.ndarray):
        #only two data outputs given
        if not isinstance(output3, np.ndarray) and isinstance(output1, np.ndarray) and isinstance(output2, np.ndarray):
            outputs = np.concatenate((output1, output2))
            late_fusion_output = np.mean(outputs, axis = 0)
        #only one data output given
        #optional: change this to give the output of the individual model
        elif not isinstance(output3, np.ndarray) and not isinstance(output2, np.ndarray) and isinstance(output1, np.ndarray):
            late_fusion_output = "This is not a fusion model! Please see the individual model results corresponding to the data type you entered above."
        #no outputs given
        else:
            late_fusion_output = "No models were run! Please input multiple data types and run models to see a fusion result."
    else:
        outputs = np.concatenate((output1, output2, output3))
        late_fusion_output = np.mean(outputs, axis = 0)
            
    return late_fusion_output

In [29]:
#test cases
print(f'The full late fusion average output is: {late_fusion_avg(tabular_output, notes_output, img_output)}')
print(f'The late fusion tabular/notes average output is: {late_fusion_avg(tabular_output, notes_output)}')
print(f'The late fusion tabular/images average output is: {late_fusion_avg(tabular_output, img_output)}')
print(f'The late fusion notesimages average output is: {late_fusion_avg(notes_output, img_output)}')
print(f'The late fusion tabular average output is: {late_fusion_avg(tabular_output)}')
print(f'The late fusion notes average output is: {late_fusion_avg(notes_output)}')
print(f'The late fusion images average output is: {late_fusion_avg(img_output)}')
print(f'The late fusion no models average output is: {late_fusion_avg()}')

The full late fusion average output is: [0.24575235 0.26505448 0.23728074 0.13930837 0.11260407]
The late fusion tabular/notes average output is: [0.14937202 0.38869017 0.28898409 0.06536295 0.10759077]
The late fusion tabular/images average output is: [0.2399755  0.22056468 0.3099258  0.16148148 0.06805254]
The late fusion notesimages average output is: [0.34790951 0.18590858 0.11293232 0.19108067 0.16216892]
The late fusion tabular average output is: This is not a fusion model! Please see the individual model results corresponding to the data type you entered above.
The late fusion notes average output is: This is not a fusion model! Please see the individual model results corresponding to the data type you entered above.
The late fusion images average output is: This is not a fusion model! Please see the individual model results corresponding to the data type you entered above.
The late fusion no models average output is: No models were run! Please input multiple data types and run 

## Check metrics for average aggregator

In [15]:
# print classification report 
#print(classification_report(y_val, grid_predictions)) 

In [16]:
#print(confusion_matrix(y_val.numpy(), grid_predictions))

In [17]:
#create_roc_auc_curve_others(clf, X_val, y_val)

# Average Ranking
Average the relative ranks together, rather than softmax values

In [31]:
#working locally - generate 3 random numpy arrays of length 5
tabular_output = np.random.dirichlet(np.ones(5),size=1)
notes_output = np.random.dirichlet(np.ones(5),size=1)
img_output = np.random.dirichlet(np.ones(5),size=1)

print(f'Tabular output is: {tabular_output}')
print(f'Notes output is: {notes_output}')
print(f'Images output is: {img_output}')

Tabular output is: [[0.26609705 0.33486235 0.1051147  0.1267885  0.1671374 ]]
Notes output is: [[0.03369514 0.45098697 0.00678653 0.12444272 0.38408864]]
Images output is: [[0.60515471 0.04393547 0.19451779 0.12841807 0.02797395]]


In [32]:
def late_fusion_avg_rank(output1 = None, output2 = None, output3 = None):
    '''Averages the relative rank of each class, should ignore "model confidence"'''
    
    if not isinstance(output1, np.ndarray) or not isinstance(output2, np.ndarray) or not isinstance(output3, np.ndarray):
        #only two data outputs given
        if not isinstance(output3, np.ndarray) and isinstance(output1, np.ndarray) and isinstance(output2, np.ndarray):
            rank1 = output1.argsort().argsort().reshape(output1.shape)
            rank2 = output2.argsort().argsort().reshape(output2.shape)
            
            outputs = np.concatenate((rank1, rank2))
            late_fusion_output = np.mean(outputs, axis = 0)
            
        #only one data output given
        elif not isinstance(output3, np.ndarray) and not isinstance(output2, np.ndarray) and isinstance(output1, np.ndarray):
            late_fusion_output = "This is not a fusion model! Please see the individual model results corresponding to the data type you entered above."
       
        #no outputs given
        else:
            late_fusion_output = "No models were run! Please input multiple data types and run models to see a fusion result."
    else:
        rank1 = output1.argsort().reshape(output1.shape)
        rank2 = output2.argsort().reshape(output2.shape)
        rank3 = output3.argsort().reshape(output3.shape)

        outputs = np.concatenate((rank1, rank2, rank3))
        late_fusion_output = np.mean(outputs, axis = 0)
            
    return late_fusion_output

In [33]:
#test cases
print(f'The full late fusion average output is: {late_fusion_avg_rank(tabular_output, notes_output, img_output)}')
print(f'The late fusion tabular/notes average output is: {late_fusion_avg_rank(tabular_output, notes_output)}')
print(f'The late fusion tabular/images average output is: {late_fusion_avg_rank(tabular_output, img_output)}')
print(f'The late fusion notesimages average output is: {late_fusion_avg_rank(notes_output, img_output)}')
print(f'The late fusion tabular average output is: {late_fusion_avg_rank(tabular_output)}')
print(f'The late fusion notes average output is: {late_fusion_avg_rank(notes_output)}')
print(f'The late fusion images average output is: {late_fusion_avg_rank(img_output)}')
print(f'The late fusion no models average output is: {late_fusion_avg_rank()}')

The full late fusion average output is: [2.66666667 1.33333333 3.33333333 2.         0.66666667]
The late fusion tabular/notes average output is: [2.  4.  0.  1.5 2.5]
The late fusion tabular/images average output is: [3.5 2.5 1.5 1.5 1. ]
The late fusion notesimages average output is: [2.5 2.5 1.5 2.  1.5]
The late fusion tabular average output is: This is not a fusion model! Please see the individual model results corresponding to the data type you entered above.
The late fusion notes average output is: This is not a fusion model! Please see the individual model results corresponding to the data type you entered above.
The late fusion images average output is: This is not a fusion model! Please see the individual model results corresponding to the data type you entered above.
The late fusion no models average output is: No models were run! Please input multiple data types and run models to see a fusion result.


# Weighted Average aggregator
Weighted based on class-level AUC

In [27]:
#working locally - generate 3 random numpy arrays of length 5
tabular_output = np.random.dirichlet(np.ones(5),size=1)
notes_output = np.random.dirichlet(np.ones(5),size=1)
img_output = np.random.dirichlet(np.ones(5),size=1)

print(f'Tabular output is: {tabular_output}')
print(f'Notes output is: {notes_output}')
print(f'Images output is: {img_output}')

Tabular output is: [[0.06120404 0.04781931 0.02780818 0.66310534 0.20006313]]
Notes output is: [[0.55782557 0.14232564 0.01039623 0.1069354  0.18251716]]
Images output is: [[0.12965855 0.04102065 0.15604203 0.37740312 0.29587565]]


In [30]:
tabular_output.argmax()

3

In [None]:
def late_fusion_weighted_avg(output1 = None, output2 = None, output3 = None):
    '''Weights each softmax value by the relative test-AUC for that class'''

    
    
    if not isinstance(output1, np.ndarray) or not isinstance(output2, np.ndarray) or not isinstance(output3, np.ndarray):
        #only two data outputs given
        if not isinstance(output3, np.ndarray) and isinstance(output1, np.ndarray) and isinstance(output2, np.ndarray):
            outputs = np.concatenate((output1, output2))
            late_fusion_output = np.mean(outputs, axis = 0)
        #only one data output given
        elif not isinstance(output3, np.ndarray) and not isinstance(output2, np.ndarray) and isinstance(output1, np.ndarray):
            late_fusion_output = "This is not a fusion model! Please see the individual model results corresponding to the data type you entered above."
        #no outputs given
        else:
            late_fusion_output = "No models were run! Please inpupt multiple data types and run models to see a fusion result."
    else:
        outcome1 = output1.argmax()
        outcome2 = output2.argmax()
        outcome3 = output3.argmax()
        
        outputs = np.concatenate((output1, output2, output3))
        late_fusion_output = np.mean(outputs, axis = 0)
            
    return late_fusion_output