In [None]:
## Import libraries
import numpy as np
import pandas as pd
import pickle
import time

from sklearn.model_selection import StratifiedShuffleSplit
from Utils import evaluate_model_skl, store_results, visualize_boxplots, visualize_boxplot_onemodel, compare_models, weighted_error

In [None]:
pd.set_option('display.width', 1000)

In [None]:
N = 10  # experiment repetitions
k = 4  # k for k-fold cross-validation in hyperparameter tuning
seed = 42

In [None]:
## Selected models
cd_classifier = 'RF'
img_classifier = 'multiInputCNN'

# Load data

In [None]:
## Images

from PIL import Image

# Frontal images
front_images_h = np.asarray([np.array(Image.open(os.path.join('Images/Healthy/Front',f))) for f in os.listdir('Images/Healthy/Front')])
front_images_s = np.asarray([np.array(Image.open(os.path.join('Images/Sick/Front',f))) for f in os.listdir('Images/Sick/Front')])
front = np.concatenate((front_images_h, front_images_s))

## Left lateral (L90) images
L90_images_h = np.asarray([np.array(Image.open(os.path.join('Images/Healthy/L90',f))) for f in os.listdir('Images/Healthy/L90')])
L90_images_s = np.asarray([np.array(Image.open(os.path.join('Images/Sick/L90',f))) for f in os.listdir('Images/Sick/L90')])
L90 = np.concatenate((L90_images_h, L90_images_s))

# Right lateral (R90) images
R90_images_h = np.asarray([np.array(Image.open(os.path.join('Images/Healthy/R90',f))) for f in os.listdir('Images/Healthy/R90')])
R90_images_s = np.asarray([np.array(Image.open(os.path.join('Images/Sick/R90',f))) for f in os.listdir('Images/Sick/R90')])
R90 = np.concatenate((R90_images_h, R90_images_s))

## Shape of thermograms
_,h,w = front.shape

In [None]:
## Clinical data
clinical_data_h = pd.read_csv('Clinical_data/clinical_data_h.csv')
clinical_data_s = pd.read_csv('Clinical_data/clinical_data_s.csv')
cd_colnames = clinical_data_h.columns

clinical_data = pd.concat([clinical_data_h,clinical_data_s])

In [None]:
## Generate labels
labels_h = [0]*len(front_images_h)
labels_s = [1]*len(front_images_s)
labels = np.concatenate((labels_h, labels_s))

# Pre-processing

In [None]:
## Min-max normalization

# Images
M = np.concatenate((front, L90, R90)).max()
m = np.concatenate((front, L90, R90))).min()

front = ((front - m) / (M - m)).astype('float32')
L90 = ((L90 - m) / (M - m)).astype('float32')
R90 = ((R90 - m) / (M - m)).astype('float32')

# Clinical data
M = clinical_data.max().values
M[M<1] = 1
m = clinical_data.min().values

clinical_data = (clinical_data-m)/(M-m)

In [None]:
## Select columns of clinical data
id_columns_to_delete = [1, 3, 4, 6, 8, 9, 10, 12, 13, 14, 15, 16, 18, 22, 23, 25]
columns_to_delete = cd_colnames[id_columns_to_delete]
clinical_data.drop(columns_to_delete,axis=1,inplace=True)
cd_colnames = list(cd_colnames)
for f in columns_to_delete:
    cd_colnames.remove(f)

## Convert to numpy array
clinical_data = np.asarray(clinical_data, dtype=np.float32)

# Function definitions

In [None]:
## Hyperparameter tuning
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier, Perceptron
from sklearn.model_selection import GridSearchCV, cross_val_score, RandomizedSearchCV

def tune_hyperparameters(classifier_type, data, labels, k=4):
    n_sick = labels.sum()
    n_healthy = len(labels) - n_sick
    rate_train = n_healthy / n_sick

    if classifier_type == 'SVC' or classifier_type == 'SVM':
        # Support Vector Machine (SVM) classifier
        estimator = SVC(class_weight={0: 1, 1: rate_train}, probability=True)
        param_grid = {'C': [1,10,100,1000], # Regularization parameter. Default: C=1.0
                      'kernel': ['linear', 'rbf', 'sigmoid', 'poly'], # Default: kernel='rbf'
                      'gamma': ['scale', 'auto', 1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001] # Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’. Default: gamma='scale'
                     }

    elif classifier_type == 'DT':
        # Decision Tree (DT) classifier
        estimator = DecisionTreeClassifier(class_weight={0: 1, 1: rate_train})
        param_grid = {'ccp_alpha' : np.arange(0, 0.1, 0.01), # Complexity parameter used for Minimal Cost-Complexity Pruning. Default: ccp_alpha = 0.0
                      'criterion': ['gini','entropy'],#'log_loss'], # The function to measure the quality of a split. Default: criterion='gini'
                      'max_depth' : [None, 1, 5, 10, 15], # The maximum depth of the tree. Default: max_depth=None
                      #'max_features': [None, 'sqrt', 'log2'], # The number of features to consider when looking for the best split. Default: max_features=None
                      'max_leaf_nodes': [None, 3, 6, 9], # Grow a tree with max_leaf_nodes in best-first fashion. Default: None
                      'min_samples_leaf': [1, 2, 3, 4], # The minimum number of samples required to be at a leaf node. Default: min_samples_leaf=1
                      'min_samples_split' : [2, 5, 10, 15], # The minimum number of samples required to split an internal node. Default: min_samples_split=2
                      'min_weight_fraction_leaf' : np.arange(0.0, 0.5, 0.05), # The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Default: min_weight_fraction_leaf=0
                      #'splitter': ['best','random'] # The strategy used to choose the split at each node. Default: splitter='best'
                     }

    elif classifier_type == 'SGD':
        # Linear classifiers (SVM, logistic regression, etc.) with Stochastic Gradient Descent (SGD) training
        estimator = SGDClassifier(class_weight={0: 1, 1: rate_train})
        param_grid = {'loss': ['log_loss', 'modified_huber'], # The loss function to be used. Default: loss='hinge'. Options: ['hinge', 'log_loss', 'modified_huber', 'squared_hinge', 'perceptron', ‘squared_error’, ‘huber’, ‘epsilon_insensitive’, ‘squared_epsilon_insensitive’]
                      'penalty': [None, 'l2', 'l1', 'elasticnet'], # The penalty (aka regularization term) to be used. Default: penalty='l2'
                      'alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1], # Constant that multiplies the regularization term. Default: alpha=0.0001
                      'l1_ratio': [0.05, 0.15, 0.3, 0.5, 0.7, 0.9],  # The Elastic Net mixing parameter, only used if penalty is 'elasticnet'. Default: l1_ratio=0.15
                      'max_iter': [500, 1000, 1500, 2000], # The maximum number of passes over the training data (aka epochs). Default: max_iter=1000
                      'tol': [None, 1e-2, 1e-3, 1e-4, 1e-5], # The stopping criterion. Default: tol=1e-3
                      'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'], # The learning rate schedule. Default: learning_rate='optimal'
                      'eta0':  [0.001, 0.01, 0.1, 1, 10]  # The initial learning rate for the ‘constant’, ‘invscaling’ or ‘adaptive’ schedules. Default: eta0=0.0
                    }

    elif classifier_type == 'NN':
        # Linear perceptron classifier (single-layer)
        estimator = Perceptron(class_weight={0: 1, 1: rate_train})
        param_grid = {'penalty': [None, 'l2', 'l1', 'elasticnet'], # The penalty (aka regularization term). Default: penalty=None
                      'alpha': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1], # Constant that multiplies the regularization term if regularization is used. Default: alpha=0.0001
                      'fit_intercept': [True, False], # Whether the intercept should be estimated or not. Default: fit_intercept=True
                      'max_iter': [500, 1000, 1500, 2000], # The maximum number of passes over the training data (aka epochs). Default: max_iter=1000
                      'tol': [None, 1e-2, 1e-3, 1e-4, 1e-5], # The stopping criterion. Default: tol=1e-3
                      #'shuffle': [True, False], # Whether or not the training data should be shuffled after each epoch. Default: shuffle=True
                      'eta0': [10, 1, 0.1, 0.01, 0.001], # Constant by which the updates are multiplied. Default: eta0=1
                      'validation_fraction': [0.01, 0.1, 0.2, 0.3] # The proportion of training data to set aside as validation set for early stopping. Default: validation_fraction=0.1
                    }
        

    else:
        print('Wrong classifier type')
        return

    cost_scorer = 'roc_auc'  # cost_scorer = make_scorer(weighted_error, greater_is_better=False)

    # Tune hyperparameters with k-fold cross-validation on training set
    classifier = GridSearchCV(estimator, param_grid, scoring=cost_scorer, cv=k)
    classifier.fit(data, labels)
        
    return classifier.best_estimator_, classifier.best_params_

In [None]:
## Function to train the weight for clinical data in the Weighted Voting (WV) classifier
from sklearn.metrics import roc_auc_score, accuracy_score, make_scorer

def tune_weight(pred_CD, pred_img, labels, metric=None):
    best_weight = 0
    best_metric = 0
    if metric == "we":
        best_metric = np.inf
            
    for weight_cd in range(0, 10000, 1):
        weight = weight_cd/10000
                
        new_pred = pred_CD * weight + pred_img * (1 - weight)
        
        if metric == "accuracy":
            accuracy = accuracy_score(labels, np.round(new_pred))
            if accuracy > best_metric:
                best_metric = accuracy
                best_weight = weight
        elif metric == "roc_auc":
            roc_auc = roc_auc_score(labels, new_pred)
            if roc_auc > best_metric:
                best_metric = roc_auc
                best_weight = weight
        elif metric == "we":
            we = weighted_error(labels, np.round(new_pred))
            if we < best_metric:
                best_metric = we
                best_weight = weight
        else:
            tune_weight(pred_CD, pred_img, labels, "accuracy")
            tune_weight(pred_CD, pred_img, labels, "roc_auc")
            tune_weight(pred_CD, pred_img, labels, "we")
            break
            
    print(f'Best {metric}: {best_metric} | Best weight: {best_weight}')
    return best_weight

def weighted_voting(pred_cd, pred_img, weight = 0.698):
    return pred_cd * weight + pred_img * (1 - weight)

In [None]:
## Number of parameters in a classifier

def num_parameters(classifier_type, classifier):
        
    if classifier_type == 'WV':
        n_parameters = 1
        
    elif classifier_type == 'SVC' or classifier_type == 'SVM':
        n_support_vectors = len(classifier.support_vectors_)
        n_coefficients = len(classifier.dual_coef_[0])
        n_parameters = n_support_vectors + n_coefficients
        
    elif classifier_type == 'DT':
        n_parameters = classifier.tree_.node_count
        
    elif classifier_type == 'SGD':
            n_parameters = classifier.coef_.size + classifier.intercept_.size
            
    elif classifier_type == 'NN':
        n_parameters = classifier.coef_.size + classifier.intercept_.size
        
    else:
        n_parameters = None
        
    return n_parameters

# Tune hyperparameters and train N times

In [None]:
trials_params, trials_results = [], []

#np.random.seed(seed)

splitter = StratifiedShuffleSplit(n_splits=N, test_size=int(round(0.15*len(labels))), random_state = seed)

for trial, (train_index, test_index) in enumerate(splitter.split(clinical_data, labels)):
    
    print(f'Trial {trial + 1}'), print()

    ### Split the dataset
    crossval_front = front[train_index]
    crossval_L90 = L90[train_index]
    crossval_R90 = R90[train_index]
    crossval_clinical_data = clinical_data[train_index]
    crossval_labels = labels[train_index]

    test_front = front[train_index]
    test_L90 = L90[train_index]
    test_R90 = R90[train_index]
    test_clinical_data = clinical_data[test_index]
    test_labels = labels[test_index]

    
    ## Load trained models and predict
    # Clinical data classifier
    with open('Clinical_data_classifiers/Models/'+cd_classifier+'_'+str(trial+1)+'.pkl', 'rb') as f:
        cd_model = pickle.load(f)
        
    pred_cd_crossval = cd_model.predict_proba(crossval_clinical_data)[:,1]
    pred_cd_test = cd_model.predict_proba(test_clinical_data)[:,1]
        
    # Multi-view image classifier
    img_model = tf.keras.models.load_model('Image_classifiers/Models/'+img_classifier+'_'+str(trial+1)+'.h5', compile=False)
    if img_classifier == 'multiInputCNN':
        # Input: list of views
        pred_img_crossval = img_model([crossval_front, crossval_L90, crossval_R90])[:,0]
        pred_img_test = img_model([test_front, test_L90, test_R90])[:,0]
    else:
        # Input: concatenate views along the channel axis
        pred_img_crossval = img_model(np.stack((crossval_front, crossval_L90, crossval_R90), axis=-1))[:,0]
        pred_img_test = img_modelnp.stack((test_front, test_L90, test_R90), axis=-1))[:,0]        

    
    for classifier_type in ['WV','SVM','DT','SGD','NN']:

        print(f'Classifier: {classifier_type}')

        if classifier_type == 'WV':
            # Weighted Voting (WV) classifier does not require hyperparameter tuning
            ti = time.time()
            weight = tune_weight(pred_cd_crossval, pred_img_crossval, crossval_labels, metric='roc_auc')
            trials_params.append({'classifier':classifier_type, 'trial':trial+1, 'weight':weight})
            train_time = time.time() - ti
    
            hours, remainder = divmod(train_time, 3600)
            minutes, seconds = divmod(remainder, 60)
            print(f'Training took {hours} hours, {minutes} minutes, and {seconds} seconds.')

            ## Predict
            predictions_train = weighted_voting(pred_cd_crossval, pred_img_crossval, weight)
            np.save('Ensemble_models/Predictions/'+classifier_type+'_train_'+str(trial+1)+'.npy',predictions_train)
            predictions_test = weighted_voting(pred_cd_test, pred_img_test, weight)
            np.save('Ensemble_models/Predictions/'+classifier_type+'_test_'+str(trial+1)+'.npy',predictions_test)

            ## Print the number of parameters in the model
            num_params = num_parameters(classifier_type, _)
            print(f'Classifier has {num_params} parameters.'), print()
            
        else:

            ## Concatenate predictions to generate the input vector
            inputs_crossval = np.concatenate((pred_img_crossval.reshape(-1, 1),pred_cd_crossval.reshape(-1, 1)), axis=1)
            inputs_test = np.concatenate((pred_img_test.reshape(-1, 1),pred_cd_test.reshape(-1, 1)), axis=1)
            
            ## Tune hyperparameters with k-fold cross-validation and then train on the entire training set with the tuned hyperparameters
            ti = time.time()
            classifier, parameters = tune_hyperparameters(classifier_type, inputs_crossval, crossval_labels, k)
            trials_params.append({**{'classifier':classifier_type}, **{'trial':trial+1}, **parameters})
            train_time = time.time() - ti
    
            hours, remainder = divmod(train_time, 3600)
            minutes, seconds = divmod(remainder, 60)
            print(f'Hyperparameter tuning took {hours} hours, {minutes} minutes, and {seconds} seconds.')
    
            ## Save the model
            with open('Ensemble_models/Models/'+classifier_type+'_'+str(trial+1)+'.pkl','wb') as f:
                pickle.dump(classifier,f)
        
            ## Predict
            if not classifier_type == 'NN':
                predictions_train = classifier.predict_proba(inputs_crossval)[:,1]
                predictions_test = classifier.predict_proba(inputs_test)[:,1]
            else:
                predictions_train = classifier.predict(inputs_crossval)
                predictions_test = classifier.predict(inputs_test)
            np.save('Ensemble_models/Predictions/'+classifier_type+'_train_'+str(trial+1)+'.npy',predictions_train)
            np.save('Ensemble_models/Predictions/'+classifier_type+'_test_'+str(trial+1)+'.npy',predictions_test)

            ## Print the number of parameters in the model
            num_params = num_parameters(classifier_type, classifier)
            print(f'Classifier has {num_params} parameters.'), print()
        
        
        ## Evaluate the model
        results_train = evaluate_model_skl(predictions_train, crossval_labels)
        print('TRAIN results:')
        for metric, value in results_train.items():
            print(f'{metric}: {value:.4f}' if isinstance(value, (float, int)) else f'{metric}: {value}')
        print()
            
        results_test = evaluate_model_skl(predictions_test, test_labels)
        print('TEST results:')
        for metric, value in results_test.items():
            print(f'{metric}: {value:.4f}' if isinstance(value, (float, int)) else f'{metric}: {value}')
        print()

        ## Store results
        trials_results.append({**{'classifier':classifier_type}, **{'trial':trial+1}, 
                               **store_results(num_params, train_time, results_train, results_test)})

    print(), print(100*'#'), print()
    
pd.DataFrame(trials_params).to_csv('Ensemble_models/Parameters_'+str(N)+'trials.csv')
pd.DataFrame(trials_results).round(decimals=5).to_csv('Ensemble_models/Results_'+str(N)+'trials.csv')

# Compare models

In [None]:
## Read results
trials_results = pd.read_csv('Ensemble_models/Results_'+str(N)+'trials.csv', index_col=0)
trials_results.fillna(1e-10, inplace=True)

In [None]:
## Print statistics
models = trials_results.classifier.unique()
metrics = [c for c in trials_results.columns if 'test_' in c and c not in ['test_TP','test_FP','test_TN','test_FN']]
    
statistics = pd.DataFrame(index=models, columns=[item for sublist in [[metric+'_mean', metric+'_std'] for metric in metrics] for item in sublist])
    
for metric in metrics:
    mn, st = metric+'_mean', metric+'_std'
    for model in models:
        results = trials_results[trials_results['classifier']==model][metric].values
        statistics.at[model,mn] = results.mean()
        statistics.at[model,st] = results.std()

statistics

In [None]:
for metric in [m for m in metrics if 'test' in m]:
    mn, st = metric+'_mean', metric+'_std'
    if 'Loss' in metric or 'WE' in metric:
        model_best = pd.to_numeric(statistics[metric+'_mean']).idxmin()
        print(f'Model with lowest {metric} is {model_best} with value {statistics.loc[model_best,mn]} and standard deviation {statistics.loc[model_best,st]}')
    else:
        model_best = pd.to_numeric(statistics[metric+'_mean']).idxmax()
        print(f'Model with highest {metric} is {model_best} with value {statistics.loc[model_best,mn]} and standard deviation {statistics.loc[model_best,st]}')

In [None]:
## Print mean and std metrics for each model
for classifier_type in trials_results.classifier.unique():
    print(f'Classifier: {classifier_type}')
    results = trials_results[trials_results['classifier'] == classifier_type]

    # Number of parameters
    parameters = results['Parameters'].values
    print(f'Mean number of parameters: {parameters.mean()} [{parameters.min()}, {parameters.max()}], std {parameters.std()}')

    # training time
    trainTime = results['trainTime'].values
    hours, remainder = divmod(trainTime.mean(), 3600)
    minutes, seconds = divmod(remainder, 60)
    print(f'Mean training time: {hours} hours, {minutes} minutes, and {seconds} seconds, (std {trainTime.std()} sec)')
    print()
    
    # TRAIN results
    metrics = ['BCELoss','Accuracy','Sensitivity','Specificity','ROC_AUC','Precision','F1','WE']
    for metric in metrics:
        values = results['train_' + metric].values
        print(f'Mean train {metric}: {values.mean()}, std {values.std()}')
    print()

    # TEST results
    for metric in metrics:
        values = results['test_' + metric].values
        print(f'Mean test {metric}: {values.mean()}, std {values.std()}')
    print()

    print('-'*120), print()

In [None]:
## Show boxplots
visualize_boxplots(trials_results,
                   ['test_BCELoss','test_Accuracy','test_F1','test_ROC_AUC','test_WE'], #[c for c in cd_trials_results.columns if 'test_' in c and c not in ['test_TP','test_FP','test_TN','test_FN','test_WE','test_Loss']],
                   True,'Ensemble_models/Boxplots_allModels.png')

In [None]:
## Statistical model comparison
compare_models(trials_results)

# Model selection

In [None]:
selected_model = 'WV'

In [None]:
visualize_boxplot_onemodel(trials_results[trials_results['classifier']==selected_model],
                           ['test_Accuracy','test_Sensitivity','test_Specificity','test_F1','test_ROC_AUC'],
                           True,'Ensemble_models/Boxplot_'+selected_model+'.png')