### Validation of the model

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import glob
import os
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [10]:
#this method return values to calculate ROC and AUC score
def confusion_matrix(y_true,y_score, threshold):
    
    t_p = 0 #true positive
    f_p = 0 #false positive
    f_n = 0 #false negative
    t_n = 0 #true negative
    qtd_p = 0 #negative quantity
    qtd_n = 0 #netative quantity
    
    for i in range(len(y_true)):
        
        if y_true[i] == 1:
            #positive
            qtd_p+=1
            
            if y_score[i] >= threshold:
                #true positive
                t_p += 1
            else:
                #false negative
                f_n += 1
                
        else:
            #negative
            qtd_n += 1
            
            if y_score[i] >= threshold:
                #false positive
                f_p += 1
                
            else:
                #true negative
                t_n += 1
                    
    return {
        "threshold": threshold,
        "t_p":t_p,
        "f_p":f_p,
        "f_n":f_n,
        "t_n":t_n,
        "qtd_p":qtd_p,
        "qtd_n":qtd_n
    }

In [None]:
def y_true_y_score(model, class_names,path):
    
    y_true = []
    y_score = []
    
    for cla in class_names:
        for image in os.listdir(path+"/"+class_names[cla]): #iterate by all files in the path, make sure that exists only images
            img = keras.preprocessing.image.load_img(path+"/"+class_names[cla]+"/"+image)
            img_array = tf.keras.preprocessing.image.img_to_array(img)# / 255. not if the image already scaled by 255
            img_array = tf.expand_dims(img_array, 0) #transform to shape = (1, r, r), where r is the resolution

            predictions = model.predict(img_array)
            y_score.append(predictions[0][0])
            y_true.append(cla)
                
    return y_true, y_score

In [68]:
def precision(t_p,f_p):
    return t_p/(t_p+f_p)

#true positive and false positive rate
def accuracy(t_p, t_n, f_p, f_n):
    return (t_p+t_p)/(t_n+f_p+f_n+t_p)

#sensitivity = true positive rate
def recall(t_p, f_n):
    return t_p/(t_p+f_n)

#false positive rate = 1 - specifity
def f_p_rate(f_p, t_n):
    return f_p/(f_p+t_n)

#true negative rate
def specificity(t_n, f_p):
    return t_n/(t_n+f_p)

def F_beta(precision, recall, beta=1):
    return (1+beta**2)*precision*recall/precision*recall*beta**2


def best_threshold_by_roc(y_true, y_score):
    #add all results for differ threholds
    
    best_d = None
    best_matrix = None
    
    for threshold in range(0, 1, 1/1000):
        matrix_t = confusion_matrix(y_true, y_score, threshold)
        
        #curve roc with the x and y axis
        y = recall(matrix_t['t_p'],matrix_t['f_n'])
        x = f_p_rate(matrix_t['f_p'],matrix_t['t_p'])
        
        #calculate the distance between optimum and the position i
        di = np.sqrt(x**2+(1-y)**2)
        
        if best_d is None:
            best_d = di
            best_matrix = matrix_t
        elif best_d > di:
            best_d = di
            matrix_t = matrix_t
        
    return matrix_t

#area under the curve of roc
def AUC_score(x,y):
    
    a = 0
    for i in range(1,len(x)):
        a += (x[i]-x[i-1:i])*y[i-1]
        
    return a

**ROC curve** True positive rate x False positive rate

In [None]:
class_names = {
    0: 'benigno',
    1: 'maligno'
}

models_path = ""
images_path= ""

models_scores = dict() #model_uri: (auc_score, accu_score)

#find the best model by AUC or ACCURACY or F1 score
for model_uri in glob.glob(models_path+"/*.h5"):
    model_i = tf.keras.models.load_model(model_uri)
    model_name = os.path.split(model_uri)[-1]
    
    y_true, y_pred = y_true_y_score(model, class_names, images_path)

    s1 = roc_auc_score(y_true, y_pred) # accuracy_score(y_true, y_pred, normalize=False), f1_score(y_true, y_pred)
    models_scores[model_name] = np.array([s1])
    
models_scores_df = pd.DataFrame(models_scores).transpose()
models_scores_df[2] = (models_scores_df[0]+models_scores_df[1])/3 #mean between the three scores
models_scores_df = models_scores_df.sort_values(by=2, axis=0, ascending=False)

best_model = tf.keras.models.load_model(models_path+"/"+models_scores_df.index[0])
y_true, y_pred = y_true_y_score(model, class_names, images_path)

#print the details of best model with the best threshold
best_threshold_by_roc(y_true, y_score)