In [1]:
import tensorflow as tf 
%matplotlib inline 

  from ._conv import register_converters as _register_converters


# F_measure Scoring function 
### F_measure scoring function is the harmonic mean of specificity and sensitivity
F measure is a function of sensitivity and specificity, sometimes it is quite difficult to find a right model especially when you are grid searching hyperparameters, since the model evaluation keras has to offer is only `accuracy` in classification which does not gives you the best model in terms of specificity and sensitivity, i have come up with a new measure that can be used as model evaluation metric while during the training to asses a model, and when grid search is used it will give you the model which has best f_measure score. 
f_measure is actually Harmonic mean of sensitivity and specificity, while `f1_score` from sklearn is a Harmonic mean of precision and recall. 
Initially the function works with only binary classes, in future will modify it for multiclass classification as well. 

In [125]:
from keras import backend as K
def f_measure(y_true, y_pred):
    """
    f_measure: Harmonic mean of specificity and sensitivity, shall be used to calculate score batch wise
    during training
    **for binary classification only**
    @param
    y_true: Tensor of actual labels 
    y_pred: Tensor of predicted labels 
    @returns 
    f_measure score for a batch 
    """
    def specificity(y_true, y_pred):
        """Compute the confusion matrix for a set of predictions.
    
        Parameters
        ----------
        y_pred   : predicted values for a batch if samples (must be binary: 0 or 1)
        y_true   : correct values for the set of samples used (must be binary: 0 or 1)
    
        Returns
        -------
        out : the specificity
        """
        neg_y_true = 1 - y_true
        neg_y_pred = 1 - y_pred
        fp = K.sum(neg_y_true * y_pred)
        tn = K.sum(neg_y_true * neg_y_pred)
        
        specificity = tn / (tn + fp + K.epsilon())
        return specificity
    
    def recall(y_true, y_pred):
        """Recall metric.
        
        Only computes a batch-wise average of recall.
        
        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall
    
    specificity = specificity(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((specificity * recall)/(specificity + recall + K.epsilon()))




def f1_score(y_true, y_pred):
    """Computes the F score.
     The F1 score is harmonic mean of precision and recall.
     it is computed as a batch-wise average.
     This is can be used for multi-label classification. 
    """
    
    
    def precision(y_true, y_pred):
        """Precision metric.
         Only computes a batch-wise average of precision.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

    def recall(y_true, y_pred):
        """Recall metric.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    f1_score = 2 * (p * r) / (p + r + K.epsilon())
    return f1_score

## Example for multiclass classification

In [126]:
import numpy as np

In [127]:
y_true =      [[0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.]]
y_predicted = [[0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.]]

In [128]:
y_true = K.constant(y_true, dtype=tf.float32)
y_pred = K.constant(y_predicted, dtype=tf.float32)

In [129]:
K.eval(f1_score(y_true=y_true, y_pred=y_pred))

0.49999994

## binary classifiation f_measure

In [134]:

y_true =    [0., 1., 0., 1., 0., 0.]
y_predicted = [0., 1., 0., 0., 0., 1.]
y_true = K.constant(y_true, dtype=tf.float32)
y_pred = K.constant(y_predicted, dtype=tf.float32)

In [135]:
K.eval(f_measure(y_true=y_true, y_pred=y_pred))

0.59999996