In [None]:
from sklearn import metrics
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as plt_colors
import numpy as np
import math
import itertools
from collections import OrderedDict
import os

import matplotlib
font_size = 32
matplotlib.rc('xtick', labelsize=font_size)     
matplotlib.rc('ytick', labelsize=font_size)
matplotlib.rc('axes', labelsize=font_size+4, titlesize=font_size+4)

def encode (name):
    classes = ['Akashiwo', 'Ceratium falcatiforme or fusus', 'Ceratium furca', 'Chattonella', 'Cochlodinium', 'Gyrodinium', 'Lingulodinium polyedra', 'Other', 'Prorocentrum micans', 'Pseudo-nitzschia chain']
    return classes.index(name)

def compute_cm(gtruth, pred, plot=True, save=True, color='blue'):
        num_classes = 10
        # Create array for confusion matrix with dimensions based on number of classes
        confusion_matrix_rawcount = np.zeros((num_classes, num_classes))
        class_count = np.zeros(
            (num_classes, 1))  # 1st col represents number of images per class

        # Create confusion matrix
        for t, p in zip(gtruth, pred):
            class_count[encode(t), 0] += 1
            confusion_matrix_rawcount[encode(t), encode(p)] += 1
        confusion_matrix_rate = np.zeros((num_classes, num_classes))
        for i in range(num_classes):
            confusion_matrix_rate[i, :] = (confusion_matrix_rawcount[i, :]) / class_count[i, 0] * 100
            
        confusion_matrix_rate = np.around(confusion_matrix_rate, decimals=4)

        if plot:
            _plot_confusion_matrix(confusion_matrix_rate, save=save, color=color)
        return confusion_matrix_rate, np.diag(confusion_matrix_rate)

def _plot_confusion_matrix(cm, cmap=plt.cm.Greens, save=False, color='blue'):
        """Plot the confusion matrix and diagonal class accuracies"""
        classes = ['Akashiwo', 'Ceratium falcatiforme or fusus', 'Ceratium furca', 'Chattonella', 'Cochlodinium', 'Gyrodinium', 'Lingulodinium polyedra', 'Other', 'Prorocentrum micans', 'Pseudo-nitzschia chain']
        plt.figure(figsize=(40,20))
        plt.subplot(1,2,1)
        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.title('Confusion matrix')
        plt.colorbar()
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes, rotation=45, ha='right')
        plt.yticks(tick_marks, classes)

        fmt = '.2f'
        thresh = cm.max() / 2. if not math.isnan(cm.max()) else 50.0
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j, i, format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black", fontsize=font_size-7)

        plt.ylabel('True label')
        plt.xlabel('Predicted label')

        # Plot diagonal scores alongside it
        plt.subplot(1, 2, 2)
        temp = dict(zip(classes, np.nan_to_num(cm.diagonal())))
        cm_dict = OrderedDict(sorted(temp.items(), key=lambda x:x[1]))
        classes = list(cm_dict.keys())
        cm_diag = list(cm_dict.values())

        ax = pd.Series(cm_diag).plot(kind='barh', color=color)
        ax.set_xlabel('Class Accuracy')
        ax.set_yticklabels(classes)
        rects = ax.patches
        # Make some labels.
        for rect, label in zip(rects, cm_diag):
            width = rect.get_width()
            label = np.nan if label == 0 else label
            ax.text(width + 5, rect.get_y() + rect.get_height() / 2, format(label, fmt),
                    ha='center', va='bottom')
            
        if save:
            cm_fname = os.path.join('./cv_pier_confusion_matrix.png')
            plt.savefig(cm_fname)
        plt.tight_layout()
        plt.show()


df = pd.read_csv('data6/phytoplankton-db/predictions/cv_pier_test_predictions.csv')

gtruth = df['label'].tolist()
pred = df['ml_hab_prediction'].tolist()
classes = ['Akashiwo', 'Ceratium falcatiforme or fusus', 'Ceratium furca', 'Chattonella', 'Cochlodinium', 'Gyrodinium', 'Lingulodinium polyedra', 'Other', 'Prorocentrum micans', 'Pseudo-nitzschia chain']

cm, pier_diag = compute_cm(gtruth, pred, plot=True, save=True, color='Green')

print( metrics.classification_report(gtruth, pred,target_names=classes) )

In [None]:
df = pd.read_csv('data6/phytoplankton-db/predictions/cv_hab_in_vitro_predictions.csv')

gtruth = df['label'].tolist()
pred = df['ml_hab_prediction'].tolist()
classes = ['Akashiwo', 'Ceratium falcatiforme or fusus', 'Ceratium furca', 'Chattonella', 'Cochlodinium', 'Gyrodinium', 'Lingulodinium polyedra', 'Other', 'Prorocentrum micans', 'Pseudo-nitzschia chain']

def compute_cm(gtruth, pred, plot=True, save=True, color='blue'):
        num_classes = 10
        # Create array for confusion matrix with dimensions based on number of classes
        confusion_matrix_rawcount = np.zeros((num_classes, num_classes))
        class_count = np.zeros(
            (num_classes, 1))  # 1st col represents number of images per class

        # Create confusion matrix
        for t, p in zip(gtruth, pred):
            class_count[encode(t), 0] += 1
            confusion_matrix_rawcount[encode(t), encode(p)] += 1
        confusion_matrix_rate = np.zeros((num_classes, num_classes))
        for i in range(num_classes):
            confusion_matrix_rate[i, :] = (confusion_matrix_rawcount[i, :]) / class_count[i, 0] * 100
            
        confusion_matrix_rate = np.around(confusion_matrix_rate, decimals=4)

        if plot:
            _plot_confusion_matrix(confusion_matrix_rate, cmap=plt.cm.Oranges, save=save, color=color)
        return confusion_matrix_rate, np.diag(confusion_matrix_rate)

cm, lab_diag = compute_cm(gtruth, pred, plot=True, save=True, color='orange')

print( metrics.classification_report(gtruth, pred,target_names=classes) )

In [None]:
diag_df = pd.DataFrame({'class': classes, 'Class Accuracy': lab_diag, 'Imaging System': ['SPC-Lab']*10})

In [None]:
diag_df = pd.concat([diag_df, pd.DataFrame({'class': classes, 'Class Accuracy': pier_diag, 'Imaging System': ['SPC-Pier']*10})])

In [None]:
import seaborn as sns

In [None]:
diag_df = diag_df.sort_values(['Imaging System', 'Class Accuracy'], ascending=False)

In [None]:
import matplotlib
font_size = 20
matplotlib.rc('xtick', labelsize=font_size)     
matplotlib.rc('ytick', labelsize=font_size)
matplotlib.rc('axes', labelsize=font_size, titlesize=font_size)

current_palette_7 = sns.color_palette("muted", 3)
sns.set_palette(current_palette_7[1:])

plt.figure(figsize=(20,5))
sns.barplot(x='class', y='Class Accuracy', hue='Imaging System', data=diag_df)
plt.xticks(rotation=30, ha='right')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)