This notebook shows different visualizations of the algorithms performances

In [None]:
import config

In [None]:
from fold import computedFold

In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
from fold import readFold
import matplotlib.pyplot as plt
from pmlb import classification_dataset_names

In [None]:
import matplotlib
matplotlib.rcParams.update({'font.size': 12})

# Open all the score

If all methods are not computed at the same date, indicates several dates in the list

In [None]:
def fromDictToFrame(dictionary, keys = []):
    """
        Transfroms a multidimensional dictionary in a dataframe
    """
    df = {}
    for key, value in dictionary.items():
        if isinstance(value, dict):
            for k, v in fromDictToFrame(value, keys + [key]).items():
                df[k] = v
        else:
            df[tuple(keys + [key])] = [value]
    return df

def openFiles(dates, subfolder_detection = False, join = "inner"):
    """
        Opens all files at the given dates and restructures it in several dataframes for clarity
    """
    if subfolder_detection:
        dates = [os.path.join(date, method) for date in dates for method in os.listdir(os.path.join(config.result, date)) ]
        
    scores, assignation, constraints, info = [], [], [], {}
    for date in dates:
        print("Opening {}".format(date))
        path = os.path.join(config.result, date)
        scoresDate, assignationDate, constraintsDate = {}, {}, {}
        
        # Open each dataset
        for dname in tqdm(os.listdir(path)):
            unpick = pickle.load(open(os.path.join(path, dname), 'rb'))

            # Open Score
            scoreDname = pd.DataFrame.from_dict(fromDictToFrame(unpick["Score"]))
            if not scoreDname.empty:
                scoresDate[dname] = scoreDname

                # Open Assignation
                assignationDate[dname] = pd.DataFrame.from_dict(fromDictToFrame(unpick["Assignation"]))

                # Open Constraints
                constraintsDate[dname] = pd.DataFrame.from_dict(fromDictToFrame(unpick["Percentage Constraint"]))

                # Open info
                info[dname] = pd.DataFrame.from_dict(fromDictToFrame(readFold(unpick["Name"])))

        if len(scoresDate) > 0:
            scores.append(pd.concat(scoresDate, axis = "index").reset_index(level=[1], drop=True))
            assignation.append(pd.concat(assignationDate, axis = "index").reset_index(level=[1], drop=True))
            constraints.append(pd.concat(constraintsDate, axis = "index").reset_index(level=[1], drop=True))
        else:
            print(" -> Empty")
            
    return pd.concat(scores, join = join, axis = 1),\
        pd.concat(assignation, join = join, axis = 1),\
        pd.concat(constraints, join = join, axis = 1),\
        pd.concat(info, axis = "index").reset_index(level=[1], drop=True)

In [None]:
dates = ['24 April 2019 15:28:13'] #05 April 2019 11:59:58 #03 April 2019 15:45:13 #19 April 2019 12:11:09 #24 April 2019 15:28:13

In [None]:
scores, assignation, constraints, info = openFiles(dates, True)
print("Open {} result files".format(len(scores)))

In [None]:
scores.head()

In [None]:
assignation.head()

In [None]:
constraints.head()

In [None]:
info.head()

# Comparison

In [None]:
metric = "FScore" #FScore #Normalized Mutual Info #Adjusted Rand
category = "test" # 'all', 'train' or 'test'

## Global performance

Computes mean of the different iterations

In [None]:
folder_score = scores.loc[:, (slice(None), slice(None), category, metric)].copy()
folder_score = folder_score.T.reset_index(level=[2, 3], drop=True).T
scores_average = scores.groupby(axis=1, level=[1, 2, 3]).mean()

Compute the global performances of the different methods

In [None]:
measure = scores_average.loc[:, (slice(None), category, metric)].copy()
measure = measure.T.reset_index(level=[1, 2], drop=True)
measure = measure.loc[measure.median(axis = 1).sort_values(ascending = False).index].T
sns.boxplot(data = measure, orient="h")
plt.ylabel("Methods")
plt.xlabel(metric)
plt.savefig('{}_{}.eps'.format(metric, category), format='eps', dpi=1000, bbox_inches = "tight")

In [None]:
sns.violinplot(data = measure, orient="h")
plt.ylabel("Methods")
plt.xlabel(metric)

# Analysis by method

In [None]:
method = "Bayesian Optimization SoftKmeans"
metric = "Adjusted Rand"

## First ranked

Compute and display the rank of the different methods

In [None]:
our_methods = ["Bayesian Optimization SoftKmeans", "Bayesian Optimization", "Bayesian mahalanobis", "Bayesian cosine", "Cross Validation", "Cross Validation SoftKmeans"]
var = [v for v in measure.columns if v not in our_methods] + [method]

In [None]:
# Count rank position
ranks = np.arange(1, len(var)).tolist()
rank = measure[var].rank(axis = 1, method = 'min', ascending = False).astype(int)
count = rank.apply(lambda x: x.value_counts()).T
count = count.sort_values(by=ranks, ascending = False)[ranks] # Reorder
a = count.plot.barh(stacked=True)
a.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel("Number of datasets")
plt.savefig('{}_{}_rank.eps'.format(metric, category), format='eps', dpi=1000, bbox_inches = "tight")
plt.show()

ranks = [1, 2, 3]
count = count.sort_values(by=ranks, ascending = False)[ranks] # Reorder
a = count.plot.barh(stacked=True)
for ir in ranks:
    significant = {v: 0 for v in var}
    for i, s in folder_score.iterrows():
        for best in rank.loc[i].index[rank.loc[i] == ir]:
            lower = s[(slice(None), best)].mean() - 1.96 * s[(slice(None), best)].std() / np.sqrt(len(s[(slice(None), best)]))
            significant[best] += all([lower > (s[(slice(None),r)].mean() + 1.96 * s[(slice(None),r)].std() / np.sqrt(len(s[(slice(None),r)]))) for r in rank.loc[i].index if ((r != best) and (rank.loc[i][r] >= ir))])
    significant = pd.DataFrame.from_dict(significant, orient="index", columns=["Significant "]).loc[count.index]
    a = significant.plot.barh(ax = a, color = "k", alpha = 0.25, hatch='//', left = count[[c for c in ranks if c < ir]].sum(axis = 1), legend = (ir == ranks[-1]))

plt.xlabel("Number of datasets")
plt.ylabel("Methods")
plt.savefig('{}_{}_rank_sign.eps'.format(metric, category), format='eps', dpi=1000, bbox_inches = "tight")

In [None]:
a = count.plot.barh(stacked=True)
ranks = [2, 3]
for ir in ranks:
    previous = {v: 0 for v in var}
    for i, s in folder_score.iterrows():
        for best in rank.loc[i].index[rank.loc[i] == ir]:
            upper = s[(slice(None), best)].mean() + 1.96 * s[(slice(None), best)].std() / np.sqrt(len(s[(slice(None), best)]))
            previous[best] += all([upper > (s[(slice(None),r)].mean() - 1.96 * s[(slice(None),r)].std() / np.sqrt(len(s[(slice(None),r)]))) for r in rank.loc[i].index if (rank.loc[i][r] == ir - 1)])
    previous = pd.DataFrame.from_dict(previous, orient="index", columns=["Overlap Previous"]).loc[count.index]
    previous.plot.barh(ax = a, color = "w", alpha = 0.25, hatch='//', edgecolor="w", left = count[[c for c in range(1, ir)]].sum(axis = 1), legend = (ir == ranks[-1]))
plt.xlabel("Number of datasets")
plt.ylabel("Methods")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.savefig('{}_{}.eps'.format(metric, category), format='eps', dpi=1000, bbox_inches = "tight")

## Versus all

In [None]:
var = [v for v in var if v != method]
number_per_line = 4 
cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True, reverse=True)

In [None]:
info["Number of points"] = info['Labels', None].apply(lambda x: len(x))
info["Number of clusters"] = info["N_Classes", None]
number_line = np.ceil(len(var) / number_per_line).astype(int)

fig, axes = plt.subplots(number_line, number_per_line, figsize = (4 * number_per_line, 4 * number_line), sharey=True)
for i, v in enumerate(var):
    a = axes[i % number_line, i // number_line]
    a.plot([0, 1], [0, 1], ls = ':')
    a = sns.scatterplot(ax = a, x = v, y = method, data = measure, sizes=(10, 200), size=info["Number of points"], hue=info["Number of clusters"], alpha = 0.7, palette = cmap, legend = 'brief' if (v == var[-1]) else False)
else:
    a.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    for j in range(i + 1, number_line * number_per_line):
        axes[j  % number_line, j // number_line].axis('off')
plt.tight_layout()

plt.savefig('{}_{}_comparison.eps'.format(metric, category), format='eps', dpi=1000, bbox_inches = "tight")

## Train vs Test Performances

Compare performances between training and test, it is interesting to notice how our method does not overfit where other methods perform well on training but have bad performances on test.

In [None]:
measure = scores_average.loc[:, (method, slice(None), metric)].copy()
measure = measure.T.reset_index(level=[0, 2], drop=True).T

In [None]:
plt.title(method)
plt.grid(alpha = 0.1)
sns.scatterplot(x="train", y="test", sizes=(10, 200), data=measure, 
                size=info["Number of points"], hue=info["Number of clusters"], palette = cmap, alpha = 0.7)
plt.xlim(-0.1,1.1)
plt.ylim(-0.1,1.1)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

## Train vs Test Constraint

Compare the percentage of cosntraint respected on train and test

In [None]:
from constraint import verification_constraint

In [None]:
def percentageConstraint(gtLabels, label, trainIndices, test = False):
    """
        Compute the percentage of constraint respected
        gtLabels -- Ground truth label
        label -- Ground truth label
        trainIndices -- Indices for train
        test -- Compute on test supset if True
    """
    if test:
        trainIndices = [i for i in range(len(gtLabels)) if i not in trainIndices]
    gtConstraint = 2*np.equal.outer(gtLabels[trainIndices], gtLabels[trainIndices]) - 1

    verified, notVerified = verification_constraint(gtConstraint, label[trainIndices])
    return  verified / (notVerified + verified)

In [None]:
measure["% train constraint respected"] = pd.DataFrame.from_dict({dname: np.mean([percentageConstraint(info.loc[dname]['Labels', pd.NaT],
                                                                    assignation.loc[dname][i, method],
                                                                    info.loc[dname]['Train', i, method])
                                                                for i in assignation.loc[dname].index.levels[0]])
                                                        for dname in measure.index}, orient = 'index')

measure["% test constraint respected"] = pd.DataFrame.from_dict({dname: np.mean([percentageConstraint(info.loc[dname]['Labels', pd.NaT],
                                                                    assignation.loc[dname][i, method],
                                                                    info.loc[dname]['Train', i, method], test = True)
                                                                for i in assignation.loc[dname].index.levels[0]])
                                                        for dname in measure.index}, orient = 'index')

In [None]:
plt.title(method)
plt.grid(alpha = 0.1)
sns.scatterplot(x="% train constraint respected", y="% test constraint respected", sizes=(10, 200), data=measure, 
                size=info["Number of points"], hue=info["Number of clusters"], palette = cmap, alpha = 0.7)
plt.xlim(-0.1,1.1)
plt.ylim(-0.1,1.1)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [None]:
plt.title(method)
plt.grid(alpha = 0.1)
sns.scatterplot(x="% train constraint respected", y="test", sizes=(10, 200), data=measure, 
                size=info["Number of points"], hue=info["Number of clusters"], palette = cmap, alpha = 0.7)
plt.xlim(-0.1,1.1)
plt.ylim(-0.1,1.1)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

## On which datasets is it worse ?

In [None]:
difference = scores_average.loc[:, (var, category, metric)].max(axis="columns") # Max
difference = difference - scores_average.loc[:, (method, category, metric)] # Remove current score
difference = difference[difference > 0]

In [None]:
worse = info.loc[difference.index]
worse["Difference"] = difference
worse[["Name", "Difference", "Number of clusters", "Number of points"]].sort_values("Difference", ascending = False)

-----

# Evolution

In [None]:
dates = ['24 April 2019 15:28:47']#, '04 April 2019 17:49:23', '02 April 2019 13:09:14', '02 April 2019 13:08:22', '02 April 2019 13:07:48', '07 April 2019 17:09:09', '19 April 2019 12:14:33',  '24 April 2019 15:28:47'] 

In [None]:
scores, assignation, constraints, info = openFiles(dates, True, 'outer')
print("Open {} result files".format(len(scores)))

In [None]:
scores.head()

## Performances

In [None]:
metric = "Adjusted Rand"
category = "test" # 'all', 'train' or 'test'

In [None]:
measure = scores.copy().groupby(axis=1, level=[1, 2, 3]).mean()
measure = measure.loc[:, (slice(None), category, metric)]
measure = measure.T.reset_index(level=[1, 2], drop=True).T

deviation = scores.copy().groupby(axis=1, level=[1, 2, 3]).std()
deviation = deviation.loc[:, (slice(None), category, metric)]
deviation = deviation.T.reset_index(level=[1, 2], drop=True).T

length = (~ pd.isna(scores.copy())).groupby(axis=1, level=[1, 2, 3]).sum()
length = length.loc[:, (slice(None), category, metric)]
length = length.T.reset_index(level=[1, 2], drop=True).T

measure['Number of constraints'] = [float(i[i.index('_(') + len('_(True, '): i.rindex(')')]) for i in measure.index]
measure['Name'] = info[('Name', pd.NaT)]

In [None]:
upper = measure + 1.96 * deviation / np.sqrt(length)
lower = measure - 1.96 * deviation / np.sqrt(length)

In [None]:
for i in info[('Name', pd.NaT)].unique():
    currentDataset = measure[info[('Name', pd.NaT)] == i].sort_values('Number of constraints')
    top10 = currentDataset.mean().sort_values(ascending = False)[1:6].index
    for method in top10:
        plt.plot(currentDataset['Number of constraints'], currentDataset[method])
        plt.fill_between(currentDataset['Number of constraints'], lower.loc[currentDataset.index, method], upper.loc[currentDataset.index, method], alpha = 0.1)
    plt.title(i)
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()

In [None]:
for i in info[('Name', pd.NaT)].unique():
    currentDataset = measure[info[('Name', pd.NaT)] == i].sort_values('Number of constraints')
    currentDataset[currentDataset.mean().sort_values(ascending = False).index].plot(x = 'Number of constraints')
    plt.title(i)
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

## Difference to the first 

In [None]:
method = "Bayesian Optimization"

In [None]:
var = [v for v in measure.columns if v not in our_methods] + [method]

In [None]:
group = measure[var].groupby(['Name', 'Number of constraints']).mean()
rank = group.rank(axis=1, method = 'min', ascending = False).astype(int)
count = (rank == 1).groupby('Number of constraints').sum()
a = count.plot.bar(stacked=True)
plt.ylabel("Number of Datasets")
plt.title("Ranked first")
plt.tight_layout()
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()

In [None]:
rank.groupby('Number of constraints').count()