# Analysis of Different Class Selections and Different Number of Descendants: Recongition Gap, MIRC-percentage and MIRC sizes

This notebook creates the bar plot figures of the magnitudes and
standard deviations of recognition gaps, the sizes of the MIRCs and the
fractions of images that have a MIRC for different experiments. In the
manuscript, it is Figure 8.

# Your TODO

Please specify the path to the most top directory of your recognition
gap experiments, i.e. the parent directory of the analysis folder.

In [None]:
import data_csv_utils
import plot_utils
import sys
import pandas as pd
import matplotlib.pyplot as plt
path_to_recognition_gap_folder = "/gpfs01/bethge/home/jborowski/CHAM_recognition_gap/JOV_publication_git_bethgelab/recognition_gap/"

## Load libraries

In [None]:

# custom imports
sys.path.insert(1, path_to_recognition_gap_folder)

## Data

In [None]:
# initialize dictionaries that will be plotted
recognitionGapsMachine = {}
recognitionGapsMachineStd = {}
number_of_MIRCS = {}
total_number_individual_image_classes = {}
MIRC_size_mean = {}
MIRC_size_std = {}

In [None]:
# iterate over each experiment with different conditions
for exp_dir_condition, exp_dir_list in data_csv_utils.exp_dir_dict.items():
    # get all the data in one dataframe
    all_data_df = data_csv_utils.get_df_from_exp_dir_list(exp_dir_list)
    # clean the data such that only data from images which yielded MIRCs is
    # contained
    all_data_df_real_MIRCs = data_csv_utils.get_df_with_data_from_real_MIRCs_only(
        all_data_df)

    # calculate the metrics that are displayed in the appendix:
    # A. mean and standard deviation of rec_gap
    recognitionGapsMachine[exp_dir_condition] = all_data_df_real_MIRCs.mean(
        axis=0).rec_gap
    recognitionGapsMachineStd[exp_dir_condition] = all_data_df_real_MIRCs.std(
        axis=0, ddof=0).rec_gap

    # B. mean and std of MIRCs size
    MIRC_size_mean[exp_dir_condition] = all_data_df_real_MIRCs.mean(
        axis=0).pix_size_MIRC
    MIRC_size_std[exp_dir_condition] = all_data_df_real_MIRCs.std(
        axis=0, ddof=0).pix_size_MIRC

    # C. calculate number of total images and number of images with MIRCs.
    total_number_individual_image_classes[exp_dir_condition] = all_data_df.shape[0]
    number_of_MIRCS[exp_dir_condition] = all_data_df_real_MIRCs.shape[0]

In [None]:
# add the data from Ullman et al. (2016) to the dictionary
recognitionGapsMachine["human-selected patches"] = 0.14
recognitionGapsMachineStd["human-selected patches"] = 0.24

## Set Parameters

In [None]:
n_experiment_conditions = len(data_csv_utils.exp_dir_dict)

# Plot it!

## A. Recognition gap

In [None]:
def plot_machine_rec_gaps_as_vertical_bars():
    """plot the magnitued and standard deviation of the recognition gaps for the different
    machine experiments as vertical bars."""

    for x_index, (key, value) in enumerate(recognitionGapsMachine.items()):
        edgecolor = "k" if key == "human-selected patches" else ""
        plt.bar(
            x_index,
            value,
            plot_utils.width,
            yerr=recognitionGapsMachineStd[key],
            color=plot_utils.colors[x_index],
            edgecolor=edgecolor,
            label=key,
        )

In [None]:
plt.figure(figsize=[4.5, 4.5])

# plot data
# plot human data as horizontal bar
plot_utils.plot_human_rec_gap_as_horizonal_bar(len(recognitionGapsMachine))
# plot machine data as vertical bar
plot_machine_rec_gaps_as_vertical_bars()

# axes
ax = plt.axes()
x_labels = [
    f"{round(recognitionGapsMachine[key], 3)}"
    f"\u00B1"  # plus-minus sign
    f"{round(recognitionGapsMachineStd[key], 3)}"
    for key in recognitionGapsMachine.keys()]
plt.xticks(list(range(n_experiment_conditions + 1)), x_labels, rotation=20)
ax.set_xlim(-0.5, n_experiment_conditions + 0.5)
ax.set_ylabel("recognition gap")
ax.yaxis.label.set_color("red")

# legend
legend = plt.legend(bbox_to_anchor=(1.3, 0.6765), frameon=False)

plot_utils.hide_right_and_top_spine(plt.axes())

plt.title("Recognition gap: mean and std")

plt.savefig("JOV_appendix_rec_gap.svg", bbox_inches="tight")
plt.show()

## B. Size of MIRCs

In [None]:
plt.figure(figsize=[4, 4.5])

# plot size and standard deviation of MIRCs as vertical bars
for x_index, (key, value) in enumerate(MIRC_size_mean.items()):
    p = plt.bar(
        x_index,
        value,
        plot_utils.width,
        yerr=MIRC_size_std[key],
        color=plot_utils.colors[x_index],
    )

# axes
x_labels = [
    f"{round(MIRC_size_mean[key], 3)}"
    f"\u00B1"  # plus-minus sign
    f"{round(MIRC_size_std[key], 3)}"
    for key in MIRC_size_mean.keys()]
plt.xticks(list(range(n_experiment_conditions)), x_labels, rotation=20)
plt.xlim(-0.5, n_experiment_conditions - 1 + 0.5)
plt.ylabel("mean size of MIRCs [original px space]")

plot_utils.hide_right_and_top_spine(plt.axes())

plt.title("Size of MIRCs: mean and std")

plt.savefig("JOV_appendix_MIRC_size.svg", bbox_inches="tight")
plt.show()

## C. MIRC%

In [None]:
plt.figure(figsize=[4, 4.5])

# plot fraction of MIRCs as vertical bars
for x_index, (key, value) in enumerate(number_of_MIRCS.items()):
    p = plt.bar(
        x_index,
        number_of_MIRCS[key] / total_number_individual_image_classes[key],
        plot_utils.width,
        color=plot_utils.colors[x_index]
    )

# axes
x_labels = [
    f"{number_of_MIRCS[key]}/{total_number_individual_image_classes[key]}"
    for key in number_of_MIRCS.keys()]
plt.xticks(list(range(n_experiment_conditions)), x_labels, rotation=20)
plt.xlim(-0.5, n_experiment_conditions - 1 + 0.5)
plt.xlabel("number of images with MIRCs / total number of images")
ylabel = plt.ylabel("fraction of images that has MIRCs")
plt.ylim(0.0, 1.05)

plot_utils.hide_right_and_top_spine(plt.axes())

plt.title("Fraction of images that has MIRCs")

plt.savefig("JOV_appendix_MIRC_fraction.svg", bbox_inches="tight")
plt.show()