# 1. Histogram from a single set of measurements

The code below will take a .csv file named as csv_file from the folder provided as dir_measurements, and will make a Histogram with the data in the columns with the names from "measures", and will save the plot as:

    histogram_{hist_basename}_{column_name}.png

where we provided the value "hist_basename", and this will be stored inside of the folder provided in dir_histograms.

##1.1 Functions definitions

In [None]:
import matplotlib.pyplot as plt
import os
import math
import numpy as np

## Function to round a number with n significant digits
def round_to_significant_digits(number, n_digits=1):
    """
    Round a number to the specified number of significant digits.

    Args:
        number (float): The number to be rounded.
        n_digits (int): The number of significant digits.

    Returns:
        float: The number rounded to n significant digits.
    """
    if number == 0:
        return 0  # Special case for 0

    # Calculate the order of magnitude of the number
    order_of_magnitude = int((math.floor(math.log10(abs(number)))))

    # Calculate the factor needed to adjust the number to have n significant digits
    factor = 10 ** (-1 - order_of_magnitude)      # Note: number * factor will be equal to 0 with a decimal point followed by all the significant digits of number

    # Round the number to n significant digits
    rounded_number = np.round(number * factor, n_digits)    # To keep the significant digits we want
    rounded_number = rounded_number / factor                # To return its order of magnitude

    # Ensure precision
    precision_digits = (n_digits-1) - order_of_magnitude
    rounded_number = np.round(rounded_number, precision_digits)

    return rounded_number


## Function to create and save a histogram
def MakeHistogram(data, path_to_save, hist_name, width=640, height="",
                  title="", xlabel="", ylabel="", bins=20, dpi=100,
                  x_min=None, x_max=None, y_min=None, y_max=None,
                  label_fontsize=12, title_fontsize=14, tick_fontsize=10,
                  tick_length=8, tick_width=2, grid=False, set_mean=True,
                  set_min=False, set_max=False, set_std=False):
    """
    Take "data" and makes a histogram with the parameters provided and save it
    in "path_to_save" with the file name "hist_name" and .png extension.

    Additionally, if:
      set_mean: Calculates the mean value of the data and shows it in the plot
                with a vertical line.
      set_min: Gets the minimum value of the data and shows it in the plot
               with a vertical line.
      set_max: Gets the maximum value of the data and shows it in the plot
               with a vertical line.
    """
    # Histogram parameters
    if not height: height = width

    # Create the histogram
    fig, ax = plt.subplots(figsize=(width/dpi, height/dpi), dpi=dpi)
    values, bins, patches = ax.hist(data, bins=bins)

    # Set label and title with increased font sizes
    ax.set_xlabel(xlabel, fontsize=label_fontsize)
    ax.set_ylabel(ylabel, fontsize=label_fontsize)
    ax.set_title(title, fontsize=title_fontsize)

    # Set axis limits
    if x_min is not None or x_max is not None:
      ax.set_xlim(x_min, x_max)
    if y_min is not None or y_max is not None:
      ax.set_ylim(y_min, y_max)

    # Increase tick label font size on both axes
    ax.tick_params(axis='both', labelsize=tick_fontsize,
                   length=tick_length, width=tick_width)

    # Add grid to the y-axis if specified
    if grid:
        ax.grid(True, linestyle='--', linewidth=tick_width*0.5)

    hor_displace = (max(data) - min(data))*0.01
    # Calculate mean value and add a red dashed line
    if set_mean :
      mean_value = data.mean()
      ax.axvline(mean_value, color='red', linestyle='--', linewidth=2*tick_width,
                 label=f'Mean: {mean_value:.2f}')

      # Add text annotation next to the top of the line
      ax.text(mean_value + hor_displace, max(values)*1.01,
              f'Mean: {mean_value:.2f}', color='black', fontsize=label_fontsize,
              verticalalignment='bottom', horizontalalignment='left')

    # Calculate variance value and add a green dashed line
    if set_std :
        mean_value = data.mean()
        std_value = np.std(data)
        std_value = round_to_significant_digits(std_value, n_digits=2)
        # Add text annotation next to the top of the line
        ax.text(mean_value - hor_displace, max(values)*1.01,
                f'StD.: {std_value:}', color='black', fontsize=label_fontsize,
                verticalalignment='bottom', horizontalalignment='right')

    # Display mininum and maximum values
    if set_min :
      ax.text(min(data) + hor_displace, max(values)*0.01,
              f'Min.: {min(data):.2f}', color='black', fontsize=label_fontsize,
              verticalalignment='bottom', horizontalalignment='left')
    if set_max :
      ax.text(max(data) + hor_displace, max(values)*0.01,
              f'Max.: {max(data):.2f}', color='black', fontsize=label_fontsize,
              verticalalignment='bottom', horizontalalignment='right')

    # Export the histogram to a file
    if not os.path.exists(path_to_save):
      os.makedirs(path_to_save, exist_ok=True)
    filepath = os.path.join(path_to_save, hist_name + ".png")
    fig.savefig(filepath)
    plt.close(fig)

    return

## 1.2. Make histogram

In [None]:
import pandas as pd
import os

# Directories
dir_base = os.getcwd()                                          # ./
dir_images = os.path.join(dir_base,"images")                    # ./images
#dir_measurements = os.path.join(dir_base, "Measurements")       # ./Measurements
dir_measurements = dir_base                                     # ./
dir_histograms = os.path.join(dir_images,"Histograms")          # ./images/Histograms
if not os.path.exists(dir_histograms):
    os.makedirs(dir_histograms, exist_ok=True)

# CSV file to use
csv_file = "measures_pathomics_pretrained.csv"
csv_file_path = os.path.join(dir_measurements, csv_file)

# Get histogram base names
hist_basename = "pathomics_pretrained"
hist_basetitle = "pathomic images with pre-trained EDSR"

# Parameters
measures = ["MSE", "PSNR", "SSIM"]

hist_width = 4000
hist_bins = 20
label_fontsize = 60
title_fontsize = 72
tick_fontsize = 48
tick_length = 30
tick_width = 4
set_grid = True
set_mean = True
set_min = True
set_max = True
set_std = True

# Load .csv into a dataframe df
df = pd.read_csv(csv_file_path)

# Iterate over the measures
for measure in measures:
  data = df[measure]
  hist_name = f"histogram_{hist_basename}_{measure}"
  hist_title = f"{measure} for {len(data)} {hist_basetitle}"
  # Make and export histogram
  MakeHistogram(data=data, path_to_save=dir_histograms, hist_name=hist_name,
                width=hist_width, height="", title=hist_title,
                xlabel=measure, ylabel="Frequency", bins=hist_bins, dpi=100,
                label_fontsize=label_fontsize, title_fontsize=title_fontsize,
                tick_fontsize=tick_fontsize, tick_length=tick_length,
                tick_width=tick_width, grid=set_grid, set_mean=set_mean,
                set_min=set_min, set_max=set_max, set_std=set_std)

# 2. Compare histograms

The code below will take a list of paths of .csv files named as csv_file_paths_list (with respective labels given in a list csv_files_labels) from the folder provided as dir_measurements, and will make a Histogram with the data in the columns with the names from "measures", and will save the plot as:

    histogram_comparison_{hist_basename}_{column_name}.png

where we provided the value "hist_basename", and this will be stored inside of the folder provided in dir_histograms.

## 2.1 Function definitions

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patheffects import withStroke
import os
import math
import numpy as np

## Function to round a number with n significant digits
def round_to_significant_digits(number, n_digits=1):
    """
    Round a number to the specified number of significant digits.

    Args:
        number (float): The number to be rounded.
        n_digits (int): The number of significant digits.

    Returns:
        float: The number rounded to n significant digits.
    """
    if number == 0:
        return 0  # Special case for 0

    # Calculate the order of magnitude of the number
    order_of_magnitude = int((math.floor(math.log10(abs(number)))))

    # Calculate the factor needed to adjust the number to have n significant digits
    factor = 10 ** (-1 - order_of_magnitude)      # Note: number * factor will be equal to 0 with a decimal point followed by all the significant digits of number

    # Round the number to n significant digits
    rounded_number = np.round(number * factor, n_digits)    # To keep the significant digits we want
    rounded_number = rounded_number / factor                # To return its order of magnitude

    # Ensure precision
    precision_digits = (n_digits-1) - order_of_magnitude
    rounded_number = np.round(rounded_number, precision_digits)

    return rounded_number

## Function to compare 2 or more histograms in the same figure and save it
def CompareHistograms(data_list, data_labels, hist_name, path_to_save, width=640,
                      height="", title="", xlabel="", ylabel="", bins=20, alpha=0.5,
                      dpi=100, x_min=None, x_max=None, y_min=None, y_max=None,
                      label_fontsize=12, title_fontsize=14, tick_fontsize=10,
                      tick_length=8, tick_width=2, grid=False, set_mean=True,
                      set_min=False, set_max=False, set_std=False):
    """
    Take "data_list" as a list of data values to make a histogram for each,
    with the parameters provided, on the same figure and save it
    in "path_to_save" with the file name "hist_name" and .png extension.

    Note: data_list and data_labels must be of the same length.

    Additionally, for each set of data values, if:
      set_mean: Calculates the mean value of the data and shows it in the plot
                with a vertical line.
      set_min: Gets the minimum value of the data and shows it in the plot
               with a vertical line.
      set_max: Gets the maximum value of the data and shows it in the plot
               with a vertical line.
    """
    # Histogram parameters
    if not height: height = width

    # Create the histogram
    fig, ax = plt.subplots(figsize=(width/dpi, height/dpi), dpi=dpi)

    # Set label and title
    ax.set_xlabel(xlabel, fontsize=label_fontsize)
    ax.set_ylabel(ylabel, fontsize=label_fontsize)
    ax.set_title(title, fontsize=title_fontsize, pad=title_fontsize*2)

    # Set axis limits
    if x_min is not None or x_max is not None:
      ax.set_xlim(x_min, x_max)
    if y_min is not None or y_max is not None:
      ax.set_ylim(y_min, y_max)

    # Set tick label font size on both axes
    ax.tick_params(axis='both', labelsize=tick_fontsize,
                   length=tick_length, width=tick_width)

    # Add grid to the y-axis if specified
    if grid:
        ax.grid(True, linestyle='--', linewidth=tick_width*0.5)

    # Go over each set of data values:
    for idx, data in enumerate(data_list) :
      # Make the histogram for the current data
      values, bins, patches = ax.hist(data, bins=bins, label=data_labels[idx], alpha=alpha)

      # Calculate other parameters for the plot
      if y_max is None : y_max = max(values)
      hor_displace = (max(data) - min(data))*0.01
      ver_displace_relative = 0.99 - idx * 0.05
      ver_displace_relative_bottom = 0.01 + idx * 0.05

      # Get assigned color to use
      current_color = patches[0].get_facecolor()

      # Calculate mean value and add a red dashed line
      if set_mean :
        mean_value = data.mean()
        mean_line = ax.axvline(mean_value, color=current_color, linestyle='--', linewidth=2*tick_width)
        mean_line.set_path_effects([withStroke(linewidth=tick_width, foreground='white')])

        # Add text annotation next to the top of the line
        mean_text = ax.text(mean_value + hor_displace, y_max*ver_displace_relative,
                            f'Mean: {mean_value:.2f}', color=current_color, fontsize=label_fontsize,
                            verticalalignment='top', horizontalalignment='left', label=None, weight="bold")
        mean_text.set_path_effects([withStroke(linewidth=tick_width*2, foreground='white')])

      # Calculate variance value and add a green dashed line
      if set_std :
          mean_value = data.mean()
          std_value = np.std(data)
          std_value = round_to_significant_digits(std_value, n_digits=2)
          # Add text annotation next to the top of the line
          std_text = ax.text(mean_value - hor_displace, y_max*ver_displace_relative,
                             f'StD.: {std_value:}', color=current_color, fontsize=label_fontsize,
                             verticalalignment='top', horizontalalignment='right', label=None, weight="bold")
          std_text.set_path_effects([withStroke(linewidth=tick_width*2, foreground='white')])

      # Display mininum and maximum values
      if set_min :
        min_text = ax.text(min(data) + hor_displace, y_max*ver_displace_relative_bottom,
                           f'Min.: {min(data):.2f}', color=current_color, fontsize=label_fontsize,
                           verticalalignment='bottom', horizontalalignment='left', label=None, weight="bold")
        min_text.set_path_effects([withStroke(linewidth=tick_width*2, foreground='white')])
      if set_max :
        max_text = ax.text(max(data) + hor_displace, y_max*ver_displace_relative_bottom,
                           f'Max.: {max(data):.2f}', color=current_color, fontsize=label_fontsize,
                           verticalalignment='bottom', horizontalalignment='right', label=None, weight="bold")
        max_text.set_path_effects([withStroke(linewidth=tick_width*2, foreground='white')])

    # Make legend
    ax.legend(fontsize=label_fontsize)

    # Export the histogram to a file
    if not os.path.exists(path_to_save):
      os.makedirs(path_to_save, exist_ok=True)
    filepath = os.path.join(path_to_save, hist_name + ".png")
    fig.savefig(filepath)
    plt.close(fig)

    return

## 2.2 Making the histogram comparison

In [None]:
import pandas as pd
import os

# Directories
dir_base = os.getcwd()                                          # ./
dir_images = os.path.join(dir_base,"images")                    # ./images
#dir_measurements = os.path.join(dir_base, "Measurements")       # ./Measurements
dir_measurements = dir_base                                     # ./
dir_histograms = os.path.join(dir_images,"Histograms")          # ./images/Histograms
if not os.path.exists(dir_histograms):
    os.makedirs(dir_histograms, exist_ok=True)

# Get paths to CSV files to use
csv_file_paths_list = [os.path.join(dir_measurements, "measures_pathomics_trained_wholedataset_new.csv"),
                       os.path.join(dir_measurements, "measures_pathomics_trained_batches_new.csv"),
                       os.path.join(dir_measurements, "measures_pathomics_trained_pngs_new.csv"),
                       os.path.join(dir_measurements, "measures_pathomics_pretrained_new.csv")]
csv_files_labels = ["Trained in \n dedicated dataset\n (Humanitas)\n whole dataset",
                    "Trained in \n dedicated dataset\n (Humanitas)\n dataset in batches",
                    "Trained in \n general dataset\n (TCIA)",
                    "Pretrained EDSR"]

# Get histogram base names
hist_basename = "pathomics_new"
hist_basetitle = "pathomic images with EDSR trained on different cases"

# Parameters
measures = ["MSE", "PSNR", "SSIM"]

axes_limits = {
    "MSE": {"x_min": 0, "x_max": 1000, "y_min": 0, "y_max": 200},
    "PSNR": {"x_min": 15, "x_max": 40, "y_min": 0, "y_max": 200},
    "SSIM": {"x_min": 0, "x_max": 1, "y_min": 0, "y_max": 300}
}

hist_width = 4000
hist_bins = 20
alpha = 0.6
label_fontsize = 60
title_fontsize = 72
tick_fontsize = 48
tick_length = 30
tick_width = 4
set_grid = True
set_mean = True
set_min = False
set_max = False
set_std = True

# Load data files contents into a list of dataframes
df_list = []
for data_file_path in csv_file_paths_list :
  df = pd.read_csv(data_file_path)
  df_list.append(df)

# Iterate over the measures
for measure in measures:
  # Take data to plot
  data_list = [df[measure] for df in df_list]
  # Determine the number of data points
  data_points_number_list = [len(data) for data in data_list]
  min_data_points_number = min(data_points_number_list)

  # Get axes limits
  x_min = axes_limits[measure]["x_min"]
  x_max = axes_limits[measure]["x_max"]
  y_min = axes_limits[measure]["y_min"]
  y_max = axes_limits[measure]["y_max"]

  # Make and export histogram comparison
  hist_name = f"histogram_comparison_{hist_basename}_{measure}"
  hist_title = f"{measure} for {min_data_points_number} {hist_basetitle}"

  CompareHistograms(data_list=data_list, data_labels=csv_files_labels,
                    hist_name=hist_name, path_to_save=dir_histograms, width=hist_width,
                    height="", title=hist_title, xlabel=measure, ylabel="Frequency", bins=hist_bins, alpha=alpha,
                    dpi=100, x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max,
                    label_fontsize=label_fontsize, title_fontsize=title_fontsize, tick_fontsize=tick_fontsize,
                    tick_length=tick_length, tick_width=tick_width, grid=set_grid, set_mean=set_mean,
                    set_min=set_min, set_max=set_max, set_std=set_std)



#3. Making box plots to compare data

The code below will take a list of paths of .csv files named as csv_file_paths_list (with respective labels given in a list csv_files_labels) from the folder provided as dir_measurements, and will make a Histogram with the data in the columns with the names from "measures", and will save the plot as:

    boxplots_comparison_{fig_basename}_{column_name}.png

where we provided the value "fig_basename", and this will be stored inside of the folder provided in dir_figures.

## 3.1 Functions definitions

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patheffects import withStroke
import os
import math
import numpy as np

## Function to round a number with n significant digits
def round_to_significant_digits(number, n_digits=1):
    """
    Round a number to the specified number of significant digits.

    Args:
        number (float): The number to be rounded.
        n_digits (int): The number of significant digits.

    Returns:
        float: The number rounded to n significant digits.
    """
    if number == 0:
        return 0  # Special case for 0

    # Calculate the order of magnitude of the number
    order_of_magnitude = int((math.floor(math.log10(abs(number)))))

    # Calculate the factor needed to adjust the number to have n significant digits
    factor = 10 ** (-1 - order_of_magnitude)      # Note: number * factor will be equal to 0 with a decimal point followed by all the significant digits of number

    # Round the number to n significant digits
    rounded_number = np.round(number * factor, n_digits)    # To keep the significant digits we want
    rounded_number = rounded_number / factor                # To return its order of magnitude

    # Ensure precision
    precision_digits = (n_digits-1) - order_of_magnitude
    rounded_number = np.round(rounded_number, precision_digits)

    return rounded_number

## Function to compare 2 or more histograms in the same figure and save it
def MakeBoxPlots(data_list, data_labels, fig_name, path_to_save, width=640,
                 height="", title="", xlabel="", ylabel="", dpi=100,
                 x_min=None, x_max=None, y_min=None, y_max=None,
                 label_fontsize=12, title_fontsize=14, tick_fontsize=10, values_fontsize=12,
                 tick_length=8, tick_width=2, markersize=5, box_width=0.5, grid=False,
                 show_median=True, show_mean=True, show_min=False, show_max=False, show_std=False):
    """
    Take "data_list" as a list of data values to make a box plot for each,
    with the parameters provided, on the same figure and save it
    in "path_to_save" with the file name "hist_name" and .png extension.

    Note: data_list and data_labels must be of the same length.

    Additionally, for each set of data values, if:
      show_mean: Calculates the mean value of the data and shows it in the plot
                with a vertical line.
      show_min: Gets the minimum value of the data and shows it in the plot
               with a vertical line.
      show_max: Gets the maximum value of the data and shows it in the plot
               with a vertical line.
    """
    # Figure parameters
    if not height: height = width

    # Create the figure
    fig, ax = plt.subplots(figsize=(width/dpi, height/dpi), dpi=dpi)

    # Set label and title
    if xlabel: ax.set_xlabel(xlabel, fontsize=label_fontsize)
    if ylabel: ax.set_ylabel(ylabel, fontsize=label_fontsize)
    if title: ax.set_title(title, fontsize=title_fontsize, pad=title_fontsize*2)

    # Set axes limits
    if x_min is not None or x_max is not None:
      ax.set_xlim(x_min, x_max)
    if y_min is not None or y_max is not None:
      ax.set_ylim(y_min, y_max)

    # Set tick label font size on both axes
    ax.tick_params(axis='both', labelsize=tick_fontsize,
                   length=tick_length, width=tick_width)

    # Add grid to the y-axis if specified
    if grid:
        ax.grid(True, linestyle='--', linewidth=tick_width*0.5)

    # Go over each set of data values:
    for idx, data in enumerate(data_list) :
      # Make the box plot for the current data
      box_plot = ax.boxplot([data], positions=[idx], labels=[data_labels[idx]],
                            widths=box_width, patch_artist=True, notch=True, showmeans=True)

      # Adjust the linewidth of boxplot elements
      for item in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']:
        plt.setp(box_plot[item], linewidth=tick_width*2.5)

      # Adjust the marker size of the scatter points and means
      for flier in box_plot['fliers']:
        flier.set_markersize(markersize)
      for mean_point in box_plot['means']:
        mean_point.set_markersize(markersize*2)

      # Calculate other parameters for the plot
      if y_max is None : y_max = max(data)
      if y_min is None : y_min = min(data)
      ver_displace = (y_max - y_min) / 100
      #ver_displace_relative_bottom = 0.01 + idx * 0.05

      # Get colors to use
      base_color = "black"
      #box_color = box_plot['boxes'][0].get_facecolor()
      mean_color = box_plot['means'][0].get_color()
      median_color = box_plot['medians'][0].get_color()

      # Calculate median value and add a red dashed line
      if show_median :
        median_value = data.median()
        # Add text annotation next to the top of the line
        mean_text = ax.text(idx, median_value + ver_displace,
                            f'Median: {median_value:.2f}', color=median_color, fontsize=values_fontsize,
                            verticalalignment='bottom', horizontalalignment='center', label=None, weight="bold")
        mean_text.set_path_effects([withStroke(linewidth=tick_width*2, foreground='white')])

      # Calculate mean value and add a red dashed line
      if show_mean :
        mean_value = data.mean()
        median_value = data.median()
        if mean_value < median_value :
          mean_alignment = "top"
          mean_position = box_plot["whiskers"][0].get_ydata()[0] - ver_displace    # respect lower quartile
        else :
          mean_alignment = "bottom"
          mean_position = box_plot["whiskers"][1].get_ydata()[0] + ver_displace    # respect upper quartile
        # Add text annotation next to the top of the line
        mean_text = ax.text(idx, mean_position,
                            f'Mean: {mean_value:.2f}', color=mean_color, fontsize=values_fontsize,
                            verticalalignment=mean_alignment, horizontalalignment='center', label=None, weight="bold")
        mean_text.set_path_effects([withStroke(linewidth=tick_width*2, foreground='white')])

      # Calculate variance value and add a green dashed line
      if show_std :
          median_value = data.median()
          std_value = np.std(data)
          std_value = round_to_significant_digits(std_value, n_digits=2)
          # Add text annotation next to the top of the line
          std_text = ax.text(idx, median_value - ver_displace,
                             f'StD.: {std_value:}', color=base_color, fontsize=values_fontsize,
                             verticalalignment='top', horizontalalignment='center', label=None, weight="bold")
          std_text.set_path_effects([withStroke(linewidth=tick_width*2, foreground='white')])

      # Display mininum and maximum values
      if show_min :
        min_text = ax.text(idx, min(data),
                           f'Min.: {min(data):.2f}', color=base_color, fontsize=values_fontsize,
                           verticalalignment='bottom', horizontalalignment='center', label=None, weight="bold")
        min_text.set_path_effects([withStroke(linewidth=tick_width*2, foreground='white')])
      if show_max :
        max_text = ax.text(idx, max(data),
                           f'Max.: {max(data):.2f}', color=base_color, fontsize=values_fontsize,
                           verticalalignment='top', horizontalalignment='center', label=None, weight="bold")
        max_text.set_path_effects([withStroke(linewidth=tick_width*2, foreground='white')])

    # Make legend
    #ax.legend(fontsize=label_fontsize)

    # Export the histogram to a file
    if not os.path.exists(path_to_save):
      os.makedirs(path_to_save, exist_ok=True)
    filepath = os.path.join(path_to_save, fig_name + ".png")
    fig.savefig(filepath)
    plt.close(fig)

    return

## 3.2 Making the box plots

In [None]:
import pandas as pd
import os

# Directories
dir_base = os.getcwd()                                          # ./
dir_images = os.path.join(dir_base,"images")                    # ./images
#dir_measurements = os.path.join(dir_base, "Measurements")       # ./Measurements
dir_measurements = dir_base                                     # ./
dir_figures = os.path.join(dir_images,"Figures")                # ./images/Figures
if not os.path.exists(dir_figures):
    os.makedirs(dir_figures, exist_ok=True)

# Get paths to CSV files to use
csv_file_paths_list = [os.path.join(dir_measurements, "measures_pathomics_trained_wholedataset_new.csv"),
                       os.path.join(dir_measurements, "measures_pathomics_trained_batches_new.csv"),
                       os.path.join(dir_measurements, "measures_pathomics_trained_pngs_new.csv"),
                       os.path.join(dir_measurements, "measures_pathomics_pretrained_new.csv")]
csv_files_labels = ["Trained in \n dedicated dataset\n (Humanitas)\n whole dataset",
                    "Trained in \n dedicated dataset\n (Humanitas)\n dataset in batches",
                    "Trained in \n general dataset\n (TCIA)",
                    "Pretrained EDSR"]

# Get figure base names
fig_basename = "pathomics_new"
fig_basetitle = "pathomic images with EDSR trained on different cases"

# Parameters
measures = ["MSE", "PSNR", "SSIM"]

axes_limits = {
    "MSE": {"x_min": -0.5, "x_max": 3.5, "y_min": None, "y_max": None},
    "PSNR": {"x_min": -0.5, "x_max": 3.5, "y_min": None, "y_max": None},
    "SSIM": {"x_min": -0.5, "x_max": 3.5, "y_min": None, "y_max": None}
}

fig_width = 4000
label_fontsize = 60
values_fontsize = 50
title_fontsize = 72
tick_fontsize = 48
tick_length = 30
tick_width = 4
markersize = 30
box_width = 0.75
set_grid = True
show_median = True
show_mean = True
show_min = False
show_max = False
show_std = True

# Load data files contents into a list of dataframes
df_list = []
for data_file_path in csv_file_paths_list :
  df = pd.read_csv(data_file_path)
  df_list.append(df)

# Iterate over the measures
for measure in measures:
  # Take data to plot
  data_list = [df[measure] for df in df_list]
  # Determine the number of data points
  data_points_number_list = [len(data) for data in data_list]
  min_data_points_number = min(data_points_number_list)

  # Get axes limits
  x_min = axes_limits[measure]["x_min"]
  x_max = axes_limits[measure]["x_max"]
  y_min = axes_limits[measure]["y_min"]
  y_max = axes_limits[measure]["y_max"]

  # Make and export boxplots comparison
  fig_name = f"boxplots_comparison_{fig_basename}_{measure}"
  fig_title = f"{measure} for {min_data_points_number} {fig_basetitle}"

  MakeBoxPlots(data_list=data_list, data_labels=csv_files_labels,
               fig_name=fig_name, path_to_save=dir_figures, width=fig_width,
               height="", title=fig_title, xlabel="", ylabel=measure, values_fontsize=values_fontsize,
               dpi=100, x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, box_width=box_width,
               label_fontsize=label_fontsize, title_fontsize=title_fontsize, tick_fontsize=tick_fontsize,
               tick_length=tick_length, tick_width=tick_width, grid=set_grid, markersize=markersize,
               show_median=show_median, show_mean=show_mean, show_min=show_min, show_max=show_max, show_std=show_std)



# 4. Making figures from Segmentation measures

The codes below are analogous versions of the Histogram from a single set of measurements and Making box plots to compare data, of the sections above, but with .csv files with measurements involving segmentation metrics (like the mean_matched_score).

We must use the corresponding cells with the functions definitions above, and then use the cells below.

## 4.1 Histograms

In [None]:
import pandas as pd
import os

# Directories
dir_base = os.getcwd()                                          # ./
dir_images = os.path.join(dir_base,"images")                    # ./images
#dir_measurements = os.path.join(dir_base, "Measurements")       # ./Measurements
dir_measurements = dir_base                                     # ./
dir_histograms = os.path.join(dir_images,"Histograms")          # ./images/Histograms
if not os.path.exists(dir_histograms):
    os.makedirs(dir_histograms, exist_ok=True)

# CSV file to use

csv_file = "measures_pathomics_with_segmentation-pretrained.csv"
csv_file_path = os.path.join(dir_measurements, csv_file)

# Get histogram base names
hist_basename = "pathomics_segmentation_pretrained"
hist_basetitle = "segmentation on pathomic images\n with pre-trained EDSR"

# Parameters
metrics = ["mean_matched_score"]

hist_width = 4000
hist_bins = 20
label_fontsize = 60
title_fontsize = 72
tick_fontsize = 48
tick_length = 30
tick_width = 4
set_grid = True
set_mean = True
set_min = False
set_max = False
set_std = True

x_min=0.0
x_max=1.0
y_min=0
y_max=800

# Load .csv into a dataframe df
df = pd.read_csv(csv_file_path)

# Iterate over the measures
for measure in metrics:
  data = df[measure]
  hist_name = f"histogram_{hist_basename}_{measure}"
  hist_title = f"{measure} for {len(data)} {hist_basetitle}"
  # Make and export histogram
  MakeHistogram(data=data, path_to_save=dir_histograms, hist_name=hist_name,
                width=hist_width, height="", title=hist_title,
                xlabel=measure, ylabel="Frequency", bins=hist_bins, dpi=100,
                x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max,
                label_fontsize=label_fontsize, title_fontsize=title_fontsize,
                tick_fontsize=tick_fontsize, tick_length=tick_length,
                tick_width=tick_width, grid=set_grid, set_mean=set_mean,
                set_min=set_min, set_max=set_max, set_std=set_std)

## 4.2 Boxplots

In [None]:
import pandas as pd
import os

# Directories
dir_base = os.getcwd()                                          # ./
dir_images = os.path.join(dir_base,"images")                    # ./images
#dir_measurements = os.path.join(dir_base, "Measurements")       # ./Measurements
dir_measurements = dir_base                                     # ./
dir_figures = os.path.join(dir_images,"Figures")                # ./images/Figures
if not os.path.exists(dir_figures):
    os.makedirs(dir_figures, exist_ok=True)


# Get paths to CSV files to use
csv_file_paths_list = [os.path.join(dir_measurements, "measures_pathomics_with_segmentation-bicubic.csv"),
                       os.path.join(dir_measurements, "measures_pathomics_with_segmentation-pretrained.csv"),
                       os.path.join(dir_measurements, "measures_pathomics_with_segmentation-trained_pngs.csv"),
                       os.path.join(dir_measurements, "measures_pathomics_with_segmentation-trained_wholedataset.csv")
                       ]
csv_files_labels = ["Bicubic",
                    "Pretrained EDSR",
                    "Trained in \n general dataset\n (TCIA)",
                    "Trained in \n dedicated dataset\n (Humanitas)"
                    ]

# Get figure base names
fig_basename = "pathomics_final"
fig_basetitle = "pathomic images\n with EDSR trained on different cases"

# Parameters
metrics = ["mean_matched_score"]

axes_limits = {
    "mean_matched_score": {"x_min": -0.5, "x_max": 3.5, "y_min": 0.45, "y_max": 0.95}
}

fig_width = 4000
label_fontsize = 60
values_fontsize = 50
title_fontsize = 72
tick_fontsize = 48
tick_length = 30
tick_width = 4
markersize = 30
box_width = 0.75
set_grid = True
show_median = True
show_mean = True
show_min = False
show_max = False
show_std = True

# Load data files contents into a list of dataframes
df_list = []
for data_file_path in csv_file_paths_list :
  df = pd.read_csv(data_file_path)
  df_list.append(df)

# Iterate over the measures
for measure in metrics:
  # Take data to plot
  data_list = [df[measure] for df in df_list]
  # Determine the number of data points
  data_points_number_list = [len(data) for data in data_list]
  min_data_points_number = min(data_points_number_list)

  # Get axes limits
  x_min = axes_limits[measure]["x_min"]
  x_max = axes_limits[measure]["x_max"]
  y_min = axes_limits[measure]["y_min"]
  y_max = axes_limits[measure]["y_max"]

  # Make and export boxplots comparison
  fig_name = f"boxplots_comparison_{fig_basename}_{measure}"
  fig_title = f"{measure} on segmentation for {min_data_points_number} {fig_basetitle}"

  MakeBoxPlots(data_list=data_list, data_labels=csv_files_labels,
               fig_name=fig_name, path_to_save=dir_figures, width=fig_width,
               height="", title=fig_title, xlabel="", ylabel=measure, values_fontsize=values_fontsize,
               dpi=100, x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, box_width=box_width,
               label_fontsize=label_fontsize, title_fontsize=title_fontsize, tick_fontsize=tick_fontsize,
               tick_length=tick_length, tick_width=tick_width, grid=set_grid, markersize=markersize,
               show_median=show_median, show_mean=show_mean, show_min=show_min, show_max=show_max, show_std=show_std)

