# **CoNIC 2022 Compute Stats**
---

<font size = 4> Notebook adapted from the code https://github.com/TissueImageAnalytics/CoNIC

In [3]:
#@markdown ##Play the cell to install python libraries



# Install packages which are not included in Google Colab

#!pip install docopt 
#!pip install numpy 
#!pip install pandas
#!pip install tqdm
#!pip install scipy
#!pip install sklearn

#Force session restart
#exit(0)
print("Libraries installed")

Libraries installed


In [4]:
#@markdown ##Play the cell to connect your Google Drive to Colab

#@markdown * Click on the URL. 

#@markdown * Sign in your Google Account. 

#@markdown * Copy the authorization code. 

#@markdown * Enter the authorization code. 

#@markdown * Click on "Files" site on the right. Refresh the site. Your Google Drive folder should now be available here as "drive". 

# mount user's Google Drive to Google Colab.
from google.colab import drive
drive.mount('/content/gdrive')





Mounted at /content/gdrive


In [5]:
#@markdown ## Play the cell to set path to MyDrive
%cd gdrive/MyDrive

/content/gdrive/MyDrive


In [6]:
#@markdown ## Play the cell to clone CoNIC2022 Compute Stat scripts from GitHub repository
! git clone https://github.com/TissueImageAnalytics/CoNIC.git
print("done")

Cloning into 'CoNIC'...
remote: Enumerating objects: 122, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 122 (delta 13), reused 19 (delta 3), pack-reused 86[K
Receiving objects: 100% (122/122), 8.10 MiB | 9.55 MiB/s, done.
Resolving deltas: 100% (52/52), done.
done


In [16]:
#@markdown ## Play the cell to set MyDrive path
%cd /content/gdrive/MyDrive

/content/gdrive/MyDrive


In [34]:
#@markdown ## Play the cell to append custom modules path
import sys
sys.path.append('/content/gdrive/My Drive/CoNIC')
print("appending custom module path done")

appending custom module path done


In [35]:
#@markdown ##Play the cell to run the initial import

from docopt import docopt
import numpy as np
import os
import pandas as pd
from tqdm.auto import tqdm

from gdrive.MyDrive.CoNIC.misc.utils import remap_label, get_bounding_box
from gdrive.MyDrive.CoNIC.metrics.stats_utils import get_pq, get_multi_pq_info, get_multi_r2
print("import done")

import done


# **- Go to step 1 for Segmentation & classification: multi-class panoptic quality (mPQ+)**

# **- Go directly to step 2 for only Predicting cellular composition: multi-class coefficient of determination (R2)**

## **1- Segmentation & classification: multi-class panoptic quality (mPQ+)**

To appropriately calculate the metrics, ensure that your output is in the following format:
- .npy array of size Nx256x256x2, where N is the number of processed patches.
- First channel is the instance segmentation map containing values ranging from 0 (background) to n (number of nuclei).
- Second channel is the classification map containing values ranging from 0 (background) to 6 (number of classes in the dataset).

In [36]:
#@markdown ###Select path to prediction and ground truth: 
Prediction_path = "/content/gdrive/MyDrive/CoNIC2022/Compute_statistics/Predictions/labels_0_100.npy" #@param {type:"string"}

GroundTruth_path = "/content/gdrive/MyDrive/CoNIC2022/Compute_statistics/Ground_Truth/labels_0_100.npy" #@param {type:"string"}

In [37]:
#@markdown ### Play the cell to run to calculate statistics

# https://github.com/TissueImageAnalytics/CoNIC
# To get the stats for segmentation and classification, run:

# python compute_stats.py --mode="seg_class" --pred=<path_to_results> --true=<path_to_ground_truth>
# To get the stats for cellular composition prediction, run:

# python compute_stats.py --mode="regression" --pred=<path_to_results> --true=<path_to_ground_truth>

#"regression" or "seg_class"
mode = ["--mode"]
#pred_array = np.load('/content/gdrive/MyDrive/Predictions/labels_0_100.npy')
#true_array = np.load('/content/gdrive/MyDrive/Ground_Truth/labels_0_100.npy')
pred_array = np.load(Prediction_path)
true_array = np.load(GroundTruth_path)
#seg_metrics_names = ["pq"]
seg_metrics_names = ["pq", "multi_pq+"]
reg_metrics_names = ["r2"]

nr_patches = pred_array.shape[0]

all_metrics = {}
pq_list = []
mpq_info_list = []

for patch_idx in tqdm(range(nr_patches)):
    # get a single patch
    pred = pred_array[patch_idx]
    true = true_array[patch_idx]
    # instance segmentation map
    pred_inst = pred[..., 0]
    true_inst = true[..., 0]
    # classification map
    pred_class = pred[..., 1]
    true_class = true[..., 1]
    
    for idx, metric in enumerate(seg_metrics_names):
        if metric == "pq":
            # get binary panoptic quality
            pq = get_pq(true_inst, pred_inst)
            pq = pq[0][2]
            pq_list.append(pq)
        elif metric == "multi_pq+":
            # get the multiclass pq stats info from single image
            mpq_info_single = get_multi_pq_info(true, pred)
            mpq_info = []
            # aggregate the stat info per class
            for single_class_pq in mpq_info_single:
                tp = single_class_pq[0]
                fp = single_class_pq[1]
                fn = single_class_pq[2]
                sum_iou = single_class_pq[3]
                mpq_info.append([tp, fp, fn, sum_iou])
            mpq_info_list.append(mpq_info)
        else:
            raise ValueError("%s is not supported!" % metric)

pq_metrics = np.array(pq_list)
pq_metrics_avg = np.mean(pq_metrics, axis=-1)  # average over all images
if "multi_pq+" in seg_metrics_names:
    mpq_info_metrics = np.array(mpq_info_list, dtype="float")
    # sum over all the images
    total_mpq_info_metrics = np.sum(mpq_info_metrics, axis=0)

for idx, metric in enumerate(seg_metrics_names):
    if metric == "multi_pq+":
        mpq_list = []
        # for each class, get the multiclass PQ
        for cat_idx in range(total_mpq_info_metrics.shape[0]):
            total_tp = total_mpq_info_metrics[cat_idx][0]
            total_fp = total_mpq_info_metrics[cat_idx][1]
            total_fn = total_mpq_info_metrics[cat_idx][2]
            total_sum_iou = total_mpq_info_metrics[cat_idx][3]

            # get the F1-score i.e DQ
            dq = total_tp / (
                (total_tp + 0.5 * total_fp + 0.5 * total_fn) + 1.0e-6
            )
            # get the SQ, when not paired, it has 0 IoU so does not impact
            sq = total_sum_iou / (total_tp + 1.0e-6)
            mpq_list.append(dq * sq)
        mpq_metrics = np.array(mpq_list)
        all_metrics[metric] = [np.mean(mpq_metrics)]
    else:
        all_metrics[metric] = [pq_metrics_avg]

df = pd.DataFrame(all_metrics)
df = df.to_string(index=False)
print(df)

  0%|          | 0/100 [00:00<?, ?it/s]

   pq  multi_pq+
 0.97        1.0


## **2- Predicting cellular composition: multi-class coefficient of determination (R2)**
Single .csv file where the column headers should be:
- neutrophil
- epithelial
- lymphocyte
- plasma
- eosinophil
- connective

To make sure the calculation is done correctly, ensure that the row ordering is the same for both the ground truth and prediction csv files.

In [38]:
#@markdown ###Select path to prediction and ground truth: 
Prediction_path = "/content/gdrive/MyDrive/CoNIC2022/Compute_statistics/Predictions/counts.csv" #@param {type:"string"}

GroundTruth_path = "/content/gdrive/MyDrive/CoNIC2022/Compute_statistics/Ground_Truth/counts.csv" #@param {type:"string"}

In [39]:
#@markdown ### Play the cell to run to calculate statistics
#pred_csv = pd.read_csv('/content/gdrive/MyDrive/Predictions/counts.csv')
#true_csv = pd.read_csv('/content/gdrive/MyDrive/Ground_Truth/counts.csv')
pred_csv = pd.read_csv(Prediction_path)
true_csv = pd.read_csv(GroundTruth_path)

for idx, metric in enumerate(reg_metrics_names):
    if metric == "r2":
        # calculate multiclass coefficient of determination
        r2 = get_multi_r2(true_csv, pred_csv)
        all_metrics["multi_r2"] = [r2]
    else:
        raise ValueError("%s is not supported!" % metric)

df = pd.DataFrame(all_metrics)
df = df.to_string(index=False)
print(df)

   pq  multi_pq+  multi_r2
 0.97        1.0       1.0
