In [1]:
# header files
%matplotlib inline
import torch
import numpy as np
import csv
import glob
import math
import tensorboard
from PIL import Image
import skimage
from skimage import io, transform
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [15, 15]
print("Header files loaded!")

Header files loaded!


### Analysis on Test Dataset
This notebook is used for checking the performance of the three models (Transformer, Baseline Deep Learning and Baseline CPA) on the test dataset. First step was running the models on the test dataset and generating the csvs, namely, test_transformer.csv, test_baseline.csv and test_baseline_cpa.csv which contain the predictions for each image of the test dataset. Then in this notebook, we analyse these files and go through the predictions from each model on this test dataset.

### Transformer Model
The following display the performance of the transformer model on the test dataset. We see that the transformer model gives a total of 12643 bad images (blurry, empty and debris). Then we display the probabilities (max and min) for each of the blurry, empty and debris class.

In [2]:
# read csv files and obtain frequency for each class
files = glob.glob("../results/test_dataset/test_transformer.csv")
print(files)

file_list = []
prob_good_list = []
prob_blurry_list = []
prob_empty_list = []
prob_debris_list = []
file_label_list = []
t_prob_bad_list = []


for file in files:
    flag = -1
    
    with open(file, newline='') as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                flag = 1
            else:
                array = row
                
                # if bad class
                if int(array[5]) == 1 or int(array[5]) == 2 or int(array[5]) == 3:
                    file_list.append(array[0])
                    prob_good_list.append(array[1])
                    file_label_list.append(array[5])
                    
                    if int(array[5]) == 1:
                        prob_blurry_list.append((float(array[2]), array[0]))
                        t_prob_bad_list.append((float(array[2]), array[0], "1"))
                    if int(array[5]) == 2:
                        prob_empty_list.append((float(array[3]), array[0]))
                        t_prob_bad_list.append((float(array[3]), array[0], "2"))
                    if int(array[5]) == 3:
                        prob_debris_list.append((float(array[4]), array[0]))
                        t_prob_bad_list.append((float(array[4]), array[0], "3"))
                        

print(len(t_prob_bad_list))
print(len(prob_blurry_list))
print(len(prob_empty_list))
print(len(prob_debris_list))

['../results/test_dataset/test_transformer.csv']
12643
702
597
11344


In [3]:
prob_blurry_list = sorted(prob_blurry_list, key = lambda x: float(x[0]), reverse=True)
print(prob_blurry_list[0])
print(prob_blurry_list[len(prob_blurry_list)-1])
print(len(prob_blurry_list))

(0.998, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120275__2021-02-20T14_16_02-Measurement1/Images/r14c21f03p01-ch2sk1fk1fl1.tiff')
(0.338, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120276__2021-02-20T10_34_48-Measurement1/Images/r10c17f02p01-ch2sk1fk1fl1.tiff')
702


In [4]:
prob_empty_list = sorted(prob_empty_list, key = lambda x: float(x[0]), reverse=True)
print(prob_empty_list[0])
print(prob_empty_list[len(prob_empty_list)-1])
print(len(prob_empty_list))

(1.0, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120275__2021-02-20T14_16_02-Measurement1/Images/r08c04f05p01-ch5sk1fk1fl1.tiff')
(0.355, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120274__2021-02-20T17_58_18-Measurement1/Images/r14c21f08p01-ch2sk1fk1fl1.tiff')
597


In [5]:
prob_debris_list = sorted(prob_debris_list, key = lambda x: float(x[0]), reverse=True)
print(prob_debris_list[0])
print(prob_debris_list[len(prob_debris_list)-1])
print(len(prob_debris_list))

(0.999, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120275__2021-02-20T14_16_02-Measurement1/Images/r04c19f04p01-ch3sk1fk1fl1.tiff')
(0.349, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120274__2021-02-20T17_58_18-Measurement1/Images/r08c21f08p01-ch1sk1fk1fl1.tiff')
11344


### Baseline - Deep Learning Model
The following display the performance of the baseline (deep learning) model on the test dataset. We see that the baseline model gives a total of 30593 bad images (blurry, empty and debris). Then we display the probabilities (max and min) for each of the blurry, empty and debris class.

In [6]:
# read csv files and obtain frequency for each class
k = 150
files = glob.glob("../results/test_dataset/test_baseline.csv")
print(files)

b_file_list = []
b_prob_good_list = []
b_prob_blurry_list = []
b_prob_empty_list = []
b_prob_debris_list = []
file_label_list = []
c_prob_bad_list = []


for file in files:
    flag = -1
    
    with open(file, newline='') as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                flag = 1
            else:
                array = row
                
                #array[0] = array[0].replace("2019_07_11_JUMP-CP-pilots", "2019_07_11_JUMP_CP_pilots")
                #array[0] = array[0][:len(array[0])-1]
                
                # if bad class
                if int(array[5]) == 1 or int(array[5]) == 2 or int(array[5]) == 3:
                    b_file_list.append(array[0])
                    b_prob_good_list.append(array[1])
                    file_label_list.append(array[5])
                    
                    if int(array[5]) == 1:
                        b_prob_blurry_list.append((float(array[2]), array[0]))
                        c_prob_bad_list.append((float(array[2]), array[0], "1"))
                    if int(array[5]) == 2:
                        b_prob_empty_list.append((float(array[3]), array[0]))
                        c_prob_bad_list.append((float(array[3]), array[0], "2"))
                    if int(array[5]) == 3:
                        b_prob_debris_list.append((float(array[4]), array[0]))
                        c_prob_bad_list.append((float(array[4]), array[0], "3"))
                        

print(len(c_prob_bad_list))
print(len(b_prob_blurry_list))
print(len(b_prob_empty_list))
print(len(b_prob_debris_list))
print("Done processing!")

['../results/test_dataset/test_baseline.csv']
30593
7280
486
22827
Done processing!


In [7]:
b_prob_blurry_list = sorted(b_prob_blurry_list, key = lambda x: float(x[0]), reverse=True)
print(b_prob_blurry_list[0])
print(b_prob_blurry_list[len(b_prob_blurry_list)-1])
print(len(b_prob_blurry_list))

(0.98, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120276__2021-02-20T10_34_48-Measurement1/Images/r13c12f02p01-ch2sk1fk1fl1.tiff')
(0.337, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120276__2021-02-20T10_34_48-Measurement1/Images/r01c08f07p01-ch1sk1fk1fl1.tiff')
7280


In [8]:
b_prob_empty_list = sorted(b_prob_empty_list, key = lambda x: float(x[0]), reverse=True)
print(b_prob_empty_list[0])
print(b_prob_empty_list[len(b_prob_empty_list)-1])
print(len(b_prob_empty_list))

(0.996, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120274__2021-02-20T17_58_18-Measurement1/Images/r08c04f07p01-ch5sk1fk1fl1.tiff')
(0.345, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120277__2021-02-20T07_02_46-Measurement1/Images/r16c19f01p01-ch1sk1fk1fl1.tiff')
486


In [9]:
b_prob_debris_list = sorted(b_prob_debris_list, key = lambda x: float(x[0]), reverse=True)
print(b_prob_debris_list[0])
print(b_prob_debris_list[len(b_prob_debris_list)-1])
print(len(b_prob_debris_list))

(0.981, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120267__2021-02-20T21_37_27-Measurement1/Images/r15c19f08p01-ch5sk1fk1fl1.tiff')
(0.336, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120275__2021-02-20T14_16_02-Measurement1/Images/r16c21f04p01-ch1sk1fk1fl1.tiff')
22827


### Baseline - CPA Model
The following display the performance of the baseline (cpa) model on the test dataset. We see that the cpa model gives a total of 30548 bad images (blurry, empty and debris). Then we display the probabilities (max and min) for each of the blurry, empty and debris class.

In [10]:
# read csv files and obtain frequency for each class
k = 150
files = glob.glob("../results/test_dataset/test_baseline_cpa.csv")
print(files)

b_file_list = []
b_prob_good_list = []
b_prob_blurry_list = []
b_prob_empty_list = []
b_prob_debris_list = []
file_label_list = []
b_prob_bad_list = []


for file in files:
    flag = -1
    
    with open(file, newline='') as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                flag = 1
            else:
                array = row
                
                array[0] = array[0].replace("2019_07_11_JUMP-CP-pilots", "2019_07_11_JUMP_CP_pilots")
                array[0] = array[0][:len(array[0])-1]
                
                # if bad class
                if int(array[5]) == 1 or int(array[5]) == 2 or int(array[5]) == 3:
                    b_file_list.append(array[0])
                    b_prob_good_list.append(array[1])
                    file_label_list.append(array[5])
                    
                    if int(array[5]) == 1:
                        b_prob_blurry_list.append((float(array[2]), array[0]))
                        b_prob_bad_list.append((float(array[2]), array[0], "1"))
                    if int(array[5]) == 2:
                        b_prob_empty_list.append((float(array[3]), array[0]))
                        b_prob_bad_list.append((float(array[3]), array[0], "2"))
                    if int(array[5]) == 3:
                        b_prob_debris_list.append((float(array[4]), array[0]))
                        b_prob_bad_list.append((float(array[4]), array[0], "3"))
                        

print(len(b_prob_bad_list))
print(len(b_prob_blurry_list))
print(len(b_prob_empty_list))
print(len(b_prob_debris_list))
print("Done processing!")

['../results/test_dataset/test_baseline_cpa.csv']
30548
0
56
30492
Done processing!


In [11]:
b_prob_bad_list = sorted(b_prob_bad_list, key = lambda x: float(x[0]), reverse=True)
print(b_prob_bad_list[0])
print(b_prob_bad_list[len(b_prob_bad_list)-1])
print(len(b_prob_bad_list))

(1.0, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120274__2021-02-20T17_58_18-Measurement1/Images/r01c17f03p01-ch4sk1fk1fl1.tif', '3')
(0.33, '/dgx1nas1/cellpainting-datasets/2019_07_11_JUMP_CP_pilots/2021_03_03_Stain5_CondC_PE_Standard/images/BR00120267__2021-02-20T21_37_27-Measurement1/Images/r16c17f05p01-ch2sk1fk1fl1.tif', '3')
30548


In [12]:
transformer_bad = t_prob_bad_list
baseline_bad = b_prob_bad_list[:150]
cpa_bad = c_prob_bad_list[:150]
print(len(transformer_bad))
print(len(baseline_bad))
print(len(cpa_bad))

12643
150
150
