# Import and install all necessary packages in Python and Octave

In [None]:
# Import Colab specific packages and mount to Google Drive folder
from google.colab.patches import cv2_imshow
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

!apt-get update
!apt install msttcorefonts -qq

# Import all necessary packages for code to run

import os
import pathlib
import numpy as np
import math
import pandas as pd
import glob
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.patches import Circle
import seaborn as sns
import copy
from scipy import ndimage
from scipy.spatial import procrustes
import warnings
import pickle
import sklearn
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPool2D, UpSampling2D, Concatenate, GaussianNoise
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, LearningRateScheduler
from tensorflow.python.client import device_lib
from random import randint
from tqdm import tqdm
import cv2
#from IPython.display import Image
import PIL
!pip3 install fpdf
from fpdf import FPDF
#import PyPDF2
!pip3 install PyPDF2
#from PyPDF2 import PdfFileMerger, PdfFileReader
from PyPDF2 import PdfMerger, PdfReader
import time # measure how long training takes
import skimage
from skimage.measure import label, find_contours
from skimage.metrics import hausdorff_distance #, hausdorff_pair
from skimage.morphology import skeletonize, convex_hull_image
from skimage import draw
import random

!pip3 install num2words
from num2words import num2words

# Packages and programs required to run Octave code in this notebook (uses oct2py package)
#print('ATTEMPTING TO INSTALL OCT2PY')
!pip3 install oct2py #--no-deps
#print('ATTEMPTING TO INSTALL OCTAVE')
!apt install octave # makes it possible to run matlab scripts
#print('ATTEMPTING TO INSTALL OCTAVE DEV TOOLS')
!apt install liboctave-dev
#!pip3 install --no-deps -e '/content/gdrive/My Drive/Colab Notebooks/Sector Project/oct2py-5.5.1'
#print('BELOW IS AN ERROR IN SETUP!')

import oct2py
from oct2py import octave
# The line below allows Octave to run alongside the notebook
%load_ext oct2py.ipython 


# Check configurations
# If you have a GPU connected, you should see specs pop up.
tf.config.list_physical_devices('GPU')
device_lib.list_local_devices()

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Not connected to a GPU')
else:
    print(gpu_info)

# Check if you are using a high-ram runtime (comes from one of Google's tutorials)
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
    print('Not using a high-RAM runtime')
else:
    print('You are using a high-RAM runtime!')

# Start Octave and install its missing packages

In [None]:
%%octave # Start Octave
#pwd
#dir("/content/gdrive/MyDrive")
#addpath("/content/gdrive/My Drive/Colab\ Notebooks/sector-counting-pipeline/Pipeline")
#addpath("/content/gdrive/Colab Notebooks/sector-counting-pipeline/Pipeline")
#pkg install qt_toolkit
#pkg install fltk

#pkg install "/content/gdrive/My Drive/Colab\ Notebooks/psi-sectored-classification/image-2.12.0.tar.gz"
pkg install -forge image
pkg load image

#pkg install "/content/gdrive/My Drive/Colab\ Notebooks/psi-sectored-classification/dataframe-1.2.0.tar.gz"
pkg install -forge dataframe
pkg load dataframe

# Set parameters for notebook flow

In [None]:
#-----------------------
# EXPECTED IMAGE INPUT
#-----------------------

# Input and output of U-Net
# Image input and output
H = 1024
W = 1024
shape = (H, W, 3) # The size of the input image (assumes 3 channels i.e. RGB)
num_classes = 3 # How many classes of pixels should the segmentation have?
# 2: Colony pixels versus background pixels
# 3: Red colony pixels, white colony pixels and background pixels (this is what we will be using from now on)

#---------------------
# TRAINING PARAMETERS
#---------------------

# Training settings Enable training and/or classification
train_model = False # set to True if you wish to train a new model
use_test_network = True # set to True if you want to use a small model to test with instead of the implemented model

# Model settings
lr = 1e-4 # learning rate (initial)
min_lr = 1e-6 # lowest learning rate if using a scheduler or a reduce on plateau (default is 1e-6)
batch_size = 1 # how many images are fed in at one pass?
epochs = 100 # the maximum number of passes through the images

# Output segmentations of plates during training
print_test_segs = False

# Output segmentations of plates after training
get_training_segs = True # save segmentations for the training images
get_testing_segs = True # save segmentations for the testing images


#---------------------
# CLASSIFICATION PARAMETERS
#---------------------

# Should classification happen?
classify_training_colonies = True
classify_testing_colonies = True

# Save all colony images and annotations of colonies from testing images
save_all_annotations = True

# Padding for images being saved (zooming out a bit, not adding blank pixels)
image_padding = 5

# Including OTHERS' annotations of the images
use_expert_counts = False # quantifiable colonies from images (dots on the images)
use_quantifiable_counts_from_table = False # quantifiable colonies tabulated (assumes you have the tabulated data)
use_true_cured_colonies_from_table = False # cured colonies tabulated (assumes you have the tabulated data)
use_true_sector_counts = False # sector frequencies tabulated (assumes you have the tabulated data)

#---------------------
# OTEHR THINGS TO LEAVE ALONE
#---------------------

# Set random number generator
SEED = 42
np.random.seed(SEED) # for numpy operations
tf.random.set_seed(SEED) # for tensorflow operations

# Get directories set up

In [None]:
#---------------------------------------
# THINGS TO CHANGE BEFORE RUNNING
#---------------------------------------

# 1. GET THE REPO DIRECTORY

# Change the lines below to correspond to the loction of the repo in your Google Drive
repo_parent_folder = '/content/gdrive/My Drive/Colab Notebooks' # The parent directory of the repo folder
repo_body_folder = repo_parent_folder + '/psi-sectored-classification' # The repo folder itself, named "Sector Project"
octave.addpath(repo_body_folder) # include this in Octave's path when reading functions.

# This is the subfolder of the repo containing the segmnetation-classification pipeline.
# This is also the directory of which this iPython script is found.
main_folder = repo_body_folder + '/Pipeline' # The location of the code for running [PSI]-CIC


# 2. GET THE FILE NAMES FOR THE WEIGHTS AND THE COLONY OUTPUT DOCUMENT

# Name of the file containing the trainied weights
# Such file should be stored in the "Trained Models" subfolder of "Pipeline"
# Do not include the '.h5' file type at the end
# File names:
# 2021_07_01 (too big to add on Github)
# test_model_3 (included in Github)
weights_file = 'test_model_3'

# NOTE: If you want to use the weights file used in the paper, please contact
# me as Github's file size limits prevent me from including it in the repo.

# Name of the output file containing all the colony crops from the classification step
# This will be the name of the PDF file with colony crops in the "_Pipeline_test" folder.
colony_chart_doc = '2023_03_15'


# 3. GET DIRECTORIES OF TRAINING AND TESTING IMAGES

# Training and validation image locations
training_image_set = repo_body_folder + '/Image Generation/Synthetic_Images/train/images'
validation_image_set = repo_body_folder + '/Image Generation/Synthetic_Images/val/images'

# Directories of the testing images
this_image_set = 'Test Plates/Set 1'
# Note: The main folder where these images are found is "Real Images", which is the PARENT directory of this script
# This will be the same structure used when looking at output for the specific image sets

# If you would like to use one image to test how well a segmentation during
# training is, write the name of the image you want to test in here.
# This must be in the set of testing images.
if print_test_segs == True:
    image_to_test_with = 'Plate_2.jpg'



# Initialize necessary folders for storing output (don't change this)

## Required folders
Refer to this cell if a directory related error is thrown

In [None]:
# Check that the directories for syntheitc images and the weights exist.
# AN ERROR IS THROWN IF ONE OF THEM CANNOT BE FOUND.

# Directory where all TRAINING images and masks are found
image_folder = repo_body_folder + '/Image Generation/Synthetic_Images'
if os.path.exists(image_folder) == False:
    raise NameError('Directory ' + image_folder + ' does not exist.')

# Directory where model weights are stored or found
weights_folder = main_folder + '/Trained Models'
if os.path.exists(weights_folder) == False:
    raise NameError('Directory ' + weights_folder + ' does not exist.')

# Subdirectory containing all the test plates
all_real_images_folder = repo_body_folder + '/' + 'Real_Images'
if os.path.exists(all_real_images_folder) == False:
    raise NameError('Directory ' + all_real_images_folder + ' does not exist.')

# Subdirectory of the "Real Images" folder which contains the specific image set you are using
real_image_folder = all_real_images_folder + '/' + this_image_set # leave this line alone
if os.path.exists(real_image_folder) == False:
    raise NameError('Directory ' + real_image_folder + ' does not exist.')

# If you are using images containing the manual annotations, indicate where
# this data is found
if use_expert_counts == True:
    additional_data_folder = main_folder + '/additional_data/' + this_image_set
    if os.path.exists(additional_data_folder) == False:
        raise NameError('Directory ' + additional_data_folder + ' does not exist.')

# If you are using an image to intermediately test U-Net during training, 
# make sure the image you are using can be found
if print_test_segs == True:
    if os.path.exists(real_image_folder + '/' + image_to_test_with) == False:
        raise NameError('The image you want to test with cannot be found in this folder.')

## Folders to add based on options

In [None]:
#---------------------------------------------------
# Training output directories

# If folders do not exist, they will be created for you.

# Primary directory where all output from TRAINING data will be stores
train_output_folder = main_folder + '/output_train/' + str(weights_file)
if os.path.exists(train_output_folder) == False:
    os.makedirs(train_output_folder)

# Subdirectory where TRAINING image segmentations will be stored
train_seg_folder = train_output_folder + '/segs'
if os.path.exists(train_seg_folder) == False:
    os.mkdir(train_seg_folder)

# Subdirectory where circle detections for the corresponding TRAINING image segmentation will be found
train_circle_folder = train_output_folder + '/CHT Circles'
if os.path.exists(train_circle_folder) == False:
    os.makedirs(train_circle_folder)

# Subdirectory where data for each circle detection in TRAINING images will be found (including position and radius)
train_circle_data_folder = train_output_folder + '/CHT Data'
if os.path.exists(train_circle_data_folder) == False:
    os.makedirs(train_circle_data_folder)

# Subdirectory where classification data for each detection in TRAINING images will be found (including position and radius)
# These are .pkl files of tables, one per plate, containing data on each colony.
train_output_table_folder = train_output_folder + '/Colony Tables'
if os.path.exists(train_output_table_folder) == False:
    os.makedirs(train_output_table_folder)


#---------------------------------------------------
# Testing output directories

# If folders do not exist, they will be created for you.

# If you would like to see output of the plate segmentation during the
# training process, this is the subdirectory of the Pipeline folder where 
# such output will be stored
if print_test_segs == True:
    test_per_epoch_folder = main_folder + '/segs_per_epoch/' + str(weights_file) + '/' + this_image_set
    if os.path.exists(test_per_epoch_folder) == False:
        os.makedirs(test_per_epoch_folder)

# Primary directory where all output from TESTING data will be stored
# This is model specific, so the weights file name will be an additional subdirectory
# for all image sets tested withn thouse weights.
test_output_folder = main_folder + '/output_test/' + str(weights_file) + '/' + this_image_set
if os.path.exists(test_output_folder) == False:
    os.makedirs(test_output_folder)

# Subdirectory where TESTING image segmentations will be stored
test_seg_folder = test_output_folder + '/segs'
if os.path.exists(test_seg_folder) == False:
    os.makedirs(test_seg_folder)

# Subdirectory where circle detections for the corresponding TESTING image segmentations will be found
test_circle_folder = test_output_folder + '/CHT Circles'
if os.path.exists(test_circle_folder) == False:
    os.makedirs(test_circle_folder)

# Subdirectory where data for each circle detection in TESTING images will be found (including position and radius)
test_circle_data_folder = test_output_folder + '/CHT Data'
if os.path.exists(test_circle_data_folder) == False:
    os.makedirs(test_circle_data_folder)

# Subdirectory where classification data for each detection in TESTING images will be found (including position and radius)
# These are .pkl files of tables, one per plate, containing data on each colony.
test_output_table_folder = test_output_folder + '/Colony Tables'
if os.path.exists(test_output_table_folder) == False:
    os.makedirs(test_output_table_folder)

# Subdirectory showing the bounding boxes for each colony detected in the TESTING images will be found
test_boxes_folder = test_output_folder + '/detections'
if os.path.exists(test_boxes_folder) == False:
    os.makedirs(test_boxes_folder)



#------------------------------------------------------
# FULL OUTPUT: ANNOTATION DETAILS AND PLOTS

# Everything still stored in the subdiectory of output_test corresponding to 
# the segmentaion weights and image set used.

# Subdirectory which stores PDF documents per plate showing the cropped colony, its
# segmentation, and annotations.
output_details_folder = test_output_folder + '/PDF Details'
if os.path.exists(output_details_folder) == False:
    os.makedirs(output_details_folder)

# Subdirectory which stores plots visualizing data obgtained from each plate
output_plots_folder = test_output_folder + '/Plots'
if os.path.exists(output_plots_folder) == False:
    os.makedirs(output_plots_folder)
    
#-------------------------------------------------------
# FULL OUTPUT: CROPPING DIRECTORIES

# This section is only active when save_all_annotations is True

if save_all_annotations == True:

    output_crops_folder = test_output_folder + '/crops'

    if os.path.exists(output_crops_folder + '/raw') == False:
        os.makedirs(output_crops_folder + '/raw') # where the original colonies are cropped and stored

    # Below used only if data on quantifiable colonies are available

    if use_expert_counts == True:
        if os.path.exists(output_crops_folder + '/counted') == False:
            os.makedirs(output_crops_folder + '/counted') # where the original quantifiable colonies are cropped and stored

    if os.path.exists(output_crops_folder + '/circles') == False:
        os.makedirs(output_crops_folder + '/circles') # same as before, but a circle is overlayed on the colony

    if os.path.exists(output_crops_folder + '/segs') == False:
        os.makedirs(output_crops_folder + '/segs') # the output from the U-Net segmentation such that only nonzero pixels in the circle are kept


    if os.path.exists(output_crops_folder + '/init_regions') == False:
        os.makedirs(output_crops_folder + '/init_regions') # A segmentation outlining the possible sector-like regions of the colony, both red and white

    if os.path.exists(output_crops_folder + '/init_bounds') == False:
        os.makedirs(output_crops_folder + '/init_bounds') # The raw segmentation containing only the boundary of the colony

    if os.path.exists(output_crops_folder + '/init_partitions') == False:
        os.makedirs(output_crops_folder + '/init_partitions') # same as the raw segmentation, but with lines annotated to represent locations of sector borders

    if os.path.exists(output_crops_folder + '/init_bad') == False:
        os.makedirs(output_crops_folder + '/init_bad') # A segmentation outlining the sector-like regions that failed the consistency check


    if os.path.exists(output_crops_folder + '/cor_segs') == False:
        os.makedirs(output_crops_folder + '/cor_segs') # the output from the U-Net segmentation such that only nonzero pixels in the circle are kept

    if os.path.exists(output_crops_folder + '/cor_bounds') == False:
        os.makedirs(output_crops_folder + '/cor_bounds') # The corrected segmentation containing only the boundary of the colony

    if os.path.exists(output_crops_folder + '/cor_regions') == False:
        os.makedirs(output_crops_folder + '/cor_regions') # the output from the U-Net segmentation such that only nonzero pixels in the circle are kept

    if os.path.exists(output_crops_folder + '/cor_bounds') == False:
        os.makedirs(output_crops_folder + '/cor_bounds') # same as the raw segmentation, but with lines annotated to represent locations of sector borders

    if os.path.exists(output_crops_folder + '/cor_bad') == False:
        os.makedirs(output_crops_folder + '/cor_bad') # A segmentation outlining the sector-like regions that failed the consistency check (should usually be blank after correction is performed)


    if os.path.exists(output_crops_folder + '/sectors') == False:
        os.makedirs(output_crops_folder + '/sectors') # the output containing the regions in the segmentation where a sector is predicted

    if os.path.exists(output_crops_folder + '/sector_comps') == False:
        os.makedirs(output_crops_folder + '/sector_comps') # same as before, but only red pixels in the segmentation are considered

# Functions to Load

## Functions for U-Net and classification

### U-Net Architecture

In [None]:
# Functions for the U-Net Architecture

# U-Net architecture, more deep

def conv_block(inputs, channels, pool=True):
    x = Conv2D(channels, 3, padding="same")(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(channels, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    if pool == True: # Only to be used in the encoder path which has max pooling layers
        p = MaxPool2D((2,2))(x)
        return x, p
    else:
        return x

if use_test_network == False:
    print('Running U-Net.')

    def build_unet(shape, num_classes):
        inputs = Input(shape)

        # Determine whether to add a layer indicating adding noise
        # This should be done to the training images only.
        #inputs = GaussianNoise(inputs, stddev=0.5) # use this
        #inputs = GaussianNoise(inputs, stddev=0.5, training=True)

        # Encoder region

        x1, p1 = conv_block(inputs, 64, pool=True)
        x2, p2 = conv_block(p1, 128, pool=True)
        x3, p3 = conv_block(p2, 256, pool=True)
        x4, p4 = conv_block(p3, 512, pool=True)

        # Connecting the Encoder and Decoder regions (the deepest layer)

        b1 = conv_block(p4, 1024, pool=False)

        # Decoder Region

        u1 = UpSampling2D((2,2), interpolation="bilinear")(b1)
        #print(u1.shape)
        #print(x4.shape)
        c1 = Concatenate()([u1, x4])
        x5 = conv_block(c1, 512, pool=False)

        u2 = UpSampling2D((2,2), interpolation="bilinear")(x5)
        c2 = Concatenate()([u2, x3])
        x6 = conv_block(c2, 256, pool=False)

        u3 = UpSampling2D((2,2), interpolation="bilinear")(x6)
        c3 = Concatenate()([u3, x2])
        x7 = conv_block(c3, 128, pool=False)

        u4 = UpSampling2D((2,2), interpolation="bilinear")(x7)
        c4 = Concatenate()([u4, x1])
        x8 = conv_block(c4, 64, pool=False)

        # Obtain output layer with number of desired classes

        output = Conv2D(num_classes, 1, padding="same", activation="softmax")(x8)

        return Model(inputs, output)

elif use_test_network == True:
    print('Running the testing network.')

    def build_unet(shape, num_classes):
        inputs = Input(shape)

        # Encoder region

        x1, p1 = conv_block(inputs, 8, pool=True)

        # Decoder Region

        u1 = UpSampling2D((2,2), interpolation="bilinear")(p1)
        x2 = conv_block(u1, 16, pool=False)

        # Obtain output layer with number of desired classes

        output = Conv2D(num_classes, 1, padding="same", activation="softmax")(x2)

        return Model(inputs, output)

### Pre-processing images

In [None]:
# function to crop image
def crop(img):
    i, j = img.nonzero()[:2]
    x_min = i.min()
    x_max = i.max() + 1
    y_min = j.min()
    y_max = j.max() + 1
    return img[x_min:x_max, y_min:y_max], [x_min, x_max, y_min, y_max]

def trim_border(x):
    if x.shape[0] > x.shape[1]:
        pixel_diff = x.shape[0] - x.shape[1]
        if pixel_diff % 2 == 0:
            x_cropped = x[(pixel_diff // 2):(x.shape[0]-(pixel_diff // 2)),:,:]
        else:
            x_cropped = x[math.ceil(pixel_diff / 2):x.shape[0]-(math.floor(pixel_diff / 2)),:,:]

    elif x.shape[0] < x.shape[1]:
        pixel_diff = x.shape[1] - x.shape[0]
        if pixel_diff % 2 == 0:
            x_cropped = x[:,(pixel_diff // 2):x.shape[1]-(pixel_diff // 2),:]
        else:
            x_cropped = x[:,math.ceil(pixel_diff / 2):x.shape[1]-(math.floor(pixel_diff / 2)),:]

    else:
        x_cropped = x
    #print('After Resizing: ' + 'Width: ' + str((x_cropped.shape)[0]) + ', Height: ' + str((x_cropped.shape)[1]))
    #raise NameError('Image is saved.')
    return x_cropped

def read_image(x):
    x = cv2.imread(x,cv2.IMREAD_COLOR)
    x = trim_border(x) # this should be called 
    x = cv2.resize(x,(W,H))
    x = x / 255.0 # normalize image
    x = x.astype(np.float32)
    return x

def read_mask(x):
    x = cv2.imread(x,cv2.IMREAD_GRAYSCALE)
    #print(x.shape)
    x = cv2.resize(x,(W,H))
    if num_classes == 2:
        x = x / 255.0
        # This is necessary because for some reason binary images automatically have 0 and 255 encoded.
        #x = cv2.resize(x,(16,16))
        #print(x.shape)
    x = x.astype(np.int32)
    return x

def tf_dataset(x,y, batch=1):
    dataset = tf.data.Dataset.from_tensor_slices((x,y))
    dataset = dataset.shuffle(buffer_size=500)
    dataset = dataset.map(preprocess)
    dataset = dataset.batch(batch)
    dataset = dataset.repeat()
    dataset = dataset.prefetch(2)
    return dataset

def preprocess(x,y):
    def f(x,y):
        x = x.decode()
        y = y.decode()

        image = read_image(x)
        mask = read_mask(y)

        return image, mask

    image, mask = tf.numpy_function(f, [x, y], [tf.float32, tf.int32])
    mask = tf.one_hot(mask, num_classes, dtype=tf.int32)
    image.set_shape([H,W,3]) # does not change
    mask.set_shape([H,W,num_classes]) # change last argument dependeing on how many classes you want to do for segmentation

    return image, mask

## Helper functions
1. Cropping an image
2. Finding unique elements in a lists
3. Bresenham's Line Algorithm (draw line in pixel space)

In [None]:
# Helper functions

# function to get unique values
# (https://www.geeksforgeeks.org/python-get-unique-values-list/)
def unique(list1):
 
  # intilize a null list
    unique_list = []
        
    # traverse for all elements
    for x in list1:
        # check if exists in unique_list or not
        if x not in unique_list:
            unique_list.append(x)
        # print list
    return unique_list

# Create triangle starting from center, connecting the two ends of a connected component
# To get the boundaries of the triangle, use Bresenham's line algorithm

# Definition taken from http://www.roguebasin.com/index.php?title=Bresenham%27s_Line_Algorithm#Python
def get_line(start, end):
    """Bresenham's Line Algorithm
    Produces a list of tuples from start and end

    >>> points1 = get_line((0, 0), (3, 4))
    >>> points2 = get_line((3, 4), (0, 0))
    >>> assert(set(points1) == set(points2))
    >>> print points1
    [(0, 0), (1, 1), (1, 2), (2, 3), (3, 4)]
    >>> print points2
    [(3, 4), (2, 3), (1, 2), (1, 1), (0, 0)]
    """
    # Setup initial conditions
    x1, y1 = start
    x2, y2 = end
    dx = x2 - x1
    dy = y2 - y1

    # Determine how steep the line is
    is_steep = abs(dy) > abs(dx)

    # Rotate line
    if is_steep:
        x1, y1 = y1, x1
        x2, y2 = y2, x2

    # Swap start and end points if necessary and store swap state
    swapped = False
    if x1 > x2:
        x1, x2 = x2, x1
        y1, y2 = y2, y1
        swapped = True

    # Recalculate differentials
    dx = x2 - x1
    dy = y2 - y1

    # Calculate error
    error = int(dx / 2.0)
    ystep = 1 if y1 < y2 else -1

    # Iterate over bounding box generating points between start and end
    y = y1
    points = []
    for x in range(x1, x2 + 1):
        coord = (y, x) if is_steep else (x, y)
        points.append(coord)
        error -= abs(dy)
        if error < 0:
            y += ystep
            error += dx

    # Reverse the list if the coordinates were swapped
    if swapped:
        points.reverse()
    return points

# Function to sort bounary pixels by theta value relative to center of colony image
    # 'seg' should be the mask of the colony boundary
    # 'start' is the center of the image, and 'end' is the boundary pixel
def sort_thetas(seg):
    # get center of the segmentation
    seg_shape = seg.shape
    seg_center = (np.round(seg_shape[1]/2.0), np.round(seg_shape[0]/2.0))

    # get the points on the boundary
    points = np.transpose(np.nonzero(seg)).tolist()

    # Get the angles from the center to the endpoints
    these_thetas = []
    for this_point in points:
        diff_width = this_point[1] - seg_center[0]
        diff_height = this_point[0] - seg_center[1]
        these_thetas.append(math.atan2(-diff_height, diff_width))

    these_thetas_sorted = np.sort(these_thetas)
    these_thetas_sorted_args = np.argsort(these_thetas)
    sorted_points = []
    for this_arg in these_thetas_sorted_args:
        sorted_points.append(points[this_arg])

    # Add a copy of the first point to the end to make this periodic
    these_thetas_sorted = np.append(these_thetas_sorted, these_thetas_sorted[0])
    sorted_points.append(sorted_points[0])
    
    return these_thetas_sorted, sorted_points

# Function to create data for plotting 'intensity' between endpoints of a line
def get_intensity_map(seg, sorted_points):
    intensity_sum = []
    full_seg_sum = []
    seg_shape = seg.shape
    seg_center = (np.round(seg_shape[1]/2.0).astype(np.int32), np.round(seg_shape[0]/2.0).astype(np.int32))

    # get a line between the center and each endpoint iteratively
    for this_point in sorted_points:
        line_points = get_line(seg_center, tuple(this_point))
        line_points_sum = len(line_points)

        # make mask containing points on the line
        seg_line = np.zeros_like(seg)
        full_seg = np.zeros_like(seg)
        for this_line_point in line_points:

            full_seg[this_line_point[0], this_line_point[1]] = True
            # check if each pixel on the line is red.  Keep those that are red only
            if seg[this_line_point[0], this_line_point[1]] == True:
                seg_line[this_line_point[0], this_line_point[1]] = True

        seg_line_sum = np.sum(seg_line)
        intensity_sum.append(seg_line_sum)
        full_seg_sum.append(np.sum(full_seg))

    return intensity_sum, full_seg_sum

def create_filled_ellipse_in_array(seg, padding = 0):
    ellipse_array = np.zeros_like(seg).astype(bool)
    ellipse_height = seg.shape[0]
    ellipse_width = seg.shape[1]
    rr, cc = draw.ellipse((ellipse_height -1) / 2.0, (ellipse_width-1) / 2.0, (ellipse_height) / 2.0 - padding, (ellipse_width) / 2.0 - padding)
    ellipse_array[rr,cc] = 1
    return ellipse_array

def create_circle_boundary(seg, radius):
    ellipse_array = np.zeros_like(seg).astype(bool)
    ellipse_height = seg.shape[0]
    ellipse_width = seg.shape[1]
    rr, cc = draw.ellipse((ellipse_height -1) / 2.0, (ellipse_width-1) / 2.0, radius, radius)
    ellipse_array[rr,cc] = 1
    ellipse_boundary = get_colony_boundary_binary(ellipse_array)
    return ellipse_boundary

def get_endpoint_locations(endpoints_list, seg, radius):
    # Function finds the angle between the center and endpoints of the component,
    # and extends the endpoint to the radius of the circle predicted
    ellipse_array = np.zeros_like(seg).astype(bool)
    ellipse_height = seg.shape[0]
    ellipse_width = seg.shape[1]
    mid_height = (ellipse_height -1) / 2.0
    mid_width = (ellipse_width -1) / 2.0
    endpoints_x = []
    endpoints_y = []
    endpoints_x.append(mid_width)
    endpoints_y.append(mid_height)
    center_seg = list([mid_height, mid_width])
    angle_list = []
    for this_endpoint in endpoints_list:
        angle = math.atan2((this_endpoint[0] - endpoints_y[0]), (this_endpoint[1] - endpoints_x[0]))
        angle_list.append(angle)
    endpoint_locations = []
    for this_angle in angle_list:
        endpoint_x = center_seg[1] + radius*math.cos(this_angle)
        endpoint_y = center_seg[0] + radius*math.sin(this_angle)
        endpoint_locations.append(list([endpoint_x, endpoint_y]))
        endpoints_x.append(endpoint_x)
        endpoints_y.append(endpoint_y)
    return angle_list, endpoint_locations, endpoints_x, endpoints_y

def switch_red_white(seg):
    temp_seg = copy.deepcopy(seg)
    temp_seg = np.where(temp_seg == 255, 111, temp_seg) # find pixels given 255, place a temporary number (these should be red pixels)
    temp_seg = np.where(temp_seg == 127, 255, temp_seg) # find pixels given 255, and assign them 255 (these are the white pixels you want to label properly)
    temp_seg = np.where(temp_seg == 111, 127, temp_seg) # find pixels given the temporary label, and assign them 127 (these are the red pixels you want to label properly)
    return temp_seg

## Classification Code
Functions related to the colony boundary

1. Find the colony boundary and skeletonize it.
2. Same as 1, but with binaray images.
3. Get the red and white components.
4. Find the connected components of the red boundary.
5. For each component, find the endpoints.
6. Draw the sector from the endpoints to the colony center.
7. Get the boundary and interior of the sector.
8. Check if the interior and boundary partitions of the sector are consistent
9. Decide if the boundary has been misclassified.  If yes, change the class of the colony boundary.  Rerun steps 4-9.


In [None]:
# Function to find colony boundary

def get_colony_boundary(colony_mask):
    padded_colony = np.pad(colony_mask[:,:,0] != 0, 1, 'constant', constant_values=0)
    check_boundary_array = np.zeros_like(padded_colony) + 4
    image_shape = check_boundary_array.shape

    for i in range(1,image_shape[0]-1):
        for j in range(1,image_shape[1]-1):
            check_boundary_array[i,j] = check_boundary_array[i,j] - padded_colony[i-1,j] # pixel above is 1
            check_boundary_array[i,j] = check_boundary_array[i,j] - padded_colony[i+1,j] # pixel below is 1
            check_boundary_array[i,j] = check_boundary_array[i,j] - padded_colony[i,j-1] # pixel to the left is 1
            check_boundary_array[i,j] = check_boundary_array[i,j] - padded_colony[i,j+1] # pixel to the right is 1

    background_mask = check_boundary_array > 0
    edge_mask = np.multiply(background_mask, padded_colony) # sanity check so that boundary is in the padded colony
    edge_mask_unpadded = edge_mask[1:-1, 1:-1] # remove the padding

    # Skeletonize red boundary pixels (https://scikit-image.org/docs/dev/auto_examples/edges/plot_skeleton.html)
    # This will turn any represenation of a cohesive red colony region on the boundary as a set of lines in pixels space, meant to correct for areas that have weird boundaries.
    edge_mask_unpadded = skeletonize(edge_mask_unpadded)
    return edge_mask_unpadded

# binary version of above code
def get_colony_boundary_binary(colony_mask):
    padded_colony = np.pad(colony_mask != 0, 1, 'constant', constant_values=0)
    check_boundary_array = np.zeros_like(padded_colony) + 4
    image_shape = check_boundary_array.shape

    for i in range(1,image_shape[0]-1):
        for j in range(1,image_shape[1]-1):
            check_boundary_array[i,j] = check_boundary_array[i,j] - padded_colony[i-1,j] # pixel above is 1
            check_boundary_array[i,j] = check_boundary_array[i,j] - padded_colony[i+1,j] # pixel below is 1
            check_boundary_array[i,j] = check_boundary_array[i,j] - padded_colony[i,j-1] # pixel to the left is 1
            check_boundary_array[i,j] = check_boundary_array[i,j] - padded_colony[i,j+1] # pixel to the right is 1

    background_mask = check_boundary_array > 0
    edge_mask = np.multiply(background_mask, padded_colony) # sanity check so that boundary is in the padded colony
    edge_mask_unpadded = edge_mask[1:-1, 1:-1] # remove the padding

    # Skeletonize red boundary pixels (https://scikit-image.org/docs/dev/auto_examples/edges/plot_skeleton.html)
    # This will turn any represenation of a cohesive red colony region on the boundary as a set of lines in pixels space, meant to correct for areas that have weird boundaries.
    edge_mask_unpadded = skeletonize(edge_mask_unpadded)
    return edge_mask_unpadded


# Function to find both the red and white partitions of colony boundary

def get_boundary_partitions(red_colony_mask, white_colony_mask, boundary):
    red_boundary_mask = np.multiply(red_colony_mask, boundary)
    white_boundary_mask = np.multiply(white_colony_mask, boundary)
    boundary_mask_h, boundary_mask_w =  boundary.shape

    
    #red_boundary_skeleton = skeletonize(red_boundary_mask)
    #white_boundary_skeleton = skeletonize(white_boundary_mask)
    return red_boundary_mask, white_boundary_mask, boundary_mask_h, boundary_mask_w


# Function to find endpoints of red connected component

def get_boundary_component_endpoints(colony_image, red_component):
    red_component_padded = np.pad(red_component, 1, 'constant', constant_values=0).astype(np.int32)

    # This function uses the results of 2 methods to find endpoints of a curve in order to deal with each other's deficiencies.

    # Method 1:
    # Hit-miss algortihm using cv2 function goodFeaturesToTrack (https://docs.opencv.org/3.4/dd/d1a/group__imgproc__feature.html#ga1d6bb77486c8f92d79c8793ad995d541)
    red_endpoints = cv2.goodFeaturesToTrack(red_component.astype(np.uint8), maxCorners=2, qualityLevel=0.01, minDistance=0.1)
    if type(red_endpoints == None):
        red_endpoints_list = []
        #print(red_endpoints_list)
    else:
        red_endpoints = red_endpoints.astype(np.int32)
        red_endpoints_list = (red_endpoints[0].astype(np.int32)).tolist()

    # Method 2:
    # Look for the obvious endpoints that 'corner'
    endpoints_check = np.zeros_like(colony_image) + 8
    endpoints_check_padded = np.pad(endpoints_check, 1, 'constant', constant_values=0)
    endpoint_padded_shape = endpoints_check_padded.shape

    for i in range(1,endpoint_padded_shape[0]-1):
        for j in range(1,endpoint_padded_shape[1]-1):
            endpoints_check_padded[i,j] = endpoints_check_padded[i,j] - red_component_padded[i-1,j] # pixel above is 1
            endpoints_check_padded[i,j] = endpoints_check_padded[i,j] - red_component_padded[i+1,j] # pixel below is 1
            endpoints_check_padded[i,j] = endpoints_check_padded[i,j] - red_component_padded[i,j-1] # pixel to the left is 1
            endpoints_check_padded[i,j] = endpoints_check_padded[i,j] - red_component_padded[i,j+1] # pixel to the right is 1
            endpoints_check_padded[i,j] = endpoints_check_padded[i,j] - red_component_padded[i-1,j-1] # pixel on top-left is 1
            endpoints_check_padded[i,j] = endpoints_check_padded[i,j] - red_component_padded[i-1,j+1] # pixel on top-right is 1
            endpoints_check_padded[i,j] = endpoints_check_padded[i,j] - red_component_padded[i+1,j-1] # pixel on bottom-left is 1
            endpoints_check_padded[i,j] = endpoints_check_padded[i,j] - red_component_padded[i+1,j+1] # pixel on bottom-right is 1

    # Look for any 'cornering' pixels.  There should be at most 2 if skeletonize works properly.
    endpoint_mask = endpoints_check_padded > 6
    component_endpoint_mask = np.multiply(endpoint_mask, red_component_padded) # sanity check to see if endpoints are part of boundary mask
    component_endpoint_mask_unpadded = component_endpoint_mask[1:-1, 1:-1]
    endpoint_locations = np.nonzero(component_endpoint_mask_unpadded)

    # ensure dimensions are consistent
    endpoint_locations = np.transpose(np.flip(np.flip(np.array(endpoint_locations), axis=0),axis=1))
    endpoint_locations = endpoint_locations.tolist()

    # combine lists and keep the unique elements

    full_endpoints_list = endpoint_locations + red_endpoints_list
    full_endpoints_list = unique(full_endpoints_list)

    if len(full_endpoints_list) == 1:
        warnings.warn('Algorithm found only 1 endpoint.  It\'s possible that you have a very small region, but double check the code again anyway.')
    elif len(full_endpoints_list) > 2:
        warnings.warn('Algorithm found more than 2 endpoints.  Possible bad boundary detected.  Will proceed in using the first two elements in the list.')
    elif len(full_endpoints_list) == 0:
        warnings.warn('Algorithm found no endpoints.  It\'s possible that you have a full cycle, but double check the code again anyway.')

    return full_endpoints_list # This does NOT include the center of the colony


# Function that returns the boundary and filled sector

def get_sector_masks(red_component, full_endpoints_list):
    sector_bounds = np.zeros_like(red_component)
    red_h, red_w = sector_bounds.shape

    # if there exists an endpoint
    if len(full_endpoints_list) > 0:
        bound_1 = get_line((np.round(red_h/2.0).astype(np.int32), np.round(red_w/2.0).astype(np.int32)), (full_endpoints_list[0][1],full_endpoints_list[0][0]))
        boundary_line_1 = [list(this_pix) for this_pix in bound_1]
        for this_point in boundary_line_1:
            sector_bounds[this_point[0],this_point[1]] = 1

    # if there exists at least two endpoints (and hopefully there are EXACTLY two...)
    if len(full_endpoints_list) > 1:
        bound_2 = get_line((np.round(red_h/2.0).astype(np.int32), np.round(red_w/2.0).astype(np.int32)), (full_endpoints_list[1][1],full_endpoints_list[1][0]))
        boundary_line_2 = [list(this_pix) for this_pix in bound_2]
        for this_point in boundary_line_2:
            sector_bounds[this_point[0],this_point[1]] = 1

    sector_boundary = np.logical_or(sector_bounds, red_component)

    # Flood fill region inside sector boundary
    sector_filled = copy.deepcopy(sector_boundary)
    sector_filled[ndimage.binary_fill_holes(sector_filled)] = 1 # flood fill region using 'binary_fill_holes'
    sector_filled = sector_filled.astype(np.int32)

    sector_interior = np.logical_xor(sector_filled, sector_boundary)
    return sector_boundary, sector_interior, sector_filled


# Function to check for interior-exterior consistency

def check_for_consistency(sector_interior, sector_boundary, red_colony_mask):
  # Check that the interior of the sector is represented in the colony
    check_interior = np.logical_and(sector_interior, red_colony_mask)
    interior_sum = np.sum(sector_interior)
    check_interior_sum = np.sum(check_interior)
    # If there is no interior or there are two few pixels, use both the interior and the boundary.
    if interior_sum >= 3:
        # If first condition is met, check to see how close the sector is actually captured by U-Net
        # Compute the number of pixels in the sector detected by U-Net, divided by the number of pixels in tbe simplified sector.
        prop_interior = check_interior_sum.astype(np.float64) / interior_sum.astype(np.float64)
        if prop_interior < 0.5:
            #print('There is one sector that may be misclassified.  Moving on to next sector.')
            confirm_check = False
            # U-Net did not detect an adequate number of pixels inside the simplified sector
            # This could be a misclassified sector
            # In this case, change the bad boundary pixels to the opposite color
            # Since we are working only with the red boundary pixel, change the bad red pixels to white pixels.
        else:
            #print('This is a sector')
            confirm_check = True
            # If the second condition is also not met, then you have a sector
            # add the sector to the list
    else:
        #print('There is one sector that is too small to analyze.  Using both interior and boundary instead')
        full_sector = np.logical_or(sector_interior, sector_boundary)
        check_interior_and_boundary = np.logical_and(full_sector, red_colony_mask)
        sector_sum = np.sum(full_sector)
        check_full_sum = np.sum(check_interior_and_boundary)
        prop_interior = check_full_sum.astype(np.float64) / sector_sum.astype(np.float64)
        if sector_sum >= 5:
            # There are enough pixels overall to perform a measurement.
            
            if prop_interior < 0.5:
                #print('There is one sector that may be misclassified.  Moving on to next sector.')
                confirm_check = False
            else:
                #print('This is a sector')
                confirm_check = True

        else:
            #print('There is one sector that is too small for the method being used.  Skipping this one.')
            confirm_check = False
    
    return confirm_check, prop_interior

# A redo of the above function, but considers the full region regardless of how many pixels there are (must be greater than 0)
def check_for_consistency_2(sector_filled, colony_mask):
  # Check that the interior of the sector is represented in the colony
    check_interior = np.logical_and(sector_filled, colony_mask)
    sector_sum = np.sum(sector_filled)
    check_sector_sum = np.sum(check_interior)
    prop_interior = check_sector_sum.astype(np.float64) / sector_sum.astype(np.float64)

    if prop_interior < 0.5:
        confirm_check = False
    else:
        confirm_check = True
    
    return confirm_check, prop_interior


# Function to change pixels from red to white

def change_pixel_labels(red_boundary_skeleton, red_component, white_boundary_skeleton):
  # If a potential sector is lacking interior red pixels, and intead has sifficinet percentage of white pixels,
  # this function will change the exterior pixels of the sector from red to white.
    temp_red_boundary_skeleton = np.logical_xor(red_boundary_skeleton, red_component) # one is a subset of the other
    temp_white_boundary_skeleton = np.logical_or(white_boundary_skeleton, red_component) # both are disjoint
    return temp_red_boundary_skeleton, temp_white_boundary_skeleton

# Function doesn't work yet.  Use the two below this.
def pixel_class_swap_on_boundary(boundary_skeleton_to_change, red_boundary_skeleton_bad_components, white_boundary_skeleton_bad_components):

    bad_red_boundary_pixels = np.logical_and(boundary_skeleton_to_change > 0, red_boundary_skeleton_bad_components)
    bad_white_boundary_pixels = np.logical_and(boundary_skeleton_to_change > 0, white_boundary_skeleton_bad_components)

    # Change bad red boundaries to white
    boundary_skeleton_to_change[bad_red_boundary_pixels] = 1

    # Change bad white boundaries to red
    boundary_skeleton_to_change[bad_white_boundary_pixels] = 2

    return boundary_skeleton_to_change

# Functions for swapping boundary labels for components that fail consistency check
# Consider writing a function that does both operatons for both classes.
def grow_boundary(boundary_skeleton_to_grow, bad_components_to_take):
    # using logical or allows to add to the existing structure
    grown_boundary = np.logical_or(boundary_skeleton_to_grow, bad_components_to_take)  # For example, add the bad white boundaries to the red boundaries
    return grown_boundary

def shrink_boundary(boundary_skeleton_to_shrink, bad_components_to_remove):
    # Since red and white boundaries are both disjoint subsets of the full boundary, using xor will subtract the subset from the whole without adding new information.
    shrunk_boundary = np.logical_xor(boundary_skeleton_to_shrink, bad_components_to_remove) # For example, remove the bad white boundaries, and give them to the red boundaries
    return shrunk_boundary

## Functions for shape properties

In [None]:
# Condition 1: Check that the colony segmentation is one connected component
def check_components_of_colony(seg):
    # First, check if the colony is one connected component
    binary_seg = seg > 0
    seg_labels = label(binary_seg)
    num_labels = np.unique(seg_labels)[1:]
    #print('Number of labels: ' + str(num_labels))

    if len(num_labels) == 1:
        # The colony is one connected component
        # This satisfies the condition
        condition_1_strong = True
        condition_1_weak = True
    else:
        binary_seg_count = np.count_nonzero(binary_seg)
        # This colony has two or more connectged components
        # Find the biggest one, and set a threshold
        # Loop through each non-zero number
        max_comp_size = 0
        for this_label in num_labels:
            this_comp = seg_labels[seg_labels == this_label]
            this_comp_size = sum(this_comp)
            if this_comp_size > max_comp_size:
                max_comp_size = copy.deepcopy(this_comp_size)
        big_ratio = max_comp_size / float(binary_seg_count)
        if big_ratio > 0.9:
            condition_1_strong = False
            condition_1_weak = True
        else:
            condition_1_strong = False
            condition_1_weak = False

    return condition_1_strong, condition_1_weak


# Condition 2: Check that the boundary of the colony sgentmation is one connected cpmponent
# The one should be more strict because all parts of the boundary are used explicitly in the classification step.
def check_components_of_boundary(seg_boundary):
    # First, check if the colony is one connected component
    binary_seg = seg_boundary > 0
    seg_labels = label(binary_seg)
    num_labels = np.unique(seg_labels)[1:]

    if len(num_labels) == 1:
        # The boundary meets condition 2
        condition_2 = True
    else:
        # The boundary has two or more connected compoents
        # This fails condition 2
        condition_2 = False
    
    return condition_2


# Condition 3: Check for holes in the colony segmentation.
# Ideas: Get the colony boundary.  Fill it.  If the fill matches the colony segmentation,
# then this is easly verified.  If there is more than one connected component, 
# then this condition will automatically fail.
def check_for_holes(seg, seg_boundary):
    binary_seg = seg > 0
    binary_boundary = seg_boundary > 0

    #print(binary_seg.shape)
    #print(binary_boundary.shape)

    # Take the boundary and fill the space in between
    filled_boundary = copy.deepcopy(binary_boundary)
    filled_boundary[ndimage.binary_fill_holes(filled_boundary)] = True
    #print(filled_boundary.shape)
    # print('Filled boundary')
    # plt.imshow(filled_boundary)
    # plt.show()
    filled_boundary_count = np.count_nonzero(filled_boundary) # number of pixels inside the boundary, including the boundary itself.
    binary_seg_agreement = np.logical_and(binary_seg, filled_boundary) # all pixels in the filled boundary that are also in the segmentation
    binary_seg_agreement_count = np.count_nonzero(binary_seg_agreement) # how many of those pixels exist?
    binary_seg_count = np.count_nonzero(binary_seg)
    #print(binary_seg_agreement_count)
    #print(binary_seg_count)
    if (float(binary_seg_agreement_count) / float(filled_boundary_count)) > 0.999:
        # The boundary and the space within matches that of the space occupies by the colony segmenatation
        condition_3 = True
    else:
        condition_3 = False
    
    return condition_3


# Condition 4: Check that the colony boundary has only one cycle, and that the cycle uses every pixel of the boundary.
# This is equivalent to saying that the colony boundary has exactly one Hamiltonian cycle.
# Showing that there exists one is far easier than showing there is only one though.
def find_hamilton_cycle(seg_boundary):
    binary_boundary = seg_boundary > 0
    # Check that a cycle exists
    # If it exists, check that the cycle traverses the same number of pixels as the number in the segmentation
    # Idea: Find all cycles of an image.  Aim for the ones whose length is the same as the number of pixels of the boundary.  This will ensure that all teh pixels are being used.
    
    # get contours
    contours = cv2.findContours(binary_boundary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    hierarchy = contours[1] if len(contours) == 2 else contours[2]
    contours = contours[0] if len(contours) == 2 else contours[1]

    # count inner contours
    count = 0
    for component in zip(contours, hierarchy):
        cntr = component[0]
        hier = component[1]
        # discard outermost no parent contours and keep innermost no child contours
        # hier = indices for next, previous, child, parent
        # no parent or no child indicated by negative values
        if (hier[3] > -1) & (hier[2] < 0):
            count = count + 1

    # get the actual inner list of hierarchy descriptions
    hierarchy = hierarchy[0]

    has_cycle = False
    return has_cycle

    
    

# Condition 5: Check that the convex hull of the segmentation is close to a circle.
def compare_convex_hull(seg, seg_boundary):
    binary_seg = seg > 0
    binary_boundary = seg_boundary > 0
    #binary_seg_count = np.count_nonzero(binary_seg)

    # Compare the convex hull of the segmentation with the segmentation itself
    chull_seg = convex_hull_image(binary_boundary)
    chull_seg_count = np.count_nonzero(chull_seg)
    # print('Convex Hull')
    # plt.imshow(chull_seg)
    # plt.show()
    #chull_seg_count = np.count_nonzero(chull_seg)
    comparison_with_seg = np.logical_and(binary_seg, chull_seg) # compare the convex hull to the segmentation
    comparison_with_seg_count = np.count_nonzero(comparison_with_seg) # count the number of pixels appearing in both
    #print(float(comparison_with_seg_count))
    #print(float(chull_seg_count))
    if (float(comparison_with_seg_count) / float(chull_seg_count)) > 0.95:
        # The colony segmentation appears to be approximately convex
        #print('Convex segmentation')
        is_approximately_convex = True
    else:
        #print('Not convex')
        is_approximately_convex = False

    # Compare the colony semgation to a circle.
    filled_circle = create_filled_ellipse_in_array(seg, padding = 0)
    filled_circle_count = np.count_nonzero(filled_circle)
    comparison_with_circle = np.logical_and(binary_seg, filled_circle)
    comparison_with_circle_count = np.count_nonzero(comparison_with_circle)
    if (float(comparison_with_circle_count) / float(filled_circle_count)) > 0.95:
        # The colony segmentation appears to be approximately convex
        #print('Circular')
        is_approximately_circular = True
    else:
        #print('Not circular')
        is_approximately_circular = False

    return is_approximately_convex, is_approximately_circular

# Condition 6: Compute the Hausdorff distance between the boundary segmentation and the enclosing circle.
# Ideally, we want the shape of the colony segmengtation to be very similar to a circle, because real coloies appear round.
def get_hausdorff_distance(seg, seg_boundary):
    binary_seg = seg > 0
    binary_boundary = seg_boundary > 0
    chull_seg = convex_hull_image(binary_boundary) # get convex hull of the boundary, and fill it.
    chull_boundary = get_colony_boundary_binary(chull_seg)
    filled_circle = create_filled_ellipse_in_array(seg, padding = 0)
    circle_boundary = get_colony_boundary_binary(filled_circle)

    # Compute Hausdorff distance between colony boundary and circle
    dist_to_circle = hausdorff_distance(binary_boundary, circle_boundary)

    # Compute Hausdorff distance between colony boundary and the boundary of the segmentation's convex hull
    dist_to_chull = hausdorff_distance(binary_boundary, chull_boundary)

    return dist_to_chull, dist_to_circle

## Functions for Plotting

In [None]:
def find_mode(my_array):
    vals, counts = np.unique(my_array, return_counts = True)
    index = np.argmax(counts)
    return vals[index], counts[index]

def get_count_breakdown(my_array):
    if len(my_array) > 0:
        vals, counts = np.unique(my_array, return_counts = True)
        max_val = np.max(vals)
        vals_list = list(vals)
        sorted_counts = []
        for i in range(0, max_val+1):
            this_val = vals_list.index(i)
            sorted_counts.append(counts[this_val])
        return np.array(sorted_counts)
    else:
        return np.array([])

def extend_array(my_array, length):
    if my_array.size < length:
        some_zeros = length - my_array.size
        my_ext_array = np.append(my_array, np.zeros((1, some_zeros), dtype=int))
        return my_ext_array
    else:
        return my_array

def get_sector_count_breakdown(my_array):
    if len(my_array) > 0:
        vals, counts = np.unique(my_array, return_counts = True)
        max_val = np.max(vals)
        min_val = np.min(vals)
        if min_val != 0:
            no_counts = np.array([0 for i in range(0, min_val)])
            counts = np.append(no_counts, counts)
            no_vals = np.array([i for i in range(0, min_val)])
            vals = np.append(no_vals, vals)
        vals_list = list(vals)
        sorted_counts = []
        print(counts)
        print(vals)
        print(vals_list)
        for i in range(0, max_val+1):
            this_val = vals_list.index(i)
            sorted_counts.append(counts[this_val])
        return np.array(sorted_counts)
    else:
        return np.array([])

def addlabels_initial(x,y,fs):
    for i in range(len(x)):
        ax.text(i-(0.25), y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

def addlabels_prediction(x,y,fs):
    for i in range(len(x)):
        ax.text(i, y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

def addlabels_truemarks(x,y,fs):
    for i in range(len(x)):
        ax.text(i+(0.25), y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

def addlabels_centered(x,y,fs):
    for i in range(len(x)):
        ax.text(x[i], y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)


def addlabels_initial_ax(x,y,fs, this_axis):
    for i in range(len(x)):
        ax[this_axis].text(i-(0.25), y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

def addlabels_prediction_ax(x,y,fs, this_axis):
    for i in range(len(x)):
        ax[this_axis].text(i, y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

def addlabels_truemarks_ax(x,y,fs, this_axis):
    for i in range(len(x)):
        ax[this_axis].text(i+(0.25), y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

def addlabels_centered_ax(x,y,fs, this_axis):
    for i in range(len(x)):
        ax[this_axis].text(x[i], y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

def addlabels_prediction_ax_bytick(x,y,fs, this_axis):
    for i in range(len(x)):
        ax[this_axis].text(x[i], y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

def addlabels_truemarks_ax_bytick(x,y,fs, this_axis):
    for i in range(len(x)):
        ax[this_axis].text(x[i]+(0.25), y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

def addlabels_centered_ax_bytick(x,y,fs, this_axis):
    for i in range(len(x)):
        ax[this_axis].text(x[i], y[i]+5, y[i], ha = 'center', fontfamily="serif", fontsize=fs)

# Load training and validation images, and save paths to those images
Only use this if you have the masks showing sector counts

In [None]:
# Images for Training and validation

# Get paths of training and validation sets

train_path = image_folder + '/train'
val_path = image_folder + '/val'

image_path = train_path + '/images/'
mask_path = train_path + '/masks/'
sector_count_path = train_path + '/masks_sector_counts/'
#mask_boundary_path = train_path + '/masks_bw_boundary/'
val_image_path = val_path + '/images/'
val_mask_path = val_path + '/masks/'
val_sector_count_path = val_path + '/masks_sector_counts/'
#val_mask_boundary_path = val_path + '/masks_bw_boundary/'

# Get names of training images and masks

my_images = glob.glob(image_path + '*.png')
my_masks = glob.glob(mask_path + '*.png')
my_sector_counts = glob.glob(sector_count_path + '*.png')
my_val_images = glob.glob(val_image_path + '*.png')
my_val_masks = glob.glob(val_mask_path + '*.png')
my_val_sector_counts = glob.glob(val_sector_count_path + '*.png')

# Check that list length is the same
if (len(my_images) != len(my_masks)) | (len(my_images) != len(my_sector_counts)):
    raise ValueError('The number of images in the train subdirectories are inconsistent.  Check that the numbe of images is the same in all train subdirectories.')

if (len(my_val_images) != len(my_val_masks)) | (len(my_val_images) != len(my_val_sector_counts)):
    raise ValueError('The number of images in the val subdirectories are inconsistent.  Check that the number of images is the same in all val subdirectories.')


# Check that all images used have the (image, mask, sector_count) tuple.

for file in my_images:
    x = os.path.basename(file)
    #print(mask_path + x)
    if os.path.exists(mask_path + x) == False:
        raise NameError('Training mask not found for image ' + image_path + x)
    if os.path.exists(sector_count_path + x) == False:
        raise NameError('Training sector count mask not found for image ' + image_path + x)

for file in my_masks:
    x = os.path.basename(file)
    #print(mask_path + x)
    if os.path.exists(image_path + x) == False:
        raise NameError('Training image not found for mask ' + mask_path + x)
    if os.path.exists(sector_count_path + x) == False:
        raise NameError('Training sector count mask not found for mask ' + mask_path + x)

for file in my_sector_counts:
    x = os.path.basename(file)
    #print(mask_path + x)
    if os.path.exists(image_path + x) == False:
        raise NameError('Training image not found for sector count mask ' + sector_count_path + x)
    if os.path.exists(mask_path + x) == False:
        raise NameError('Training mask not found for sector count mask ' + sector_count_path + x)

# Get names of validation images and masks

for file in my_val_images:
    x = os.path.basename(file)
    #print(mask_path + x)
    if os.path.exists(val_mask_path + x) == False:
        raise NameError('Validation mask not found for image ' + val_image_path + x)
    if os.path.exists(val_sector_count_path + x) == False:
        raise NameError('Validation sector count mask not found for image ' + val_image_path + x)

for file in my_val_masks:
    x = os.path.basename(file)
    #print(mask_path + x)
    if os.path.exists(val_image_path + x) == False:
        raise NameError('Validation image not found for mask ' + val_mask_path + x)
    if os.path.exists(val_sector_count_path + x) == False:
        raise NameError('Validation sector count mask not found for mask ' + val_mask_path + x)

for file in my_val_sector_counts:
    x = os.path.basename(file)
    #print(mask_path + x)
    if os.path.exists(val_image_path + x) == False:
        raise NameError('Validation image not found for sector count mask ' + val_sector_count_path + x)
    if os.path.exists(val_mask_path + x) == False:
        raise NameError('Validation mask not found for sector count mask ' + val_sector_count_path + x)

#file_list = os.listdir(root + '/images/*.png')
print('Image locations checked and grouped.')

  # What the above does is this:
  # my_images and my_masks are the training images and correspodning training masks respectively.
  # my_val_images and my_val_masks are the validation images and validation masks respectively.
  # This checks to see if all the images and masks have corresponding pairs.
  # If this part results in an error, do not continue until the error is resolved.


# Same as above, but no sector count masks are provided
Use this if you do not have sector counts masks

In [None]:
# Images for Training and validation

# Get paths of training and validation sets

train_path = image_folder + '/train'
val_path = image_folder + '/val'

image_path = train_path + '/images/'
mask_path = train_path + '/masks/'
#sector_count_path = train_path + '/masks_sector_counts/'
#mask_boundary_path = train_path + '/masks_bw_boundary/'
val_image_path = val_path + '/images/'
val_mask_path = val_path + '/masks/'
#val_sector_count_path = val_path + '/masks_sector_counts/'
#val_mask_boundary_path = val_path + '/masks_bw_boundary/'

print(image_path)

# Get names of training images and masks

# images and masks are png files with the same names

my_images = glob.glob(image_path + '*.png')
my_masks = glob.glob(mask_path + '*.png')
#my_sector_counts = glob.glob(sector_count_path + '*.png')
my_val_images = glob.glob(val_image_path + '*.png')
my_val_masks = glob.glob(val_mask_path + '*.png')
#my_val_sector_counts = glob.glob(val_sector_count_path + '*.png')

#print(len(my_images))
#print(len(my_masks))

# Check that list length is the same
if len(my_images) != len(my_masks):
    raise ValueError('The number of images in the train subdirectories are inconsistent.  Check that the numbe of images is the same in all train subdirectories.')

if len(my_val_images) != len(my_val_masks):
    raise ValueError('The number of images in the val subdirectories are inconsistent.  Check that the number of images is the same in all val subdirectories.')


# Check that all images used have the (image, mask, sector_count) tuple.

for file in my_images:
    x = os.path.splitext(os.path.basename(file))[0]
    #print(x)
    #print(mask_path + x)
    #print(mask_path + x)
    if os.path.exists(mask_path + x + '.png') == False:
        raise NameError('Training mask not found for image ' + image_path + x)
    # if os.path.exists(sector_count_path + x) == False:
    #     raise NameError('Training sector count mask not found for image ' + image_path + x)

for file in my_masks:
    x = os.path.splitext(os.path.basename(file))[0]
    #print(mask_path + x)
    if os.path.exists(image_path + x + '.png') == False:
        raise NameError('Training image not found for mask ' + mask_path + x)
    # if os.path.exists(sector_count_path + x) == False:
    #     raise NameError('Training sector count mask not found for mask ' + mask_path + x)

# for file in my_sector_counts:
#     x = os.path.basename(file)
#     #print(mask_path + x)
#     if os.path.exists(image_path + x) == False:
#         raise NameError('Training image not found for sector count mask ' + sector_count_path + x)
#     if os.path.exists(mask_path + x) == False:
#         raise NameError('Training mask not found for sector count mask ' + sector_count_path + x)

# Get names of validation images and masks

for file in my_val_images:
    x = os.path.splitext(os.path.basename(file))[0]
    #print(mask_path + x)
    if os.path.exists(val_mask_path + x + '.png') == False:
        raise NameError('Validation mask not found for image ' + val_image_path + x)
    # if os.path.exists(val_sector_count_path + x) == False:
    #     raise NameError('Validation sector count mask not found for image ' + val_image_path + x)

for file in my_val_masks:
    x = os.path.splitext(os.path.basename(file))[0]
    #print(mask_path + x)
    if os.path.exists(val_image_path + x + '.png') == False:
        raise NameError('Validation image not found for mask ' + val_mask_path + x)
    # if os.path.exists(val_sector_count_path + x) == False:
    #     raise NameError('Validation sector count mask not found for mask ' + val_mask_path + x)

# for file in my_val_sector_counts:
#     x = os.path.basename(file)
#     #print(mask_path + x)
#     if os.path.exists(val_image_path + x) == False:
#         raise NameError('Validation image not found for sector count mask ' + val_sector_count_path + x)
#     if os.path.exists(val_mask_path + x) == False:
#         raise NameError('Validation mask not found for sector count mask ' + val_sector_count_path + x)

#file_list = os.listdir(root + '/images/*.png')
print('Image locations checked and grouped.')

  # What the above does is this:
  # my_images and my_masks are the training images and correspodning training masks respectively.
  # my_val_images and my_val_masks are the validation images and validation masks respectively.
  # This checks to see if all the images and masks have corresponding pairs.
  # If this part results in an error, do not continue until the error is resolved.


# Build U-Net

## Look at summary of U-Net leyers and define callback functions

In [None]:
# See summary for a 1024x1024x3 image
model = build_unet((1024,1024,3),3)
model.summary()

# This function is not used in the paper
def my_schedule(epoch, lr):
    if ((epoch % 10) == 0) and (epoch != 0):
        print('Reducing learning rate by factor of 10.  New learning rate is', lr*0.1)
        return lr * 0.1
    else:
        return lr

class MyCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print('Getting test segmentations.')
        # Load test images and predict with model

        # Image a (mostly cured + sectored example)
        x = read_image(real_image_folder + '/' + image_to_test_with)
        p = model.predict(np.expand_dims(x,axis=0))[0]
        p = np.argmax(p,axis=-1)
        p = np.expand_dims(p,axis=-1)
        p = p * (255/(num_classes-1))
        p = p.astype(np.uint8)
        my_image = PIL.Image.fromarray(np.squeeze(p, axis=-1), "L")
        my_image.save(test_per_epoch_folder + '/a_' + str(epoch) + '.png')


## Build U-Net, set parameters for training and set which callbacks to use.

In [None]:
# Compile model
model = build_unet(shape, num_classes)

# used in paper
model.compile(loss="categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(lr), metrics=['accuracy']) 

# Not used in paper
#model.compile(loss="categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(lr), metrics=[tf.keras.metrics.CategoricalAccuracy()])

# Set up function that gathers the images
train_dataset = tf_dataset(my_images, my_masks, batch = batch_size)
val_dataset = tf_dataset(my_val_images, my_val_masks, batch = batch_size)

# Estimate how many batches of data are need to complete 1 epoch
train_steps = len(my_images)//batch_size
val_steps = len(my_val_images)//batch_size

# Set callbacks during the training process
if print_test_segs == True:

    # Includes the functions defined in the previous cell for printing a segmentation of specific test image at each epoch
    # Save model weights at each epoch, only keeping the best weights
    # Learning rate decreases upon reaching a local minimum in validation loss
    # Training stops automatically when validation loss does not decrease any more than 0.001 after 5 epochs
    # Training will stop automatically after reaching the maximum number of epochs 
    callbacks = [
                ModelCheckpoint(weights_folder + '/' + weights_file + '.h5', verbose=1, save_best_model=True, save_weights_only=False),
                MyCallback(),
                LearningRateScheduler(my_schedule),
                ReduceLROnPlateau(monitor="val_loss", patience=3, factor=0.1, verbose=1, min_lr=min_lr),
                EarlyStopping(monitor="val_loss", min_delta=0.001, patience=5, verbose=1)
    ]
else:
    # Save model weights at each epoch, only keeping the best weights
    # Learning rate decreases upon reaching a local minimum in validation loss
    # Training stops automatically when validation loss does not decrease any more than 0.001 after 5 epochs
    # Training will stop automatically after reaching the maximum number of epochs 
    callbacks = [
                ModelCheckpoint(weights_folder + '/' + weights_file + '.h5', verbose=1, save_best_model=True, save_weights_only=False),
                ReduceLROnPlateau(monitor="val_loss", patience=3, factor=0.1, verbose=1, min_lr=min_lr),
                EarlyStopping(monitor="val_loss", min_delta=0.001, patience=5, verbose=1)
    ]


# For training up until the maxmum number of epochs specified
# callbacks = [
#              ModelCheckpoint(weights_folder + '/' + weights_file + '.h5', verbose=1, save_best_model=True, save_weights_only=False),
#              ReduceLROnPlateau(monitor="val_loss", patience=3, factor=0.1, verbose=1, min_lr=1e-8)
# ]

# Training a new U-Net only!
Only run this block if you are trying to obtain a new set of weights for U-Net.  Otherwise previous training results may be lost in the output.

In [None]:
# If you have a trained model, load it first to continue training at this checkpoint
# Haven't set up to resume training yet.
#model = tf.keras.models.load_model("/content/gdrive/My Drive/Colab Notebooks/U-Net/data_30/model_classes_3_mixed.h5")


# IF YOU ENABLED TRAINING, THEN RUN THE CODE SNIPPET BELOW.  IF NOT, THIS BLOCK WILL BE SKIPPED.

if train_model == True:
    # train the model
    history = model.fit(train_dataset,
            steps_per_epoch=train_steps,
            validation_data=val_dataset,
            validation_steps=val_steps,
            epochs=epochs,
            callbacks=callbacks)
    print(history.history.keys())

    

    # Plot the results of training
    # This code was taken from https://machinelearningmastery.com/display-deep-learning-model-training-history-in-keras/

    # summarize history for accuracy
    plt.plot(history.history['categorical_accuracy'])
    plt.plot(history.history['val_categorical_accuracy'])
    plt.title('Model Accuracy on Synthetic Images')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Training', 'Validation'], loc='center right')
    plt.show()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss on Synthetic Images')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Training', 'Validation'], loc='center right')
    plt.show()

    file_handle = open(weights_folder + '/accuracy_loss_' + str(num_classes) + '.pkl','wb')
    pickle.dump([history.history['categorical_accuracy'], history.history['val_categorical_accuracy'], history.history['loss'], history.history['val_loss']], file_handle)
    file_handle.close()

In [None]:
if train_model == True:

    # summarize history for accuracy
    plt.plot(history.history['categorical_accuracy'])
    plt.plot(history.history['val_categorical_accuracy'])
    plt.title('Model Accuracy on Synthetic Images')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Training', 'Validation'], loc='best')
    plt.show()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss on Synthetic Images')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Training', 'Validation'], loc='best')
    plt.show()

# Load a trained U-Net

In [None]:
# This uses the value of "weights_file" specified near the beginning of the script
model = tf.keras.models.load_model(weights_folder + '/' + weights_file + '.h5')

# TRAINING Image Segmentation and Classification

## Get locations of training and validation images
Always run this

In [None]:
training_images_train = sorted(glob.glob(training_image_set + '/' + '*'))
training_images_val = sorted(glob.glob(validation_image_set + '/' + '*'))
training_images = sorted(training_images_train + training_images_val)
print('Number of images found: ' + str(len(training_images)))

## Segment training images

In [None]:
# Code for image segmentation (involves Python and Octave code (requires oct2py module))
# 1. Python - Ready U-Net for input.
# 2. Python - Feed image to U-Net.
# 3. Python - Get output segmentation of image.

# 4. Octave - Use isolated colonies to estimate a range of radii to search for circular colonies.
# 5. Octave - Obtain centers and radii in image with circle Hough transform (imfindcircles).
# 6. Octave (and then Python) - Save csv files of circle locations and sizes in each image respectively.

# 7. Python (matplotlib) - Make mask of image showing where the circles are found.
# 8. Repeat steps 2-7 for each image

if get_training_segs == True:

    %matplotlib inline

     # For eahc training image
    for this_image in training_images:

        # Steps 1-3: get output segmentation and save it

        this_plate = pathlib.PurePath(this_image)
        plate_name = this_plate.name

        print('Reading plate: ' + str(plate_name))

        x = read_image(this_image)
        
        # Predict the class of each pixel, and partition output the same way
        p = model.predict(np.expand_dims(x,axis=0))[0]
        p = np.argmax(p,axis=-1)
        p = np.expand_dims(p,axis=-1)
        p = p * (255/(num_classes-1))
        p = p.astype(np.int32)


        p_full = tf.identity(p).numpy()
        in_class = tf.math.greater(tf.constant(p_full), tf.constant([0])).numpy()

        p = p.astype(np.uint8)
        # white pixels (should be 255)
        p_1 = tf.math.equal(tf.constant(p_full), tf.constant([255])).numpy().astype(np.uint8) * 255
        # red pixels (should be 127)
        p_2 = tf.math.equal(tf.constant(p_full), tf.constant([127])).numpy().astype(np.uint8) * 255
        p_full = 255 * in_class.astype(np.uint8)
        #print(p.shape)

        #cv2_imshow(p)
        
        # Show and/or save image
        #plt.imshow(p * 255/(num_classes-1))
        #cv2.cvtColor(p * 255/(num_classes-1), cv2.COLOR_BGR2RGB)
        #plt.imshow(cv2.cvtColor(p * 255/(num_classes-1), cv2.COLOR_BGR2GRAY))
        #plt.imshow(p, cmap='binary')
        #plt.show()
        #print(np.unique(p[20:40, 900:920]))
        my_image = PIL.Image.fromarray(np.squeeze(p, axis=-1), "L")
        #display(my_image.resize((256,256)))
        my_image.save(train_seg_folder + '/' + this_plate.stem + '.png')


        # Steps 4-6: Run Matlab code in Octave to use CHT, and store colony location data

        octave.feval('get_circular_data.m', this_image, train_seg_folder + '/' + this_plate.stem + '.png', train_circle_data_folder)
        try:
            radii_table = pd.read_csv(train_circle_data_folder + '/' + this_plate.stem + '.csv', header=None)
            radii_table.columns = ['Colony', 'Center (x)', 'Center (y)', 'Radius', 'Top Left (x)', 'Top Left (y)', 'Width', 'Height', 'Estimated Center (x)', 'Estimated Center (y)']
            radii_table.to_csv(train_circle_data_folder + '/' + this_plate.stem + '.csv')

            # Step 7: Plot the image with the circles overlayed, and save it
            #radii_table = pd.read_csv(test_circle_data_folder + '/' + this_plate.stem + '.csv')

            fig, ax = plt.subplots()
            ax.imshow(cv2.cvtColor(x, cv2.COLOR_BGR2RGB))
            fig.set_size_inches(1024/96, 1024/96)
            for index, row in radii_table.iterrows():
                full_circle = Circle((row['Estimated Center (x)'], row['Estimated Center (y)']), radius=row['Radius'], color='blue', fill=False, linewidth=1, alpha=0.9)
                ax.add_patch(full_circle)
            plt.axis('off')
            fig.savefig(train_circle_folder + '/' + pathlib.Path(this_image).stem + '.jpg', bbox_inches='tight', pad_inches=0)
            plt.close()


        except pd.errors.EmptyDataError:
            # If an error is about to be thrown due to an empty csv file, run these lines instead
            # An empty file means no colonies were detected
            # We still would like to ensure a file with this name exists for later.
            print('No colonies were detected.  Skipping this image.')
            my_table_columns = ['Colony', 'Center (x)', 'Center (y)', 'Radius', 'Top Left (x)', 'Top Left (y)', 'Width', 'Height', 'Estimated Center (x)', 'Estimated Center (y)']
            radii_table = pd.DataFrame(columns=my_table_columns)
            radii_table.to_csv(train_circle_data_folder + '/' + this_plate.stem + '.csv')

            # Step 7: Plot the image with the circles overlayed, and save it
            #radii_table = pd.read_csv(test_circle_data_folder + '/' + this_plate.stem + '.csv')

            #pylab.ioff()
            fig, ax = plt.subplots()
            ax.imshow(cv2.cvtColor(x, cv2.COLOR_BGR2RGB))
            fig.set_size_inches(1024/96, 1024/96) # set because the small screen pixel size is 96 dpi
            plt.axis('off')
            fig.savefig(train_circle_folder + '/' + pathlib.Path(this_image).stem + '.jpg', bbox_inches='tight', pad_inches=0)
            plt.close()
        


## Get the names of the images segmented and their circle detections

In [None]:
train_CHT_images = sorted(glob.glob(train_circle_folder + '/' + '*'))

print('Number of images found: ' + str(len(train_CHT_images)))

train_image_pairs = tuple(zip(training_images, train_CHT_images))
print(train_image_pairs)

# Create reference table for plate names, and store it as a csv file in the main annotation directory
file_dict = {}
for this_plate_number in range(1, len(training_images)+1):
    this_plate = pathlib.PurePath(training_images[this_plate_number - 1])
    plate_name = this_plate.name
    plate_stem = os.path.splitext(plate_name)[0]
    file_dict[plate_name] = 'Plate ' + str(this_plate_number)
print(file_dict)
file_items = file_dict.items()
file_list = list(file_items)
file_df = pd.DataFrame(file_items, columns = ['Plate Name', 'Folder Name'])
print(file_df)

## Classify colonies in training images

In [None]:
# Code for colony classification (this should be all python code)
# 1. Read in image and corresponding segmentation.
# 2. Read in csv files containg circle locations.
# 3. For each row in the csv file, crop out the circular region, estimate size.
# 4. Restrict data collection to pixels within the circle detected, exclusing all other pixels.
# 5. Split the components of the image into red, white and background.
# 6. Get boundary components of the colony, check for consistency.
# 7. Output predicted number of sectors and their sizes after the consistency check.
# 8. Save the cropping of the colony in a few ways:
#   - the raw colony
#   - the raw colony with circle overlayed
#   - the segmentation of the colony within the circular region
#   - the segmentation of the colony with lines drawn on the image to repreent sector borders
#   - the predicted sector like regions, where each sector is a different shade of gray.
#   - similar to the previous, but keeping pixels classified only as red pixels.
#   - the segmentation that is corrected following the consistency check (possibly doing an additional check on the white pixels)
# 9. Save data on the colony itself, including the sector information, to a row in a table.
# 10. Save the table to a csv file.
# 11. Repeat all steps above for each image.

# Issues to work on:
# Verify that the purity metric is properly being utilized
# Figure out what to do with the holes inside colony segmentations.
#   -- A hole has its own boundary, so could look for the boundary of the hole.
#   -- The boundary of the hole MUST be smaller than the boundary of the entire colony
#   -- Find all connected compoents of the boundary, then exclude the LARGEST one.
#   -- For all other boundary components, these are expected to be the holes.  You need a procedure to fill them.
#   -- The procedure could be as simple as filling the hole with the class pertaining to the most common pixel on the boundary.

# Implementation already existing:



if classify_training_colonies == True:

    %matplotlib inline

    starting_image = True

    # if use_expert_counts == True:
    #     dot_quant_images = sorted(glob.glob(additional_data_folder + '/Quant/' + '*'))
    #     dot_state_images = sorted(glob.glob(additional_data_folder + '/State/' + '*'))


    # Run each plate through the classification pipeline

    for (train_image, CHT_image) in train_image_pairs:

        # Get plate name 
        this_plate = pathlib.PurePath(train_image)
        plate_name = this_plate.name
        plate_stem = os.path.splitext(plate_name)[0]

        print('Plate: ' + str(plate_name) + ':')
        # if save_all_annotations == True:
        #     print('Annotations will be saved within subfolders named ' + "\'" + file_dict[plate_name] + "\'")

        # Read images of the plate
        x = read_image(train_image)
        x_CHT = read_image(CHT_image)

        # initialize lists for storing values
        all_cropped_colonies = []

        # Sizes of regions in pixels
        white_region_sum = []
        red_region_sum = []
        colony_region_sum = []
        sector_region_sum = []

        corrected_white_region_sum = []
        corrected_red_region_sum = []
        corrected_sector_region_sum = []

        true_white_region_sum = []
        true_red_region_sum = []
        true_colony_region_sum = []
        true_sector_region_sum = []

        # Counting sectors
        initial_region_counts = []
        all_sector_counts = []
        true_sector_counts = []

        

        boundary_region_sum = []
        colony_prop_sum = []

        # Purity scores for regions and colonies
        average_sector_score = []
        average_sector_iou = []

        weighted_sector_score_before = []
        weighted_red_sector_score_before = []
        weighted_white_sector_score_before = []

        weighted_sector_score_after = []
        weighted_red_sector_score_after = []
        weighted_white_sector_score_after = []

        # Bounding box info for colonies in images
        sides_vert_top = [];
        sides_vert_bottom = [];
        sides_horz_left = [];
        sides_horz_right = [];

        # Test lists
        colony_is_connected = []
        colony_is_approx_connected = []
        boundary_is_connected = []
        colony_is_whole = []
        boundary_is_hamilton = []
        colony_is_approx_convex = []
        colony_is_approx_circular = []
        hausdorff_dist_convex = []
        hausdorff_dist_circle = []

        # Lists to store purity scores of each region and the color of the region
        region_purity_before = []
        region_color_before = []
        region_sizes_before = []

        weighted_purity_before = []
        weighted_purity_red_before = []
        weighted_purity_white_before = []

        region_purity_after = []
        region_color_after = []
        region_sizes_after = []

        weighted_purity_after = []
        weighted_purity_red_after = []
        weighted_purity_white_after = []

        cured_colony_before = []
        cured_colony_after = []

        stable_colony_before = []
        stable_colony_after = []

        # Load images here if using quantifable colony data/annotations
        # if use_expert_counts == True:
        #     x_quant = read_image(additional_data_folder + '/Quant/' + plate_stem + '.tif')
        #     x_state = read_image(additional_data_folder + '/State/' + plate_stem + '.tif')
        #     quantifiable_colony = []
        #     quantifiable_cured = []
        #     quantifiable_stable = []
        #     quantifiable_sectored = []

        #------------------------------
        # Read in plate and locate colonies
        #------------------------------
        
        # Read the segmentation of the plate, and keep track of which class each pixel belongs to
        p = read_mask(train_seg_folder + '/' + this_plate.stem + '.png')
        #p = read_mask(main_folder + '/Test Segs/' + specific_test_folder + '/Class_3/' + this_plate.stem + '.png')

        p_full = tf.identity(p).numpy()
        in_class = tf.math.greater(tf.constant(p_full), tf.constant([0])).numpy()

        p = p.astype(np.uint8)
        # white pixels
        p_1 = tf.math.equal(tf.constant(p_full), tf.constant([255])).numpy().astype(np.uint8) * 255
        # red pixels
        p_2 = tf.math.equal(tf.constant(p_full), tf.constant([127])).numpy().astype(np.uint8) * 255
        p_full = 255 * in_class.astype(np.uint8)

        # Gather the location and radii data from the colonies
        # If there are no colonies detected, or there is no table, skip this section at once

        colony_locations = pd.read_csv(train_circle_data_folder + '/' + pathlib.Path(train_image).stem + '.csv')
        #colony_locations = pd.read_csv(main_folder + '/Test Segs CHT Data/' + specific_test_folder + '/Class_3/' + pathlib.Path(test_image).stem + '.csv')

        # Output information from the imported csv
        print(str(len(colony_locations["Radius"])) + ' colonies found using circle Hough transform')
        plate_names = np.repeat(plate_name, len(colony_locations["Radius"]))
        colony_numbers = np.array(range(len(colony_locations["Radius"])))

        #---------------------------------------------------------------------

        # CLASSIFICATION PIPELINE START
        # Pre-processing step

        # Images to save:
        # - Cropping of the colony
        # - Cropping of the colony with the overalyed circle
        # - Original colony segmentation, such that only the pixels inside the overlayed circle are considered.

        for this_index in range(0,len(colony_numbers)):

            print('')
            print('Colony ' + str(this_index))
            # get example image using bounding indices
            #this_index = 2

            # Copy location data from colony image
            top_left_x = colony_locations["Top Left (x)"][this_index]
            top_left_y = colony_locations["Top Left (y)"][this_index]
            box_width = colony_locations["Width"][this_index]
            box_height = colony_locations["Height"][this_index]

            # Store the locations in another set of lists
            sides_vert_top.append(top_left_y)
            sides_vert_bottom.append(top_left_y + box_height - 1)
            sides_horz_left.append(top_left_x)
            sides_horz_right.append(top_left_x + box_width - 1)

            # Grab segmentation of colony using coordinates copied above
            # The colony image is NOT a boolean array
            colony_image = p[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)]
            ellipse_array = create_filled_ellipse_in_array(colony_image)
            colony_image = np.multiply(colony_image, ellipse_array) # unpadded segmentation with the pixels inside the overlayed circle

            # if use_expert_counts == True:
            #     quant_image = x_quant[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1), :]
            #     state_image = x_state[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1), :]

            # The colony mask IS a boolean array.  Keep all the pixels of each class.
            white_colony_mask = p_1[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)] > 0
            red_colony_mask = p_2[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)] > 0 
            colony_mask = np.logical_or(white_colony_mask, red_colony_mask) # sanity check to see of this is the same as colony image

            # Add segmentation of the pixels inside the circular region of detection, and apply the mask.  This ensures we only use the pixels inside the circle for analysis.
            # Booleans are inputs, and booleans are outputs
            # Force a circle in colonies detected in the circle detection step
            white_colony_mask = np.multiply(white_colony_mask, ellipse_array)
            red_colony_mask = np.multiply(red_colony_mask, ellipse_array)
            colony_mask = np.logical_or(white_colony_mask, red_colony_mask)

            # Get initial measure of the sizes of the red and white regions of the colony
            white_region_sum.append(np.sum(white_colony_mask))
            red_region_sum.append(np.sum(red_colony_mask))
            colony_region_sum.append(np.sum(colony_mask))
            sector_region_sum.append(np.sum(red_colony_mask) / np.sum(colony_mask))

            # Find colony boundaries, ensuring that the boundaries are ON the colony, not ADJACENT to it.
            edge_mask_unpadded = get_colony_boundary_binary(colony_image) # The function is above
            interior_mask_unpadded = np.logical_xor(colony_image > 0, edge_mask_unpadded) # Second mask containing only the interior pixels of the segmentation
            interior_colony = np.multiply(colony_image, interior_mask_unpadded) # This is NOT a boolean

            # #---------------------------------------------------------------
            # # Get quantifiable colony labels (if applicable)
            # #---------------------------------------------------------------

            # # If we have locations of quantifiable colonies, use this to gather the colonies.
            # if use_expert_counts == True:
            #     #----------------------------------
            #     # Determine where the quantifiable colonies are (they have black dots on them)
            #     # Set color boundaries for the markers in the counted images
            #     black_dot_boundaries = [([0, 0, 0], [5, 5, 5])]

            #     for (lower, upper) in black_dot_boundaries:
            #     # create NumPy arrays from the boundaries
            #         lower = np.array(lower, dtype = "uint8")
            #         upper = np.array(upper, dtype = "uint8")
            #         # find the colors within the specified boundaries and apply
            #         # the mask
            #         dot_mask = cv2.inRange((quant_image*255).astype(np.uint8), lower, upper)
            #         #dot_output = cv2.bitwise_and((count_image*255).astype(np.uint8), dot_mask)
            #         # Get connected components of the detected pixels
            #         black_labels = label(dot_mask)
            #         num_black_labels = len(np.unique(black_labels))
            #         if num_black_labels <= 1:
            #             # No dot was detected.  Thus the colony was considered non-quantifiable.
            #             colony_is_quantifiable = False
            #         else:
            #             # Loop through each component.  Find one component that is not too small and is directly on the colony
            #             colony_center_y = (quant_image.shape[0] - 1) / 2.0
            #             colony_center_x = (quant_image.shape[1] - 1) / 2.0
            #             for this_comp in range(1, num_black_labels):
            #                 this_dot_comp = black_labels == this_comp
            #                 # Get centroid of component
            #                 (comp_centroid_y, comp_centroid_x) = ndimage.center_of_mass(this_dot_comp)
            #                 dot_dist = math.sqrt(((comp_centroid_y - colony_center_y) ** 2) + ((comp_centroid_x - colony_center_x) ** 2))
            #                 if dot_dist < colony_locations["Radius"][this_index]:
            #                     colony_is_quantifiable = True
            #                     break
            #                     # end the loop, as we found a dot on the colony
                            
            #                 if this_comp == (num_black_labels - 1):
            #                     # We looped through all the dots, but none of them were on the colony.  Don't analyze this colony.
            #                     colony_is_quantifiable = False

            #     quantifiable_colony.append(colony_is_quantifiable)

            #     #print('Colony', this_index, ': Quantifiable:', colony_is_quantifiable)

            #     #----------------------------------------
            #     # Determine if colony is cured, stable, or sectored

            #     # RGB version
            #     # cured_dot_boundaries = [([34-5, 177-5, 76-5], [34+5, 177+5, 76+5])]
            #     # stable_dot_boundaries = [([237-5, 28-5, 36-5], [237+5, 28+5, 36+5])]
            #     # sectored_dot_boundaries = [([63-5, 72-5, 204-5], [63+5, 72+5, 204+5])]

            #     # BGR version (cv2 needs this)
            #     # Marker colors were manaully chosen, so info below is based on that.
            #     # Wes annotations
            #     cured_dot_boundaries = [([76-5, 177-5, 34-5], [76+5, 177+5, 34+5])]
            #     stable_dot_boundaries = [([36-5, 28-5, 237-5], [36+5, 28+5, 237+5])]
            #     sectored_dot_boundaries = [([204-5, 72-5, 63-5], [204+5, 72+5, 63+5])]

            #     # Nicole annotations
            #     # cured_dot_boundaries = [([0, 250, 0], [0, 255, 0])]
            #     # stable_dot_boundaries = [([0, 0, 250], [0, 0, 255])]
            #     # sectored_dot_boundaries = [([250, 250, 0], [255, 255, 0])]

            #     # cured_dot_boundaries = [([0, 250, 0], [0, 255, 0])]
            #     # stable_dot_boundaries = [([0, 0, 250], [0, 0, 255])]
            #     # sectored_dot_boundaries = [([250, 0, 0], [255, 0, 0])]

            #     for (lower, upper) in cured_dot_boundaries:
            #     # create NumPy arrays from the boundaries
            #         lower = np.array(lower, dtype = "uint8")
            #         upper = np.array(upper, dtype = "uint8")
            #         # find the colors within the specified boundaries and apply
            #         # the mask
            #         #print(np.unique((colony_image*255).astype(np.uint8)))
            #         dot_mask = cv2.inRange((state_image*255).astype(np.uint8), lower, upper)
            #         #dot_output = cv2.bitwise_and((count_image*255).astype(np.uint8), dot_mask)
            #         # Get connected components of the detected dot pixels
            #         #print(np.unique(dot_mask))
            #         cured_labels = label(dot_mask)
            #         num_cured_labels = len(np.unique(cured_labels))
            #         if num_cured_labels <= 1:
            #             # No dot was detected.  Thus the colony was considered non-quantifiable.
            #             colony_is_cured = False
            #         else:
            #             # Loop through each component.  Find one component that is not too small and is directly on the colony
            #             colony_center_y = (state_image.shape[0] - 1) / 2.0
            #             colony_center_x = (state_image.shape[1] - 1) / 2.0
            #             for this_comp in range(1, num_cured_labels):
            #                 this_dot_comp = cured_labels == this_comp
            #                 # Get centroid of component
            #                 (comp_centroid_y, comp_centroid_x) = ndimage.center_of_mass(this_dot_comp)
            #                 dot_dist = math.sqrt(((comp_centroid_y - colony_center_y) ** 2) + ((comp_centroid_x - colony_center_x) ** 2))
            #                 if dot_dist < colony_locations["Radius"][this_index]:
            #                     colony_is_cured = True
            #                     break
            #                     # end the loop, as we found a dot on the colony
                            
            #                 if this_comp == (num_cured_labels - 1):
            #                     # We looped through all the dots, but none of them were on the colony.  Don't analyze this colony.
            #                     colony_is_cured = False

            #     quantifiable_cured.append(colony_is_cured)

            #     #print('Colony', this_index, ': Cured:', colony_is_cured)


            #     for (lower, upper) in stable_dot_boundaries:
            #     # create NumPy arrays from the boundaries
            #         lower = np.array(lower, dtype = "uint8")
            #         upper = np.array(upper, dtype = "uint8")
            #         # find the colors within the specified boundaries and apply
            #         # the mask
            #         #print(np.unique((colony_image*255).astype(np.uint8)))
            #         dot_mask = cv2.inRange((state_image*255).astype(np.uint8), lower, upper)
            #         #dot_output = cv2.bitwise_and((count_image*255).astype(np.uint8), dot_mask)
            #         # Get connected components of the detected pixels
            #         #print(np.unique(dot_mask))
            #         stable_labels = label(dot_mask)
            #         num_stable_labels = len(np.unique(stable_labels))
            #         if num_stable_labels <= 1:
            #             # No dot was detected.  Thus the colony was considered non-quantifiable.
            #             colony_is_stable = False
            #         else:
            #             # Loop through each component.  Find one component that is not too small and is directly on the colony
            #             colony_center_y = (state_image.shape[0] - 1) / 2.0
            #             colony_center_x = (state_image.shape[1] - 1) / 2.0
            #             for this_comp in range(1, num_stable_labels):
            #                 this_dot_comp = stable_labels == this_comp
            #                 # Get centroid of component
            #                 (comp_centroid_y, comp_centroid_x) = ndimage.center_of_mass(this_dot_comp)
            #                 dot_dist = math.sqrt(((comp_centroid_y - colony_center_y) ** 2) + ((comp_centroid_x - colony_center_x) ** 2))
            #                 if dot_dist < colony_locations["Radius"][this_index]:
            #                     colony_is_stable = True
            #                     break
            #                     # end the loop, as we found a dot on the colony
                            
            #                 if this_comp == (num_stable_labels - 1):
            #                     # We looped through all the dots, but none of them were on the colony.  Don't analyze this colony.
            #                     colony_is_stable = False

            #     quantifiable_stable.append(colony_is_stable)

            #     #print('Colony', this_index, ': Stable:', colony_is_stable)


            #     for (lower, upper) in sectored_dot_boundaries:
            #     # create NumPy arrays from the boundaries
            #         lower = np.array(lower, dtype = "uint8")
            #         upper = np.array(upper, dtype = "uint8")
            #         # find the colors within the specified boundaries and apply
            #         # the mask
            #         #print(np.unique((colony_image*255).astype(np.uint8)))
            #         dot_mask = cv2.inRange((state_image*255).astype(np.uint8), lower, upper)
            #         #dot_output = cv2.bitwise_and((count_image*255).astype(np.uint8), dot_mask)
            #         # Get connected components of the detected pixels
            #         #print(np.unique(dot_mask))
            #         sectored_labels = label(dot_mask)
            #         num_sectored_labels = len(np.unique(sectored_labels))
            #         if num_sectored_labels <= 1:
            #             # No dot was detected.  Thus the colony was considered non-quantifiable.
            #             colony_is_sectored = False
            #         else:
            #             # Loop through each component.  Find one component that is not too small and is directly on the colony
            #             colony_center_y = (state_image.shape[0] - 1) / 2.0
            #             colony_center_x = (state_image.shape[1] - 1) / 2.0
            #             for this_comp in range(1, num_sectored_labels):
            #                 this_dot_comp = sectored_labels == this_comp
            #                 # Get centroid of component
            #                 (comp_centroid_y, comp_centroid_x) = ndimage.center_of_mass(this_dot_comp)
            #                 dot_dist = math.sqrt(((comp_centroid_y - colony_center_y) ** 2) + ((comp_centroid_x - colony_center_x) ** 2))
            #                 if dot_dist < colony_locations["Radius"][this_index]:
            #                     colony_is_sectored = True
            #                     break
            #                     # end the loop, as we found a dot on the colony
                            
            #                 if this_comp == (num_sectored_labels - 1):
            #                     # We looped through all the dots, but none of them were on the colony.  Don't analyze this colony.
            #                     colony_is_sectored = False

            #     quantifiable_sectored.append(colony_is_sectored)

            #     #print('Colony', this_index, ': Sectored:', colony_is_sectored)


            #     #----------------------------------

            #-----------------------------------------------------
            # Get connectedness properties of the segmentation
            #-----------------------------------------------------

            # Use this information to test whether the segmentation meets the conditions

            # Condition 1 test: is the segmentation one connected component?
            condition_1_test_strong, condition_1_test_weak = check_components_of_colony(colony_mask)
            colony_is_connected.append(condition_1_test_strong)
            colony_is_approx_connected.append(condition_1_test_weak)
            #print('Condition 1: Seg is one component: ' + str(condition_1_test_strong))
            #print("Condition 1: Seg is \'approximately\' one component: " + str(condition_1_test_weak))

            # Condition 2 test: Is the boundary one connected component?
            condition_2_test = check_components_of_boundary(edge_mask_unpadded)
            boundary_is_connected.append(condition_2_test)
            #print('Condition 2: Boundary is one component: ' + str(condition_2_test))

            # Condition 3 test: Are there holes in the segmentation?
            condition_3_test = check_for_holes(colony_mask, edge_mask_unpadded)
            colony_is_whole.append(condition_3_test)
            #print('Condition 3: Segmentation has no holes: ' + str(condition_3_test))

            # Condition 4 test: Is the boundary a Hamiltonian cycle? (no ready yet)
            #condition_4_test = get_hamilton_cycle(colony_mask, edge_mask_unpadded)
            #print('Has Hamiltonian cycle: ' + str(condition_4_test))

            # Condition 5: Check circularity and convexity
            condition_5_convex, condition_5_circular = compare_convex_hull(colony_mask, edge_mask_unpadded)
            colony_is_approx_convex.append(condition_5_convex)
            colony_is_approx_circular.append(condition_5_circular)
            #print('Condition 5: Segmentation is approximately convex: ' + str(condition_5_convex))
            #print('Condition 5: Segmentation is approximately circular: ' + str(condition_5_circular))

            # Condition 6: Check hausdorff distance
            hausdorff_chull, hausdorff_circle = get_hausdorff_distance(colony_mask, edge_mask_unpadded)
            hausdorff_dist_convex.append(hausdorff_chull)
            hausdorff_dist_circle.append(hausdorff_circle)
            #print('Condition 6: Hausdorff distance between boundary and convex hull: ' + str(hausdorff_chull))
            #print('Condition 6: Hausdorff distance between boundary and circle: ' + str(hausdorff_circle))

            #----------------------------------------
            # Partition the boundaries into red and white components

            # Get 'ideal' boundary of the colony
            ideal_circle = create_circle_boundary(edge_mask_unpadded, colony_locations["Radius"][this_index])

            # Find connected components of the red and white pixels found on the boundary
            red_boundary_skeleton, white_boundary_skeleton, boundary_mask_h, boundary_mask_w = get_boundary_partitions(red_colony_mask, white_colony_mask, edge_mask_unpadded)

            #plt.imshow(red_boundary_skeleton)
            #plt.title('Red Boundary Skeleton')

            # Save the three images using this data
            #   - Oringinal image padded
            #   - CHT image padded
            #   - Segmentation padded
            # Force a circle like previously
            padded_x = 255 * x[max((top_left_y-1)-image_padding, 0):min((top_left_y + box_height - 1)+image_padding, H-1), max((top_left_x-1) - image_padding, 0):min((top_left_x + box_width - 1)+image_padding, W-1), :]
            padded_x_CHT = 255 * x_CHT[max((top_left_y-1)-image_padding, 0):min((top_left_y + box_height - 1)+image_padding, H-1), max((top_left_x-1) - image_padding, 0):min((top_left_x + box_width - 1)+image_padding, W-1), :]
            padded_mask = p[max((top_left_y-1)-image_padding, 0):min((top_left_y + box_height - 1)+image_padding, H-1), max((top_left_x-1) - image_padding, 0):min((top_left_x + box_width - 1)+image_padding, W-1)]
            ellipse_array_2 = create_filled_ellipse_in_array(padded_mask, padding = image_padding)
            padded_mask = np.multiply(padded_mask, ellipse_array_2)

            # Save the colony images as previously
            # if save_all_annotations == True:
            #     if not cv2.imwrite(output_crops_folder + '/raw/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.jpg', padded_x):
            #         raise Exception('Could not write image.')
            #     if not cv2.imwrite(output_crops_folder + '/circles/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.jpg', padded_x_CHT):
            #         raise Exception('Could not write image.')
            #     if not cv2.imwrite(output_crops_folder + '/segs/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', padded_mask):
            #         raise Exception('Could not write image.')

            # # Save the croppings for individual colonies which were annotated
            #     if use_expert_counts == True:
            #         padded_x_count = 255 * x_quant[max((top_left_y-1)-image_padding, 0):min((top_left_y + box_height - 1)+image_padding, H-1), max((top_left_x-1) - image_padding, 0):min((top_left_x + box_width - 1)+image_padding, W-1), :]
            #         if not cv2.imwrite(output_crops_folder + '/counted/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', padded_x_count):
            #             raise Exception('Could not write image.')
            #-------------------------------------------------------------

            # CLASSIFICATION STEP BEGINS
            # GET INITIAL REGIONAL BREAKDOWN AND RED REGION ANNOTATIONS OF THE COLONY

            # Should store information about the following:
            #   - Idealized red and white regions
            #   - The boundaries of each red and white region
            #   - The sizes of each region
            #   - The purity scores of each region
            #   - the states of each colony (cured, stable)

            # Images to save in this section
            # - Regional segmentation
            # - Boundary of the colony
            # - Red region boundary annotation
            # - Regions that fail consistency check

            # Plot padded version of the initial segmentation.  Annotations will be saved onto the image.
            colony_image_padded = np.pad(colony_image, 5)
            fig1, ax1 = plt.subplots()
            ax1.imshow(colony_image_padded, cmap='gray')


            recheck_boundaries = True # initialized to true so that we can look at show each step of the pipeline is performing

            # Using boundary information, find and extract potential red and white regions of the colony

            red_labels = label(red_boundary_skeleton)
            white_labels = label(white_boundary_skeleton)

            # Initialize masks separating potential red and white regions
            initial_red_region_mask = np.zeros_like(red_boundary_skeleton)
            initial_white_region_mask = np.zeros_like(white_boundary_skeleton)
            initial_red_boundary_mask = np.zeros_like(red_boundary_skeleton)
            initial_white_boundary_mask = np.zeros_like(white_boundary_skeleton)

            # keep track of regions which fail the consistency check
            initial_bad_red_score_mask = np.zeros_like(red_boundary_skeleton)
            initial_bad_white_score_mask = np.zeros_like(white_boundary_skeleton)

            # Keep track of boundaries whihc fail the consistency check
            boundary_correction = np.zeros_like(red_boundary_skeleton)

            # Initialize lists to store characteristics about each region
            # This includes endpoints on the sector, the purity of the sector, and an indicator for the purity being above the 50 percent threshold
            red_component_endpoints = []
            red_component_scores = []
            red_component_checks = []
            red_component_sizes = []

            white_component_endpoints = []
            white_component_scores = []
            white_component_checks = []
            white_component_sizes = []

            # How many boundaries of each color are there?
            num_red_boundaries = len(np.unique(red_labels)[1:]) # number of red boundaries present
            num_white_boundaries = len(np.unique(white_labels)[1:]) # number of white boundaries present

            # Initial count of the number of sectors is the number of red boundaries
            initial_region_counts.append(num_red_boundaries)

            # States can be initialy predicted using the number of red and white boundaries
            #if ((num_red_boundaries == 1) & (num_white_boundaries == 0)):
            if (num_white_boundaries == 0):
                cured_colony_before.append(True)
            else:
                cured_colony_before.append(False)

            #if ((num_red_boundaries == 0) & (num_white_boundaries == 1)):
            if (num_red_boundaries == 0):
                stable_colony_before.append(True)
            else:
                stable_colony_before.append(False)

            # Now, to analyze each of the regions to determine if they are sectored

            # Analyze the initial red regions of the colony
            for this_label in np.unique(red_labels)[1:]:
                red_component = copy.deepcopy(red_labels)
                red_component = red_component == this_label
                red_component = red_component.astype(np.int32)

                # Append the red boundary pixels on this component to the red boundary mask
                initial_red_boundary_mask = np.logical_or(initial_red_boundary_mask, red_component > 0)

                # Function to get endpoints of connected component
                full_endpoints_list = get_boundary_component_endpoints(colony_image[:,:], red_component)

                # If exactly two points are found, then everything's good.

                # Get the angle of the endpoints relative to the colony center
                [endpoint_angles, endpoint_locations, endpoints_x, endpoints_y] = get_endpoint_locations(full_endpoints_list, colony_mask, colony_locations["Radius"][this_index])

                # Function to get mask representing sector boundary
                sector_boundary, sector_interior, sector_filled = get_sector_masks(red_component, full_endpoints_list)

                # Append the predicted filled region to the red region mask
                initial_red_region_mask = np.logical_or(initial_red_region_mask, sector_filled)

                # Apply consistency check to score the region
                confirm_check, prop_interior = check_for_consistency_2(sector_filled, red_colony_mask)

                # Update score mask to denote where the consistency check failed
                if confirm_check == False:
                    recheck_boundaries = True
                    initial_bad_red_score_mask = np.logical_or(initial_bad_red_score_mask, sector_filled)

                # Append scores and info to lists
                red_component_endpoints.append(full_endpoints_list) # endpoints of the connected compponent on the boundary
                red_component_scores.append(prop_interior) # purity score of the region
                red_component_checks.append(confirm_check) # whether the purity score was at least 0.5
                red_component_sizes.append(np.sum(initial_red_region_mask)) # the number of pixels in the region
                
                # # ---ANNOTATION PROCEDURE---

                # # Plot the lines of the sector (and the boundary line) onto the colony segmentation
                # length_points = len(endpoints_x)
                # #print(length_points)
                # #print(endpoints_x)
                # if len(np.unique(red_labels)[1:]) > 0: # only plots lines if there are divided regions
                #     plot_bounds_x = []
                #     plot_bounds_y = []
                #     plot_bounds_x.append(endpoints_x[0] + image_padding)
                #     plot_bounds_y.append(endpoints_y[0] + image_padding)
                #     # Get list of center and endpoints on the boundary
                #     for this_bound in range(0, length_points-1):
                #         plot_bounds_x.append(endpoints_x[this_bound+1] + image_padding)
                #         plot_bounds_y.append(endpoints_y[this_bound+1] + image_padding)
                #         #plt.plot(plot_points_y, plot_points_x, color='blue')
                #         #print(endpoints_x[0:2])
                #         #print(endpoints_y[0:2])
                #     plot_bounds_x = np.roll(np.array(plot_bounds_x), 1)
                #     plot_bounds_y = np.roll(np.array(plot_bounds_y), 1)
                #     #print(plot_bounds_x)
                #     #print(plot_bounds_y)
                #     line_style = ':' if (len(plot_bounds_x) == 2) else '-'
                #     ax1.plot(plot_bounds_y, plot_bounds_x, linewidth=5, linestyle=line_style, alpha=0.85)
                #     if len(plot_bounds_x) == 1:
                #         full_circle = Circle((plot_bounds_y, plot_bounds_x), radius=colony_locations["Radius"][this_index], color='blue', fill=False, linewidth=5, alpha=0.85)
                #         ax1.add_patch(full_circle)


            # Do the same for the white regions
            for this_label in np.unique(white_labels)[1:]:
                white_component = copy.deepcopy(white_labels)
                white_component = white_component == this_label
                white_component = white_component.astype(np.int32)

                initial_white_boundary_mask = np.logical_or(initial_white_boundary_mask, white_component > 0)

                # Function to get endpoints of connected component
                full_endpoints_list = get_boundary_component_endpoints(colony_image[:,:], white_component)

                # If exactly two points are found, then everything's good.

                # Function to get mask representing sector boundary
                sector_boundary, sector_interior, sector_filled = get_sector_masks(white_component, full_endpoints_list)

                # Fill initial region mask with the filled sector
                initial_white_region_mask = np.logical_or(initial_white_region_mask, sector_filled)

                # Apply consistency check to score region
                confirm_check, prop_interior = check_for_consistency_2(sector_filled, white_colony_mask)

                # Update score mask to denote where the consistency check failed
                if confirm_check == False:
                    recheck_boundaries = True
                    initial_bad_white_score_mask = np.logical_or(initial_bad_white_score_mask, sector_filled)

                # Append scores and info to lists
                white_component_endpoints.append(full_endpoints_list)
                white_component_scores.append(prop_interior)
                white_component_checks.append(confirm_check)
                white_component_sizes.append(np.sum(initial_white_region_mask))

            # At this point, you should have two masks, one for the red and white regions respectivey.
            # You should also have the endponts of each component, stored as a collection of lists, one list per component
            # Finally, you should have a score for those components

            # -------------------------------------
            # Store the purity scores in a sublist, along with a second sublist indicating the color of each region
            # -------------------------------------

            all_component_scores = []
            all_region_colors = []
            all_region_sizes = []

            if not red_component_scores:
                all_region_colors = all_region_colors + ['red']
                all_component_scores = all_component_scores + [np.nan]
                all_region_sizes = all_region_sizes + [np.nan]
            else:
                all_region_colors = all_region_colors + (['red'] * len(red_component_scores))
                all_component_scores = all_component_scores + red_component_scores
                all_region_sizes = all_region_sizes + red_component_sizes

            if not white_component_scores:
                all_region_colors = all_region_colors + ['white']
                all_component_scores = all_component_scores + [np.nan]
                all_region_sizes = all_region_sizes + [np.nan]
            else:
                all_region_colors = all_region_colors + (['white'] * len(white_component_scores))
                all_component_scores = all_component_scores + white_component_scores
                all_region_sizes = all_region_sizes + white_component_sizes

            region_purity_before.append(all_component_scores)
            region_color_before.append(all_region_colors)
            region_sizes_before.append(all_region_sizes)

            # -------------------------------------
            # Do the same for the weighted purity scores across the entire colony
            # -------------------------------------

            # Compute weighted purity scores over all regions, for white only, and for red only

            total_red_sum = np.nansum(red_component_sizes)
            total_white_sum = np.nansum(white_component_sizes)

            if not red_component_scores:
                red_region_weights = np.array([0])
                weighted_red_scores = np.array([0])
            else:
                red_region_weights = np.divide(np.array(red_component_sizes), total_red_sum) # this vector should add to 1, as this is a normalization of the weights
                weighted_red_scores = np.multiply(np.array(red_component_scores), red_region_weights)

            if not white_component_scores:
                white_region_weights = np.array([0])
                weighted_white_scores = np.array([0])
            else:
                white_region_weights = np.divide(np.array(white_component_sizes), total_white_sum) # this vector should add to 1, as this is a normalization of the weights
                weighted_white_scores = np.multiply(np.array(white_component_scores), white_region_weights)

            # Get weighted average over both regions together
            all_region_sum = np.nansum(all_region_sizes)
            all_region_weights = np.divide(np.array(red_component_sizes + white_component_sizes), all_region_sum)
            all_region_weighted_scores = np.multiply(np.array(red_component_scores + white_component_scores), all_region_weights)

            weighted_purity_red_before.append(list(weighted_red_scores))
            weighted_purity_white_before.append(list(weighted_white_scores))
            weighted_purity_before.append(list(all_region_weighted_scores))
            weighted_red_sector_score_before.append(np.nansum(weighted_red_scores))
            weighted_white_sector_score_before.append(np.nansum(weighted_white_scores))
            weighted_sector_score_before.append(np.nansum(all_region_weighted_scores))

            # Now, create the masks containing the initial_regions
            initial_region_mask = np.maximum(initial_red_region_mask.astype(np.uint8), 2*initial_white_region_mask.astype(np.uint8))*(255/(num_classes-1))
            initial_boundary_mask = np.maximum(initial_red_boundary_mask.astype(np.uint8), 2*initial_white_boundary_mask.astype(np.uint8))*(255/(num_classes-1))
            initial_score_mask = np.maximum(initial_bad_red_score_mask.astype(np.uint8), 2*initial_bad_white_score_mask.astype(np.uint8))*(255/(num_classes-1))

            # Make sure to pad them in the same way as the output segmentation
            initial_region_mask = np.pad(initial_region_mask, image_padding)
            initial_boundary_mask = np.pad(initial_boundary_mask, image_padding)
            initial_score_mask = np.pad(initial_score_mask, image_padding)

            # # Save the initial region and boundary mask.  Also save image indicating regions which should be investigted further.
            # if save_all_annotations == True:
            #     if not cv2.imwrite(output_crops_folder + '/init_regions/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', initial_region_mask):
            #         raise Exception('Could not write image.')
            #     if not cv2.imwrite(output_crops_folder + '/init_bounds/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', initial_boundary_mask):
            #         raise Exception('Could not write image.')
            #     if not cv2.imwrite(output_crops_folder + '/init_bad/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', initial_score_mask):
            #         raise Exception('Could not write image.')
            plt.axis('off')
            # if save_all_annotations == True:
            #     fig1.savefig(output_crops_folder + '/init_partitions/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', bbox_inches='tight', pad_inches=0)
            plt.close(fig1);




            #------------------------------------------------------------------

            # CLASS SWITCH/MERGING STEP

            # This is only applied to regions where the consistency check fails.
            # i.e. less than 50% of pixels in the predicted region are of the same class
            # as the outer boundary pixels

            # The process will repeat until all regions pass the consisency check

            # NOTE: This section is only executed if one of either the red or
            # white regions estimated above, fails the constency check.
            # If all regions predicted are consistent in class, the below will
            # not execute, as it will do exactly the same stuff as just done above.

            # Therefore, this section is purposely redundant and helps us keep
            # track of which regions are being updated.



            repetition_counter = 0
            performing_correction = False # initialize at the beginning

            while recheck_boundaries == True:

                recheck_boundaries = False # reset.
                repetition_counter = repetition_counter + 1

                # The above should be switched back to True if there is a potentially misclassified boundary


                # Find the connected components of the skeleton.
                # The number of red connected components gives the initial number of sectors.
                # The number of white connected components are the regions separating the red sectors.
                # A score for sectoriness will be applied to both sets of regions.
                red_labels = label(red_boundary_skeleton)
                white_labels = label(white_boundary_skeleton)

                # Initialize masks separating potential red and white regions
                red_region_mask = np.zeros_like(red_boundary_skeleton)
                white_region_mask = np.zeros_like(white_boundary_skeleton)
                red_boundary_mask = np.zeros_like(red_boundary_skeleton)
                white_boundary_mask = np.zeros_like(white_boundary_skeleton)
                red_score_mask = np.zeros_like(red_boundary_skeleton)
                white_score_mask = np.zeros_like(white_boundary_skeleton)

                # Intialize array to change boundary.
                # This is only updated when there is a potentially misclassified boundary.
                boundary_correction_red = np.zeros_like(red_boundary_skeleton)
                boundary_correction_white = np.zeros_like(white_boundary_skeleton)

                # Generate regions directly from segmentation
                # Iterate through each component and collect some information
                # Collect info about component endpoints and purity scores
                red_component_endpoints = []
                red_component_scores = []
                red_component_checks = []

                white_component_endpoints = []
                white_component_scores = []
                white_component_checks = []

               # print('Number of red components: ' + str(max(np.unique(red_labels)[1:])))

                # Iterate through the red components
                for this_label in np.unique(red_labels)[1:]:
                    #print('Running the red check.')
                    red_component = copy.deepcopy(red_labels)
                    red_component = red_component == this_label
                    red_component = red_component.astype(np.int32)

                    red_boundary_mask = np.logical_or(red_boundary_mask, red_component > 0)

                    # Function to get endpoints of connected component
                    full_endpoints_list = get_boundary_component_endpoints(colony_image[:,:], red_component)

                    # If exactly two points are found, then everything's good.

                    # Function to get mask representing sector boundary
                    sector_boundary, sector_interior, sector_filled = get_sector_masks(red_component, full_endpoints_list)

                    # Fill initial region mask with the filled sector
                    red_region_mask = np.logical_or(red_region_mask, sector_filled)

                    # Apply consistency check to score region
                    confirm_check, prop_interior = check_for_consistency_2(sector_filled, red_colony_mask)

                    # Update score mask to denote where the consistency check failed
                    if confirm_check == False:
                        #performing_correction = True # This signifies that boundary information will be different from the initial breakdown
                        recheck_boundaries = True
                        boundary_correction_red = np.logical_or(boundary_correction_red, red_component)


                    # Append scores and info to lists
                    red_component_endpoints.append(full_endpoints_list)
                    red_component_scores.append(prop_interior)
                    red_component_checks.append(confirm_check)

                
                # Do the same for the white components
                for this_label in np.unique(white_labels)[1:]:

                    white_component = copy.deepcopy(white_labels)
                    white_component = white_component == this_label
                    white_component = white_component.astype(np.int32)

                    white_boundary_mask = np.logical_or(white_boundary_mask, white_component > 0)

                    # Function to get endpoints of connected component
                    full_endpoints_list = get_boundary_component_endpoints(colony_image[:,:], white_component)

                    # If exactly two points are found, then everything's good.

                    # Function to get mask representing sector boundary
                    sector_boundary, sector_interior, sector_filled = get_sector_masks(white_component, full_endpoints_list)

                    # Fill initial region mask with the filled sector
                    white_region_mask = np.logical_or(white_region_mask, sector_filled)

                    # Apply consistency check to score region
                    confirm_check, prop_interior = check_for_consistency_2(sector_filled, white_colony_mask)

                    # Update score mask to denote where the consistency check failed
                    if confirm_check == False:
                        #performing_correction = True # This signifies that boundary information will be different from the initial breakdown
                        recheck_boundaries = True
                        boundary_correction_white = np.logical_or(boundary_correction_white, white_component)

                    # Append scores and info to lists
                    white_component_endpoints.append(full_endpoints_list)
                    white_component_scores.append(prop_interior)
                    white_component_checks.append(confirm_check)

                # If there were regions that failed the consistency check, swap the classes on the boundary
                if recheck_boundaries == True:

                    performing_correction = True # This signifies that boundary information will be different from the initial breakdown

                    # Run the swap functions
                    red_boundary_skeleton = grow_boundary(red_boundary_skeleton, boundary_correction_white) # takes the bad white boundaries and switches them to the red class
                    red_boundary_skeleton = shrink_boundary(red_boundary_skeleton, boundary_correction_red) # removes the bad red boundaries

                    white_boundary_skeleton = grow_boundary(white_boundary_skeleton, boundary_correction_red) # takes the bad red boundaries and switches them to the white class
                    white_boundary_skeleton = shrink_boundary(white_boundary_skeleton, boundary_correction_white) # removes the bad white boundaries

                # Only run the block below if this colony cannot be analyzed appropriatly with this pipeline (may be an awful segmentation)
                if repetition_counter > 20:
                    warnings.warn('Corrections have been applied too many times.  The colony segmentation used here is likely unsuitable for this pipeline.')
                    break

                    # Once the swap is done, you will head back to the top of this while loop.

            # At this point, you should have two masks, one for the red and white regions respectivey.
            # You should also have the endponts of each components, stored as a collection of lists, one list per component
            # Finally, you should have a score for those components

            # Now, create the masks containing the regions that pass the consistency check
            corrected_region_mask = np.maximum(red_region_mask.astype(np.uint8), 2*white_region_mask.astype(np.uint8))*(255/(num_classes-1))
            corrected_boundary_mask = np.maximum(red_boundary_mask.astype(np.uint8), 2*white_boundary_mask.astype(np.uint8))*(255/(num_classes-1))

            red_labels = label(red_boundary_skeleton)
            white_labels = label(white_boundary_skeleton)

            #corrected_boundary_mask = np.maximum((white_labels > 0).astype(np.uint8), 2*((red_labels > 0).astype(np.uint8)))*(255/(num_classes-1))

            # Use the corrected boundary_mask to piece together the corrected colony segmentation
            corrected_colony_image = np.add(interior_colony, corrected_boundary_mask).astype(np.uint8)
            corrected_colony_image_padded = np.pad(corrected_colony_image, image_padding)

            # Re-partition the image following correction
            corrected_full = tf.identity(corrected_colony_image).numpy().astype(np.int32)
            corrected_white_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([255])).numpy().astype(np.uint8)
            corrected_red_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([127])).numpy().astype(np.uint8)
            corrected_colony_mask = np.logical_or(corrected_white_colony_mask, corrected_red_colony_mask) # sanity check to see of this is the same as colony image


            # if performing_correction == True:
            #     corrected_region_mask = np.maximum(white_region_mask.astype(np.uint8), 2*red_region_mask.astype(np.uint8))*(255/(num_classes-1))
            #     corrected_boundary_mask = np.maximum(white_boundary_mask.astype(np.uint8), 2*red_boundary_mask.astype(np.uint8))*(255/(num_classes-1))
            #     #score_mask = np.maximum(initial_bad_white_score_mask.astype(np.uint8), 2*initial_bad_red_score_mask.astype(np.uint8))*(255/(num_classes-1))
            # else:
            #     corrected_region_mask = np.maximum(initial_white_region_mask.astype(np.uint8), 2*initial_red_region_mask.astype(np.uint8))*(255/(num_classes-1))
            #     corrected_boundary_mask = np.maximum(initial_white_boundary_mask.astype(np.uint8), 2*initial_red_boundary_mask.astype(np.uint8))*(255/(num_classes-1))

            
            #score_mask = np.pad(score_mask, 5)

            #--------------------------------------------------
            # Get the corrected skeletons, regions, and annotations

            # Get boundary information from the boundary corrected/merged segmentation
            corrected_red_boundary_skeleton, corrected_white_boundary_skeleton, corrected_boundary_mask_h, corrected_boundary_mask_w = get_boundary_partitions(corrected_red_colony_mask, corrected_white_colony_mask, edge_mask_unpadded)

            red_labels = label(corrected_red_boundary_skeleton)
            white_labels = label(corrected_white_boundary_skeleton)

            corrected_boundary_mask = np.maximum((red_labels > 0).astype(np.uint8), 2*((white_labels > 0).astype(np.uint8)))*(255/(num_classes-1))

            # Use the corrected boundary_mask to piece together the corrected colony segmentation
            corrected_colony_image = np.add(interior_colony, corrected_boundary_mask).astype(np.uint8)
            corrected_colony_image_padded = np.pad(corrected_colony_image, image_padding)

            corrected_region_mask = np.maximum(red_region_mask.astype(np.uint8), 2*white_region_mask.astype(np.uint8))*(255/(num_classes-1))
            corrected_boundary_mask = np.maximum(red_boundary_mask.astype(np.uint8), 2*white_boundary_mask.astype(np.uint8))*(255/(num_classes-1))
            #score_mask = np.maximum(initial_bad_white_score_mask.astype(np.uint8), 2*initial_bad_red_score_mask.astype(np.uint8))*(255/(num_classes-1))

            # Re-partition the image following correction
            corrected_full = tf.identity(corrected_colony_image).numpy().astype(np.int32)
            corrected_white_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([255])).numpy().astype(np.uint8)
            corrected_red_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([127])).numpy().astype(np.uint8)
            corrected_colony_mask = np.logical_or(corrected_white_colony_mask, corrected_red_colony_mask) # sanity check to see of this is the same as colony image

            

            #-------------------------------------------------------------------

            # PROCESSING THE CORRECTED REGIONS
            # If you got to this point, then the boundaries should be consistent with the interior of the colony.

            # Images to save in this section
            # - Regional segmentation with the corrected boundary
            # - Red region boundary annotation with the corrected boundary
            # - Red regions remaining after correction applied

            

            # initialize masks containing the sector locations
            all_sector_bounds = np.zeros_like(colony_mask).astype(np.int32)
            all_sector_filled = np.zeros_like(colony_mask).astype(np.int32)
            all_sector_filled_labels = np.zeros_like(colony_mask).astype(np.int32)

            # Use the corrected boundary_mask to piece together the corrected colony segmentation
            # corrected_colony_image = np.add(interior_colony, corrected_boundary_mask).astype(np.uint8)
            # corrected_colony_image_padded = np.pad(corrected_colony_image, image_padding)

            # # Re-partition the image followng correction
            # corrected_full = tf.identity(corrected_colony_image).numpy().astype(np.int32)
            # corrected_white_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([127])).numpy().astype(np.uint8)
            # corrected_red_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([255])).numpy().astype(np.uint8)
            # corrected_colony_mask = np.logical_or(corrected_white_colony_mask, corrected_red_colony_mask) # sanity check to see of this is the same as colony image
            #print(np.unique(corrected_colony_image))
            #plt.imshow(corrected_colony_mask, cmap='gray')
            #raise NameError('Corrected colony mask')

            # Save the corrected segmenation
            # if save_all_annotations == True:
            #     if not cv2.imwrite(output_crops_folder + '/cor_segs/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', corrected_colony_image_padded):
            #         raise Exception('Could not write image.')

            # # Get boundary information from the corrected segmentation
            # corrected_red_boundary_skeleton, corrected_white_boundary_skeleton, corrected_boundary_mask_h, corrected_boundary_mask_w = get_boundary_partitions(corrected_red_colony_mask, corrected_white_colony_mask, edge_mask_unpadded)

            # Initialize masks separating potential red and white regions
            post_red_region_mask = np.zeros_like(corrected_red_boundary_skeleton)
            post_white_region_mask = np.zeros_like(corrected_white_boundary_skeleton)
            post_red_boundary_mask = np.zeros_like(corrected_red_boundary_skeleton)
            post_white_boundary_mask = np.zeros_like(corrected_white_boundary_skeleton)
            post_red_score_mask = np.zeros_like(corrected_red_boundary_skeleton)
            post_white_score_mask = np.zeros_like(corrected_white_boundary_skeleton)

            # red_labels = label(corrected_red_boundary_skeleton)
            # white_labels = label(corrected_white_boundary_skeleton)

            # initialize counter for the number of sectors in this colony
            total_sectors = 0

            # Make copy of colony mask and place sectors on top
            #colony_mask_faded = copy.deepcopy(colony_mask).astype(np.uint8)
            #colony_mask_faded[colony_mask_faded > 0] = 20

            corrected_colony_mask_faded = copy.deepcopy(corrected_colony_mask).astype(np.uint8)
            corrected_colony_mask_faded[corrected_colony_mask_faded > 0] = 20

            sector_scores = []
            sector_ious = []

            # Create figure for annotating the corrected colony segmentations.  Annotations will be saved onto the image.
            fig2, ax2 = plt.subplots()
            ax2.imshow(corrected_colony_image_padded, cmap='gray')

            # Compute the scores of the regions one more time.
            # All regions should pass the consistency check by this point.  If not, then something is wrong.
            red_component_endpoints = []
            red_component_scores = []
            red_component_checks = []
            red_component_sizes = []

            white_component_endpoints = []
            white_component_scores = []
            white_component_checks = []
            white_component_sizes = []

            num_red_boundaries = len(np.unique(red_labels)[1:]) # number of red boundaries present
            num_white_boundaries = len(np.unique(white_labels)[1:]) # number of white boundaries present

            #if ((num_red_boundaries == 1) & (num_white_boundaries == 0)):
            if (num_white_boundaries == 0):
                cured_colony_after.append(True)
            else:
                cured_colony_after.append(False)

            #if ((num_red_boundaries == 0) & (num_white_boundaries == 1)):
            if (num_red_boundaries == 0):
                stable_colony_after.append(True)
            else:
                stable_colony_after.append(False)

            for this_label in np.unique(red_labels)[1:]:
                red_component = copy.deepcopy(red_labels)
                red_component = red_component == this_label
                red_component = red_component.astype(np.int32)

                post_red_boundary_mask = np.logical_or(post_red_boundary_mask, red_component > 0)

                # Function to get endpoints of connected component
                full_endpoints_list = get_boundary_component_endpoints(corrected_colony_image[:,:], red_component)

                # If exactly two points are found, then everything's good.

                # Function to get mask representing sector boundary
                sector_boundary, sector_interior, sector_filled = get_sector_masks(red_component, full_endpoints_list)

                # Fill initial region mask with the filled sector
                post_red_region_mask = np.logical_or(post_red_region_mask, sector_filled)

                # Apply consistency check to score region
                confirm_check, prop_interior = check_for_consistency_2(sector_filled, corrected_red_colony_mask)

                # Update score mask to denote where the consistency check failed
                if confirm_check == False:
                    print('Double check your code.  The red consistency check failed for this colony with score ' + str(prop_interior))
                    #raise IOError('Something is wrong with how regions are being scored')

                # Append scores and info to lists
                red_component_endpoints.append(full_endpoints_list)
                red_component_scores.append(prop_interior)
                red_component_checks.append(confirm_check)
                red_component_sizes.append(np.sum(post_red_region_mask))

                # Code for plotting the annotations

                # For the consistent sectors, get the angles of the endpoints relative to the center
                # colony mask, or any other array with the same size and shape, will work as input as it's only needed for size info
                [endpoint_angles, endpoint_locations, endpoints_x, endpoints_y] = get_endpoint_locations(full_endpoints_list, corrected_colony_mask, colony_locations["Radius"][this_index])
                #print(endpoints_x)

                # Add to mask containg sector locations
                #sector_filled = np.logical_or(sector_boundary, sector_interior)
                all_sector_bounds = np.logical_or(all_sector_bounds, sector_boundary)
                all_sector_filled = np.logical_or(all_sector_filled, sector_filled)
                all_sector_filled_labels[sector_filled.astype(bool)] = this_label
                total_sectors = total_sectors + 1
                corrected_colony_mask_faded[sector_filled.astype(bool)] = 255 / this_label

                # Get a score for sectoriness.  We want to be sure we are capturing the entire sector
                this_sector_mask = np.logical_and(sector_filled, red_colony_mask)
                this_union_mask = np.logical_or(sector_filled, red_colony_mask)
                this_sector_score = np.sum(this_sector_mask) / np.sum(sector_filled)
                this_sector_iou = np.sum(this_sector_mask) / np.sum(this_union_mask)
                sector_scores.append(this_sector_score)
                sector_ious.append(this_sector_iou)

                # # Plot the lines of the sector (and the boundary line) onto the colony segmentation
                # length_points = len(endpoints_x)
                # #print(length_points)
                # #print(endpoints_x)
                # if len(np.unique(red_labels)[1:]) > 0:
                #     plot_bounds_x = []
                #     plot_bounds_y = []
                #     plot_bounds_x.append(endpoints_x[0] + 5)
                #     plot_bounds_y.append(endpoints_y[0] + 5)
                #     # Get list of center and endpoints on the boundary
                #     for this_bound in range(0, length_points-1):
                #         plot_bounds_x.append(endpoints_x[this_bound+1] + 5)
                #         plot_bounds_y.append(endpoints_y[this_bound+1] + 5)
                #         #plt.plot(plot_points_y, plot_points_x, color='blue')
                #         #print(endpoints_x[0:2])
                #         #print(endpoints_y[0:2])
                #     plot_bounds_x = np.roll(np.array(plot_bounds_x), 1)
                #     plot_bounds_y = np.roll(np.array(plot_bounds_y), 1)
                #     #print(plot_bounds_x)
                #     #print(plot_bounds_y)
                #     line_style = ':' if (len(plot_bounds_x) == 2) else '-'
                #     ax2.plot(plot_bounds_y, plot_bounds_x, linewidth=5, linestyle=line_style, alpha=0.85)
                #     if len(plot_bounds_x) == 1:
                #         full_circle = Circle((plot_bounds_y, plot_bounds_x), radius=colony_locations["Radius"][this_index], color='blue', fill=False, linewidth=5, alpha=0.85)
                #         ax2.add_patch(full_circle)



            for this_label in np.unique(white_labels)[1:]:
                white_component = copy.deepcopy(white_labels)
                white_component = white_component == this_label
                white_component = white_component.astype(np.int32)

                post_white_boundary_mask = np.logical_or(post_white_boundary_mask, white_component > 0)

                # Function to get endpoints of connected component
                full_endpoints_list = get_boundary_component_endpoints(corrected_colony_image[:,:], white_component)

                # If exactly two points are found, then everything's good.

                # Function to get mask representing sector boundary
                sector_boundary, sector_interior, sector_filled = get_sector_masks(white_component, full_endpoints_list)

                # Fill initial region mask with the filled sector
                post_white_region_mask = np.logical_or(post_white_region_mask, sector_filled)

                # Apply consistency check to score region
                confirm_check, prop_interior = check_for_consistency_2(sector_filled, corrected_white_colony_mask)

                # Update score mask to denote where the consistency check failed
                if confirm_check == False:
                    print('Double check your code.  The white consistency check failed for this colony with score ' + str(prop_interior))

                # Append scores and info to lists
                white_component_endpoints.append(full_endpoints_list)
                white_component_scores.append(prop_interior)
                white_component_checks.append(confirm_check)
                white_component_sizes.append(np.sum(post_white_region_mask))

            print('Scores for red regions: ' + str(red_component_scores))
            print('Scores for white regions: ' + str(white_component_scores))

            # Store the purity scores in a sublist, along with a second sublist indicating the color of each region

            all_component_scores = []
            all_region_colors = []
            all_region_sizes = []

            if not red_component_scores:
                all_region_colors = all_region_colors + ['red']
                all_component_scores = all_component_scores + [np.nan]
                all_region_sizes = all_region_sizes + [np.nan]
            else:
                all_region_colors = all_region_colors + (['red'] * len(red_component_scores))
                all_component_scores = all_component_scores + red_component_scores
                all_region_sizes = all_region_sizes + red_component_sizes

            if not white_component_scores:
                all_region_colors = all_region_colors + ['white']
                all_component_scores = all_component_scores + [np.nan]
                all_region_sizes = all_region_sizes + [np.nan]
            else:
                all_region_colors = all_region_colors + (['white'] * len(white_component_scores))
                all_component_scores = all_component_scores + white_component_scores
                all_region_sizes = all_region_sizes + white_component_sizes

            region_purity_after.append(all_component_scores)
            region_color_after.append(all_region_colors)
            region_sizes_after.append(all_region_sizes)

            # Compute weighted purity scores over all regions, for white only, and for red only

            total_red_sum = np.nansum(red_component_sizes)
            total_white_sum = np.nansum(white_component_sizes)

            if not red_component_scores:
                red_region_weights = np.array([0])
                weighted_red_scores = np.array([0])
            else:
                red_region_weights = np.divide(np.array(red_component_sizes), total_red_sum) # this vector should add to 1, as this is a normalization of the weights
                weighted_red_scores = np.multiply(np.array(red_component_scores), red_region_weights)

            if not white_component_scores:
                white_region_weights = np.array([0])
                weighted_white_scores = np.array([0])
            else:
                white_region_weights = np.divide(np.array(white_component_sizes), total_white_sum) # this vector should add to 1, as this is a normalization of the weights
                weighted_white_scores = np.multiply(np.array(white_component_scores), white_region_weights)

            # Get weighted average over both regions together
            all_region_sum = np.nansum(all_region_sizes)
            all_region_weights = np.divide(np.array(red_component_sizes + white_component_sizes), all_region_sum)
            all_region_weighted_scores = np.multiply(np.array(red_component_scores + white_component_scores), all_region_weights)

            weighted_purity_red_after.append(list(weighted_red_scores))
            weighted_purity_white_after.append(list(weighted_white_scores))
            weighted_purity_after.append(list(all_region_weighted_scores))
            weighted_red_sector_score_after.append(np.nansum(weighted_red_scores))
            weighted_white_sector_score_after.append(np.nansum(weighted_white_scores))
            weighted_sector_score_after.append(np.nansum(all_region_weighted_scores))
  
            # Now, create the masks containing the initial_regions
            post_region_mask = np.maximum(post_red_region_mask.astype(np.uint8), 2*post_white_region_mask.astype(np.uint8))*(255/(num_classes-1))
            post_boundary_mask = np.maximum(post_red_boundary_mask.astype(np.uint8), 2*post_white_boundary_mask.astype(np.uint8))*(255/(num_classes-1))
            post_score_mask = np.maximum(post_red_score_mask.astype(np.uint8), 2*post_white_score_mask.astype(np.uint8))*(255/(num_classes-1))

            post_region_mask = np.pad(post_region_mask, image_padding)
            post_boundary_mask = np.pad(post_boundary_mask, image_padding)
            post_score_mask = np.pad(post_score_mask, image_padding)

            #if not cv2.imwrite(output_crops_folder + '/Colony Corrected Regions/' + pathlib.Path(test_image).stem + '_Colony_' + str(this_index) + '.png', post_region_mask):
            #    raise Exception('Could not write image.')

            #post_region_mask = np.pad(post_region_mask, 5)
                
            # if save_all_annotations == True:
            #     if not cv2.imwrite(output_crops_folder + '/cor_bounds/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', post_boundary_mask):
            #         raise Exception('Could not write image.')
            #     if not cv2.imwrite(output_crops_folder + '/cor_regions/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', post_region_mask):
            #         raise Exception('Could not write image.')
            #     if not cv2.imwrite(output_crops_folder + '/cor_bad/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', post_score_mask):
            #         raise Exception('Could not write image.')
            plt.axis('off')
            # if save_all_annotations == True:
            #     fig2.savefig(output_crops_folder + '/cor_partitions/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', bbox_inches='tight', pad_inches=0)
            plt.close(fig2);
                    
                
            #------------------------------------------------------------------

            # PRINTING RESULTS OF COLONY

            if not sector_scores:
                sector_scores = 0
                sector_ious = 0
            average_sector_score.append(np.mean(sector_scores))
            average_sector_iou.append(np.mean(sector_ious))
            #print('Colony ' + str(this_index))
            print('Estimated number of sectors: ' + str(total_sectors))
            all_sector_counts.append(total_sectors)
            print('Average sector score: ' + str(average_sector_score[-1]))
            print('Average sector score (IoU): ' + str(average_sector_iou[-1]))

            # plt.axis('off')
            # plt.savefig(output_crops_folder + '/Colony Corrected Sector Bounds/' + pathlib.Path(test_image).stem + '_Colony_' + str(this_index) + '.png', bbox_inches='tight', pad_inches=0)
            # plt.close();
            #raise NameError('Text to read') 

            corrected_white_region_sum.append(np.sum(np.logical_xor(corrected_colony_mask, all_sector_filled)))
            corrected_red_region_sum.append(np.sum(all_sector_filled))
            corrected_sector_region_sum.append(np.sum(all_sector_filled) / np.sum(corrected_colony_mask))

            true_sector_count = 0
            true_sector_counts.append(true_sector_count)
            true_sector_region_sum.append(0)
            corrected_colony_mask_faded[corrected_colony_mask == 0] = 0

            corrected_colony_mask_faded = np.pad(corrected_colony_mask_faded, image_padding)
            corrected_red_colony_mask_padded = np.pad(corrected_red_colony_mask, image_padding)
            corrected_sector_comp_mask = np.multiply(corrected_red_colony_mask_padded, corrected_colony_mask_faded)

            #colony_image_padded = np.pad(colony_image, image_padding)
            #cv2_imshow(colony_mask_faded)
            # if save_all_annotations == True:
            #     if not cv2.imwrite(output_crops_folder + '/sectors/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', corrected_colony_mask_faded):
            #         raise Exception('Could not write image.')
            #     if not cv2.imwrite(output_crops_folder + '/sector_comps/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', corrected_sector_comp_mask):
            #         raise Exception('Could not write image.')
            
            # # Plot image and lines with matplotlib
            # Recall that endpoints_x[0], endpoints_y[0] is the center of the colony, and
            # endpoints_x[#], endpoints_y[#] is a point on the boundary representing the interfacial point of a sector
            

            # Output labellings of colonies to directory

            # 1. Get the sectors labellings you defined.
            # 2. Get intersection of this labelling with the colony mask.
            # 3. Create copy of colony mask that is faded.  (sectors will be overlayed on this in the next step)
            # 4. Save cropping to Colony Sectors folder 

            # colony_cropping = p[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)]

            # height, width = colony_cropping.shape[:2]
            # # Do some plotting to see the results in action, and to find potential problems
            # cv2_imshow(255*cv2.resize(x[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)], (4*width, 4*height)))
            # cv2_imshow(cv2.resize(colony_cropping, (4*width, 4*height)))
            # fig, ax = plt.subplots(1,4)
            # ax[0].imshow(np.squeeze(colony_image))
            # ax[1].imshow(all_sector_bounds)
            # ax[2].imshow(all_sector_filled)
            # ax[3].imshow(all_sector_filled_labels * (255 / (total_sectors+1)))
            # plt.show()

            # these_thetas_sorted, sorted_points = sort_thetas(edge_mask_unpadded)
            # intensity_sum_red, full_seg_sum_red = get_intensity_map(red_colony_mask, sorted_points)
            # intensity_sum_white, full_seg_sum_white = get_intensity_map(white_colony_mask, sorted_points)
            # intensity_sum_full, full_seg_sum_full = get_intensity_map(colony_mask, sorted_points)

            # fig, ax = plt.subplots(1,3,subplot_kw={'projection': 'polar'}, figsize=(15, 4))

            # ax[0].plot(these_thetas_sorted, intensity_sum_red)
            # #ax.plot(these_thetas_sorted, full_seg_sum)
            # ax[0].set_rmax(max(colony_image.shape)/2.0)
            # ax[0].grid(True)
            # ax[0].set_title("Red Pixels by Angle", va='bottom')

            # ax[1].plot(these_thetas_sorted, intensity_sum_white)
            # #ax.plot(these_thetas_sorted, full_seg_sum)
            # ax[1].set_rmax(max(colony_image.shape)/2.0)
            # ax[1].grid(True)
            # ax[1].set_title("White Pixels by Angle", va='bottom')

            # ax[2].plot(these_thetas_sorted, intensity_sum_full)
            # #ax.plot(these_thetas_sorted, full_seg_sum)
            # ax[2].set_rmax(max(colony_image.shape)/2.0)
            # ax[2].grid(True)
            # ax[2].set_title("Colony Pixels by Angle", va='bottom')

            # plt.show()



        # This ends the loop on the isolated colonies

        # Ensure that the number of sectors are integers
        
        
        # Construct a dataframe with the nubmer of sectors and the proportion of red present

        all_sector_counts_array = np.array(all_sector_counts).astype(int)
        true_sector_counts = np.array(true_sector_counts).astype(int)
        #true_sector_counts = np.repeat(1,len(indiv_good))
        correct_sector_count = np.abs(true_sector_counts - all_sector_counts_array) == 0

        sides_vert_top_array = np.array(sides_vert_top)
        sides_vert_bottom_array = np.array(sides_vert_bottom)
        sides_horz_left_array = np.array(sides_horz_left)
        sides_horz_right_array = np.array(sides_horz_right)
        
        # Gather all data that that can be created as a numpy array
        d = {'Plate Name': plate_names,
            'Colony Number': colony_numbers.astype(int),
            'True # Sectors': true_sector_counts,
            'Initial # Regions': np.array(initial_region_counts).astype(int),
            'Pred # Sectors': all_sector_counts_array,
            'Correct # Sectors?': correct_sector_count,
            'White Area (Seg)': white_region_sum,
            'Red Area (Seg)': red_region_sum,
            'Colony Area (Seg)': (np.array(white_region_sum) + np.array(red_region_sum)),
            'White Area (Corr)': corrected_white_region_sum,
            'Red Area (Corr)': corrected_red_region_sum,
            'Colony Area (Corr)': (np.array(corrected_white_region_sum) + np.array(corrected_red_region_sum)),
            'Avg Sector Score': average_sector_score,
            'Avg Sector Score (IoU)': average_sector_iou,
            'Side Top': sides_vert_top_array,
            'Side Bottom': sides_vert_bottom_array,
            'Side Left': sides_horz_left_array,
            'Side Right': sides_horz_right_array,
            '1 Comp': np.array(colony_is_connected),
            '1 Comp (Approx)': np.array(colony_is_approx_connected),
            'Bound Comp': np.array(boundary_is_connected),
            'No Holes': np.array(colony_is_whole),
            'Approx Convex': np.array(colony_is_approx_convex),
            'Approx Circle': np.array(colony_is_approx_circular),
            'Hausdorff Convex': np.array(hausdorff_dist_convex),
            'Hausdorff Circle': np.array(hausdorff_dist_circle)}

        # # Gasther data based on what else we used as input
        # if use_expert_counts == True:
        #     d['Quantifiable'] = np.array(quantifiable_colony)
        #     d['Quantifiable Cured'] = np.array(quantifiable_cured)
        #     d['Quantifiable Stable'] = np.array(quantifiable_stable)
        #     d['Quantifiable Sectored'] = np.array(quantifiable_sectored)

        df = pd.DataFrame(data=d)

        # Gather data that could NOT be stored as a numpy array, such as nested lists

        df['(BC) Regional Color Classes'] = list(region_color_before)
        df['(BC) Regional Sizes'] = list(region_sizes_before)
        df['(BC) Regional Purity Scores'] = list(region_purity_before)
        df['(BC) Red Purity Scores Weighted'] = list(weighted_purity_red_before)
        df['(BC) White Purity Scores Weighted'] = list(weighted_purity_white_before)
        df['(BC) Weighted Red Average Score'] = weighted_red_sector_score_before
        df['(BC) Weighted White Average Score'] = weighted_white_sector_score_before
        df['(BC) Weighted Full Average Score'] = weighted_sector_score_before
        df['(BC) Cured'] = cured_colony_before
        df['(BC) Stable'] = stable_colony_before

        df['(AC) Regional Color Classes'] = list(region_color_after)
        df['(AC) Regional Sizes'] = list(region_sizes_after)
        df['(AC) Regional Purity Scores'] = list(region_purity_after)
        df['(AC) Red Purity Scores Weighted'] = list(weighted_purity_red_after)
        df['(AC) White Purity Scores Weighted'] = list(weighted_purity_white_after)
        df['(AC) Weighted Red Average Score'] = weighted_red_sector_score_after
        df['(AC) Weighted White Average Score'] = weighted_white_sector_score_after
        df['(AC) Weighted Full Average Score'] = weighted_sector_score_after
        df['(AC) Cured'] = cured_colony_after
        df['(AC) Stable'] = stable_colony_after

        df.to_pickle(train_output_table_folder + '/' + str(plate_stem) + '.pkl')

        # if starting_image == False:
        #     #print('This ran.')
        #     all_df = pd.concat([all_df, df], axis=0, ignore_index=True)
        #     #print('The dataframe was appended.')
        # else:
        #     starting_image = False
        #     all_df = copy.deepcopy(df)

    #all_df
    # all_df.to_pickle(test_output_folder + '/' + str(weights_file) + '_colony_data.pkl')
    # unpickled_all_df = pd.read_pickle(test_output_folder + '/' + str(weights_file) + '_colony_data.pkl')
    # unpickled_all_df.to_csv(test_output_folder + '/' + str(weights_file) + '_colony_data.csv')

    # unpickled_all_df

## Merge output tables into one

In [None]:
# Read in all the files of data
sorted_tables = sorted(glob.glob(train_output_table_folder + '/' + '*'))
print(sorted_tables)

first_table = True

for this_table in sorted_tables:
    this_table_data = pd.read_pickle(this_table)
    if first_table == True:
        first_table = False
        all_table_data = copy.deepcopy(this_table_data)
    else:
        all_table_data = pd.concat([all_table_data, this_table_data], axis=0, ignore_index=True)

all_table_data.to_pickle(train_output_folder + '/' + str(weights_file) + '_colony_data.pkl')

## Load table with training data

In [None]:
#colony_data = pd.read_csv(output_data_folder + '/' + str(weights_file) + '_colony_data_CHT_' + str(num_classes) + '_puritycheck_byregion.csv')
#colony_data = pd.read_csv(test_output_folder + '/' + str(weights_file) + '_colony_data_CHT_puritycheck_byregion.csv')
#colony_data = pd.read_csv(test_output_folder + '/' + str(weights_file) + '_colony_data_2.csv')
colony_data = pd.read_pickle(train_output_folder + '/' + str(weights_file) + '_colony_data.pkl')

#print(colony_data)
all_plate_names = colony_data['Plate Name'].unique()
all_sector_values = list(range(0, int(np.max(colony_data['Pred # Sectors']))+1))

# Include true sector counts if available
if use_true_sector_counts == True:

    # Read in table with true sector counts
    true_sector_counts = pd.read_csv(train_output_folder + '/true_colony_data.csv') # load file containing true sector counts
    colony_data['True # Sectors'] = true_sector_counts['True # Sectors'] # insert the true sector counts in the data
    matching_sector_counts = colony_data['True # Sectors'] == colony_data['Pred # Sectors'] # compare the true and predicted sector counts
    colony_data['Correct # Sectors?'] = matching_sector_counts # mark where the counts match and insert this into the data

if use_quantifiable_counts_from_table == True:
    true_quant_colonies = pd.read_csv(train_output_folder + '/true_quantifiable_colonies.csv') # load file containing whether colony is cured
    colony_data['Quantifiable'] = true_quant_colonies['Quantifiable'] # insert this data into the original table
    
if use_true_cured_colonies_from_table == True:
    # Read in table with true cured colonies
    true_cured_colonies = pd.read_csv(train_output_folder + '/true_cured_colonies.csv') # load file containing whether colony is cured
    colony_data['Is Cured?'] = true_cured_colonies['Is Cured?'] # insert this data into the original table

if (use_true_sector_counts == True) & (use_quantifiable_counts_from_table == True) & (use_true_cured_colonies_from_table == True):
    colony_data['Quantifiable Cured'] = (colony_data['Quantifiable'] == True) & (colony_data['Is Cured?'] == True)
    colony_data['Quantifiable Stable'] = (colony_data['Quantifiable'] == True) & (colony_data['True # Sectors'] == 0)
    colony_data['Quantifiable Sectored'] = (colony_data['Quantifiable'] == True) & (colony_data['True # Sectors'] > 0) & (colony_data['Is Cured?'] == False)

# Since this is training data, we know the number of sectors
colony_data['True # Sectors'] = np.repeat(1, len(colony_data['Pred # Sectors']))
colony_data['Is Cured?'] = np.repeat(False, len(colony_data['Pred # Sectors']))
colony_data['Is Stable?'] = np.repeat(False, len(colony_data['Pred # Sectors']))
    
colony_data.to_pickle(train_output_folder + '/' + str(weights_file) + '_colony_data.pkl')
#colony_data.to_pickle(output_data_folder + '/' + str(weights_file) + '_colony_data_CHT_' + str(num_classes) + '_puritycheck_byregion.pkl')
#colony_data.to_csv(output_data_folder + '/' + str(weights_file) + '_colony_data_CHT_' + str(num_classes) + '_puritycheck_byregion.csv')

colony_data.to_csv(train_output_folder + '/' + str(weights_file) + '_colony_data.csv')
colony_data


## Get colony labels ([PSI+], [psi-], and Sectored)

In [None]:
colony_states_before = np.array(['UNFILLED' for i in range(0, len(colony_data))])
colony_states_after = np.array(['UNFILLED' for i in range(0, len(colony_data))])
colony_states_true = np.array(['UNFILLED' for i in range(0, len(colony_data))])
#colony_states_set = set(colony_states)
#print(colony_states_set)

max_sector_count_before = max(colony_data['Initial # Regions'])
max_sector_count_after = max(colony_data['Pred # Sectors'])
max_sector_count_true = max(colony_data['True # Sectors'])

max_sector_count_all = max([max_sector_count_before, max_sector_count_after, max_sector_count_true])

# [PSI+]: Get all colonies with no red regions

colony_states_before[(colony_data['(BC) Stable'] == True)] = '[PSI+]'
colony_states_after[(colony_data['(AC) Stable'] == True)] = '[PSI+]'
colony_states_true[(colony_data['Is Stable?'] == True)] = '[PSI+]'

# [psi-]: Get all quantifiable colonies with no white regions

colony_states_before[(colony_data['(BC) Cured'] == True)] = '[psi-]'
colony_states_after[(colony_data['(AC) Cured'] == True)] = '[psi-]'
colony_states_true[(colony_data['Is Cured?'] == True)] = '[psi-]'

# Sx: Get all quantifiable colonies with at least 1 white region and exactly x red regions 

for num_regions in range(1, max_sector_count_all+1):
    colony_states_before[(colony_data['(BC) Cured'] == False) & (colony_data['(BC) Stable'] == False) & (colony_data['Initial # Regions'] == num_regions)] = str('S' + str(num_regions))
    colony_states_after[(colony_data['(AC) Cured'] == False) & (colony_data['(AC) Stable'] == False) & (colony_data['Pred # Sectors'] == num_regions)] = str('S' + str(num_regions))
    colony_states_true[(colony_data['Is Cured?'] == False) & (colony_data['Is Stable?'] == False) & (colony_data['True # Sectors'] == num_regions).astype(bool)] = str('S' + str(num_regions))

#print(np.unique(colony_states_before))
#print(np.unique(colony_states_after))
#print(np.unique(colony_states_true))

unmarked_locations = np.where(colony_states_true == 'UNFILLED')

# Make corrections to the table for unfilled locations


# Display any colony locations what are marked as UNFILLED

colony_row = colony_data.iloc[unmarked_locations]
#print(colony_row)
#print(colony_row.index)

#print(colony_states_before)

# If every location has been filled, then add these to the merged table
colony_data['Label Before'] = colony_states_before
colony_data['Label After'] = colony_states_after
colony_data['Label True'] = colony_states_true

#print(quantifiable_colony_data['Label Before'])
# counter = 0

# for ind in colony_row.index:
#     colony_number = colony_row['Colony Number'].iloc[counter]
#     plate_name = colony_row['Plate Name'].iloc[counter]
#     set_number = colony_row['Set'].iloc[counter]

#     #if counter == 0:
#     #    merged_table['Quantifiable Stable'] = 

#     # Get image
#     if set_number == 2:
#         image_to_display = read_image(sector_project_folder + '/Real Images/Wes Plates/Set 2 Prepro/' + plate_name)*255
#     image_to_display = cv2.rectangle(image_to_display, (colony_row['Side Left'].iloc[counter], colony_row['Side Top'].iloc[counter]), (colony_row['Side Right'].iloc[counter], colony_row['Side Bottom'].iloc[counter]), (255, 0, 0), 2)
#     #cv2_imshow(image_to_display)

## Plots on Training Data

### Predictions Only

#### Colony States (no sector counts)

In [None]:

#print(sampled_colony_data.keys())
#print(sampled_colony_data['Is it a colony? '])


max_initial = np.max(colony_data['Initial # Regions'])
max_pred = np.max(colony_data['Pred # Sectors'])
#max_true = np.max(colony_data['True # Sectors'])
max_all = np.max([max_initial, max_pred])
#diff_count_before = np.abs(colony_data['Initial # Regions'] - colony_data['True # Sectors'])
#diff_count_after = np.abs(colony_data['Pred # Sectors'] - colony_data['True # Sectors'])

initial_correct_counts = []
post_correct_counts = []
#true_correct_counts = []
all_counts = []

label_names = ['[PSI+]', '[psi-]', 'Sectored']

# print(np.unique(colony_data['Label True']))

# Gather [PSI+] counts
#true_white_labels = colony_data[(colony_data['Label True'] == '[PSI+]')]
correct_white_labels_before = colony_data[(colony_data['Label Before'] == '[PSI+]')]
correct_white_labels_after = colony_data[(colony_data['Label After'] == '[PSI+]')]

initial_correct_counts.append(len(correct_white_labels_before))
post_correct_counts.append(len(correct_white_labels_after))
#true_correct_counts.append(len(true_white_labels))

# Gather [psi-] counts
#true_red_labels = colony_data[(colony_data['Label True'] == '[psi-]')]
correct_red_labels_before = colony_data[(colony_data['Label Before'] == '[psi-]')]
correct_red_labels_after = colony_data[(colony_data['Label After'] == '[psi-]')]

initial_correct_counts.append(len(correct_red_labels_before))
post_correct_counts.append(len(correct_red_labels_after))
#true_correct_counts.append(len(true_red_labels))


# Gather sectored counts
correct_sector_labels_before = colony_data[(colony_data['Label Before'].str.startswith('S'))]
correct_sector_labels_after = colony_data[(colony_data['Label After'].str.startswith('S'))]

initial_correct_counts.append(len(correct_sector_labels_before))
post_correct_counts.append(len(correct_sector_labels_after))
#max_sector_counts = max([np.nanmax(colony_data['Initial # Regions'].astype(int)), np.nanmax(colony_data['Pred # Sectors'].astype(int)), np.nanmax(colony_data['True # Sectors'].astype(int))])
# print(max_sector_counts)

# for this_num_sectors in range(1, max_sector_counts+1):
#     #true_sector_labels = colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors))]
#     correct_sector_labels_before = colony_data[(colony_data['Label Before'] == 'S'+str(this_num_sectors))]
#     correct_sector_labels_after = colony_data[(colony_data['Label After'] == 'S'+str(this_num_sectors))]

#     initial_correct_counts.append(len(correct_sector_labels_before))
#     post_correct_counts.append(len(correct_sector_labels_after))
#     #true_correct_counts.append(len(true_sector_labels))

# print(colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors)) & (colony_data['Label After'] == 'S'+str(this_num_sectors))])
#sector_labels = ['S'+str(i) for i in (range(1, max_sector_counts+1))]
# print(sector_labels)
#label_names = label_names + sector_labels
# print(label_names)
x = np.arange(len(label_names))
# print(len(x))
# print(len(initial_correct_counts))



width = 0.25  # the width of the bars

fig, ax = plt.subplots(figsize=(12,5), sharey=True)
#x - width/2
ax.set_ylim(bottom=0, top=max(initial_correct_counts + post_correct_counts)+50)
rects1 = ax.bar(x - width/2, initial_correct_counts, width, label='Original Predictions', color='blue')
rects2 = ax.bar(x+width/2, post_correct_counts, width, label='With Purity Correction', color='red')
#rects2 = ax.bar(x + width/2, all_counts, width, label='All Colonies', color='red')
#rects3 = ax.bar(x + width, true_correct_counts, width, label='Manual Counts', color='green')

#print(true_single_frequency)
#print(pred_single_frequency)

ax.set_xlabel('Colony States')
ax.set_ylabel('Frequency')
ax.set_title('Classified Colonies', fontsize=16)
ax.xaxis.label.set_fontsize(14)
ax.yaxis.label.set_fontsize(14)
ax.set_xticks(np.arange(0, 3, step=1))
ax.set_xticklabels(label_names)
ax.tick_params(axis='both', labelsize=12)
ax.legend(loc='best')

xtickslocs = ax.get_xticks()
print(xtickslocs)

addlabels_centered(xtickslocs-width/2, initial_correct_counts, 9)
addlabels_centered(xtickslocs+width/2, post_correct_counts, 9)
#addlabels_pred(x, all_counts, 10)
#addlabels_truemarks(x, true_correct_counts, 9)

ax.axvline(x = 0.5, color = 'k', linestyle = '--')
ax.axvline(x = 1.5, color = 'k', linestyle = '--')

fig.tight_layout()
plt.show()



#### Colony States (with sector counts)

In [None]:

#print(sampled_colony_data.keys())
#print(sampled_colony_data['Is it a colony? '])


max_initial = np.max(colony_data['Initial # Regions'])
max_pred = np.max(colony_data['Pred # Sectors'])
#max_true = np.max(colony_data['True # Sectors'])
max_all = np.max([max_initial, max_pred])
#diff_count_before = np.abs(colony_data['Initial # Regions'] - colony_data['True # Sectors'])
#diff_count_after = np.abs(colony_data['Pred # Sectors'] - colony_data['True # Sectors'])

initial_correct_counts = []
post_correct_counts = []
#true_correct_counts = []
all_counts = []

label_names = ['[PSI+]', '[psi-]']

# print(np.unique(colony_data['Label True']))

# Gather [PSI+] counts
#true_white_labels = colony_data[(colony_data['Label True'] == '[PSI+]')]
correct_white_labels_before = colony_data[(colony_data['Label Before'] == '[PSI+]')]
correct_white_labels_after = colony_data[(colony_data['Label After'] == '[PSI+]')]

initial_correct_counts.append(len(correct_white_labels_before))
post_correct_counts.append(len(correct_white_labels_after))
#true_correct_counts.append(len(true_white_labels))

# Gather [psi-] counts
#true_red_labels = colony_data[(colony_data['Label True'] == '[psi-]')]
correct_red_labels_before = colony_data[(colony_data['Label Before'] == '[psi-]')]
correct_red_labels_after = colony_data[(colony_data['Label After'] == '[psi-]')]

initial_correct_counts.append(len(correct_red_labels_before))
post_correct_counts.append(len(correct_red_labels_after))
#true_correct_counts.append(len(true_red_labels))


# Gather sectored counts
max_sector_counts = max([np.nanmax(colony_data['Initial # Regions'].astype(int)), np.nanmax(colony_data['Pred # Sectors'].astype(int)), np.nanmax(colony_data['True # Sectors'].astype(int))])
# print(max_sector_counts)

for this_num_sectors in range(1, max_sector_counts+1):
    #true_sector_labels = colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors))]
    correct_sector_labels_before = colony_data[(colony_data['Label Before'] == 'S'+str(this_num_sectors))]
    correct_sector_labels_after = colony_data[(colony_data['Label After'] == 'S'+str(this_num_sectors))]

    initial_correct_counts.append(len(correct_sector_labels_before))
    post_correct_counts.append(len(correct_sector_labels_after))
    #true_correct_counts.append(len(true_sector_labels))

# print(colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors)) & (colony_data['Label After'] == 'S'+str(this_num_sectors))])
sector_labels = ['S'+str(i) for i in (range(1, max_sector_counts+1))]
# print(sector_labels)
label_names = label_names + sector_labels
# print(label_names)
x = np.arange(len(label_names))
# print(len(x))
# print(len(initial_correct_counts))



width = 0.25  # the width of the bars

fig, ax = plt.subplots(figsize=(12,5), sharey=True)
#x - width/2
ax.set_ylim(bottom=0, top=max(initial_correct_counts + post_correct_counts)+50)
rects1 = ax.bar(x - width/2, initial_correct_counts, width, label='Original Predictions', color='blue')
rects2 = ax.bar(x+width/2, post_correct_counts, width, label='With Purity Correction', color='red')
#rects2 = ax.bar(x + width/2, all_counts, width, label='All Colonies', color='red')
#rects3 = ax.bar(x + width, true_correct_counts, width, label='Manual Counts', color='green')

#print(true_single_frequency)
#print(pred_single_frequency)

ax.set_xlabel('Colony States')
ax.set_ylabel('Frequency')
ax.set_title('Classified Colonies', fontsize=16)
ax.xaxis.label.set_fontsize(14)
ax.yaxis.label.set_fontsize(14)
ax.set_xticks(np.arange(0, max_sector_counts+2, step=1))
ax.set_xticklabels(label_names)
ax.tick_params(axis='both', labelsize=12)
ax.legend(loc='best')

xtickslocs = ax.get_xticks()
print(xtickslocs)

addlabels_centered(xtickslocs-width/2, initial_correct_counts, 9)
addlabels_centered(xtickslocs+width/2, post_correct_counts, 9)
#addlabels_pred(x, all_counts, 10)
#addlabels_truemarks(x, true_correct_counts, 9)

ax.axvline(x = 0.5, color = 'k', linestyle = '--')
ax.axvline(x = 1.5, color = 'k', linestyle = '--')

fig.tight_layout()
plt.show()



### With True Annotations

#### Colony States

In [None]:

#print(sampled_colony_data.keys())
#print(sampled_colony_data['Is it a colony? '])


max_initial = np.max(colony_data['Initial # Regions'])
max_pred = np.max(colony_data['Pred # Sectors'])
max_true = np.max(colony_data['True # Sectors'])
max_all = np.max([max_initial, max_pred, max_true])
diff_count_before = np.abs(colony_data['Initial # Regions'] - colony_data['True # Sectors'])
diff_count_after = np.abs(colony_data['Pred # Sectors'] - colony_data['True # Sectors'])

initial_correct_counts = []
post_correct_counts = []
true_correct_counts = []
all_counts = []

label_names = ['[PSI+]', '[psi-]']

# print(np.unique(colony_data['Label True']))

# Gather [PSI+] counts
true_white_labels = colony_data[(colony_data['Label True'] == '[PSI+]')]
correct_white_labels_before = colony_data[(colony_data['Label True'] == '[PSI+]') & (colony_data['Label Before'] == '[PSI+]')]
correct_white_labels_after = colony_data[(colony_data['Label True'] == '[PSI+]') & (colony_data['Label After'] == '[PSI+]')]

initial_correct_counts.append(len(correct_white_labels_before))
post_correct_counts.append(len(correct_white_labels_after))
true_correct_counts.append(len(true_white_labels))

# Gather [psi-] counts
true_red_labels = colony_data[(colony_data['Label True'] == '[psi-]')]
correct_red_labels_before = colony_data[(colony_data['Label True'] == '[psi-]') & (colony_data['Label Before'] == '[psi-]')]
correct_red_labels_after = colony_data[(colony_data['Label True'] == '[psi-]') & (colony_data['Label After'] == '[psi-]')]

initial_correct_counts.append(len(correct_red_labels_before))
post_correct_counts.append(len(correct_red_labels_after))
true_correct_counts.append(len(true_red_labels))


# Gather sectored counts
max_sector_counts = max([np.nanmax(colony_data['Initial # Regions'].astype(int)), np.nanmax(colony_data['Pred # Sectors'].astype(int)), np.nanmax(colony_data['True # Sectors'].astype(int))])
# print(max_sector_counts)

for this_num_sectors in range(1, max_sector_counts+1):
    true_sector_labels = colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors))]
    correct_sector_labels_before = colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors)) & (colony_data['Label Before'] == 'S'+str(this_num_sectors))]
    correct_sector_labels_after = colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors)) & (colony_data['Label After'] == 'S'+str(this_num_sectors))]

    initial_correct_counts.append(len(correct_sector_labels_before))
    post_correct_counts.append(len(correct_sector_labels_after))
    true_correct_counts.append(len(true_sector_labels))

# print(colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors)) & (colony_data['Label After'] == 'S'+str(this_num_sectors))])
sector_labels = ['S'+str(i) for i in (range(1, max_sector_counts+1))]
# print(sector_labels)
label_names = label_names + sector_labels
# print(label_names)
x = np.arange(len(label_names))
# print(len(x))
# print(len(initial_correct_counts))



width = 0.25  # the width of the bars

fig, ax = plt.subplots(figsize=(12,5), sharey=True)
#x - width/2
ax.set_ylim(bottom=0, top=max(initial_correct_counts + post_correct_counts + true_correct_counts)+50)
rects1 = ax.bar(x - width, initial_correct_counts, width, label='Original Predictions', color='blue')
rects2 = ax.bar(x, post_correct_counts, width, label='With Purity Correction', color='red')
#rects2 = ax.bar(x + width/2, all_counts, width, label='All Colonies', color='red')
rects3 = ax.bar(x + width, true_correct_counts, width, label='Manual Counts', color='green')

#print(true_single_frequency)
#print(pred_single_frequency)

ax.set_xlabel('Colony States')
ax.set_ylabel('Frequency')
ax.set_title('Correctly Classified Quantifiable Colonies', fontsize=16)
ax.xaxis.label.set_fontsize(14)
ax.yaxis.label.set_fontsize(14)
ax.set_xticks(np.arange(0, max_sector_counts+2, step=1))
ax.set_xticklabels(label_names)
ax.tick_params(axis='both', labelsize=12)
ax.legend(loc='best')

addlabels_initial(x, initial_correct_counts, 9)
addlabels_prediction(x, post_correct_counts, 9)
#addlabels_pred(x, all_counts, 10)
addlabels_truemarks(x, true_correct_counts, 9)

ax.axvline(x = 0.5, color = 'k', linestyle = '--')
ax.axvline(x = 1.5, color = 'k', linestyle = '--')

fig.tight_layout()
plt.show()



#### Confusion Matrices

In [None]:
conf_mat_before = sklearn.metrics.confusion_matrix(colony_data['Label True'], colony_data['Label Before'], labels=label_names)
# print(conf_mat_before)
# print(type(conf_mat_before))
# disp_before = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix=conf_mat_before)

# disp_before.plot()
# plt.ylim([5.5, -0.5])
# plt.title('Confusion Matrix of Sector Predictions\nNo Purity Correction')
# plt.ylabel('True # Sectors')
# plt.xlabel('Predicted # Sectors')

#plt.show()

conf_mat_after = sklearn.metrics.confusion_matrix(colony_data['Label True'], colony_data['Label After'], labels=label_names)
#disp_after = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix=conf_mat_after)

#disp_after.plot()
#plt.title('Confusion Matrix of Sector Predictions\nWith Purity Correction')
#plt.ylabel('True # Sectors')
#plt.xlabel('Predicted # Sectors')

#plt.show()



ax = sns.heatmap(conf_mat_before, annot=True, fmt='.5g')
ax.set_xlabel('Predicted States')
ax.set_ylabel('True States')
ax.set_title('Confusion Matrix of Colony Predictions\nOriginal Predictions')
ax.set_xticklabels(label_names, rotation=0)
ax.set_yticklabels(label_names, rotation=0)
plt.show()

ax = sns.heatmap(conf_mat_after, annot=True, fmt='.5g')
ax.set_xlabel('Predicted States')
ax.set_ylabel('True # States')
ax.set_title('Confusion Matrix of Colony Predictions\nWith Purity Correction')
ax.set_xticklabels(label_names, rotation=0)
ax.set_yticklabels(label_names, rotation=0)
plt.show()

In [None]:
# Look at the incorrectly labeled colonies

incorrect_before = colony_data[(colony_data['Label True'] == 'S1') & (colony_data['Label Before'] != 'S1')]
incorrect_after = colony_data[(colony_data['Label True'] == 'S1') & (colony_data['Label After'] != 'S1')]

In [None]:
incorrect_after
fig,ax = plt.subplots()
ax.hist(incorrect_before['Red Area (Seg)'] / incorrect_before['Colony Area (Seg)'], bins = 100)
ax.set_title('Percentage of Colony Covered by its Sector\nIncorrect Classsifications Before (N=' + str(len(incorrect_before['Red Area (Seg)'])) + ')')
ax.set_xlabel('Proportion of Red')
ax.set_ylabel('Frequency')
fig.show()

fig,ax = plt.subplots()
ax.hist(incorrect_after['Red Area (Seg)'] / incorrect_after['Colony Area (Seg)'], bins = 100)
ax.set_title('Percentage of Colony Covered by its Sector\nIncorrect Classsifications After (N=' + str(len(incorrect_after['Red Area (Seg)'])) + ')')
ax.set_xlabel('Proportion of Red')
ax.set_ylabel('Frequency')
fig.show()

In [None]:
colony_states_before = np.array(['UNFILLED' for i in range(0, len(colony_data))])
colony_states_after = np.array(['UNFILLED' for i in range(0, len(colony_data))])
colony_states_true = np.array(['UNFILLED' for i in range(0, len(colony_data))])
#colony_states_set = set(colony_states)
#print(colony_states_set)

max_sector_count_before = max(colony_data['Initial # Regions'])
max_sector_count_after = max(colony_data['Pred # Sectors'])
max_sector_count_true = max(colony_data['True # Sectors'])

max_sector_count_all = max([max_sector_count_before, max_sector_count_after, max_sector_count_true])

# [PSI+]: Get all colonies with no red regions

colony_states_before[(colony_data['(BC) Stable'] == True)] = '[PSI+]'
colony_states_after[(colony_data['(AC) Stable'] == True)] = '[PSI+]'
colony_states_true[(colony_data['Is Stable?'] == True)] = '[PSI+]'

# [psi-]: Get all quantifiable colonies with no white regions

colony_states_before[(colony_data['(BC) Cured'] == True)] = '[psi-]'
colony_states_after[(colony_data['(AC) Cured'] == True)] = '[psi-]'
colony_states_true[(colony_data['Is Cured?'] == True)] = '[psi-]'

# Sx: Get all quantifiable colonies with at least 1 white region and exactly x red regions 

for num_regions in range(1, max_sector_count_all+1):
    colony_states_before[(colony_data['(BC) Cured'] == False) & (colony_data['(BC) Stable'] == False) & (colony_data['Initial # Regions'] == num_regions)] = str('S' + str(num_regions))
    colony_states_after[(colony_data['(AC) Cured'] == False) & (colony_data['(AC) Stable'] == False) & (colony_data['Pred # Sectors'] == num_regions)] = str('S' + str(num_regions))
    colony_states_true[(colony_data['Is Cured?'] == False) & (colony_data['Is Stable?'] == False) & (colony_data['True # Sectors'] == num_regions).astype(bool)] = str('S' + str(num_regions))

#print(np.unique(colony_states_before))
#print(np.unique(colony_states_after))
#print(np.unique(colony_states_true))

unmarked_locations = np.where(colony_states_true == 'UNFILLED')

# Make corrections to the table for unfilled locations


# Display any colony locations what are marked as UNFILLED

colony_row = colony_data.iloc[unmarked_locations]
#print(colony_row)
#print(colony_row.index)

#print(colony_states_before)

# If every location has been filled, then add these to the merged table
colony_data['Label Before'] = colony_states_before
colony_data['Label After'] = colony_states_after
colony_data['Label True'] = colony_states_true

#print(quantifiable_colony_data['Label Before'])
# counter = 0

# for ind in colony_row.index:
#     colony_number = colony_row['Colony Number'].iloc[counter]
#     plate_name = colony_row['Plate Name'].iloc[counter]
#     set_number = colony_row['Set'].iloc[counter]

#     #if counter == 0:
#     #    merged_table['Quantifiable Stable'] = 

#     # Get image
#     if set_number == 2:
#         image_to_display = read_image(sector_project_folder + '/Real Images/Wes Plates/Set 2 Prepro/' + plate_name)*255
#     image_to_display = cv2.rectangle(image_to_display, (colony_row['Side Left'].iloc[counter], colony_row['Side Top'].iloc[counter]), (colony_row['Side Right'].iloc[counter], colony_row['Side Bottom'].iloc[counter]), (255, 0, 0), 2)
#     #cv2_imshow(image_to_display)

# TESTING Image Segmentation and Classification

## Get locations of testing images

In [None]:
test_images = sorted(glob.glob(real_image_folder + '/' + '*'))
print('Number of images found: ' + str(len(test_images)))

## Segment testing images

In [None]:
# Code for image segmentation (involves Python and Octave code (requires oct2py module))
# 1. Python - Ready u_net for input.
# 2. Python - Feed image to U-Net.
# 3. Python - Get output segmentation of image.
# 4. Octave - Use isolated colonies to estimate a range of radii to search for circular colonies.
# 5. Octave - Obtain centers and radii in image with circle Hough transform.
# 6. Octave (or Python maybe pandas) - Save csv files of circle locations and sizes in each image respectively.
# 7. Octave (or Python maybe matplotlib) - Make mask of image showing where the circles are found.
# 8. Repeat steps 2-7 for each image

if get_testing_segs == True:

    %matplotlib inline

    for this_image in test_images:

        # Steps 1-3: get output segmentation and save it

        this_plate = pathlib.PurePath(this_image)
        plate_name = this_plate.name

        print('Reading plate: ' + str(plate_name))

        x = read_image(this_image)
        
        # Predict the class of each pixel, and partition output the same way
        p = model.predict(np.expand_dims(x,axis=0))[0]
        p = np.argmax(p,axis=-1)
        p = np.expand_dims(p,axis=-1)
        p = p * (255/(num_classes-1))
        print(p.shape)
        #print(np.max(p))
        p = p.astype(np.int32)
        #print(np.max(p))

        p_full = tf.identity(p).numpy()
        in_class = tf.math.greater(tf.constant(p_full), tf.constant([0])).numpy()

        p = p.astype(np.uint8)
        # white pixels (should be 255)
        p_1 = tf.math.equal(tf.constant(p_full), tf.constant([255])).numpy().astype(np.uint8) * 255
        # red pixels (should be 127)
        p_2 = tf.math.equal(tf.constant(p_full), tf.constant([127])).numpy().astype(np.uint8) * 255
        p_full = 255 * in_class.astype(np.uint8)
        #print(p.shape)

        cv2_imshow(p)
        
        # Show and/or save image
        #plt.imshow(p * 255/(num_classes-1))
        #cv2.cvtColor(p * 255/(num_classes-1), cv2.COLOR_BGR2RGB)
        #plt.imshow(cv2.cvtColor(p * 255/(num_classes-1), cv2.COLOR_BGR2GRAY))
        #plt.imshow(p, cmap='binary')
        #plt.show()
        #print(np.unique(p[20:40, 900:920]))
        my_image = PIL.Image.fromarray(np.squeeze(p, axis=-1), "L")
        #display(my_image.resize((256,256)))
        my_image.save(test_seg_folder + '/' + this_plate.stem + '.png')

        # Steps 4-6: Run Matlab code in Octave to use CHT, and store colony location data
        octave.feval('get_circular_data.m', this_image, test_seg_folder + '/' + this_plate.stem + '.png', test_circle_data_folder)
        try:
            radii_table = pd.read_csv(test_circle_data_folder + '/' + this_plate.stem + '.csv', header=None)
            radii_table.columns = ['Colony', 'Center (x)', 'Center (y)', 'Radius', 'Top Left (x)', 'Top Left (y)', 'Width', 'Height', 'Estimated Center (x)', 'Estimated Center (y)']
            radii_table.to_csv(test_circle_data_folder + '/' + this_plate.stem + '.csv')

            # Step 7: Plot the image with the circles overlayed, and save it
            #radii_table = pd.read_csv(test_circle_data_folder + '/' + this_plate.stem + '.csv')

            fig, ax = plt.subplots()
            plt.imshow(cv2.cvtColor(x, cv2.COLOR_BGR2RGB))
            fig.set_size_inches(1024/96, 1024/96)
            for index, row in radii_table.iterrows():
                full_circle = Circle((row['Estimated Center (x)'], row['Estimated Center (y)']), radius=row['Radius'], color='blue', fill=False, linewidth=1, alpha=0.9)
                ax.add_patch(full_circle)
            plt.axis('off')
            plt.savefig(test_circle_folder + '/' + pathlib.Path(this_image).stem + '.jpg', bbox_inches='tight', pad_inches=0)
            plt.close()

        except pd.errors.EmptyDataError: # If an error is about to be thrown due to an empty csv file, run these lines instead
            print('No colonies were detected.  Skipping this image.')
            my_table_columns = ['Colony', 'Center (x)', 'Center (y)', 'Radius', 'Top Left (x)', 'Top Left (y)', 'Width', 'Height', 'Estimated Center (x)', 'Estimated Center (y)']
            radii_table = pd.DataFrame(columns=my_table_columns)
            radii_table.to_csv(test_circle_data_folder + '/' + this_plate.stem + '.csv')

            # Step 7: Plot the image with the circles overlayed, and save it
            #radii_table = pd.read_csv(test_circle_data_folder + '/' + this_plate.stem + '.csv')

            fig, ax = plt.subplots()
            plt.imshow(cv2.cvtColor(x, cv2.COLOR_BGR2RGB))
            fig.set_size_inches(1024/96, 1024/96) # set because the small screen pixel size is 96 dpi
            plt.axis('off')
            plt.savefig(test_circle_folder + '/' + pathlib.Path(this_image).stem + '.jpg', bbox_inches='tight', pad_inches=0)
            plt.close()
        #print(radii_table)
        #raise FileExistsError('The script finished without errors.')
        #octave.run('octave_test.m')
        


## Get names of segmented testing images

In [None]:
test_images = sorted(glob.glob(real_image_folder + '/' + '*'))
print('Number of images found: ' + str(len(test_images)))

test_CHT_images = sorted(glob.glob(test_circle_folder + '/' + '*'))
print('Number of images found: ' + str(len(test_CHT_images)))

test_image_pairs = tuple(zip(test_images, test_CHT_images))
print(test_image_pairs)


# Create reference table for plate names, and store it as a csv file in the main annotation directory
file_dict = {}
for this_plate_number in range(1, len(test_images)+1):
    this_plate = pathlib.PurePath(test_images[this_plate_number - 1])
    plate_name = this_plate.name
    plate_stem = os.path.splitext(plate_name)[0]
    file_dict[plate_name] = 'Plate ' + str(this_plate_number)
print(file_dict)
file_items = file_dict.items()
file_list = list(file_items)
file_df = pd.DataFrame(file_items, columns = ['Plate Name', 'Folder Name'])
print(file_df)


# If you are trying to save the crops and annotations of each colony, the below will run as well.

if save_all_annotations == True:

    # Save this table as a csv file
    file_df.to_csv(output_crops_folder + '/Plate_References.csv')

    for index, row in file_df.iterrows():

        # Make subdirectories for each annotation class, organized by plate
        if os.path.exists(output_crops_folder + '/raw/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/raw/' + row['Folder Name']) # where the original colonies are cropped and stored

        if use_expert_counts == True:
            if os.path.exists(output_crops_folder + '/counted/' + row['Folder Name']) == False:
                os.makedirs(output_crops_folder + '/counted/' + row['Folder Name']) # where the original quantifiable colonies are cropped and stored

        if os.path.exists(output_crops_folder + '/circles/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/circles/' + row['Folder Name']) # same as before, but a circle is overlayed on the colony

        if os.path.exists(output_crops_folder + '/segs/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/segs/' + row['Folder Name']) # the output from the U-Net segmentation such that only nonzero pixels in the circle are kept


        if os.path.exists(output_crops_folder + '/init_regions/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/init_regions/' + row['Folder Name']) # A segmentation outlining the possible sector-like regions of the colony, both red and white

        if os.path.exists(output_crops_folder + '/init_bounds/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/init_bounds/' + row['Folder Name']) # The raw segmentation containing only the boundary of the colony

        if os.path.exists(output_crops_folder + '/init_partitions/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/init_partitions/' + row['Folder Name']) # same as the raw segmentation, but with lines annotated to represent locations of sector borders

        if os.path.exists(output_crops_folder + '/init_bad/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/init_bad/' + row['Folder Name']) # A segmentation outlining the sector-like regions that failed the consistency check


        if os.path.exists(output_crops_folder + '/cor_segs/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/cor_segs/' + row['Folder Name']) # the output from the U-Net segmentation such that only nonzero pixels in the circle are kept

        if os.path.exists(output_crops_folder + '/cor_bounds/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/cor_bounds/' + row['Folder Name']) # The corrected segmentation containing only the boundary of the colony

        if os.path.exists(output_crops_folder + '/cor_regions/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/cor_regions/' + row['Folder Name']) # the output from the U-Net segmentation such that only nonzero pixels in the circle are kept

        if os.path.exists(output_crops_folder + '/cor_partitions/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/cor_partitions/' + row['Folder Name']) # same as the raw segmentation, but with lines annotated to represent locations of sector borders

        if os.path.exists(output_crops_folder + '/cor_bad/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/cor_bad/' + row['Folder Name']) # A segmentation outlining the sector-like regions that failed the consistency check


        if os.path.exists(output_crops_folder + '/sectors/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/sectors/' + row['Folder Name']) # the output containing the regions in the segmentation where a sector is predicted

        if os.path.exists(output_crops_folder + '/sector_comps/' + row['Folder Name']) == False:
            os.makedirs(output_crops_folder + '/sector_comps/' + row['Folder Name']) # same as before, but only red pxieks in the segmentation are considered

## Classify colonies in testing images

In [None]:
# Code for colony classification (this should be all python code)
# 1. Read in image and corresponding segmentation.
# 2. Read in csv files containing circle locations.
# 3. For each row in the csv file, crop out the circular region, estimate size.
# 4. Restrict data collection to pixels within the circle detected, exclusing all other pixels.
# 5. Split the components of the image into red, white and background.
# 6. Get boundary components of the colony, check for consistency.
# 7. Output predicted number of sectors and their sizes after the consistency check.
# 8. Save the cropping of the colony in a few ways:
#   - the raw colony
#   - the raw colony with circle overlayed
#   - the segmentation of the colony within the circular region
#   - the segmentation of the colony with lines drawn on the image to repreent sector borders
#   - the predicted sector like regions, where each sector is a different shade of gray.
#   - similar to the previous, but keeping pixels classified only as red pixels.
#   - the segmentation that is corrected following the consistency check (possibly doing an additional check on the white pixels)
# 9. Save data on the colony itself, including the sector information, to a row in a table.
# 10. Save the table to a csv file.
# 11. Repeat all steps above for each image.

# Issues to work on:
# Verify that the purity metric is properly being utilized
# Figure out what to do with the holes inside colony segmentations.
#   -- A hole has its own boundary, so could look for the boundary of the hole.
#   -- The boundary of the hole MUST be smaller than the boundary of the entire colony
#   -- Find all connected compoents of the boundary, then exclude the LARGEST one.
#   -- For all other boundary components, these are expected to be the holes.  You need a procedure to fill them.
#   -- The procedure could be as simple as filling the hole with the class pertaining to the most common pixel on the boundary.

# Implementation already existing:



if classify_testing_colonies == True:

    %matplotlib inline

    starting_image = True

    if use_expert_counts == True:
        dot_quant_images = sorted(glob.glob(additional_data_folder + '/Quant/' + '*'))
        dot_state_images = sorted(glob.glob(additional_data_folder + '/State/' + '*'))


    # Run each plate through the classification pipeline

    for (test_image, CHT_image) in test_image_pairs:

        # Get plate name 
        this_plate = pathlib.PurePath(test_image)
        plate_name = this_plate.name
        plate_stem = os.path.splitext(plate_name)[0]

        print('Plate: ' + str(plate_name) + ':')
        if save_all_annotations == True:
            print('Annotations will be saved within subfolders named ' + "\'" + file_dict[plate_name] + "\'")

        # Read images of the plate
        x = read_image(test_image)
        x_CHT = read_image(CHT_image)

        # initialize lists for storing values
        all_cropped_colonies = []

        # Sizes of regions in pixels
        white_region_sum = []
        red_region_sum = []
        colony_region_sum = []
        sector_region_sum = []

        corrected_white_region_sum = []
        corrected_red_region_sum = []
        corrected_sector_region_sum = []

        true_white_region_sum = []
        true_red_region_sum = []
        true_colony_region_sum = []
        true_sector_region_sum = []

        # Counting sectors
        initial_region_counts = []
        all_sector_counts = []
        true_sector_counts = []

        

        boundary_region_sum = []
        colony_prop_sum = []

        # Purity scores for regions and colonies
        average_sector_score = []
        average_sector_iou = []

        weighted_sector_score_before = []
        weighted_red_sector_score_before = []
        weighted_white_sector_score_before = []

        weighted_sector_score_after = []
        weighted_red_sector_score_after = []
        weighted_white_sector_score_after = []

        # Bounding box info for colonies in images
        sides_vert_top = [];
        sides_vert_bottom = [];
        sides_horz_left = [];
        sides_horz_right = [];

        # Test lists
        colony_is_connected = []
        colony_is_approx_connected = []
        boundary_is_connected = []
        colony_is_whole = []
        boundary_is_hamilton = []
        colony_is_approx_convex = []
        colony_is_approx_circular = []
        hausdorff_dist_convex = []
        hausdorff_dist_circle = []

        # Lists to store purity scores of each region and the color of the region
        region_purity_before = []
        region_color_before = []
        region_sizes_before = []

        weighted_purity_before = []
        weighted_purity_red_before = []
        weighted_purity_white_before = []

        region_purity_after = []
        region_color_after = []
        region_sizes_after = []

        weighted_purity_after = []
        weighted_purity_red_after = []
        weighted_purity_white_after = []

        cured_colony_before = []
        cured_colony_after = []

        stable_colony_before = []
        stable_colony_after = []

        # Load images here if using quantifable colony data/annotations
        if use_expert_counts == True:
            x_quant = read_image(additional_data_folder + '/Quant/' + plate_stem + '.tif')
            x_state = read_image(additional_data_folder + '/State/' + plate_stem + '.tif')
            quantifiable_colony = []
            quantifiable_cured = []
            quantifiable_stable = []
            quantifiable_sectored = []

        #------------------------------
        # Read in plate and locate colonies
        #------------------------------
        
        # Read the segmentation of the plate, and keep track of which class each pixel belongs to
        p = read_mask(test_seg_folder + '/' + this_plate.stem + '.png')
        #p = read_mask(main_folder + '/Test Segs/' + specific_test_folder + '/Class_3/' + this_plate.stem + '.png')

        p_full = tf.identity(p).numpy()
        in_class = tf.math.greater(tf.constant(p_full), tf.constant([0])).numpy()

        p = p.astype(np.uint8)
        # white pixels
        p_1 = tf.math.equal(tf.constant(p_full), tf.constant([255])).numpy().astype(np.uint8) * 255
        # red pixels
        p_2 = tf.math.equal(tf.constant(p_full), tf.constant([127])).numpy().astype(np.uint8) * 255
        p_full = 255 * in_class.astype(np.uint8)

        # Gather the location and radii data from the colonies
        # If there are no colonies detected, or there is no table, skip this section at once

        colony_locations = pd.read_csv(test_circle_data_folder + '/' + pathlib.Path(test_image).stem + '.csv')
        #colony_locations = pd.read_csv(main_folder + '/Test Segs CHT Data/' + specific_test_folder + '/Class_3/' + pathlib.Path(test_image).stem + '.csv')

        # Output information from the imported csv
        print(str(len(colony_locations["Radius"])) + ' colonies found using circle Hough transform')
        plate_names = np.repeat(plate_name, len(colony_locations["Radius"]))
        colony_numbers = np.array(range(len(colony_locations["Radius"])))

        #---------------------------------------------------------------------

        # CLASSIFICATION PIPELINE START
        # Pre-processing step

        # Images to save:
        # - Cropping of the colony
        # - Cropping of the colony with the overalyed circle
        # - Original colony segmentation, such that only the pixels inside the overlayed circle are considered.

        for this_index in range(0,len(colony_numbers)):

            print('')
            print('Colony ' + str(this_index))
            # get example image using bounding indices
            #this_index = 2

            # Copy location data from colony image
            top_left_x = colony_locations["Top Left (x)"][this_index]
            top_left_y = colony_locations["Top Left (y)"][this_index]
            box_width = colony_locations["Width"][this_index]
            box_height = colony_locations["Height"][this_index]

            # Store the locations in another set of lists
            sides_vert_top.append(top_left_y)
            sides_vert_bottom.append(top_left_y + box_height - 1)
            sides_horz_left.append(top_left_x)
            sides_horz_right.append(top_left_x + box_width - 1)

            # Grab segmentation of colony using coordinates copied above
            # The colony image is NOT a boolean array
            colony_image = p[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)]
            ellipse_array = create_filled_ellipse_in_array(colony_image)
            colony_image = np.multiply(colony_image, ellipse_array) # unpadded segmentation with the pixels inside the overlayed circle

            if use_expert_counts == True:
                quant_image = x_quant[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1), :]
                state_image = x_state[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1), :]

            # The colony mask IS a boolean array.  Keep all the pixels of each class.
            white_colony_mask = p_1[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)] > 0
            red_colony_mask = p_2[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)] > 0 
            colony_mask = np.logical_or(white_colony_mask, red_colony_mask) # sanity check to see of this is the same as colony image

            # Add segmentation of the pixels inside the circular region of detection, and apply the mask.  This ensures we only use the pixels inside the circle for analysis.
            # Booleans are inputs, and booleans are outputs
            # Force a circle in colonies detected in the circle detection step
            white_colony_mask = np.multiply(white_colony_mask, ellipse_array)
            red_colony_mask = np.multiply(red_colony_mask, ellipse_array)
            colony_mask = np.logical_or(white_colony_mask, red_colony_mask)

            # Get initial measure of the sizes of the red and white regions of the colony
            white_region_sum.append(np.sum(white_colony_mask))
            red_region_sum.append(np.sum(red_colony_mask))
            colony_region_sum.append(np.sum(colony_mask))
            sector_region_sum.append(np.sum(red_colony_mask) / np.sum(colony_mask))

            # Find colony boundaries, ensuring that the boundaries are ON the colony, not ADJACENT to it.
            edge_mask_unpadded = get_colony_boundary_binary(colony_image) # The function is above
            interior_mask_unpadded = np.logical_xor(colony_image > 0, edge_mask_unpadded) # Second mask containing only the interior pixels of the segmentation
            interior_colony = np.multiply(colony_image, interior_mask_unpadded) # This is NOT a boolean

            #---------------------------------------------------------------
            # Get quantifiable colony labels (if applicable)
            #---------------------------------------------------------------

            # If we have locations of quantifiable colonies, use this to gather the colonies.
            if use_expert_counts == True:
                #----------------------------------
                # Determine where the quantifiable colonies are (they have black dots on them)
                # Set color boundaries for the markers in the counted images
                black_dot_boundaries = [([0, 0, 0], [5, 5, 5])]

                for (lower, upper) in black_dot_boundaries:
                # create NumPy arrays from the boundaries
                    lower = np.array(lower, dtype = "uint8")
                    upper = np.array(upper, dtype = "uint8")
                    # find the colors within the specified boundaries and apply
                    # the mask
                    dot_mask = cv2.inRange((quant_image*255).astype(np.uint8), lower, upper)
                    #dot_output = cv2.bitwise_and((count_image*255).astype(np.uint8), dot_mask)
                    # Get connected components of the detected pixels
                    black_labels = label(dot_mask)
                    num_black_labels = len(np.unique(black_labels))
                    if num_black_labels <= 1:
                        # No dot was detected.  Thus the colony was considered non-quantifiable.
                        colony_is_quantifiable = False
                    else:
                        # Loop through each component.  Find one component that is not too small and is directly on the colony
                        colony_center_y = (quant_image.shape[0] - 1) / 2.0
                        colony_center_x = (quant_image.shape[1] - 1) / 2.0
                        for this_comp in range(1, num_black_labels):
                            this_dot_comp = black_labels == this_comp
                            # Get centroid of component
                            (comp_centroid_y, comp_centroid_x) = ndimage.center_of_mass(this_dot_comp)
                            dot_dist = math.sqrt(((comp_centroid_y - colony_center_y) ** 2) + ((comp_centroid_x - colony_center_x) ** 2))
                            if dot_dist < colony_locations["Radius"][this_index]:
                                colony_is_quantifiable = True
                                break
                                # end the loop, as we found a dot on the colony
                            
                            if this_comp == (num_black_labels - 1):
                                # We looped through all the dots, but none of them were on the colony.  Don't analyze this colony.
                                colony_is_quantifiable = False

                quantifiable_colony.append(colony_is_quantifiable)

                #print('Colony', this_index, ': Quantifiable:', colony_is_quantifiable)

                #----------------------------------------
                # Determine if colony is cured, stable, or sectored

                # RGB version
                # cured_dot_boundaries = [([34-5, 177-5, 76-5], [34+5, 177+5, 76+5])]
                # stable_dot_boundaries = [([237-5, 28-5, 36-5], [237+5, 28+5, 36+5])]
                # sectored_dot_boundaries = [([63-5, 72-5, 204-5], [63+5, 72+5, 204+5])]

                # BGR version (cv2 needs this)
                # Marker colors were manaully chosen, so info below is based on that.
                # Wes annotations
                cured_dot_boundaries = [([76-5, 177-5, 34-5], [76+5, 177+5, 34+5])]
                stable_dot_boundaries = [([36-5, 28-5, 237-5], [36+5, 28+5, 237+5])]
                sectored_dot_boundaries = [([204-5, 72-5, 63-5], [204+5, 72+5, 63+5])]

                # Nicole annotations
                # cured_dot_boundaries = [([0, 250, 0], [0, 255, 0])]
                # stable_dot_boundaries = [([0, 0, 250], [0, 0, 255])]
                # sectored_dot_boundaries = [([250, 250, 0], [255, 255, 0])]

                # cured_dot_boundaries = [([0, 250, 0], [0, 255, 0])]
                # stable_dot_boundaries = [([0, 0, 250], [0, 0, 255])]
                # sectored_dot_boundaries = [([250, 0, 0], [255, 0, 0])]

                for (lower, upper) in cured_dot_boundaries:
                # create NumPy arrays from the boundaries
                    lower = np.array(lower, dtype = "uint8")
                    upper = np.array(upper, dtype = "uint8")
                    # find the colors within the specified boundaries and apply
                    # the mask
                    #print(np.unique((colony_image*255).astype(np.uint8)))
                    dot_mask = cv2.inRange((state_image*255).astype(np.uint8), lower, upper)
                    #dot_output = cv2.bitwise_and((count_image*255).astype(np.uint8), dot_mask)
                    # Get connected components of the detected dot pixels
                    #print(np.unique(dot_mask))
                    cured_labels = label(dot_mask)
                    num_cured_labels = len(np.unique(cured_labels))
                    if num_cured_labels <= 1:
                        # No dot was detected.  Thus the colony was considered non-quantifiable.
                        colony_is_cured = False
                    else:
                        # Loop through each component.  Find one component that is not too small and is directly on the colony
                        colony_center_y = (state_image.shape[0] - 1) / 2.0
                        colony_center_x = (state_image.shape[1] - 1) / 2.0
                        for this_comp in range(1, num_cured_labels):
                            this_dot_comp = cured_labels == this_comp
                            # Get centroid of component
                            (comp_centroid_y, comp_centroid_x) = ndimage.center_of_mass(this_dot_comp)
                            dot_dist = math.sqrt(((comp_centroid_y - colony_center_y) ** 2) + ((comp_centroid_x - colony_center_x) ** 2))
                            if dot_dist < colony_locations["Radius"][this_index]:
                                colony_is_cured = True
                                break
                                # end the loop, as we found a dot on the colony
                            
                            if this_comp == (num_cured_labels - 1):
                                # We looped through all the dots, but none of them were on the colony.  Don't analyze this colony.
                                colony_is_cured = False

                quantifiable_cured.append(colony_is_cured)

                #print('Colony', this_index, ': Cured:', colony_is_cured)


                for (lower, upper) in stable_dot_boundaries:
                # create NumPy arrays from the boundaries
                    lower = np.array(lower, dtype = "uint8")
                    upper = np.array(upper, dtype = "uint8")
                    # find the colors within the specified boundaries and apply
                    # the mask
                    #print(np.unique((colony_image*255).astype(np.uint8)))
                    dot_mask = cv2.inRange((state_image*255).astype(np.uint8), lower, upper)
                    #dot_output = cv2.bitwise_and((count_image*255).astype(np.uint8), dot_mask)
                    # Get connected components of the detected pixels
                    #print(np.unique(dot_mask))
                    stable_labels = label(dot_mask)
                    num_stable_labels = len(np.unique(stable_labels))
                    if num_stable_labels <= 1:
                        # No dot was detected.  Thus the colony was considered non-quantifiable.
                        colony_is_stable = False
                    else:
                        # Loop through each component.  Find one component that is not too small and is directly on the colony
                        colony_center_y = (state_image.shape[0] - 1) / 2.0
                        colony_center_x = (state_image.shape[1] - 1) / 2.0
                        for this_comp in range(1, num_stable_labels):
                            this_dot_comp = stable_labels == this_comp
                            # Get centroid of component
                            (comp_centroid_y, comp_centroid_x) = ndimage.center_of_mass(this_dot_comp)
                            dot_dist = math.sqrt(((comp_centroid_y - colony_center_y) ** 2) + ((comp_centroid_x - colony_center_x) ** 2))
                            if dot_dist < colony_locations["Radius"][this_index]:
                                colony_is_stable = True
                                break
                                # end the loop, as we found a dot on the colony
                            
                            if this_comp == (num_stable_labels - 1):
                                # We looped through all the dots, but none of them were on the colony.  Don't analyze this colony.
                                colony_is_stable = False

                quantifiable_stable.append(colony_is_stable)

                #print('Colony', this_index, ': Stable:', colony_is_stable)


                for (lower, upper) in sectored_dot_boundaries:
                # create NumPy arrays from the boundaries
                    lower = np.array(lower, dtype = "uint8")
                    upper = np.array(upper, dtype = "uint8")
                    # find the colors within the specified boundaries and apply
                    # the mask
                    #print(np.unique((colony_image*255).astype(np.uint8)))
                    dot_mask = cv2.inRange((state_image*255).astype(np.uint8), lower, upper)
                    #dot_output = cv2.bitwise_and((count_image*255).astype(np.uint8), dot_mask)
                    # Get connected components of the detected pixels
                    #print(np.unique(dot_mask))
                    sectored_labels = label(dot_mask)
                    num_sectored_labels = len(np.unique(sectored_labels))
                    if num_sectored_labels <= 1:
                        # No dot was detected.  Thus the colony was considered non-quantifiable.
                        colony_is_sectored = False
                    else:
                        # Loop through each component.  Find one component that is not too small and is directly on the colony
                        colony_center_y = (state_image.shape[0] - 1) / 2.0
                        colony_center_x = (state_image.shape[1] - 1) / 2.0
                        for this_comp in range(1, num_sectored_labels):
                            this_dot_comp = sectored_labels == this_comp
                            # Get centroid of component
                            (comp_centroid_y, comp_centroid_x) = ndimage.center_of_mass(this_dot_comp)
                            dot_dist = math.sqrt(((comp_centroid_y - colony_center_y) ** 2) + ((comp_centroid_x - colony_center_x) ** 2))
                            if dot_dist < colony_locations["Radius"][this_index]:
                                colony_is_sectored = True
                                break
                                # end the loop, as we found a dot on the colony
                            
                            if this_comp == (num_sectored_labels - 1):
                                # We looped through all the dots, but none of them were on the colony.  Don't analyze this colony.
                                colony_is_sectored = False

                quantifiable_sectored.append(colony_is_sectored)

                #print('Colony', this_index, ': Sectored:', colony_is_sectored)


                #----------------------------------

            #-----------------------------------------------------
            # Get connectedness properties of the segmentation
            #-----------------------------------------------------

            # Use this information to test whether the segmentation meets the conditions

            # Condition 1 test: is the segmentation one connected component?
            condition_1_test_strong, condition_1_test_weak = check_components_of_colony(colony_mask)
            colony_is_connected.append(condition_1_test_strong)
            colony_is_approx_connected.append(condition_1_test_weak)
            #print('Condition 1: Seg is one component: ' + str(condition_1_test_strong))
            #print("Condition 1: Seg is \'approximately\' one component: " + str(condition_1_test_weak))

            # Condition 2 test: Is the boundary one connected component?
            condition_2_test = check_components_of_boundary(edge_mask_unpadded)
            boundary_is_connected.append(condition_2_test)
            #print('Condition 2: Boundary is one component: ' + str(condition_2_test))

            # Condition 3 test: Are there holes in the segmentation?
            condition_3_test = check_for_holes(colony_mask, edge_mask_unpadded)
            colony_is_whole.append(condition_3_test)
            #print('Condition 3: Segmentation has no holes: ' + str(condition_3_test))

            # Condition 4 test: Is the boundary a Hamiltonian cycle? (no ready yet)
            #condition_4_test = get_hamilton_cycle(colony_mask, edge_mask_unpadded)
            #print('Has Hamiltonian cycle: ' + str(condition_4_test))

            # Condition 5: Check circularity and convexity
            condition_5_convex, condition_5_circular = compare_convex_hull(colony_mask, edge_mask_unpadded)
            colony_is_approx_convex.append(condition_5_convex)
            colony_is_approx_circular.append(condition_5_circular)
            #print('Condition 5: Segmentation is approximately convex: ' + str(condition_5_convex))
            #print('Condition 5: Segmentation is approximately circular: ' + str(condition_5_circular))

            # Condition 6: Check hausdorff distance
            hausdorff_chull, hausdorff_circle = get_hausdorff_distance(colony_mask, edge_mask_unpadded)
            hausdorff_dist_convex.append(hausdorff_chull)
            hausdorff_dist_circle.append(hausdorff_circle)
            #print('Condition 6: Hausdorff distance between boundary and convex hull: ' + str(hausdorff_chull))
            #print('Condition 6: Hausdorff distance between boundary and circle: ' + str(hausdorff_circle))

            #----------------------------------------
            # Partition the boundaries into red and white components

            # Get 'ideal' boundary of the colony
            ideal_circle = create_circle_boundary(edge_mask_unpadded, colony_locations["Radius"][this_index])

            # Find connected components of the red and white pixels found on the boundary
            red_boundary_skeleton, white_boundary_skeleton, boundary_mask_h, boundary_mask_w = get_boundary_partitions(red_colony_mask, white_colony_mask, edge_mask_unpadded)

            #plt.imshow(red_boundary_skeleton)
            #plt.title('Red Boundary Skeleton')

            # Save the three images using this data
            #   - Oringinal image padded
            #   - CHT image padded
            #   - Segmentation padded
            # Force a circle like previously
            padded_x = 255 * x[max((top_left_y-1)-image_padding, 0):min((top_left_y + box_height - 1)+image_padding, H-1), max((top_left_x-1) - image_padding, 0):min((top_left_x + box_width - 1)+image_padding, W-1), :]
            padded_x_CHT = 255 * x_CHT[max((top_left_y-1)-image_padding, 0):min((top_left_y + box_height - 1)+image_padding, H-1), max((top_left_x-1) - image_padding, 0):min((top_left_x + box_width - 1)+image_padding, W-1), :]
            padded_mask = p[max((top_left_y-1)-image_padding, 0):min((top_left_y + box_height - 1)+image_padding, H-1), max((top_left_x-1) - image_padding, 0):min((top_left_x + box_width - 1)+image_padding, W-1)]
            ellipse_array_2 = create_filled_ellipse_in_array(padded_mask, padding = image_padding)
            padded_mask = np.multiply(padded_mask, ellipse_array_2)

            # Save the colony images as previously
            if save_all_annotations == True:
                if not cv2.imwrite(output_crops_folder + '/raw/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.jpg', padded_x):
                    raise Exception('Could not write image.')
                if not cv2.imwrite(output_crops_folder + '/circles/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.jpg', padded_x_CHT):
                    raise Exception('Could not write image.')
                if not cv2.imwrite(output_crops_folder + '/segs/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', padded_mask):
                    raise Exception('Could not write image.')

            # Save the croppings for individual colonies which were annotated
                if use_expert_counts == True:
                    padded_x_count = 255 * x_quant[max((top_left_y-1)-image_padding, 0):min((top_left_y + box_height - 1)+image_padding, H-1), max((top_left_x-1) - image_padding, 0):min((top_left_x + box_width - 1)+image_padding, W-1), :]
                    if not cv2.imwrite(output_crops_folder + '/counted/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', padded_x_count):
                        raise Exception('Could not write image.')
            #-------------------------------------------------------------

            # CLASSIFICATION STEP BEGINS
            # GET INITIAL REGIONAL BREAKDOWN AND RED REGION ANNOTATIONS OF THE COLONY

            # Should store information about the following:
            #   - Idealized red and white regions
            #   - The boundaries of each red and white region
            #   - The sizes of each region
            #   - The purity scores of each region
            #   - the states of each colony (cured, stable)

            # Images to save in this section
            # - Regional segmentation
            # - Boundary of the colony
            # - Red region boundary annotation
            # - Regions that fail consistency check

            # Plot padded version of the initial segmentation.  Annotations will be saved onto the image.
            colony_image_padded = np.pad(colony_image, 5)
            fig1, ax1 = plt.subplots()
            ax1.imshow(colony_image_padded, cmap='gray')


            recheck_boundaries = True # initialized to true so that we can look at show each step of the pipeline is performing

            # Using boundary information, find and extract potential red and white regions of the colony

            red_labels = label(red_boundary_skeleton)
            white_labels = label(white_boundary_skeleton)

            # Initialize masks separating potential red and white regions
            initial_red_region_mask = np.zeros_like(red_boundary_skeleton)
            initial_white_region_mask = np.zeros_like(white_boundary_skeleton)
            initial_red_boundary_mask = np.zeros_like(red_boundary_skeleton)
            initial_white_boundary_mask = np.zeros_like(white_boundary_skeleton)

            # keep track of regions which fail the consistency check
            initial_bad_red_score_mask = np.zeros_like(red_boundary_skeleton)
            initial_bad_white_score_mask = np.zeros_like(white_boundary_skeleton)

            # Keep track of boundaries whihc fail the consistency check
            boundary_correction = np.zeros_like(red_boundary_skeleton)

            # Initialize lists to store characteristics about each region
            # This includes endpoints on the sector, the purity of the sector, and an indicator for the purity being above the 50 percent threshold
            red_component_endpoints = []
            red_component_scores = []
            red_component_checks = []
            red_component_sizes = []

            white_component_endpoints = []
            white_component_scores = []
            white_component_checks = []
            white_component_sizes = []

            # How many boundaries of each color are there?
            num_red_boundaries = len(np.unique(red_labels)[1:]) # number of red boundaries present
            num_white_boundaries = len(np.unique(white_labels)[1:]) # number of white boundaries present

            # Initial count of the number of sectors is the number of red boundaries
            initial_region_counts.append(num_red_boundaries)

            # States can be initialy predicted using the number of red and white boundaries
            #if ((num_red_boundaries == 1) & (num_white_boundaries == 0)):
            if (num_white_boundaries == 0):
                cured_colony_before.append(True)
            else:
                cured_colony_before.append(False)

            #if ((num_red_boundaries == 0) & (num_white_boundaries == 1)):
            if (num_red_boundaries == 0):
                stable_colony_before.append(True)
            else:
                stable_colony_before.append(False)

            # Now, to analyze each of the regions to determine if they are sectored

            # Analyze the initial red regions of the colony
            for this_label in np.unique(red_labels)[1:]:
                red_component = copy.deepcopy(red_labels)
                red_component = red_component == this_label
                red_component = red_component.astype(np.int32)

                # Append the red boundary pixels on this component to the red boundary mask
                initial_red_boundary_mask = np.logical_or(initial_red_boundary_mask, red_component > 0)

                # Function to get endpoints of connected component
                full_endpoints_list = get_boundary_component_endpoints(colony_image[:,:], red_component)

                # If exactly two points are found, then everything's good.

                # Get the angle of the endpoints relative to the colony center
                [endpoint_angles, endpoint_locations, endpoints_x, endpoints_y] = get_endpoint_locations(full_endpoints_list, colony_mask, colony_locations["Radius"][this_index])

                # Function to get mask representing sector boundary
                sector_boundary, sector_interior, sector_filled = get_sector_masks(red_component, full_endpoints_list)

                # Append the predicted filled region to the red region mask
                initial_red_region_mask = np.logical_or(initial_red_region_mask, sector_filled)

                # Apply consistency check to score the region
                confirm_check, prop_interior = check_for_consistency_2(sector_filled, red_colony_mask)

                # Update score mask to denote where the consistency check failed
                if confirm_check == False:
                    recheck_boundaries = True
                    initial_bad_red_score_mask = np.logical_or(initial_bad_red_score_mask, sector_filled)

                # Append scores and info to lists
                red_component_endpoints.append(full_endpoints_list) # endpoints of the connected compponent on the boundary
                red_component_scores.append(prop_interior) # purity score of the region
                red_component_checks.append(confirm_check) # whether the purity score was at least 0.5
                red_component_sizes.append(np.sum(initial_red_region_mask)) # the number of pixels in the region
                
                # ---ANNOTATION PROCEDURE---

                # Plot the lines of the sector (and the boundary line) onto the colony segmentation
                length_points = len(endpoints_x)
                #print(length_points)
                #print(endpoints_x)
                if len(np.unique(red_labels)[1:]) > 0: # only plots lines if there are divided regions
                    plot_bounds_x = []
                    plot_bounds_y = []
                    plot_bounds_x.append(endpoints_x[0] + image_padding)
                    plot_bounds_y.append(endpoints_y[0] + image_padding)
                    # Get list of center and endpoints on the boundary
                    for this_bound in range(0, length_points-1):
                        plot_bounds_x.append(endpoints_x[this_bound+1] + image_padding)
                        plot_bounds_y.append(endpoints_y[this_bound+1] + image_padding)
                        #plt.plot(plot_points_y, plot_points_x, color='blue')
                        #print(endpoints_x[0:2])
                        #print(endpoints_y[0:2])
                    plot_bounds_x = np.roll(np.array(plot_bounds_x), 1)
                    plot_bounds_y = np.roll(np.array(plot_bounds_y), 1)
                    #print(plot_bounds_x)
                    #print(plot_bounds_y)
                    line_style = ':' if (len(plot_bounds_x) == 2) else '-'
                    ax1.plot(plot_bounds_y, plot_bounds_x, linewidth=5, linestyle=line_style, alpha=0.85)
                    if len(plot_bounds_x) == 1:
                        full_circle = Circle((plot_bounds_y, plot_bounds_x), radius=colony_locations["Radius"][this_index], color='blue', fill=False, linewidth=5, alpha=0.85)
                        ax1.add_patch(full_circle)


            # Do the same for the white regions
            for this_label in np.unique(white_labels)[1:]:
                white_component = copy.deepcopy(white_labels)
                white_component = white_component == this_label
                white_component = white_component.astype(np.int32)

                initial_white_boundary_mask = np.logical_or(initial_white_boundary_mask, white_component > 0)

                # Function to get endpoints of connected component
                full_endpoints_list = get_boundary_component_endpoints(colony_image[:,:], white_component)

                # If exactly two points are found, then everything's good.

                # Function to get mask representing sector boundary
                sector_boundary, sector_interior, sector_filled = get_sector_masks(white_component, full_endpoints_list)

                # Fill initial region mask with the filled sector
                initial_white_region_mask = np.logical_or(initial_white_region_mask, sector_filled)

                # Apply consistency check to score region
                confirm_check, prop_interior = check_for_consistency_2(sector_filled, white_colony_mask)

                # Update score mask to denote where the consistency check failed
                if confirm_check == False:
                    recheck_boundaries = True
                    initial_bad_white_score_mask = np.logical_or(initial_bad_white_score_mask, sector_filled)

                # Append scores and info to lists
                white_component_endpoints.append(full_endpoints_list)
                white_component_scores.append(prop_interior)
                white_component_checks.append(confirm_check)
                white_component_sizes.append(np.sum(initial_white_region_mask))

            # At this point, you should have two masks, one for the red and white regions respectivey.
            # You should also have the endponts of each component, stored as a collection of lists, one list per component
            # Finally, you should have a score for those components

            # -------------------------------------
            # Store the purity scores in a sublist, along with a second sublist indicating the color of each region
            # -------------------------------------

            all_component_scores = []
            all_region_colors = []
            all_region_sizes = []

            if not red_component_scores:
                all_region_colors = all_region_colors + ['red']
                all_component_scores = all_component_scores + [np.nan]
                all_region_sizes = all_region_sizes + [np.nan]
            else:
                all_region_colors = all_region_colors + (['red'] * len(red_component_scores))
                all_component_scores = all_component_scores + red_component_scores
                all_region_sizes = all_region_sizes + red_component_sizes

            if not white_component_scores:
                all_region_colors = all_region_colors + ['white']
                all_component_scores = all_component_scores + [np.nan]
                all_region_sizes = all_region_sizes + [np.nan]
            else:
                all_region_colors = all_region_colors + (['white'] * len(white_component_scores))
                all_component_scores = all_component_scores + white_component_scores
                all_region_sizes = all_region_sizes + white_component_sizes

            region_purity_before.append(all_component_scores)
            region_color_before.append(all_region_colors)
            region_sizes_before.append(all_region_sizes)

            # -------------------------------------
            # Do the same for the weighted purity scores across the entire colony
            # -------------------------------------

            # Compute weighted purity scores over all regions, for white only, and for red only

            total_red_sum = np.nansum(red_component_sizes)
            total_white_sum = np.nansum(white_component_sizes)

            if not red_component_scores:
                red_region_weights = np.array([0])
                weighted_red_scores = np.array([0])
            else:
                red_region_weights = np.divide(np.array(red_component_sizes), total_red_sum) # this vector should add to 1, as this is a normalization of the weights
                weighted_red_scores = np.multiply(np.array(red_component_scores), red_region_weights)

            if not white_component_scores:
                white_region_weights = np.array([0])
                weighted_white_scores = np.array([0])
            else:
                white_region_weights = np.divide(np.array(white_component_sizes), total_white_sum) # this vector should add to 1, as this is a normalization of the weights
                weighted_white_scores = np.multiply(np.array(white_component_scores), white_region_weights)

            # Get weighted average over both regions together
            all_region_sum = np.nansum(all_region_sizes)
            all_region_weights = np.divide(np.array(red_component_sizes + white_component_sizes), all_region_sum)
            all_region_weighted_scores = np.multiply(np.array(red_component_scores + white_component_scores), all_region_weights)

            weighted_purity_red_before.append(list(weighted_red_scores))
            weighted_purity_white_before.append(list(weighted_white_scores))
            weighted_purity_before.append(list(all_region_weighted_scores))
            weighted_red_sector_score_before.append(np.nansum(weighted_red_scores))
            weighted_white_sector_score_before.append(np.nansum(weighted_white_scores))
            weighted_sector_score_before.append(np.nansum(all_region_weighted_scores))

            # Now, create the masks containing the initial_regions
            initial_region_mask = np.maximum(initial_red_region_mask.astype(np.uint8), 2*initial_white_region_mask.astype(np.uint8))*(255/(num_classes-1))
            initial_boundary_mask = np.maximum(initial_red_boundary_mask.astype(np.uint8), 2*initial_white_boundary_mask.astype(np.uint8))*(255/(num_classes-1))
            initial_score_mask = np.maximum(initial_bad_red_score_mask.astype(np.uint8), 2*initial_bad_white_score_mask.astype(np.uint8))*(255/(num_classes-1))

            # Make sure to pad them in the same way as the output segmentation
            initial_region_mask = np.pad(initial_region_mask, image_padding)
            initial_boundary_mask = np.pad(initial_boundary_mask, image_padding)
            initial_score_mask = np.pad(initial_score_mask, image_padding)

            # Save the initial region and boundary mask.  Also save image indicating regions which should be investigted further.
            if save_all_annotations == True:
                if not cv2.imwrite(output_crops_folder + '/init_regions/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', initial_region_mask):
                    raise Exception('Could not write image.')
                if not cv2.imwrite(output_crops_folder + '/init_bounds/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', initial_boundary_mask):
                    raise Exception('Could not write image.')
                if not cv2.imwrite(output_crops_folder + '/init_bad/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', initial_score_mask):
                    raise Exception('Could not write image.')
            plt.axis('off')
            if save_all_annotations == True:
                fig1.savefig(output_crops_folder + '/init_partitions/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', bbox_inches='tight', pad_inches=0)
            plt.close(fig1);




            #------------------------------------------------------------------

            # CLASS SWITCH/MERGING STEP

            # This is only applied to regions where the consistency check fails.
            # i.e. less than 50% of pixels in the predicted region are of the same class
            # as the outer boundary pixels

            # The process will repeat until all regions pass the consisency check

            # NOTE: This section is only executed if one of either the red or
            # white regions estimated above, fails the constency check.
            # If all regions predicted are consistent in class, the below will
            # not execute, as it will do exactly the same stuff as just done above.

            # Therefore, this section is purposely redundant and helps us keep
            # track of which regions are being updated.



            repetition_counter = 0
            performing_correction = False # initialize at the beginning

            while recheck_boundaries == True:

                recheck_boundaries = False # reset.
                repetition_counter = repetition_counter + 1

                # The above should be switched back to True if there is a potentially misclassified boundary


                # Find the connected components of the skeleton.
                # The number of red connected components gives the initial number of sectors.
                # The number of white connected components are the regions separating the red sectors.
                # A score for sectoriness will be applied to both sets of regions.
                red_labels = label(red_boundary_skeleton)
                white_labels = label(white_boundary_skeleton)

                # Initialize masks separating potential red and white regions
                red_region_mask = np.zeros_like(red_boundary_skeleton)
                white_region_mask = np.zeros_like(white_boundary_skeleton)
                red_boundary_mask = np.zeros_like(red_boundary_skeleton)
                white_boundary_mask = np.zeros_like(white_boundary_skeleton)
                red_score_mask = np.zeros_like(red_boundary_skeleton)
                white_score_mask = np.zeros_like(white_boundary_skeleton)

                # Intialize array to change boundary.
                # This is only updated when there is a potentially misclassified boundary.
                boundary_correction_red = np.zeros_like(red_boundary_skeleton)
                boundary_correction_white = np.zeros_like(white_boundary_skeleton)

                # Generate regions directly from segmentation
                # Iterate through each component and collect some information
                # Collect info about component endpoints and purity scores
                red_component_endpoints = []
                red_component_scores = []
                red_component_checks = []

                white_component_endpoints = []
                white_component_scores = []
                white_component_checks = []

               # print('Number of red components: ' + str(max(np.unique(red_labels)[1:])))

                # Iterate through the red components
                for this_label in np.unique(red_labels)[1:]:
                    #print('Running the red check.')
                    red_component = copy.deepcopy(red_labels)
                    red_component = red_component == this_label
                    red_component = red_component.astype(np.int32)

                    red_boundary_mask = np.logical_or(red_boundary_mask, red_component > 0)

                    # Function to get endpoints of connected component
                    full_endpoints_list = get_boundary_component_endpoints(colony_image[:,:], red_component)

                    # If exactly two points are found, then everything's good.

                    # Function to get mask representing sector boundary
                    sector_boundary, sector_interior, sector_filled = get_sector_masks(red_component, full_endpoints_list)

                    # Fill initial region mask with the filled sector
                    red_region_mask = np.logical_or(red_region_mask, sector_filled)

                    # Apply consistency check to score region
                    confirm_check, prop_interior = check_for_consistency_2(sector_filled, red_colony_mask)

                    # Update score mask to denote where the consistency check failed
                    if confirm_check == False:
                        #performing_correction = True # This signifies that boundary information will be different from the initial breakdown
                        recheck_boundaries = True
                        boundary_correction_red = np.logical_or(boundary_correction_red, red_component)


                    # Append scores and info to lists
                    red_component_endpoints.append(full_endpoints_list)
                    red_component_scores.append(prop_interior)
                    red_component_checks.append(confirm_check)

                
                # Do the same for the white components
                for this_label in np.unique(white_labels)[1:]:

                    white_component = copy.deepcopy(white_labels)
                    white_component = white_component == this_label
                    white_component = white_component.astype(np.int32)

                    white_boundary_mask = np.logical_or(white_boundary_mask, white_component > 0)

                    # Function to get endpoints of connected component
                    full_endpoints_list = get_boundary_component_endpoints(colony_image[:,:], white_component)

                    # If exactly two points are found, then everything's good.

                    # Function to get mask representing sector boundary
                    sector_boundary, sector_interior, sector_filled = get_sector_masks(white_component, full_endpoints_list)

                    # Fill initial region mask with the filled sector
                    white_region_mask = np.logical_or(white_region_mask, sector_filled)

                    # Apply consistency check to score region
                    confirm_check, prop_interior = check_for_consistency_2(sector_filled, white_colony_mask)

                    # Update score mask to denote where the consistency check failed
                    if confirm_check == False:
                        #performing_correction = True # This signifies that boundary information will be different from the initial breakdown
                        recheck_boundaries = True
                        boundary_correction_white = np.logical_or(boundary_correction_white, white_component)

                    # Append scores and info to lists
                    white_component_endpoints.append(full_endpoints_list)
                    white_component_scores.append(prop_interior)
                    white_component_checks.append(confirm_check)

                # If there were regions that failed the consistency check, swap the classes on the boundary
                if recheck_boundaries == True:

                    performing_correction = True # This signifies that boundary information will be different from the initial breakdown

                    # Run the swap functions
                    red_boundary_skeleton = grow_boundary(red_boundary_skeleton, boundary_correction_white) # takes the bad white boundaries and switches them to the red class
                    red_boundary_skeleton = shrink_boundary(red_boundary_skeleton, boundary_correction_red) # removes the bad red boundaries

                    white_boundary_skeleton = grow_boundary(white_boundary_skeleton, boundary_correction_red) # takes the bad red boundaries and switches them to the white class
                    white_boundary_skeleton = shrink_boundary(white_boundary_skeleton, boundary_correction_white) # removes the bad white boundaries

                # Only run the block below if this colony cannot be analyzed appropriatly with this pipeline (may be an awful segmentation)
                if repetition_counter > 20:
                    warnings.warn('Corrections have been applied too many times.  The colony segmentation used here is likely unsuitable for this pipeline.')
                    break

                    # Once the swap is done, you will head back to the top of this while loop.

            # At this point, you should have two masks, one for the red and white regions respectivey.
            # You should also have the endponts of each components, stored as a collection of lists, one list per component
            # Finally, you should have a score for those components

            # Now, create the masks containing the regions that pass the consistency check
            corrected_region_mask = np.maximum(red_region_mask.astype(np.uint8), 2*white_region_mask.astype(np.uint8))*(255/(num_classes-1))
            corrected_boundary_mask = np.maximum(red_boundary_mask.astype(np.uint8), 2*white_boundary_mask.astype(np.uint8))*(255/(num_classes-1))

            red_labels = label(red_boundary_skeleton)
            white_labels = label(white_boundary_skeleton)

            #corrected_boundary_mask = np.maximum((white_labels > 0).astype(np.uint8), 2*((red_labels > 0).astype(np.uint8)))*(255/(num_classes-1))

            # Use the corrected boundary_mask to piece together the corrected colony segmentation
            corrected_colony_image = np.add(interior_colony, corrected_boundary_mask).astype(np.uint8)
            corrected_colony_image_padded = np.pad(corrected_colony_image, image_padding)

            # Re-partition the image following correction
            corrected_full = tf.identity(corrected_colony_image).numpy().astype(np.int32)
            corrected_white_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([255])).numpy().astype(np.uint8)
            corrected_red_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([127])).numpy().astype(np.uint8)
            corrected_colony_mask = np.logical_or(corrected_white_colony_mask, corrected_red_colony_mask) # sanity check to see of this is the same as colony image


            # if performing_correction == True:
            #     corrected_region_mask = np.maximum(white_region_mask.astype(np.uint8), 2*red_region_mask.astype(np.uint8))*(255/(num_classes-1))
            #     corrected_boundary_mask = np.maximum(white_boundary_mask.astype(np.uint8), 2*red_boundary_mask.astype(np.uint8))*(255/(num_classes-1))
            #     #score_mask = np.maximum(initial_bad_white_score_mask.astype(np.uint8), 2*initial_bad_red_score_mask.astype(np.uint8))*(255/(num_classes-1))
            # else:
            #     corrected_region_mask = np.maximum(initial_white_region_mask.astype(np.uint8), 2*initial_red_region_mask.astype(np.uint8))*(255/(num_classes-1))
            #     corrected_boundary_mask = np.maximum(initial_white_boundary_mask.astype(np.uint8), 2*initial_red_boundary_mask.astype(np.uint8))*(255/(num_classes-1))

            
            #score_mask = np.pad(score_mask, 5)

            #--------------------------------------------------
            # Get corrected segmentations and regions


            # Get boundary information from the boundary corrected/merged segmentation
            corrected_red_boundary_skeleton, corrected_white_boundary_skeleton, corrected_boundary_mask_h, corrected_boundary_mask_w = get_boundary_partitions(corrected_red_colony_mask, corrected_white_colony_mask, edge_mask_unpadded)

            red_labels = label(corrected_red_boundary_skeleton)
            white_labels = label(corrected_white_boundary_skeleton)

            corrected_boundary_mask = np.maximum((red_labels > 0).astype(np.uint8), 2*((white_labels > 0).astype(np.uint8)))*(255/(num_classes-1))

            # Use the corrected boundary_mask to piece together the corrected colony segmentation
            corrected_colony_image = np.add(interior_colony, corrected_boundary_mask).astype(np.uint8)
            corrected_colony_image_padded = np.pad(corrected_colony_image, image_padding)

            corrected_region_mask = np.maximum(red_region_mask.astype(np.uint8), 2*white_region_mask.astype(np.uint8))*(255/(num_classes-1))
            corrected_boundary_mask = np.maximum(red_boundary_mask.astype(np.uint8), 2*white_boundary_mask.astype(np.uint8))*(255/(num_classes-1))
            #score_mask = np.maximum(initial_bad_white_score_mask.astype(np.uint8), 2*initial_bad_red_score_mask.astype(np.uint8))*(255/(num_classes-1))

            # Re-partition the image following correction
            corrected_full = tf.identity(corrected_colony_image).numpy().astype(np.int32)
            corrected_white_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([255])).numpy().astype(np.uint8)
            corrected_red_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([127])).numpy().astype(np.uint8)
            corrected_colony_mask = np.logical_or(corrected_white_colony_mask, corrected_red_colony_mask) # sanity check to see of this is the same as colony image

            

            #-------------------------------------------------------------------

            # PROCESSING THE CORRECTED REGIONS
            # If you got to this point, then the boundaries should be consistent with the interior of the colony.

            # Images to save in this section
            # - Regional segmentation with the corrected boundary
            # - Red region boundary annotation with the corrected boundary
            # - Red regions remaining after correction applied

            

            # initialize masks containing the sector locations
            all_sector_bounds = np.zeros_like(colony_mask).astype(np.int32)
            all_sector_filled = np.zeros_like(colony_mask).astype(np.int32)
            all_sector_filled_labels = np.zeros_like(colony_mask).astype(np.int32)

            # Use the corrected boundary_mask to piece together the corrected colony segmentation
            # corrected_colony_image = np.add(interior_colony, corrected_boundary_mask).astype(np.uint8)
            # corrected_colony_image_padded = np.pad(corrected_colony_image, image_padding)

            # # Re-partition the image followng correction
            # corrected_full = tf.identity(corrected_colony_image).numpy().astype(np.int32)
            # corrected_white_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([127])).numpy().astype(np.uint8)
            # corrected_red_colony_mask = tf.math.equal(tf.constant(corrected_full), tf.constant([255])).numpy().astype(np.uint8)
            # corrected_colony_mask = np.logical_or(corrected_white_colony_mask, corrected_red_colony_mask) # sanity check to see of this is the same as colony image
            #print(np.unique(corrected_colony_image))
            #plt.imshow(corrected_colony_mask, cmap='gray')
            #raise NameError('Corrected colony mask')

            # Save the corrected segmentation
            if save_all_annotations == True:
                if not cv2.imwrite(output_crops_folder + '/cor_segs/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', corrected_colony_image_padded):
                    raise Exception('Could not write image.')

            # # Get boundary information from the corrected segmentation
            # corrected_red_boundary_skeleton, corrected_white_boundary_skeleton, corrected_boundary_mask_h, corrected_boundary_mask_w = get_boundary_partitions(corrected_red_colony_mask, corrected_white_colony_mask, edge_mask_unpadded)

            # Initialize masks separating potential red and white regions
            post_red_region_mask = np.zeros_like(corrected_red_boundary_skeleton)
            post_white_region_mask = np.zeros_like(corrected_white_boundary_skeleton)
            post_red_boundary_mask = np.zeros_like(corrected_red_boundary_skeleton)
            post_white_boundary_mask = np.zeros_like(corrected_white_boundary_skeleton)
            post_red_score_mask = np.zeros_like(corrected_red_boundary_skeleton)
            post_white_score_mask = np.zeros_like(corrected_white_boundary_skeleton)

            # red_labels = label(corrected_red_boundary_skeleton)
            # white_labels = label(corrected_white_boundary_skeleton)

            # initialize counter for the number of sectors in this colony
            total_sectors = 0

            # Make copy of colony mask and place sectors on top
            #colony_mask_faded = copy.deepcopy(colony_mask).astype(np.uint8)
            #colony_mask_faded[colony_mask_faded > 0] = 20

            corrected_colony_mask_faded = copy.deepcopy(corrected_colony_mask).astype(np.uint8)
            corrected_colony_mask_faded[corrected_colony_mask_faded > 0] = 20

            sector_scores = []
            sector_ious = []

            # Create figure for annotating the corrected colony segmentations.  Annotations will be saved onto the image.
            fig2, ax2 = plt.subplots()
            ax2.imshow(corrected_colony_image_padded, cmap='gray')

            # Compute the scores of the regions one more time.
            # All regions should pass the consistency check by this point.  If not, then something is wrong.
            red_component_endpoints = []
            red_component_scores = []
            red_component_checks = []
            red_component_sizes = []

            white_component_endpoints = []
            white_component_scores = []
            white_component_checks = []
            white_component_sizes = []

            num_red_boundaries = len(np.unique(red_labels)[1:]) # number of red boundaries present
            num_white_boundaries = len(np.unique(white_labels)[1:]) # number of white boundaries present

            #if ((num_red_boundaries == 1) & (num_white_boundaries == 0)):
            if (num_white_boundaries == 0):
                cured_colony_after.append(True)
            else:
                cured_colony_after.append(False)

            #if ((num_red_boundaries == 0) & (num_white_boundaries == 1)):
            if (num_red_boundaries == 0):
                stable_colony_after.append(True)
            else:
                stable_colony_after.append(False)

            for this_label in np.unique(red_labels)[1:]:
                red_component = copy.deepcopy(red_labels)
                red_component = red_component == this_label
                red_component = red_component.astype(np.int32)

                post_red_boundary_mask = np.logical_or(post_red_boundary_mask, red_component > 0)

                # Function to get endpoints of connected component
                full_endpoints_list = get_boundary_component_endpoints(corrected_colony_image[:,:], red_component)

                # If exactly two points are found, then everything's good.

                # Function to get mask representing sector boundary
                sector_boundary, sector_interior, sector_filled = get_sector_masks(red_component, full_endpoints_list)

                # Fill initial region mask with the filled sector
                post_red_region_mask = np.logical_or(post_red_region_mask, sector_filled)

                # Apply consistency check to score region
                confirm_check, prop_interior = check_for_consistency_2(sector_filled, corrected_red_colony_mask)

                # Update score mask to denote where the consistency check failed
                if confirm_check == False:
                    print('Double check your code.  The red consistency check failed for this colony with score ' + str(prop_interior))
                    #raise IOError('Something is wrong with how regions are being scored')

                # Append scores and info to lists
                red_component_endpoints.append(full_endpoints_list)
                red_component_scores.append(prop_interior)
                red_component_checks.append(confirm_check)
                red_component_sizes.append(np.sum(post_red_region_mask))

                # Code for plotting the annotations

                # For the consistent sectors, get the angles of the endpoints relative to the center
                # colony mask, or any other array with the same size and shape, will work as input as it's only needed for size info
                [endpoint_angles, endpoint_locations, endpoints_x, endpoints_y] = get_endpoint_locations(full_endpoints_list, corrected_colony_mask, colony_locations["Radius"][this_index])
                #print(endpoints_x)

                # Add to mask containg sector locations
                #sector_filled = np.logical_or(sector_boundary, sector_interior)
                all_sector_bounds = np.logical_or(all_sector_bounds, sector_boundary)
                all_sector_filled = np.logical_or(all_sector_filled, sector_filled)
                all_sector_filled_labels[sector_filled.astype(bool)] = this_label
                total_sectors = total_sectors + 1
                corrected_colony_mask_faded[sector_filled.astype(bool)] = 255 / this_label

                # Get a score for sectoriness.  We want to be sure we are capturing the entire sector
                this_sector_mask = np.logical_and(sector_filled, red_colony_mask)
                this_union_mask = np.logical_or(sector_filled, red_colony_mask)
                this_sector_score = np.sum(this_sector_mask) / np.sum(sector_filled)
                this_sector_iou = np.sum(this_sector_mask) / np.sum(this_union_mask)
                sector_scores.append(this_sector_score)
                sector_ious.append(this_sector_iou)

                # Plot the lines of the sector (and the boundary line) onto the colony segmentation
                length_points = len(endpoints_x)
                #print(length_points)
                #print(endpoints_x)
                if len(np.unique(red_labels)[1:]) > 0:
                    plot_bounds_x = []
                    plot_bounds_y = []
                    plot_bounds_x.append(endpoints_x[0] + 5)
                    plot_bounds_y.append(endpoints_y[0] + 5)
                    # Get list of center and endpoints on the boundary
                    for this_bound in range(0, length_points-1):
                        plot_bounds_x.append(endpoints_x[this_bound+1] + 5)
                        plot_bounds_y.append(endpoints_y[this_bound+1] + 5)
                        #plt.plot(plot_points_y, plot_points_x, color='blue')
                        #print(endpoints_x[0:2])
                        #print(endpoints_y[0:2])
                    plot_bounds_x = np.roll(np.array(plot_bounds_x), 1)
                    plot_bounds_y = np.roll(np.array(plot_bounds_y), 1)
                    #print(plot_bounds_x)
                    #print(plot_bounds_y)
                    line_style = ':' if (len(plot_bounds_x) == 2) else '-'
                    ax2.plot(plot_bounds_y, plot_bounds_x, linewidth=5, linestyle=line_style, alpha=0.85)
                    if len(plot_bounds_x) == 1:
                        full_circle = Circle((plot_bounds_y, plot_bounds_x), radius=colony_locations["Radius"][this_index], color='blue', fill=False, linewidth=5, alpha=0.85)
                        ax2.add_patch(full_circle)



            for this_label in np.unique(white_labels)[1:]:
                white_component = copy.deepcopy(white_labels)
                white_component = white_component == this_label
                white_component = white_component.astype(np.int32)

                post_white_boundary_mask = np.logical_or(post_white_boundary_mask, white_component > 0)

                # Function to get endpoints of connected component
                full_endpoints_list = get_boundary_component_endpoints(corrected_colony_image[:,:], white_component)

                # If exactly two points are found, then everything's good.

                # Function to get mask representing sector boundary
                sector_boundary, sector_interior, sector_filled = get_sector_masks(white_component, full_endpoints_list)

                # Fill initial region mask with the filled sector
                post_white_region_mask = np.logical_or(post_white_region_mask, sector_filled)

                # Apply consistency check to score region
                confirm_check, prop_interior = check_for_consistency_2(sector_filled, corrected_white_colony_mask)

                # Update score mask to denote where the consistency check failed
                if confirm_check == False:
                    print('Double check your code.  The white consistency check failed for this colony with score ' + str(prop_interior))

                # Append scores and info to lists
                white_component_endpoints.append(full_endpoints_list)
                white_component_scores.append(prop_interior)
                white_component_checks.append(confirm_check)
                white_component_sizes.append(np.sum(post_white_region_mask))

            print('Scores for red regions: ' + str(red_component_scores))
            print('Scores for white regions: ' + str(white_component_scores))

            # Store the purity scores in a sublist, along with a second sublist indicating the color of each region

            all_component_scores = []
            all_region_colors = []
            all_region_sizes = []

            if not red_component_scores:
                all_region_colors = all_region_colors + ['red']
                all_component_scores = all_component_scores + [np.nan]
                all_region_sizes = all_region_sizes + [np.nan]
            else:
                all_region_colors = all_region_colors + (['red'] * len(red_component_scores))
                all_component_scores = all_component_scores + red_component_scores
                all_region_sizes = all_region_sizes + red_component_sizes

            if not white_component_scores:
                all_region_colors = all_region_colors + ['white']
                all_component_scores = all_component_scores + [np.nan]
                all_region_sizes = all_region_sizes + [np.nan]
            else:
                all_region_colors = all_region_colors + (['white'] * len(white_component_scores))
                all_component_scores = all_component_scores + white_component_scores
                all_region_sizes = all_region_sizes + white_component_sizes

            region_purity_after.append(all_component_scores)
            region_color_after.append(all_region_colors)
            region_sizes_after.append(all_region_sizes)

            # Compute weighted purity scores over all regions, for white only, and for red only

            total_red_sum = np.nansum(red_component_sizes)
            total_white_sum = np.nansum(white_component_sizes)

            if not red_component_scores:
                red_region_weights = np.array([0])
                weighted_red_scores = np.array([0])
            else:
                red_region_weights = np.divide(np.array(red_component_sizes), total_red_sum) # this vector should add to 1, as this is a normalization of the weights
                weighted_red_scores = np.multiply(np.array(red_component_scores), red_region_weights)

            if not white_component_scores:
                white_region_weights = np.array([0])
                weighted_white_scores = np.array([0])
            else:
                white_region_weights = np.divide(np.array(white_component_sizes), total_white_sum) # this vector should add to 1, as this is a normalization of the weights
                weighted_white_scores = np.multiply(np.array(white_component_scores), white_region_weights)

            # Get weighted average over both regions together
            all_region_sum = np.nansum(all_region_sizes)
            all_region_weights = np.divide(np.array(red_component_sizes + white_component_sizes), all_region_sum)
            all_region_weighted_scores = np.multiply(np.array(red_component_scores + white_component_scores), all_region_weights)

            weighted_purity_red_after.append(list(weighted_red_scores))
            weighted_purity_white_after.append(list(weighted_white_scores))
            weighted_purity_after.append(list(all_region_weighted_scores))
            weighted_red_sector_score_after.append(np.nansum(weighted_red_scores))
            weighted_white_sector_score_after.append(np.nansum(weighted_white_scores))
            weighted_sector_score_after.append(np.nansum(all_region_weighted_scores))
  
            # Now, create the masks containing the initial_regions
            post_region_mask = np.maximum(post_red_region_mask.astype(np.uint8), 2*post_white_region_mask.astype(np.uint8))*(255/(num_classes-1))
            post_boundary_mask = np.maximum(post_red_boundary_mask.astype(np.uint8), 2*post_white_boundary_mask.astype(np.uint8))*(255/(num_classes-1))
            post_score_mask = np.maximum(post_red_score_mask.astype(np.uint8), 2*post_white_score_mask.astype(np.uint8))*(255/(num_classes-1))

            post_region_mask = np.pad(post_region_mask, image_padding)
            post_boundary_mask = np.pad(post_boundary_mask, image_padding)
            post_score_mask = np.pad(post_score_mask, image_padding)

            #if not cv2.imwrite(output_crops_folder + '/Colony Corrected Regions/' + pathlib.Path(test_image).stem + '_Colony_' + str(this_index) + '.png', post_region_mask):
            #    raise Exception('Could not write image.')

            #post_region_mask = np.pad(post_region_mask, 5)
                
            if save_all_annotations == True:
                if not cv2.imwrite(output_crops_folder + '/cor_bounds/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', post_boundary_mask):
                    raise Exception('Could not write image.')
                if not cv2.imwrite(output_crops_folder + '/cor_regions/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', post_region_mask):
                    raise Exception('Could not write image.')
                if not cv2.imwrite(output_crops_folder + '/cor_bad/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', post_score_mask):
                    raise Exception('Could not write image.')
            plt.axis('off')
            if save_all_annotations == True:
                fig2.savefig(output_crops_folder + '/cor_partitions/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', bbox_inches='tight', pad_inches=0)
            plt.close(fig2);
                    
                
            #------------------------------------------------------------------

            # PRINTING RESULTS OF COLONY

            if not sector_scores:
                sector_scores = 0
                sector_ious = 0
            average_sector_score.append(np.mean(sector_scores))
            average_sector_iou.append(np.mean(sector_ious))
            #print('Colony ' + str(this_index))
            print('Estimated number of sectors: ' + str(total_sectors))
            all_sector_counts.append(total_sectors)
            print('Average sector score: ' + str(average_sector_score[-1]))
            print('Average sector score (IoU): ' + str(average_sector_iou[-1]))

            # plt.axis('off')
            # plt.savefig(output_crops_folder + '/Colony Corrected Sector Bounds/' + pathlib.Path(test_image).stem + '_Colony_' + str(this_index) + '.png', bbox_inches='tight', pad_inches=0)
            # plt.close();
            #raise NameError('Text to read') 

            corrected_white_region_sum.append(np.sum(np.logical_xor(corrected_colony_mask, all_sector_filled)))
            corrected_red_region_sum.append(np.sum(all_sector_filled))
            corrected_sector_region_sum.append(np.sum(all_sector_filled) / np.sum(corrected_colony_mask))

            true_sector_count = 0
            true_sector_counts.append(true_sector_count)
            true_sector_region_sum.append(0)
            corrected_colony_mask_faded[corrected_colony_mask == 0] = 0

            corrected_colony_mask_faded = np.pad(corrected_colony_mask_faded, image_padding)
            corrected_red_colony_mask_padded = np.pad(corrected_red_colony_mask, image_padding)
            corrected_sector_comp_mask = np.multiply(corrected_red_colony_mask_padded, corrected_colony_mask_faded)

            #colony_image_padded = np.pad(colony_image, image_padding)
            #cv2_imshow(colony_mask_faded)
            if save_all_annotations == True:
                if not cv2.imwrite(output_crops_folder + '/sectors/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', corrected_colony_mask_faded):
                    raise Exception('Could not write image.')
                if not cv2.imwrite(output_crops_folder + '/sector_comps/' + file_dict[plate_name] + '/' + pathlib.Path(test_image).stem + '_c_' + str(this_index) + '.png', corrected_sector_comp_mask):
                    raise Exception('Could not write image.')
            
            # # Plot image and lines with matplotlib
            # Recall that endpoints_x[0], endpoints_y[0] is the center of the colony, and
            # endpoints_x[#], endpoints_y[#] is a point on the boundary representing the interfacial point of a sector
            

            # Output labellings of colonies to directory

            # 1. Get the sectors labellings you defined.
            # 2. Get intersection of this labelling with the colony mask.
            # 3. Create copy of colony mask that is faded.  (sectors will be overlayed on this in the next step)
            # 4. Save cropping to Colony Sectors folder 

            # colony_cropping = p[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)]

            # height, width = colony_cropping.shape[:2]
            # # Do some plotting to see the results in action, and to find potential problems
            # cv2_imshow(255*cv2.resize(x[(top_left_y-1):(top_left_y + box_height - 1), (top_left_x-1):(top_left_x + box_width - 1)], (4*width, 4*height)))
            # cv2_imshow(cv2.resize(colony_cropping, (4*width, 4*height)))
            # fig, ax = plt.subplots(1,4)
            # ax[0].imshow(np.squeeze(colony_image))
            # ax[1].imshow(all_sector_bounds)
            # ax[2].imshow(all_sector_filled)
            # ax[3].imshow(all_sector_filled_labels * (255 / (total_sectors+1)))
            # plt.show()

            # these_thetas_sorted, sorted_points = sort_thetas(edge_mask_unpadded)
            # intensity_sum_red, full_seg_sum_red = get_intensity_map(red_colony_mask, sorted_points)
            # intensity_sum_white, full_seg_sum_white = get_intensity_map(white_colony_mask, sorted_points)
            # intensity_sum_full, full_seg_sum_full = get_intensity_map(colony_mask, sorted_points)

            # fig, ax = plt.subplots(1,3,subplot_kw={'projection': 'polar'}, figsize=(15, 4))

            # ax[0].plot(these_thetas_sorted, intensity_sum_red)
            # #ax.plot(these_thetas_sorted, full_seg_sum)
            # ax[0].set_rmax(max(colony_image.shape)/2.0)
            # ax[0].grid(True)
            # ax[0].set_title("Red Pixels by Angle", va='bottom')

            # ax[1].plot(these_thetas_sorted, intensity_sum_white)
            # #ax.plot(these_thetas_sorted, full_seg_sum)
            # ax[1].set_rmax(max(colony_image.shape)/2.0)
            # ax[1].grid(True)
            # ax[1].set_title("White Pixels by Angle", va='bottom')

            # ax[2].plot(these_thetas_sorted, intensity_sum_full)
            # #ax.plot(these_thetas_sorted, full_seg_sum)
            # ax[2].set_rmax(max(colony_image.shape)/2.0)
            # ax[2].grid(True)
            # ax[2].set_title("Colony Pixels by Angle", va='bottom')

            # plt.show()



        # This ends the loop on the isolated colonies

        # Ensure that the number of sectors are integers
        
        
        # Construct a dataframe with the nubmer of sectors and the proportion of red present

        all_sector_counts_array = np.array(all_sector_counts).astype(int)
        true_sector_counts = np.array(true_sector_counts).astype(int)
        #true_sector_counts = np.repeat(1,len(indiv_good))
        correct_sector_count = np.abs(true_sector_counts - all_sector_counts_array) == 0

        sides_vert_top_array = np.array(sides_vert_top)
        sides_vert_bottom_array = np.array(sides_vert_bottom)
        sides_horz_left_array = np.array(sides_horz_left)
        sides_horz_right_array = np.array(sides_horz_right)
        
        # Gather all data that that can be created as a numpy array
        d = {'Plate Name': plate_names,
            'Colony Number': colony_numbers.astype(int),
            'True # Sectors': true_sector_counts,
            'Initial # Regions': np.array(initial_region_counts).astype(int),
            'Pred # Sectors': all_sector_counts_array,
            'Correct # Sectors?': correct_sector_count,
            'White Area (Seg)': white_region_sum,
            'Red Area (Seg)': red_region_sum,
            'Colony Area (Seg)': (np.array(white_region_sum) + np.array(red_region_sum)),
            'White Area (Corr)': corrected_white_region_sum,
            'Red Area (Corr)': corrected_red_region_sum,
            'Colony Area (Corr)': (np.array(corrected_white_region_sum) + np.array(corrected_red_region_sum)),
            'Avg Sector Score': average_sector_score,
            'Avg Sector Score (IoU)': average_sector_iou,
            'Side Top': sides_vert_top_array,
            'Side Bottom': sides_vert_bottom_array,
            'Side Left': sides_horz_left_array,
            'Side Right': sides_horz_right_array,
            '1 Comp': np.array(colony_is_connected),
            '1 Comp (Approx)': np.array(colony_is_approx_connected),
            'Bound Comp': np.array(boundary_is_connected),
            'No Holes': np.array(colony_is_whole),
            'Approx Convex': np.array(colony_is_approx_convex),
            'Approx Circle': np.array(colony_is_approx_circular),
            'Hausdorff Convex': np.array(hausdorff_dist_convex),
            'Hausdorff Circle': np.array(hausdorff_dist_circle)}

        #Gather data based on what else we used as input
        if use_expert_counts == True:
            d['Quantifiable'] = np.array(quantifiable_colony)
            d['Quantifiable Cured'] = np.array(quantifiable_cured)
            d['Quantifiable Stable'] = np.array(quantifiable_stable)
            d['Quantifiable Sectored'] = np.array(quantifiable_sectored)

        df = pd.DataFrame(data=d)

        # Gather data that could NOT be stored as a numpy array, such as nested lists

        df['(BC) Regional Color Classes'] = list(region_color_before)
        df['(BC) Regional Sizes'] = list(region_sizes_before)
        df['(BC) Regional Purity Scores'] = list(region_purity_before)
        df['(BC) Red Purity Scores Weighted'] = list(weighted_purity_red_before)
        df['(BC) White Purity Scores Weighted'] = list(weighted_purity_white_before)
        df['(BC) Weighted Red Average Score'] = weighted_red_sector_score_before
        df['(BC) Weighted White Average Score'] = weighted_white_sector_score_before
        df['(BC) Weighted Full Average Score'] = weighted_sector_score_before
        df['(BC) Cured'] = cured_colony_before
        df['(BC) Stable'] = stable_colony_before

        df['(AC) Regional Color Classes'] = list(region_color_after)
        df['(AC) Regional Sizes'] = list(region_sizes_after)
        df['(AC) Regional Purity Scores'] = list(region_purity_after)
        df['(AC) Red Purity Scores Weighted'] = list(weighted_purity_red_after)
        df['(AC) White Purity Scores Weighted'] = list(weighted_purity_white_after)
        df['(AC) Weighted Red Average Score'] = weighted_red_sector_score_after
        df['(AC) Weighted White Average Score'] = weighted_white_sector_score_after
        df['(AC) Weighted Full Average Score'] = weighted_sector_score_after
        df['(AC) Cured'] = cured_colony_after
        df['(AC) Stable'] = stable_colony_after

        df.to_pickle(test_output_table_folder + '/' + str(plate_stem) + '.pkl')

        # if starting_image == False:
        #     #print('This ran.')
        #     all_df = pd.concat([all_df, df], axis=0, ignore_index=True)
        #     #print('The dataframe was appended.')
        # else:
        #     starting_image = False
        #     all_df = copy.deepcopy(df)

    #all_df
    # all_df.to_pickle(test_output_folder + '/' + str(weights_file) + '_colony_data.pkl')
    # unpickled_all_df = pd.read_pickle(test_output_folder + '/' + str(weights_file) + '_colony_data.pkl')
    # unpickled_all_df.to_csv(test_output_folder + '/' + str(weights_file) + '_colony_data.csv')

    # unpickled_all_df

## Merge output tables into one

In [None]:
# Read in all the files of data
sorted_tables = sorted(glob.glob(test_output_table_folder + '/' + '*'))
print(sorted_tables)

first_table = True

for this_table in sorted_tables:
    this_table_data = pd.read_pickle(this_table)
    if first_table == True:
        first_table = False
        all_table_data = copy.deepcopy(this_table_data)
    else:
        all_table_data = pd.concat([all_table_data, this_table_data], axis=0, ignore_index=True)

all_table_data.to_pickle(test_output_folder + '/' + str(weights_file) + '_colony_data.pkl')

## Load the merged table on training data and display it

In [None]:
#colony_data = pd.read_csv(output_data_folder + '/' + str(weights_file) + '_colony_data_CHT_' + str(num_classes) + '_puritycheck_byregion.csv')
#colony_data = pd.read_csv(test_output_folder + '/' + str(weights_file) + '_colony_data_CHT_puritycheck_byregion.csv')
#colony_data = pd.read_csv(test_output_folder + '/' + str(weights_file) + '_colony_data_2.csv')
colony_data = pd.read_pickle(test_output_folder + '/' + str(weights_file) + '_colony_data.pkl')

#print(colony_data)
all_plate_names = colony_data['Plate Name'].unique()
all_sector_values = list(range(0, int(np.max(colony_data['Pred # Sectors']))+1))

# Include true sector counts if available
if use_true_sector_counts == True:

    # Read in table with true sector counts
    true_sector_counts = pd.read_csv(test_output_folder + '/true_colony_data.csv') # load file containing true sector counts
    colony_data['True # Sectors'] = true_sector_counts['True # Sectors'] # insert the true sector counts in the data
    matching_sector_counts = colony_data['True # Sectors'] == colony_data['Pred # Sectors'] # compare the true and predicted sector counts
    colony_data['Correct # Sectors?'] = matching_sector_counts # mark where the counts match and insert this into the data

if use_quantifiable_counts_from_table == True:
    true_quant_colonies = pd.read_csv(test_output_folder + '/true_quantifiable_colonies.csv') # load file containing whether colony is cured
    colony_data['Quantifiable'] = true_quant_colonies['Quantifiable'] # insert this data into the original table
    
if use_true_cured_colonies_from_table == True:
    # Read in table with true cured colonies
    true_cured_colonies = pd.read_csv(test_output_folder + '/true_cured_colonies.csv') # load file containing whether colony is cured
    colony_data['Is Cured?'] = true_cured_colonies['Is Cured?'] # insert this data into the original table

if (use_true_sector_counts == True) & (use_quantifiable_counts_from_table == True) & (use_true_cured_colonies_from_table == True):
    colony_data['Quantifiable Cured'] = (colony_data['Quantifiable'] == True) & (colony_data['Is Cured?'] == True)
    colony_data['Quantifiable Stable'] = (colony_data['Quantifiable'] == True) & (colony_data['True # Sectors'] == 0)
    colony_data['Quantifiable Sectored'] = (colony_data['Quantifiable'] == True) & (colony_data['True # Sectors'] > 0) & (colony_data['Is Cured?'] == False)

# Since this is training data, we know the number of sectors
colony_data['True # Sectors'] = np.repeat(1, len(colony_data['Pred # Sectors']))
colony_data['Is Cured?'] = np.repeat(False, len(colony_data['Pred # Sectors']))
colony_data['Is Stable?'] = np.repeat(False, len(colony_data['Pred # Sectors']))
    
colony_data.to_pickle(test_output_folder + '/' + str(weights_file) + '_colony_data.pkl')
#colony_data.to_pickle(output_data_folder + '/' + str(weights_file) + '_colony_data_CHT_' + str(num_classes) + '_puritycheck_byregion.pkl')
#colony_data.to_csv(output_data_folder + '/' + str(weights_file) + '_colony_data_CHT_' + str(num_classes) + '_puritycheck_byregion.csv')

colony_data.to_csv(test_output_folder + '/' + str(weights_file) + '_colony_data.csv')
colony_data


## Quick Calculations

### Colonies detected and average purity scores

In [None]:
# Average purity score for a given plate
this_plate_data = colony_data[colony_data['Plate Name'] == 'Plate_2.jpg'] # Just example code
#print(this_plate_data.iloc[40])
for this_plate_name in all_plate_names:
    this_plate_data = colony_data[colony_data['Plate Name'] == this_plate_name]
    print('')
    print('Plate: ' + str(this_plate_name))
    print('Number of colonies detected: ' + str(len(this_plate_data['Plate Name'])))
    print('Number of Cured Colonies: ' + str(np.sum((this_plate_data['Red Area (Seg)'] / this_plate_data['Colony Area (Seg)']) >= 0.95)))
    print('Average purity scores across all colonies detected in the plate: ' + str(np.mean(this_plate_data['(AC) Weighted Full Average Score'])))

### Quantifiable colonies detected

In [None]:
# Quantifiable colony counts
if use_expert_counts == True:
    for this_plate_name in all_plate_names:
        this_plate_data = colony_data[colony_data['Plate Name'] == this_plate_name]
        good_plate_data = this_plate_data[this_plate_data['Quantifiable'] == True]
        print('')
        print('Plate: ' + str(this_plate_name))
        print('Colonies detected: ' + str(len(this_plate_data['Quantifiable'])))
        print('Quantifiable colonies detected: ' + str(len(good_plate_data['Quantifiable'])))
else:
    print('Quantifiable colonies not considered in this set')

## Get colony labels ([PSI+], [psi-], and Sectored)

In [None]:
colony_states_before = np.array(['UNFILLED' for i in range(0, len(colony_data))])
colony_states_after = np.array(['UNFILLED' for i in range(0, len(colony_data))])
colony_states_true = np.array(['UNFILLED' for i in range(0, len(colony_data))])
#colony_states_set = set(colony_states)
#print(colony_states_set)

max_sector_count_before = max(colony_data['Initial # Regions'])
max_sector_count_after = max(colony_data['Pred # Sectors'])
max_sector_count_true = max(colony_data['True # Sectors'])

max_sector_count_all = max([max_sector_count_before, max_sector_count_after, max_sector_count_true])

# [PSI+]: Get all colonies with no red regions

colony_states_before[(colony_data['(BC) Stable'] == True)] = '[PSI+]'
colony_states_after[(colony_data['(AC) Stable'] == True)] = '[PSI+]'
colony_states_true[(colony_data['Is Stable?'] == True)] = '[PSI+]'

# [psi-]: Get all quantifiable colonies with no white regions

colony_states_before[(colony_data['(BC) Cured'] == True)] = '[psi-]'
colony_states_after[(colony_data['(AC) Cured'] == True)] = '[psi-]'
colony_states_true[(colony_data['Is Cured?'] == True)] = '[psi-]'

# Sx: Get all quantifiable colonies with at least 1 white region and exactly x red regions 

for num_regions in range(1, max_sector_count_all+1):
    colony_states_before[(colony_data['(BC) Cured'] == False) & (colony_data['(BC) Stable'] == False) & (colony_data['Initial # Regions'] == num_regions)] = str('S' + str(num_regions))
    colony_states_after[(colony_data['(AC) Cured'] == False) & (colony_data['(AC) Stable'] == False) & (colony_data['Pred # Sectors'] == num_regions)] = str('S' + str(num_regions))
    colony_states_true[(colony_data['Is Cured?'] == False) & (colony_data['Is Stable?'] == False) & (colony_data['True # Sectors'] == num_regions).astype(bool)] = str('S' + str(num_regions))

#print(np.unique(colony_states_before))
#print(np.unique(colony_states_after))
#print(np.unique(colony_states_true))

unmarked_locations = np.where(colony_states_true == 'UNFILLED')

# Make corrections to the table for unfilled locations


# Display any colony locations what are marked as UNFILLED

colony_row = colony_data.iloc[unmarked_locations]
#print(colony_row)
#print(colony_row.index)

#print(colony_states_before)

# If every location has been filled, then add these to the merged table
colony_data['Label Before'] = colony_states_before
colony_data['Label After'] = colony_states_after
colony_data['Label True'] = colony_states_true

#print(quantifiable_colony_data['Label Before'])
# counter = 0

# for ind in colony_row.index:
#     colony_number = colony_row['Colony Number'].iloc[counter]
#     plate_name = colony_row['Plate Name'].iloc[counter]
#     set_number = colony_row['Set'].iloc[counter]

#     #if counter == 0:
#     #    merged_table['Quantifiable Stable'] = 

#     # Get image
#     if set_number == 2:
#         image_to_display = read_image(sector_project_folder + '/Real Images/Wes Plates/Set 2 Prepro/' + plate_name)*255
#     image_to_display = cv2.rectangle(image_to_display, (colony_row['Side Left'].iloc[counter], colony_row['Side Top'].iloc[counter]), (colony_row['Side Right'].iloc[counter], colony_row['Side Bottom'].iloc[counter]), (255, 0, 0), 2)
#     #cv2_imshow(image_to_display)

## Show Locations of Extracted Colonies in Each Image

In [None]:
def get_color_codes(p):
    if p == 0: # bad_seg
        color_code = (0,255,0)
    elif p == 1: # pink
        color_code = (255,0,255)
    elif p == 2: # red
        color_code = (0,0,255)
    elif p == 3: # variegating
        color_code = (255,0,0)
    elif p == 4: # white
        color_code = (255,255,255)
    return color_code

all_plate_names = colony_data['Plate Name'].unique()
all_sector_values = list(range(0, np.max(colony_data['Pred # Sectors'])+1))

# iterate through each plate
for this_plate_name in all_plate_names:

    this_plate_stem = os.path.splitext(this_plate_name)[0]
    print('Looking at ' + str(this_plate_name) + ':')
    this_plate_data = colony_data[colony_data['Plate Name'] == this_plate_name]
    number_colonies = len(this_plate_data) # number of colonies found
    print(number_colonies)
    indicator_array = [] #initialize

    for this_colony in range(0, number_colonies):
        # Check if the colony was predicted to be red (cured)
        if (this_plate_data['(AC) Cured'].iloc[this_colony]) & (this_plate_data['Pred # Sectors'].iloc[this_colony] == 1):
            indicator_array.append(2)
        # Check if the colony was predicted to be white (stable)
        elif (this_plate_data['(AC) Stable'].iloc[this_colony]) & (this_plate_data['Pred # Sectors'].iloc[this_colony] == 0):
            indicator_array.append(4)
        # Check if the colony was predicted to be variegating (sectored, neither cured nor stable)
        elif (not ((this_plate_data['(AC) Stable'].iloc[this_colony]) | (this_plate_data['(AC) Cured'].iloc[this_colony]))) & (this_plate_data['Pred # Sectors'].iloc[this_colony] > 0):
            indicator_array.append(3)
        # Anything else is a bad segmentation.  Pink colonies are not considered, so 1 is not assigned
        else:
            indicator_array.append(0)
    
    indicator_array = np.array(indicator_array)
    # Display total number of colonies for each label
    print('Bad segementations: ' + str(np.sum(indicator_array == 0)))
    print('Fully red colonies: ' + str(np.sum(indicator_array == 2)))
    print('Fully white colonies: ' + str(np.sum(indicator_array == 4)))
    print('Sectored colonies: ' + str(np.sum(indicator_array == 3)))


    # Plot the boxes onto the image
    this_image = read_image(real_image_folder + '/' + this_plate_name)
    this_img_copy = copy.deepcopy(this_image)*255
    print(len(indicator_array))
    for this_row in range(0,number_colonies):
        this_pred = indicator_array[this_row]
        cv2.rectangle(this_img_copy, (this_plate_data['Side Left'].iloc[this_row], this_plate_data['Side Top'].iloc[this_row]), (this_plate_data['Side Right'].iloc[this_row], this_plate_data['Side Bottom'].iloc[this_row]), get_color_codes(this_pred), 2)
        #cv2.rectangle(this_img_copy, (this_plate_data['Side Top'].iloc[this_row], this_plate_data['Side Left'].iloc[this_row]), (this_plate_data['Side Bottom'].iloc[this_row], this_plate_data['Side Right'].iloc[this_row]), get_color_codes(this_pred), 2)
    cv2_imshow(this_img_copy)

    cv2.imwrite(test_boxes_folder + '/' + this_plate_stem + '.jpg', this_img_copy)

## Plots of Testing Data

### Predictions Only

#### Colony States (no sector counts)

In [None]:

#print(sampled_colony_data.keys())
#print(sampled_colony_data['Is it a colony? '])


max_initial = np.max(colony_data['Initial # Regions'])
max_pred = np.max(colony_data['Pred # Sectors'])
#max_true = np.max(colony_data['True # Sectors'])
max_all = np.max([max_initial, max_pred])
#diff_count_before = np.abs(colony_data['Initial # Regions'] - colony_data['True # Sectors'])
#diff_count_after = np.abs(colony_data['Pred # Sectors'] - colony_data['True # Sectors'])

label_names = ['[PSI+]', '[psi-]', 'Sectored']

initial_correct_counts = []
post_correct_counts = []
#true_correct_counts = []
all_counts = []



# print(np.unique(colony_data['Label True']))

# Gather [PSI+] counts
#true_white_labels = colony_data[(colony_data['Label True'] == '[PSI+]')]
correct_white_labels_before = colony_data[(colony_data['Label Before'] == '[PSI+]')]
correct_white_labels_after = colony_data[(colony_data['Label After'] == '[PSI+]')]

initial_correct_counts.append(len(correct_white_labels_before))
post_correct_counts.append(len(correct_white_labels_after))
#true_correct_counts.append(len(true_white_labels))

# Gather [psi-] counts
#true_red_labels = colony_data[(colony_data['Label True'] == '[psi-]')]
correct_red_labels_before = colony_data[(colony_data['Label Before'] == '[psi-]')]
correct_red_labels_after = colony_data[(colony_data['Label After'] == '[psi-]')]

initial_correct_counts.append(len(correct_red_labels_before))
post_correct_counts.append(len(correct_red_labels_after))
#true_correct_counts.append(len(true_red_labels))


# Gather sectored counts
correct_sector_labels_before = colony_data[(colony_data['Label Before'].str.startswith('S'))]
correct_sector_labels_after = colony_data[(colony_data['Label After'].str.startswith('S'))]

initial_correct_counts.append(len(correct_sector_labels_before))
post_correct_counts.append(len(correct_sector_labels_after))
#max_sector_counts = max([np.nanmax(colony_data['Initial # Regions'].astype(int)), np.nanmax(colony_data['Pred # Sectors'].astype(int)), np.nanmax(colony_data['True # Sectors'].astype(int))])
# print(max_sector_counts)

# for this_num_sectors in range(1, max_sector_counts+1):
#     #true_sector_labels = colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors))]
#     correct_sector_labels_before = colony_data[(colony_data['Label Before'] == 'S'+str(this_num_sectors))]
#     correct_sector_labels_after = colony_data[(colony_data['Label After'] == 'S'+str(this_num_sectors))]

#     initial_correct_counts.append(len(correct_sector_labels_before))
#     post_correct_counts.append(len(correct_sector_labels_after))
#     #true_correct_counts.append(len(true_sector_labels))

# print(colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors)) & (colony_data['Label After'] == 'S'+str(this_num_sectors))])
#sector_labels = ['S'+str(i) for i in (range(1, max_sector_counts+1))]
# print(sector_labels)
#label_names = label_names + sector_labels
# print(label_names)
x = np.arange(len(label_names))
# print(len(x))
# print(len(initial_correct_counts))



width = 0.25  # the width of the bars

fig, ax = plt.subplots(figsize=(12,5), sharey=True)
#x - width/2
ax.set_ylim(bottom=0, top=max(initial_correct_counts + post_correct_counts)+50)
rects1 = ax.bar(x - width/2, initial_correct_counts, width, label='Original Predictions', color='blue')
rects2 = ax.bar(x+width/2, post_correct_counts, width, label='With Purity Correction', color='red')
#rects2 = ax.bar(x + width/2, all_counts, width, label='All Colonies', color='red')
#rects3 = ax.bar(x + width, true_correct_counts, width, label='Manual Counts', color='green')

#print(true_single_frequency)
#print(pred_single_frequency)

ax.set_xlabel('Colony States')
ax.set_ylabel('Frequency')
ax.set_title('Classified Colonies', fontsize=16)
ax.xaxis.label.set_fontsize(14)
ax.yaxis.label.set_fontsize(14)
ax.set_xticks(np.arange(0, 3, step=1))
ax.set_xticklabels(label_names)
ax.tick_params(axis='both', labelsize=12)
ax.legend(loc='best')

xtickslocs = ax.get_xticks()
print(xtickslocs)

addlabels_centered(xtickslocs-width/2, initial_correct_counts, 9)
addlabels_centered(xtickslocs+width/2, post_correct_counts, 9)
#addlabels_pred(x, all_counts, 10)
#addlabels_truemarks(x, true_correct_counts, 9)

ax.axvline(x = 0.5, color = 'k', linestyle = '--')
ax.axvline(x = 1.5, color = 'k', linestyle = '--')

fig.tight_layout()
plt.show()



#### Colony States (with sector counts)

In [None]:

#print(sampled_colony_data.keys())
#print(sampled_colony_data['Is it a colony? '])


max_initial = np.max(colony_data['Initial # Regions'])
max_pred = np.max(colony_data['Pred # Sectors'])
#max_true = np.max(colony_data['True # Sectors'])
max_all = np.max([max_initial, max_pred])
#diff_count_before = np.abs(colony_data['Initial # Regions'] - colony_data['True # Sectors'])
#diff_count_after = np.abs(colony_data['Pred # Sectors'] - colony_data['True # Sectors'])

initial_correct_counts = []
post_correct_counts = []
#true_correct_counts = []
all_counts = []

label_names = ['[PSI+]', '[psi-]']

# print(np.unique(colony_data['Label True']))

# Gather [PSI+] counts
#true_white_labels = colony_data[(colony_data['Label True'] == '[PSI+]')]
correct_white_labels_before = colony_data[(colony_data['Label Before'] == '[PSI+]')]
correct_white_labels_after = colony_data[(colony_data['Label After'] == '[PSI+]')]

initial_correct_counts.append(len(correct_white_labels_before))
post_correct_counts.append(len(correct_white_labels_after))
#true_correct_counts.append(len(true_white_labels))

# Gather [psi-] counts
#true_red_labels = colony_data[(colony_data['Label True'] == '[psi-]')]
correct_red_labels_before = colony_data[(colony_data['Label Before'] == '[psi-]')]
correct_red_labels_after = colony_data[(colony_data['Label After'] == '[psi-]')]

initial_correct_counts.append(len(correct_red_labels_before))
post_correct_counts.append(len(correct_red_labels_after))
#true_correct_counts.append(len(true_red_labels))


# Gather sectored counts
max_sector_counts = max([np.nanmax(colony_data['Initial # Regions'].astype(int)), np.nanmax(colony_data['Pred # Sectors'].astype(int)), np.nanmax(colony_data['True # Sectors'].astype(int))])
# print(max_sector_counts)

for this_num_sectors in range(1, max_sector_counts+1):
    #true_sector_labels = colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors))]
    correct_sector_labels_before = colony_data[(colony_data['Label Before'] == 'S'+str(this_num_sectors))]
    correct_sector_labels_after = colony_data[(colony_data['Label After'] == 'S'+str(this_num_sectors))]

    initial_correct_counts.append(len(correct_sector_labels_before))
    post_correct_counts.append(len(correct_sector_labels_after))
    #true_correct_counts.append(len(true_sector_labels))

# print(colony_data[(colony_data['Label True'] == 'S'+str(this_num_sectors)) & (colony_data['Label After'] == 'S'+str(this_num_sectors))])
sector_labels = ['S'+str(i) for i in (range(1, max_sector_counts+1))]
# print(sector_labels)
label_names = label_names + sector_labels
# print(label_names)
x = np.arange(len(label_names))
# print(len(x))
# print(len(initial_correct_counts))



width = 0.25  # the width of the bars

fig, ax = plt.subplots(figsize=(12,5), sharey=True)
#x - width/2
ax.set_ylim(bottom=0, top=max(initial_correct_counts + post_correct_counts)+50)
rects1 = ax.bar(x - width/2, initial_correct_counts, width, label='Original Predictions', color='blue')
rects2 = ax.bar(x+width/2, post_correct_counts, width, label='With Purity Correction', color='red')
#rects2 = ax.bar(x + width/2, all_counts, width, label='All Colonies', color='red')
#rects3 = ax.bar(x + width, true_correct_counts, width, label='Manual Counts', color='green')

#print(true_single_frequency)
#print(pred_single_frequency)

ax.set_xlabel('Colony States')
ax.set_ylabel('Frequency')
ax.set_title('Classified Colonies', fontsize=16)
ax.xaxis.label.set_fontsize(14)
ax.yaxis.label.set_fontsize(14)
ax.set_xticks(np.arange(0, max_sector_counts+2, step=1))
ax.set_xticklabels(label_names)
ax.tick_params(axis='both', labelsize=12)
ax.legend(loc='best')

xtickslocs = ax.get_xticks()
print(xtickslocs)

addlabels_centered(xtickslocs-width/2, initial_correct_counts, 9)
addlabels_centered(xtickslocs+width/2, post_correct_counts, 9)
#addlabels_pred(x, all_counts, 10)
#addlabels_truemarks(x, true_correct_counts, 9)

ax.axvline(x = 0.5, color = 'k', linestyle = '--')
ax.axvline(x = 1.5, color = 'k', linestyle = '--')

fig.tight_layout()
plt.show()



# Write colony output to file
Only works if colony annotations exist

In [None]:
# Code to print a pdf of the croppings
# 1. Sort data in table based on plate name, colony number and other elements.
# 2. Iterate through each row of the sorted table to get colony imformation
# 3. Add inforation to pdf document.
# 4. Repear for each colony and plate.
# 5. Output pdf.

# Do the same as above, but group based on the number of sectors detected



for this_plate in all_plate_names:

    # this_plate is the key to the correspnding subfolder in each annotation class
    # Initialize PDF writer
    pdf = FPDF()
    pdf = FPDF(unit = "pt", format = [850,1100])
    pdf.set_font('Arial', 'B', 16)
    target_height = float(40)
    left_margin = 10
    right_margin = 840
    bottom_margin = 1050
    number_spacing = 7
    uniform_spacing = 640


    row_number = 1 # initial row index
    col_position = 10 # initial column position
    col_margin = 40
    this_plate_stem = os.path.splitext(this_plate)[0]

    # all_cropped_images = sorted(glob.glob(output_crops_folder + '/Colony Cuts/' + this_plate_stem + '_' + '*.jpg'))
    # all_cropped_circles = sorted(glob.glob(output_crops_folder + '/Colony Circles/' + this_plate_stem + '_' + '*.jpg'))
    # all_cropped_masks = sorted(glob.glob(output_crops_folder + '/Colony Segs/' + this_plate_stem + '_' + '*.png'))

    # all_cropped_initial_regions = sorted(glob.glob(output_crops_folder + '/Colony Initial Regions/' + this_plate_stem + '_' + '*.png'))
    # all_cropped_initial_boundaries = sorted(glob.glob(output_crops_folder + '/Colony Initial Boundary/' + this_plate_stem + '_' + '*.png'))
    # all_cropped_initial_bad_regions = sorted(glob.glob(output_crops_folder + '/Colony Initial Bad Regions/' + this_plate_stem + '_' + '*.png'))
    # all_cropped_initial_sector_bounds = sorted(glob.glob(output_crops_folder + '/Colony Initial Sector Bounds/' + this_plate_stem + '_' + '*.png'))

    # all_cropped_corrected_masks = sorted(glob.glob(output_crops_folder + '/Colony Corrected Segs/' + this_plate_stem + '_' + '*.png'))
    # all_cropped_corrected_regions = sorted(glob.glob(output_crops_folder + '/Colony Corrected Regions/' + this_plate_stem + '_' + '*.png'))
    # all_cropped_corrected_boundaries = sorted(glob.glob(output_crops_folder + '/Colony Corrected Boundary/' + this_plate_stem + '_' + '*.png'))
    # all_cropped_corrected_bad_regions = sorted(glob.glob(output_crops_folder + '/Colony Corrected Bad Regions/' + this_plate_stem + '_' + '*.png'))
    # all_cropped_corrected_sector_bounds = sorted(glob.glob(output_crops_folder + '/Colony Corrected Sector Bounds/' + this_plate_stem + '_' + '*.png'))

    # all_cropped_sectors = sorted(glob.glob(output_crops_folder + '/Colony Sectors/' + this_plate_stem + '_' + '*.png'))
    # all_cropped_sector_comps = sorted(glob.glob(output_crops_folder + '/Colony Sector Comps/' + this_plate_stem + '_' + '*.png'))

    #print(all_cropped_images)
    #print(all_cropped_circles)
    #print(all_cropped_masks)
    #print(all_cropped_sectors)
    this_plate_data = colony_data[colony_data['Plate Name'] == this_plate]
    #this_plate_data.set_index('Colony Number', inplace=True)
    max_num_sectors_in_plate = int(max(this_plate_data['Pred # Sectors']))
    pdf.add_page()

    pdf.text(col_position, target_height*row_number, 'Colonies detected in Plate ' + str(this_plate) + '.  ' + str(len(this_plate_data)) + ' colonies were detected.')
    pdf.text(col_position, target_height*row_number + 20, 'Group 1: Raw image data')
    pdf.text(col_position, target_height*row_number + 40, 'Group 2: Raw segmentation of whole colony and boundary')
    pdf.text(col_position, target_height*row_number + 60, 'Group 3: Regional breakdown and analysis before boundary corrections were made')
    pdf.text(col_position, target_height*row_number + 80, 'Group 4: Segmentation with corrected boundary')
    pdf.text(col_position, target_height*row_number + 100, 'Group 5: Regional breakdown and analysis after boundary corrections were made')
    pdf.text(col_position, target_height*row_number + 120, 'Group 6: Breakdown of red sectored regions and pixels after correction')

    row_number = row_number + 2

    # Start by collecting all of the [PSI+] predictions
    for this_num_sectors in range(0,1):
        row_number = row_number + 1
        col_position = 10
        if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
            pdf.add_page()
            row_number = 1 # initial row index
        #print(this_plate_data)
        this_plate_sector_data = this_plate_data[this_plate_data['Label After'] == '[PSI+]']
        #print(this_plate_sector_data)
        this_plate_sector_data.reset_index()
        #print(this_plate_sector_data)
        pdf.text(col_position, target_height*row_number + (target_height/2), 'Colonies labeled [PSI+]: ' + str(len(this_plate_sector_data)))
        row_number = row_number + 1

        # Sort this subtable by sector scores in descending order
        sorted_plate_sector_data = this_plate_sector_data.sort_values(by=['(AC) Weighted Full Average Score', 'Colony Number'], ascending=[False, True])
        sorted_plate_sector_data.reset_index()

        for index, row in sorted_plate_sector_data.iterrows():

            #print('Colony ' + str(int(row['Colony Number'])))
            
            cured_status = 'Cured' if ((row['Red Area (Seg)'] / row['Colony Area (Seg)']) >= 0.95) else ''

            cover_image_name = output_crops_folder + '/raw/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
            cover_circle_name = output_crops_folder + '/circles/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
            cover_mask_name = output_crops_folder + '/segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_initial_region_name = output_crops_folder + '/init_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_boundary_name = output_crops_folder + '/init_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_bad_region_name = output_crops_folder + '/init_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_sector_bounds_name = output_crops_folder + '/init_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_corrected_mask_name = output_crops_folder + '/cor_segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_region_name = output_crops_folder + '/cor_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_boundary_name = output_crops_folder + '/cor_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_bad_region_name = output_crops_folder + '/cor_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_sector_bounds_name = output_crops_folder + '/cor_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_sector_name = output_crops_folder + '/sectors/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_sector_comp_name = output_crops_folder + '/sector_comps/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            if use_expert_counts == True:
                cover_counts_name = output_crops_folder + '/counted/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            # cover_region_name = output_crops_folder + '/Colony Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_bad_region_name = output_crops_folder + '/Colony Bad Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_name = output_crops_folder + '/Colony Sectors/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_comp_name = output_crops_folder + '/Colony Sector Comps/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_bounds_name = output_crops_folder + '/Colony Sector Bounds/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'

            cover_image = PIL.Image.open(cover_image_name, mode='r')
            cover_circle = PIL.Image.open(cover_circle_name, mode='r')
            cover_mask = PIL.Image.open(cover_mask_name, mode='r')

            cover_initial_region = PIL.Image.open(cover_initial_region_name, mode='r')
            cover_initial_boundary = PIL.Image.open(cover_initial_boundary_name, mode='r')
            cover_initial_bad_region = PIL.Image.open(cover_initial_bad_region_name, mode='r')
            cover_initial_sector_bounds = PIL.Image.open(cover_initial_sector_bounds_name, mode='r')

            cover_corrected_region = PIL.Image.open(cover_corrected_region_name, mode='r')
            cover_corrected_boundary = PIL.Image.open(cover_corrected_boundary_name, mode='r')
            cover_corrected_bad_region = PIL.Image.open(cover_corrected_bad_region_name, mode='r')
            cover_corrected_sector_bounds = PIL.Image.open(cover_corrected_sector_bounds_name, mode='r')

            cover_sector = PIL.Image.open(cover_sector_name, mode='r')
            cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')

            if use_expert_counts == True:
                cover_counts = PIL.Image.open(cover_counts_name, mode='r')

            # cover_region = PIL.Image.open(cover_region_name, mode='r')
            # cover_bad_region = PIL.Image.open(cover_bad_region_name, mode='r')
            # cover_sector = PIL.Image.open(cover_sector_name, mode='r')
            # cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')
            # cover_sector_bounds = PIL.Image.open(cover_sector_bounds_name, mode='r')
            #print('This Image: ')
            #print(pathlib.Path(all_cropped_images[index]).stem)
            #print(pathlib.Path(all_cropped_masks[index]).stem)
            #print(pathlib.Path(all_cropped_sectors[index]).stem)
            w_im,h_im = cover_image.size
            w_ma,h_ma = cover_mask.size
            w_annot,h_annot = cover_initial_sector_bounds.size
            scaling_factor_im = target_height / float(h_im)
            scaling_factor_ma = target_height / float(h_ma)
            scaling_factor_annot = target_height / float(h_annot)
            scaled_width_im = scaling_factor_im*w_im
            scaled_width_ma = scaling_factor_ma*w_ma
            scaled_width_annot = scaling_factor_annot*w_annot
            scaled_height_im = scaling_factor_im*h_im
            scaled_height_ma = scaling_factor_ma*h_ma
            scaled_height_annot = scaling_factor_annot*h_annot
            #image = all_cropped_images[this_colony]
            #mask = all_cropped_masks[this_colony]

            # Check that the image, mask, and number will fit inside the margins on the give row, and if not, move them to the next row
            if col_position + (3*scaled_width_im) + (10*scaled_width_ma) + (2*scaled_width_annot) + number_spacing > right_margin:
                row_number = row_number + 1
                col_position = 10
                current_col_position = copy.deepcopy(col_position)
                if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
                    pdf.add_page()
                    row_number = 1 # initial row index
            else:
                current_col_position = copy.deepcopy(col_position)
            #print(all_cropped_images[index])
            # Add images and masks to the defined position

            # Cropping of colony
            pdf.image(cover_image_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
            current_col_position = current_col_position + scaled_width_im

            # Cropping of colony from Wes's annotations
            if use_expert_counts == True:
                pdf.image(cover_counts_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
                current_col_position = current_col_position + scaled_width_im

            # Cropping of colony with circle
            pdf.image(cover_circle_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony with overlayed circle
            current_col_position = current_col_position + scaled_width_im

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # Raw semgentation
            pdf.image(cover_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
            current_col_position = current_col_position + scaled_width_ma

            # The boundary of the raw segmentation
            pdf.image(cover_initial_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The regional breakdown of the raw segmentation
            pdf.image(cover_initial_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # Annotations of where the red regions are found in the regional breakdown
            pdf.image(cover_initial_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            current_col_position = current_col_position + scaled_width_annot

            # The inconsistent regions located
            pdf.image(cover_initial_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The segmentation such that boundary pixels inconsistent with their assigned region were changed
            pdf.image(cover_corrected_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # The boundary of the corrected segmentation
            pdf.image(cover_corrected_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The regional breakdown of the corrected segmentation
            pdf.image(cover_corrected_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # Annotations on the corrected segmentations indicating where the red regions are (these should be the sectors)
            pdf.image(cover_corrected_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            current_col_position = current_col_position + scaled_width_annot

            # Any inconsistent regions detected in the corrected segmentation (in theory, these should always be completely black)
            pdf.image(cover_corrected_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # Partitioning of the red regions found in the corrected segmentation (each shaed of gray is a different sector)
            pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
            current_col_position = current_col_position + scaled_width_ma
            
            # A subset of the previous image with only the red pixels preserved.
            pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
            current_col_position = current_col_position + scaled_width_ma

            # pdf.image(cover_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            # current_col_position = current_col_position + scaled_width_ma

            # pdf.image(cover_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            # current_col_position = current_col_position + scaled_width_ma

            # current_col_position = current_col_position + 10 # A margin to separate subsets of visualizations

            # pdf.image(cover_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            # current_col_position = current_col_position + scaled_width_annot
            
            # pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
            # current_col_position = current_col_position + scaled_width_ma
            
            # pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
            # current_col_position = current_col_position + scaled_width_ma
            #col_position = col_position + scaled_width_ma
            #pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('# Sec: ' + str(int(row['Pred # Sectors']))))
            pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('Col #: ' + str(row['Colony Number'])))
            pdf.text(current_col_position + number_spacing, target_height*row_number + 36, str('Avg Sc: ' + str(round(row['(AC) Weighted Full Average Score'], 2))))
            #pdf.text(current_col_position + number_spacing, target_height*row_number + 76, cured_status)

            # make space for the next set of images

            col_position = current_col_position + number_spacing + col_margin


    # Next, do the same thing for [psi-] predictions
    for this_num_sectors in range(0,1):
        row_number = row_number + 1
        col_position = 10
        if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
            pdf.add_page()
            row_number = 1 # initial row index
        #print(this_plate_data)
        this_plate_sector_data = this_plate_data[this_plate_data['Label After'] == '[psi-]']
        #print(this_plate_sector_data)
        this_plate_sector_data.reset_index()
        #print(this_plate_sector_data)
        pdf.text(col_position, target_height*row_number + (target_height/2), 'Colonies labeled [psi-]: ' + str(len(this_plate_sector_data)))
        row_number = row_number + 1

        # Sort this subtable by sector scores in descending order
        sorted_plate_sector_data = this_plate_sector_data.sort_values(by=['(AC) Weighted Full Average Score', 'Colony Number'], ascending=[False, True])
        sorted_plate_sector_data.reset_index()

        for index, row in sorted_plate_sector_data.iterrows():

            #print('Colony ' + str(int(row['Colony Number'])))
            
            cured_status = 'Cured' if ((row['Red Area (Seg)'] / row['Colony Area (Seg)']) >= 0.95) else ''

            cover_image_name = output_crops_folder + '/raw/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
            cover_circle_name = output_crops_folder + '/circles/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
            cover_mask_name = output_crops_folder + '/segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_initial_region_name = output_crops_folder + '/init_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_boundary_name = output_crops_folder + '/init_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_bad_region_name = output_crops_folder + '/init_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_sector_bounds_name = output_crops_folder + '/init_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_corrected_mask_name = output_crops_folder + '/cor_segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_region_name = output_crops_folder + '/cor_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_boundary_name = output_crops_folder + '/cor_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_bad_region_name = output_crops_folder + '/cor_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_sector_bounds_name = output_crops_folder + '/cor_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_sector_name = output_crops_folder + '/sectors/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_sector_comp_name = output_crops_folder + '/sector_comps/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            if use_expert_counts == True:
                cover_counts_name = output_crops_folder + '/counted/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            # cover_region_name = output_crops_folder + '/Colony Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_bad_region_name = output_crops_folder + '/Colony Bad Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_name = output_crops_folder + '/Colony Sectors/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_comp_name = output_crops_folder + '/Colony Sector Comps/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_bounds_name = output_crops_folder + '/Colony Sector Bounds/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'

            cover_image = PIL.Image.open(cover_image_name, mode='r')
            cover_circle = PIL.Image.open(cover_circle_name, mode='r')
            cover_mask = PIL.Image.open(cover_mask_name, mode='r')

            cover_initial_region = PIL.Image.open(cover_initial_region_name, mode='r')
            cover_initial_boundary = PIL.Image.open(cover_initial_boundary_name, mode='r')
            cover_initial_bad_region = PIL.Image.open(cover_initial_bad_region_name, mode='r')
            cover_initial_sector_bounds = PIL.Image.open(cover_initial_sector_bounds_name, mode='r')

            cover_corrected_region = PIL.Image.open(cover_corrected_region_name, mode='r')
            cover_corrected_boundary = PIL.Image.open(cover_corrected_boundary_name, mode='r')
            cover_corrected_bad_region = PIL.Image.open(cover_corrected_bad_region_name, mode='r')
            cover_corrected_sector_bounds = PIL.Image.open(cover_corrected_sector_bounds_name, mode='r')

            cover_sector = PIL.Image.open(cover_sector_name, mode='r')
            cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')

            if use_expert_counts == True:
                cover_counts = PIL.Image.open(cover_counts_name, mode='r')

            # cover_region = PIL.Image.open(cover_region_name, mode='r')
            # cover_bad_region = PIL.Image.open(cover_bad_region_name, mode='r')
            # cover_sector = PIL.Image.open(cover_sector_name, mode='r')
            # cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')
            # cover_sector_bounds = PIL.Image.open(cover_sector_bounds_name, mode='r')
            #print('This Image: ')
            #print(pathlib.Path(all_cropped_images[index]).stem)
            #print(pathlib.Path(all_cropped_masks[index]).stem)
            #print(pathlib.Path(all_cropped_sectors[index]).stem)
            w_im,h_im = cover_image.size
            w_ma,h_ma = cover_mask.size
            w_annot,h_annot = cover_initial_sector_bounds.size
            scaling_factor_im = target_height / float(h_im)
            scaling_factor_ma = target_height / float(h_ma)
            scaling_factor_annot = target_height / float(h_annot)
            scaled_width_im = scaling_factor_im*w_im
            scaled_width_ma = scaling_factor_ma*w_ma
            scaled_width_annot = scaling_factor_annot*w_annot
            scaled_height_im = scaling_factor_im*h_im
            scaled_height_ma = scaling_factor_ma*h_ma
            scaled_height_annot = scaling_factor_annot*h_annot
            #image = all_cropped_images[this_colony]
            #mask = all_cropped_masks[this_colony]

            # Check that the image, mask, and number will fit inside the margins on the give row, and if not, move them to the next row
            if col_position + (3*scaled_width_im) + (10*scaled_width_ma) + (2*scaled_width_annot) + number_spacing > right_margin:
                row_number = row_number + 1
                col_position = 10
                current_col_position = copy.deepcopy(col_position)
                if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
                    pdf.add_page()
                    row_number = 1 # initial row index
            else:
                current_col_position = copy.deepcopy(col_position)
            #print(all_cropped_images[index])
            # Add images and masks to the defined position

            # Cropping of colony
            pdf.image(cover_image_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
            current_col_position = current_col_position + scaled_width_im

            # Cropping of colony from Wes's annotations
            if use_expert_counts == True:
                pdf.image(cover_counts_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
                current_col_position = current_col_position + scaled_width_im

            # Cropping of colony with circle
            pdf.image(cover_circle_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony with overlayed circle
            current_col_position = current_col_position + scaled_width_im

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # Raw semgentation
            pdf.image(cover_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
            current_col_position = current_col_position + scaled_width_ma

            # The boundary of the raw segmentation
            pdf.image(cover_initial_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The regional breakdown of the raw segmentation
            pdf.image(cover_initial_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # Annotations of where the red regions are found in the regional breakdown
            pdf.image(cover_initial_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            current_col_position = current_col_position + scaled_width_annot

            # The inconsistent regions located
            pdf.image(cover_initial_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The segmentation such that boundary pixels inconsistent with their assigned region were changed
            pdf.image(cover_corrected_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # The boundary of the corrected segmentation
            pdf.image(cover_corrected_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The regional breakdown of the corrected segmentation
            pdf.image(cover_corrected_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # Annotations on the corrected segmentations indicating where the red regions are (these should be the sectors)
            pdf.image(cover_corrected_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            current_col_position = current_col_position + scaled_width_annot

            # Any inconsistent regions detected in the corrected segmentation (in theory, these should always be completely black)
            pdf.image(cover_corrected_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # Partitioning of the red regions found in the corrected segmentation (each shaed of gray is a different sector)
            pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
            current_col_position = current_col_position + scaled_width_ma
            
            # A subset of the previous image with only the red pixels preserved.
            pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
            current_col_position = current_col_position + scaled_width_ma

            # pdf.image(cover_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            # current_col_position = current_col_position + scaled_width_ma

            # pdf.image(cover_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            # current_col_position = current_col_position + scaled_width_ma

            # current_col_position = current_col_position + 10 # A margin to separate subsets of visualizations

            # pdf.image(cover_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            # current_col_position = current_col_position + scaled_width_annot
            
            # pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
            # current_col_position = current_col_position + scaled_width_ma
            
            # pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
            # current_col_position = current_col_position + scaled_width_ma
            #col_position = col_position + scaled_width_ma
            #pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('# Sec: ' + str(int(row['Pred # Sectors']))))
            pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('Col #: ' + str(row['Colony Number'])))
            pdf.text(current_col_position + number_spacing, target_height*row_number + 36, str('Avg Sc: ' + str(round(row['(AC) Weighted Full Average Score'], 2))))
            #pdf.text(current_col_position + number_spacing, target_height*row_number + 76, cured_status)

            # make space for the next set of images

            col_position = current_col_position + number_spacing + col_margin

    # Now, do this for sectored colonies with any number of sectors
    for this_num_sectors in range(1, max_num_sectors_in_plate + 1):
        row_number = row_number + 1
        col_position = 10
        if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
            pdf.add_page()
            row_number = 1 # initial row index
        #print(this_plate_data)
        this_plate_sector_data = this_plate_data[this_plate_data['Label After'] == 'S'+str(this_num_sectors)]
        #print(this_plate_sector_data)
        this_plate_sector_data.reset_index()
        #print(this_plate_sector_data)
        if this_num_sectors == 1:
            pdf.text(col_position, target_height*row_number + (target_height/2), 'Colonies with ' + str(num2words(this_num_sectors)) + ' sector: ' +  str(len(this_plate_sector_data)))
        else:
            pdf.text(col_position, target_height*row_number + (target_height/2), 'Colonies with ' + str(num2words(this_num_sectors)) + ' sectors: ' +  str(len(this_plate_sector_data)))
        row_number = row_number + 1

        # Sort this subtable by sector scores in descending order
        sorted_plate_sector_data = this_plate_sector_data.sort_values(by=['(AC) Weighted Full Average Score', 'Colony Number'], ascending=[False, True])
        sorted_plate_sector_data.reset_index()

        for index, row in sorted_plate_sector_data.iterrows():

            #print('Colony ' + str(int(row['Colony Number'])))
            
            cured_status = 'Cured' if ((row['Red Area (Seg)'] / row['Colony Area (Seg)']) >= 0.95) else ''

            cover_image_name = output_crops_folder + '/raw/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
            cover_circle_name = output_crops_folder + '/circles/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
            cover_mask_name = output_crops_folder + '/segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_initial_region_name = output_crops_folder + '/init_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_boundary_name = output_crops_folder + '/init_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_bad_region_name = output_crops_folder + '/init_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_sector_bounds_name = output_crops_folder + '/init_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_corrected_mask_name = output_crops_folder + '/cor_segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_region_name = output_crops_folder + '/cor_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_boundary_name = output_crops_folder + '/cor_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_bad_region_name = output_crops_folder + '/cor_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_sector_bounds_name = output_crops_folder + '/cor_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_sector_name = output_crops_folder + '/sectors/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_sector_comp_name = output_crops_folder + '/sector_comps/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            if use_expert_counts == True:
                cover_counts_name = output_crops_folder + '/counted/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            # cover_region_name = output_crops_folder + '/Colony Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_bad_region_name = output_crops_folder + '/Colony Bad Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_name = output_crops_folder + '/Colony Sectors/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_comp_name = output_crops_folder + '/Colony Sector Comps/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_bounds_name = output_crops_folder + '/Colony Sector Bounds/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'

            cover_image = PIL.Image.open(cover_image_name, mode='r')
            cover_circle = PIL.Image.open(cover_circle_name, mode='r')
            cover_mask = PIL.Image.open(cover_mask_name, mode='r')

            cover_initial_region = PIL.Image.open(cover_initial_region_name, mode='r')
            cover_initial_boundary = PIL.Image.open(cover_initial_boundary_name, mode='r')
            cover_initial_bad_region = PIL.Image.open(cover_initial_bad_region_name, mode='r')
            cover_initial_sector_bounds = PIL.Image.open(cover_initial_sector_bounds_name, mode='r')

            cover_corrected_region = PIL.Image.open(cover_corrected_region_name, mode='r')
            cover_corrected_boundary = PIL.Image.open(cover_corrected_boundary_name, mode='r')
            cover_corrected_bad_region = PIL.Image.open(cover_corrected_bad_region_name, mode='r')
            cover_corrected_sector_bounds = PIL.Image.open(cover_corrected_sector_bounds_name, mode='r')

            cover_sector = PIL.Image.open(cover_sector_name, mode='r')
            cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')

            if use_expert_counts == True:
                cover_counts = PIL.Image.open(cover_counts_name, mode='r')

            # cover_region = PIL.Image.open(cover_region_name, mode='r')
            # cover_bad_region = PIL.Image.open(cover_bad_region_name, mode='r')
            # cover_sector = PIL.Image.open(cover_sector_name, mode='r')
            # cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')
            # cover_sector_bounds = PIL.Image.open(cover_sector_bounds_name, mode='r')
            #print('This Image: ')
            #print(pathlib.Path(all_cropped_images[index]).stem)
            #print(pathlib.Path(all_cropped_masks[index]).stem)
            #print(pathlib.Path(all_cropped_sectors[index]).stem)
            w_im,h_im = cover_image.size
            w_ma,h_ma = cover_mask.size
            w_annot,h_annot = cover_initial_sector_bounds.size
            scaling_factor_im = target_height / float(h_im)
            scaling_factor_ma = target_height / float(h_ma)
            scaling_factor_annot = target_height / float(h_annot)
            scaled_width_im = scaling_factor_im*w_im
            scaled_width_ma = scaling_factor_ma*w_ma
            scaled_width_annot = scaling_factor_annot*w_annot
            scaled_height_im = scaling_factor_im*h_im
            scaled_height_ma = scaling_factor_ma*h_ma
            scaled_height_annot = scaling_factor_annot*h_annot
            #image = all_cropped_images[this_colony]
            #mask = all_cropped_masks[this_colony]

            # Check that the image, mask, and number will fit inside the margins on the give row, and if not, move them to the next row
            if col_position + (3*scaled_width_im) + (10*scaled_width_ma) + (2*scaled_width_annot) + number_spacing > right_margin:
                row_number = row_number + 1
                col_position = 10
                current_col_position = copy.deepcopy(col_position)
                if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
                    pdf.add_page()
                    row_number = 1 # initial row index
            else:
                current_col_position = copy.deepcopy(col_position)
            #print(all_cropped_images[index])
            # Add images and masks to the defined position

            # Cropping of colony
            pdf.image(cover_image_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
            current_col_position = current_col_position + scaled_width_im

            # Cropping of colony from Wes's annotations
            if use_expert_counts == True:
                pdf.image(cover_counts_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
                current_col_position = current_col_position + scaled_width_im

            # Cropping of colony with circle
            pdf.image(cover_circle_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony with overlayed circle
            current_col_position = current_col_position + scaled_width_im

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # Raw semgentation
            pdf.image(cover_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
            current_col_position = current_col_position + scaled_width_ma

            # The boundary of the raw segmentation
            pdf.image(cover_initial_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The regional breakdown of the raw segmentation
            pdf.image(cover_initial_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # Annotations of where the red regions are found in the regional breakdown
            pdf.image(cover_initial_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            current_col_position = current_col_position + scaled_width_annot

            # The inconsistent regions located
            pdf.image(cover_initial_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The segmentation such that boundary pixels inconsistent with their assigned region were changed
            pdf.image(cover_corrected_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # The boundary of the corrected segmentation
            pdf.image(cover_corrected_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The regional breakdown of the corrected segmentation
            pdf.image(cover_corrected_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # Annotations on the corrected segmentations indicating where the red regions are (these should be the sectors)
            pdf.image(cover_corrected_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            current_col_position = current_col_position + scaled_width_annot

            # Any inconsistent regions detected in the corrected segmentation (in theory, these should always be completely black)
            pdf.image(cover_corrected_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # Partitioning of the red regions found in the corrected segmentation (each shaed of gray is a different sector)
            pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
            current_col_position = current_col_position + scaled_width_ma
            
            # A subset of the previous image with only the red pixels preserved.
            pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
            current_col_position = current_col_position + scaled_width_ma

            # pdf.image(cover_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            # current_col_position = current_col_position + scaled_width_ma

            # pdf.image(cover_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            # current_col_position = current_col_position + scaled_width_ma

            # current_col_position = current_col_position + 10 # A margin to separate subsets of visualizations

            # pdf.image(cover_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            # current_col_position = current_col_position + scaled_width_annot
            
            # pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
            # current_col_position = current_col_position + scaled_width_ma
            
            # pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
            # current_col_position = current_col_position + scaled_width_ma
            #col_position = col_position + scaled_width_ma
            #pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('# Sec: ' + str(int(row['Pred # Sectors']))))
            pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('Col #: ' + str(row['Colony Number'])))
            pdf.text(current_col_position + number_spacing, target_height*row_number + 36, str('Avg Sc: ' + str(round(row['(AC) Weighted Full Average Score'], 2))))
            #pdf.text(current_col_position + number_spacing, target_height*row_number + 76, cured_status)

            # make space for the next set of images

            col_position = current_col_position + number_spacing + col_margin



    # for this_num_sectors in range(0,max_num_sectors_in_plate + 1):
    #     row_number = row_number + 1
    #     col_position = 10
    #     if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
    #         pdf.add_page()
    #         row_number = 1 # initial row index
    #     #print(this_plate_data)
    #     this_plate_sector_data = this_plate_data[this_plate_data['Pred # Sectors'] == this_num_sectors]
    #     #print(this_plate_sector_data)
    #     this_plate_sector_data.reset_index()
    #     #print(this_plate_sector_data)
    #     if this_num_sectors == 1:
    #         pdf.text(col_position, target_height*row_number + (target_height/2), 'Colonies with ' + str(num2words(this_num_sectors)) + ' sector: ' +  str(len(this_plate_sector_data)))
    #     else:
    #         pdf.text(col_position, target_height*row_number + (target_height/2), 'Colonies with ' + str(num2words(this_num_sectors)) + ' sectors: ' +  str(len(this_plate_sector_data)))
    #     row_number = row_number + 1

    #     # Sort this subtable by sector scores in descending order
    #     sorted_plate_sector_data = this_plate_sector_data.sort_values(by=['Avg Sector Score', 'Colony Number'], ascending=[False, True])
    #     sorted_plate_sector_data.reset_index()

    #     for index, row in sorted_plate_sector_data.iterrows():

    #         #print('Colony ' + str(int(row['Colony Number'])))
            
    #         cured_status = 'Cured' if ((row['Red Area (Seg)'] / row['Colony Area (Seg)']) >= 0.95) else ''

    #         cover_image_name = output_crops_folder + '/raw/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
    #         cover_circle_name = output_crops_folder + '/circles/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
    #         cover_mask_name = output_crops_folder + '/segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

    #         cover_initial_region_name = output_crops_folder + '/init_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
    #         cover_initial_boundary_name = output_crops_folder + '/init_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
    #         cover_initial_bad_region_name = output_crops_folder + '/init_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
    #         cover_initial_sector_bounds_name = output_crops_folder + '/init_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

    #         cover_corrected_mask_name = output_crops_folder + '/cor_segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
    #         cover_corrected_region_name = output_crops_folder + '/cor_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
    #         cover_corrected_boundary_name = output_crops_folder + '/cor_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
    #         cover_corrected_bad_region_name = output_crops_folder + '/cor_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
    #         cover_corrected_sector_bounds_name = output_crops_folder + '/cor_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

    #         cover_sector_name = output_crops_folder + '/sectors/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
    #         cover_sector_comp_name = output_crops_folder + '/sector_comps/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

    #         if use_expert_counts == True:
    #             cover_counts_name = output_crops_folder + '/counted/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

    #         # cover_region_name = output_crops_folder + '/Colony Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
    #         # cover_bad_region_name = output_crops_folder + '/Colony Bad Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
    #         # cover_sector_name = output_crops_folder + '/Colony Sectors/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
    #         # cover_sector_comp_name = output_crops_folder + '/Colony Sector Comps/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
    #         # cover_sector_bounds_name = output_crops_folder + '/Colony Sector Bounds/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'

    #         cover_image = PIL.Image.open(cover_image_name, mode='r')
    #         cover_circle = PIL.Image.open(cover_circle_name, mode='r')
    #         cover_mask = PIL.Image.open(cover_mask_name, mode='r')

    #         cover_initial_region = PIL.Image.open(cover_initial_region_name, mode='r')
    #         cover_initial_boundary = PIL.Image.open(cover_initial_boundary_name, mode='r')
    #         cover_initial_bad_region = PIL.Image.open(cover_initial_bad_region_name, mode='r')
    #         cover_initial_sector_bounds = PIL.Image.open(cover_initial_sector_bounds_name, mode='r')

    #         cover_corrected_region = PIL.Image.open(cover_corrected_region_name, mode='r')
    #         cover_corrected_boundary = PIL.Image.open(cover_corrected_boundary_name, mode='r')
    #         cover_corrected_bad_region = PIL.Image.open(cover_corrected_bad_region_name, mode='r')
    #         cover_corrected_sector_bounds = PIL.Image.open(cover_corrected_sector_bounds_name, mode='r')

    #         cover_sector = PIL.Image.open(cover_sector_name, mode='r')
    #         cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')

    #         if use_expert_counts == True:
    #             cover_counts = PIL.Image.open(cover_counts_name, mode='r')

    #         # cover_region = PIL.Image.open(cover_region_name, mode='r')
    #         # cover_bad_region = PIL.Image.open(cover_bad_region_name, mode='r')
    #         # cover_sector = PIL.Image.open(cover_sector_name, mode='r')
    #         # cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')
    #         # cover_sector_bounds = PIL.Image.open(cover_sector_bounds_name, mode='r')
    #         #print('This Image: ')
    #         #print(pathlib.Path(all_cropped_images[index]).stem)
    #         #print(pathlib.Path(all_cropped_masks[index]).stem)
    #         #print(pathlib.Path(all_cropped_sectors[index]).stem)
    #         w_im,h_im = cover_image.size
    #         w_ma,h_ma = cover_mask.size
    #         w_annot,h_annot = cover_initial_sector_bounds.size
    #         scaling_factor_im = target_height / float(h_im)
    #         scaling_factor_ma = target_height / float(h_ma)
    #         scaling_factor_annot = target_height / float(h_annot)
    #         scaled_width_im = scaling_factor_im*w_im
    #         scaled_width_ma = scaling_factor_ma*w_ma
    #         scaled_width_annot = scaling_factor_annot*w_annot
    #         scaled_height_im = scaling_factor_im*h_im
    #         scaled_height_ma = scaling_factor_ma*h_ma
    #         scaled_height_annot = scaling_factor_annot*h_annot
    #         #image = all_cropped_images[this_colony]
    #         #mask = all_cropped_masks[this_colony]

    #         # Check that the image, mask, and number will fit inside the margins on the give row, and if not, move them to the next row
    #         if col_position + (3*scaled_width_im) + (10*scaled_width_ma) + (2*scaled_width_annot) + number_spacing > right_margin:
    #             row_number = row_number + 1
    #             col_position = 10
    #             current_col_position = copy.deepcopy(col_position)
    #             if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
    #                 pdf.add_page()
    #                 row_number = 1 # initial row index
    #         else:
    #             current_col_position = copy.deepcopy(col_position)
    #         #print(all_cropped_images[index])
    #         # Add images and masks to the defined position

    #         # Cropping of colony
    #         pdf.image(cover_image_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
    #         current_col_position = current_col_position + scaled_width_im

    #         # Cropping of colony from Wes's annotations
    #         if use_expert_counts == True:
    #             pdf.image(cover_counts_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
    #             current_col_position = current_col_position + scaled_width_im

    #         # Cropping of colony with circle
    #         pdf.image(cover_circle_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony with overlayed circle
    #         current_col_position = current_col_position + scaled_width_im

    #         current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

    #         # Raw semgentation
    #         pdf.image(cover_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
    #         current_col_position = current_col_position + scaled_width_ma

    #         # The boundary of the raw segmentation
    #         pdf.image(cover_initial_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
    #         current_col_position = current_col_position + scaled_width_ma

    #         current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

    #         # The regional breakdown of the raw segmentation
    #         pdf.image(cover_initial_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
    #         current_col_position = current_col_position + scaled_width_ma

    #         # Annotations of where the red regions are found in the regional breakdown
    #         pdf.image(cover_initial_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
    #         current_col_position = current_col_position + scaled_width_annot

    #         # The inconsistent regions located
    #         pdf.image(cover_initial_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
    #         current_col_position = current_col_position + scaled_width_ma

    #         current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

    #         # The segmentation such that boundary pixels inconsistent with their assigned region were changed
    #         pdf.image(cover_corrected_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
    #         current_col_position = current_col_position + scaled_width_ma

    #         # The boundary of the corrected segmentation
    #         pdf.image(cover_corrected_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
    #         current_col_position = current_col_position + scaled_width_ma

    #         current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

    #         # The regional breakdown of the corrected segmentation
    #         pdf.image(cover_corrected_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
    #         current_col_position = current_col_position + scaled_width_ma

    #         # Annotations on the corrected segmentations indicating where the red regions are (these should be the sectors)
    #         pdf.image(cover_corrected_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
    #         current_col_position = current_col_position + scaled_width_annot

    #         # Any inconsistent regions detected in the corrected segmentation (in theory, these should always be completely black)
    #         pdf.image(cover_corrected_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
    #         current_col_position = current_col_position + scaled_width_ma

    #         current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

    #         # Partitioning of the red regions found in the corrected segmentation (each shaed of gray is a different sector)
    #         pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
    #         current_col_position = current_col_position + scaled_width_ma
            
    #         # A subset of the previous image with only the red pixels preserved.
    #         pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
    #         current_col_position = current_col_position + scaled_width_ma

    #         # pdf.image(cover_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
    #         # current_col_position = current_col_position + scaled_width_ma

    #         # pdf.image(cover_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
    #         # current_col_position = current_col_position + scaled_width_ma

    #         # current_col_position = current_col_position + 10 # A margin to separate subsets of visualizations

    #         # pdf.image(cover_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
    #         # current_col_position = current_col_position + scaled_width_annot
            
    #         # pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
    #         # current_col_position = current_col_position + scaled_width_ma
            
    #         # pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
    #         # current_col_position = current_col_position + scaled_width_ma
    #         #col_position = col_position + scaled_width_ma
    #         #pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('# Sec: ' + str(int(row['Pred # Sectors']))))
    #         pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('Col #: ' + str(row['Colony Number'])))
    #         pdf.text(current_col_position + number_spacing, target_height*row_number + 36, str('Avg Sc: ' + str(round(row['Avg Sector Score'], 2))))
    #         #pdf.text(current_col_position + number_spacing, target_height*row_number + 76, cured_status)

    #         # make space for the next set of images

    #         col_position = current_col_position + number_spacing + col_margin

    # Write pdf file with output of colony data generated for this plate.
    pdf.output(output_details_folder + '/' + this_plate_stem + '.pdf', "F")
    print('Colonies from ' + this_plate_stem + ' printed.')



In [None]:
# Merge all pdf files printed.  This will concatenate data from all plates.
all_files = sorted(glob.glob(output_details_folder + '/' + '*.pdf'))
merger = PdfMerger()
for this_file in all_files:
    merger.append(PdfReader(open(this_file, 'rb')))

merger.write(main_folder + '/' + colony_chart_doc + '.pdf')

# Write files containing only samples from each plate

In [None]:
# Create folder for sampling
output_samples_folder = test_output_folder + '/PDF Samples'
if os.path.exists(output_samples_folder) == False:
    os.makedirs(output_samples_folder)

num_samples_per_plate = 40

# Set random number seed to sample from the colonies detected
np.random.seed(seed=2)
# seed 1: done on 80 samples per plate (1/12/2023)
# seed 2: done on 40 samples per plate (1/17/2023)

In [None]:
# Code to print a pdf of the croppings
# 1. Sort data in table based on plate name, colony number and other elements.
# 2. Iterate through each row of the sorted table to get colony imformation
# 3. Add inforation to pdf document.
# 4. Repear for each colony and plate.
# 5. Output pdf.

# Do the same as above, but group based on the number of sectors detected



for this_plate in all_plate_names:

    # this_plate is the key to the correspnding subfolder in each annotation class
    # Initialize PDF writer
    pdf = FPDF()
    pdf = FPDF(unit = "pt", format = [850,1100])
    pdf.set_font('Arial', 'B', 16)
    target_height = float(40)
    left_margin = 10
    right_margin = 840
    bottom_margin = 1050
    number_spacing = 7
    uniform_spacing = 640


    row_number = 1 # initial row index
    col_position = 10 # initial column position
    col_margin = 40
    this_plate_stem = os.path.splitext(this_plate)[0]
    this_plate_data = colony_data[colony_data['Plate Name'] == this_plate]

    print('Now looking at: ' + str(this_plate))

    # Sample rows from the table
    num_rows_in_data = len(this_plate_data)
    colony_ids = sorted(np.random.choice(num_rows_in_data, num_samples_per_plate, replace=False))
    print('Sampled ' + str(len(colony_ids)) + ' colonies.')
    print(colony_ids)
    this_sampled_plate_data = this_plate_data.iloc[colony_ids]

    #this_plate_data.set_index('Colony Number', inplace=True)
    max_num_sectors_in_plate = int(max(this_sampled_plate_data['Pred # Sectors']))
    pdf.add_page()

    #pdf.text(col_position, target_height*row_number, 'Colonies detected in Plate ' + str(this_plate) + '.  ' + str(len(this_plate_data)) + ' colonies were detected.')
    pdf.text(col_position, target_height*row_number, str(len(this_sampled_plate_data)) + 'detected colonies sampled from Plate ' + str(this_plate) + '.')
    pdf.text(col_position, target_height*row_number + 20, 'Group 1: Raw image data')
    pdf.text(col_position, target_height*row_number + 40, 'Group 2: Raw segmentation of whole colony and boundary')
    pdf.text(col_position, target_height*row_number + 60, 'Group 3: Regional breakdown and analysis before boundary corrections were made')
    pdf.text(col_position, target_height*row_number + 80, 'Group 4: Segmentation with corrected boundary')
    pdf.text(col_position, target_height*row_number + 100, 'Group 5: Regional breakdown and analysis after boundary corrections were made')
    pdf.text(col_position, target_height*row_number + 120, 'Group 6: Breakdown of red sectored regions and pixels after correction')

    row_number = row_number + 2

    for this_num_sectors in range(0,max_num_sectors_in_plate + 1):
        row_number = row_number + 1
        col_position = 10
        if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
            pdf.add_page()
            row_number = 1 # initial row index
        #print(this_plate_data)
        this_plate_sector_data = this_sampled_plate_data[this_sampled_plate_data['Pred # Sectors'] == this_num_sectors]
        #print(this_plate_sector_data)
        this_plate_sector_data.reset_index()
        #print(this_plate_sector_data)
        if this_num_sectors == 1:
            pdf.text(col_position, target_height*row_number + (target_height/2), 'Colonies with ' + str(num2words(this_num_sectors)) + ' sector: ' +  str(len(this_plate_sector_data)))
        else:
            pdf.text(col_position, target_height*row_number + (target_height/2), 'Colonies with ' + str(num2words(this_num_sectors)) + ' sectors: ' +  str(len(this_plate_sector_data)))
        row_number = row_number + 1

        # Sort this subtable by sector scores in descending order
        sorted_plate_sector_data = this_plate_sector_data.sort_values(by=['Avg Sector Score', 'Colony Number'], ascending=[False, True])
        sorted_plate_sector_data.reset_index()

        for index, row in sorted_plate_sector_data.iterrows():

            #print('Colony ' + str(int(row['Colony Number'])))
            
            cured_status = 'Cured' if ((row['Red Area (Seg)'] / row['Colony Area (Seg)']) >= 0.95) else ''

            cover_image_name = output_crops_folder + '/raw/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
            cover_circle_name = output_crops_folder + '/circles/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.jpg'
            cover_mask_name = output_crops_folder + '/segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_initial_region_name = output_crops_folder + '/init_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_boundary_name = output_crops_folder + '/init_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_bad_region_name = output_crops_folder + '/init_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_initial_sector_bounds_name = output_crops_folder + '/init_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_corrected_mask_name = output_crops_folder + '/cor_segs/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_region_name = output_crops_folder + '/cor_regions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_boundary_name = output_crops_folder + '/cor_bounds/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_bad_region_name = output_crops_folder + '/cor_bad/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_corrected_sector_bounds_name = output_crops_folder + '/cor_partitions/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            cover_sector_name = output_crops_folder + '/sectors/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'
            cover_sector_comp_name = output_crops_folder + '/sector_comps/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            if use_expert_counts == True:
                cover_counts_name = output_crops_folder + '/counted/' + file_dict[this_plate] + '/' + this_plate_stem + '_c_' + str(int(row['Colony Number'])) + '.png'

            # cover_region_name = output_crops_folder + '/Colony Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_bad_region_name = output_crops_folder + '/Colony Bad Regions/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_name = output_crops_folder + '/Colony Sectors/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_comp_name = output_crops_folder + '/Colony Sector Comps/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'
            # cover_sector_bounds_name = output_crops_folder + '/Colony Sector Bounds/' + this_plate_stem + '_Colony_' + str(int(row['Colony Number'])) + '.png'

            cover_image = PIL.Image.open(cover_image_name, mode='r')
            cover_circle = PIL.Image.open(cover_circle_name, mode='r')
            cover_mask = PIL.Image.open(cover_mask_name, mode='r')

            cover_initial_region = PIL.Image.open(cover_initial_region_name, mode='r')
            cover_initial_boundary = PIL.Image.open(cover_initial_boundary_name, mode='r')
            cover_initial_bad_region = PIL.Image.open(cover_initial_bad_region_name, mode='r')
            cover_initial_sector_bounds = PIL.Image.open(cover_initial_sector_bounds_name, mode='r')

            cover_corrected_region = PIL.Image.open(cover_corrected_region_name, mode='r')
            cover_corrected_boundary = PIL.Image.open(cover_corrected_boundary_name, mode='r')
            cover_corrected_bad_region = PIL.Image.open(cover_corrected_bad_region_name, mode='r')
            cover_corrected_sector_bounds = PIL.Image.open(cover_corrected_sector_bounds_name, mode='r')

            cover_sector = PIL.Image.open(cover_sector_name, mode='r')
            cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')

            if use_expert_counts == True:
                cover_counts = PIL.Image.open(cover_counts_name, mode='r')

            # cover_region = PIL.Image.open(cover_region_name, mode='r')
            # cover_bad_region = PIL.Image.open(cover_bad_region_name, mode='r')
            # cover_sector = PIL.Image.open(cover_sector_name, mode='r')
            # cover_sector_comp = PIL.Image.open(cover_sector_comp_name, mode='r')
            # cover_sector_bounds = PIL.Image.open(cover_sector_bounds_name, mode='r')
            #print('This Image: ')
            #print(pathlib.Path(all_cropped_images[index]).stem)
            #print(pathlib.Path(all_cropped_masks[index]).stem)
            #print(pathlib.Path(all_cropped_sectors[index]).stem)
            w_im,h_im = cover_image.size
            w_ma,h_ma = cover_mask.size
            w_annot,h_annot = cover_initial_sector_bounds.size
            scaling_factor_im = target_height / float(h_im)
            scaling_factor_ma = target_height / float(h_ma)
            scaling_factor_annot = target_height / float(h_annot)
            scaled_width_im = scaling_factor_im*w_im
            scaled_width_ma = scaling_factor_ma*w_ma
            scaled_width_annot = scaling_factor_annot*w_annot
            scaled_height_im = scaling_factor_im*h_im
            scaled_height_ma = scaling_factor_ma*h_ma
            scaled_height_annot = scaling_factor_annot*h_annot
            #image = all_cropped_images[this_colony]
            #mask = all_cropped_masks[this_colony]

            # Check that the image, mask, and number will fit inside the margins on the give row, and if not, move them to the next row
            if col_position + (3*scaled_width_im) + (10*scaled_width_ma) + (2*scaled_width_annot) + number_spacing > right_margin:
                row_number = row_number + 1
                col_position = 10
                current_col_position = copy.deepcopy(col_position)
                if (target_height*(row_number+1)) > bottom_margin: # If there is not enough room for rows, move the remaining images to a new page.
                    pdf.add_page()
                    row_number = 1 # initial row index
            else:
                current_col_position = copy.deepcopy(col_position)
            #print(all_cropped_images[index])
            # Add images and masks to the defined position

            # Cropping of colony
            pdf.image(cover_image_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
            current_col_position = current_col_position + scaled_width_im

            # Cropping of colony from Wes's annotations
            if use_expert_counts == True:
                pdf.image(cover_counts_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony
                current_col_position = current_col_position + scaled_width_im

            # Cropping of colony with circle
            pdf.image(cover_circle_name, current_col_position, target_height*row_number, scaled_width_im, scaled_height_im) # Insert cropping of colony with overlayed circle
            current_col_position = current_col_position + scaled_width_im

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # Raw semgentation
            pdf.image(cover_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
            current_col_position = current_col_position + scaled_width_ma

            # The boundary of the raw segmentation
            pdf.image(cover_initial_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert colony segmentation within the circle
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The regional breakdown of the raw segmentation
            pdf.image(cover_initial_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # Annotations of where the red regions are found in the regional breakdown
            pdf.image(cover_initial_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            current_col_position = current_col_position + scaled_width_annot

            # The inconsistent regions located
            pdf.image(cover_initial_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The segmentation such that boundary pixels inconsistent with their assigned region were changed
            pdf.image(cover_corrected_mask_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # The boundary of the corrected segmentation
            pdf.image(cover_corrected_boundary_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # The regional breakdown of the corrected segmentation
            pdf.image(cover_corrected_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            current_col_position = current_col_position + scaled_width_ma

            # Annotations on the corrected segmentations indicating where the red regions are (these should be the sectors)
            pdf.image(cover_corrected_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            current_col_position = current_col_position + scaled_width_annot

            # Any inconsistent regions detected in the corrected segmentation (in theory, these should always be completely black)
            pdf.image(cover_corrected_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            current_col_position = current_col_position + scaled_width_ma

            current_col_position = current_col_position + 5 # A margin to separate subsets of visualizations

            # Partitioning of the red regions found in the corrected segmentation (each shaed of gray is a different sector)
            pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
            current_col_position = current_col_position + scaled_width_ma
            
            # A subset of the previous image with only the red pixels preserved.
            pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
            current_col_position = current_col_position + scaled_width_ma

            # pdf.image(cover_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation showing the different regions of the colony
            # current_col_position = current_col_position + scaled_width_ma

            # pdf.image(cover_bad_region_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert segmentation that shows the regions that failed the consistency check
            # current_col_position = current_col_position + scaled_width_ma

            # current_col_position = current_col_position + 10 # A margin to separate subsets of visualizations

            # pdf.image(cover_sector_bounds_name, current_col_position, target_height*row_number, scaled_width_annot, scaled_height_annot) # Insert segmentaiton with boundaries of sectors annotated
            # current_col_position = current_col_position + scaled_width_annot
            
            # pdf.image(cover_sector_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert predicted sector regions
            # current_col_position = current_col_position + scaled_width_ma
            
            # pdf.image(cover_sector_comp_name, current_col_position, target_height*row_number, scaled_width_ma, scaled_height_ma) # Insert red pixel segmentations within the predicted secto regions.
            # current_col_position = current_col_position + scaled_width_ma
            #col_position = col_position + scaled_width_ma
            #pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('# Sec: ' + str(int(row['Pred # Sectors']))))
            pdf.text(current_col_position + number_spacing, target_height*row_number + 16, str('Col #: ' + str(row['Colony Number'])))
            pdf.text(current_col_position + number_spacing, target_height*row_number + 36, str('Avg Sc: ' + str(round(row['Avg Sector Score'], 2))))
            #pdf.text(current_col_position + number_spacing, target_height*row_number + 76, cured_status)

            # make space for the next set of images

            col_position = current_col_position + number_spacing + col_margin

    # Write pdf file with output of colony data generated for this plate.
    pdf.output(output_samples_folder + '/' + this_plate_stem + '.pdf', "F")
    print('Colonies from ' + this_plate_stem + ' printed.')



In [None]:
# Merge all pdf files printed.  This will concatenate data from all plates.
all_files = sorted(glob.glob(output_samples_folder + '/*.pdf'))
merger = PdfMerger()
for this_file in all_files:
    merger.append(PdfReader(open(this_file, 'rb')))

merger.write(main_folder + '/' + colony_chart_doc + '.pdf')