Cropping and Rotating

In [2]:
from cmath import nan
from PIL import Image
import numpy as np
from numpy import asarray
import matplotlib.pyplot as plt
import os, glob
from PIL import Image
import cv2
import pandas as pd

In [7]:
def crop_rotate_dir(input_dir = 'input',output_dir='output', angle = -1.5, left = 135, upper =85, right = 600, lower = 390):
    # read every image file from the input folder
    for filename in glob.glob('220118_122237_869669.jpeg'):
        # print(filename)
        with Image.open(filename) as im:
            # (left, upper, right, lower) = (100, 60, 630, 400)
            rotated = im.rotate(angle, expand = 1)
            im_final = rotated.crop((left, upper, right, lower))            
            im_final.save(filename.replace(input_dir, output_dir))

In [8]:
# Best parameters
crop_rotate_dir(input_dir = '../test_img/',output_dir='output_test/',left = 135, upper =85, right = 600, lower = 390)

In [53]:
"""
input: filename of plate
output: pd df of the ground truth for that plate
"""
from typing import Tuple


def get_ground_truth(filename: str):
    id = filename.split('_')[-1][:-5]
    labels = pd.read_csv(f'labeled_dataset/{id}' + '/' + f'{id}_labels.csv', index_col=0)
    return labels

"""
gaussian_kernel_size: greater = blurring in larger neighborhood
gaussian_sigma: greater sigma = more blurring
adp_th_block_size needs to be odd: greater = looking at local intensities in a larger neighborhood
adp_th_const is a constant that is subtracted from the weighted mean; greater = effectively more noise reduction
"""
def load_and_preprocess_img(filename: str, gaussian_kernel_size: Tuple = (3,3), gaussian_sigma: float = 1.0, adp_th_block_size: int = 5, adp_th_const: int = 4):
    img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
    img_blurred = cv2.GaussianBlur(img, gaussian_kernel_size, gaussian_sigma)
    img_th = cv2.adaptiveThreshold(img_blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, adp_th_block_size, adp_th_const)
    return img_th

"""
exclude wells with black background
"""
def exclude_wells(col_idx, row_idx):
    if ((col_idx == 2 or col_idx == 3) and row_idx == 12) or ((row_idx ==0 or row_idx == 15) and (col_idx==11 or col_idx ==12)) or (row_idx == 2 and col_idx==20):
        return True
    return False
"""
input: img or preprocessed img
output: conceptually a matrix of 16 x 24, each entry is the isolated well image
"""
def isolate_each_well(img_th):
    to_return = []
    y = img_th.shape[0] / 16
    x = img_th.shape[1] / 24
    for row in np.arange(0, img_th.shape[0], y):
        col_out = []
        for col in np.arange(0, img_th.shape[1], x):
            col_out.append(img_th[round(row):round(row+y),round(col):round(col+x)])
        to_return.append(col_out)
    return np.array(to_return, dtype=object)

"""
If one and only one particle is detected, report 1
If no particle detected, report 0
else: report -1 to indicate ambiguity
"""
def particle_detection_prediction(well_img):
    nb_components, arr = cv2.connectedComponents(well_img, connectivity=8)
    if nb_components - 1 == 1:
        pred = 1
    elif nb_components - 1 == 0:
        pred = 0
    else:
        pred = -1
    return pred

In [73]:
"""
Uses particle detection to predict whether or not bead is present.
Returns accuracy and a list of unsure well locations for each img (if particle_detection_prediction returns -1)
    which means that more than one particle is detected in the well
"""
from numpy import isnan


def melodys_pipeline(input_dir):
    accuracy_list = []
    unsure_dict = {}
    # read every image file from the input folder
    for filename in glob.glob(input_dir+'/*.jpeg'):
        right = 0
        wrong = 0
        unsure = []
        labels = get_ground_truth(filename)
        img_th = load_and_preprocess_img(filename, (3,3), 1.0, 5, 4)
        well_imgs = isolate_each_well(img_th)
        for row in range(well_imgs.shape[0]):
            for col in range(well_imgs.shape[1]):
                if exclude_wells(col, row):
                    continue
                well = np.array(well_imgs[row,col])
                pred = particle_detection_prediction(well)
                # pred = len(np.unique(well)) - 1
                if pred != -1:
                    truth = labels.loc[(labels['COLUMN_ID']==col + 1) & (labels['ROW_ID']==row + 1),'LABEL'].tolist()[0]
                    if np.isnan(truth):
                        continue
                    if pred == truth:
                        right += 1
                    else:
                        wrong += 1
                else:
                    unsure.append([row, col])
        accuracy_list.append(right / (right + wrong + len(unsure)))
        unsure_dict[filename] = len(unsure)
    return accuracy_list, unsure_dict

In [74]:
melodys_pipeline(os.getcwd())

([0.7933130699088146,
  0.9348441926345609,
  0.8246153846153846,
  0.46419098143236076],
 {'/Users/yiqingmelodywang/Desktop/CMU/Ginkgo/GinkgoCapstone/output_test220104_152235_862416.jpeg': 28,
  '/Users/yiqingmelodywang/Desktop/CMU/Ginkgo/GinkgoCapstone/output_test220322_134621_907015.jpeg': 14,
  '/Users/yiqingmelodywang/Desktop/CMU/Ginkgo/GinkgoCapstone/output_test220104_150753_862416.jpeg': 15,
  '/Users/yiqingmelodywang/Desktop/CMU/Ginkgo/GinkgoCapstone/220118_122237_869669.jpeg': 77})

In [5]:
df = pd.read_csv('small_val_labels.csv')
df.loc[(df['col_id']==1) & (df['row_id']==1),'label'].tolist()

[]

In [77]:
def melodys_toy_data_pipeline(img_dir):
    accuracy_list = []
    unsure_dict = {}
    # read every image file from the input folder
    
    right = 0
    wrong = 0
    unsure = []
    labels = pd.read_csv('small_val_labels.csv')
    # plt.figure(0)
    # plt.imshow(cv2.imread(img_dir,0))
    img_th = load_and_preprocess_img(img_dir, (3,3), 1.0, 5, 4)
    # plt.figure(999)
    # plt.imshow(img_th)
    well_imgs = isolate_each_well(img_th)
    for row in range(well_imgs.shape[0]):
        for col in range(well_imgs.shape[1]):
            truth = labels.loc[(labels['col_id']==col) & (labels['row_id']==row),'label'].tolist()
            if truth == [] or np.isnan(truth):
                continue
            truth = truth[0]
            if exclude_wells(col, row):
                continue
            well = np.array(well_imgs[row,col])
            # print(cv2.connectedComponents(well))
            pred = particle_detection_prediction(well)
            # pred = len(np.unique(well)) - 1
            # plt.figure((row+1)*(col+1))
            # plt.title(str(truth))
            # plt.imshow(well)
            if pred != -1:
                if pred == truth:
                    right += 1
                else:
                    wrong += 1
            else:
                unsure.append([row, col])
    accuracy_list.append(right / (right + wrong + len(unsure)))
    unsure_dict[img_dir] = len(unsure)
    return accuracy_list, unsure_dict

In [78]:
melodys_toy_data_pipeline('220118_122237_869669.jpeg')

([0.81], {'220118_122237_869669.jpeg': 18})