In [None]:
import cv2
import numpy as np
import os
from PIL import Image
from tqdm import tqdm


MIN_AREA_THRESHOLD = 100000

def rescale_image(img_path, rescale_factor):
    
    img = Image.open(img_path)
    
    new_size = (int(img.width * rescale_factor), int(img.height * rescale_factor))
    img_rescaled = img.resize(new_size, Image.LANCZOS)
    
    return np.array(img_rescaled)

def find_bboxes(raw_patch, bbox_patch):

    diff = cv2.absdiff(raw_patch, bbox_patch)
    gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray_diff, 25, 255, cv2.THRESH_BINARY)

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    height, width, _ = raw_patch.shape
    output_image = raw_patch.copy()

    yolo_bboxes = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        area = w * h
        if area > MIN_AREA_THRESHOLD:
            x_center = (x + w / 2) / width
            y_center = (y + h / 2) / height
            w_norm = w / width
            h_norm = h / height
            yolo_bboxes.append([0, x_center, y_center, w_norm, h_norm])
            cv2.rectangle(output_image, (x, y), (x + w, y + h), (255, 0, 0), 2)
    
    return yolo_bboxes, output_image, raw_patch

def patch_and_process_images(raw_image_path, bbox_image_path, output_folder, raw_output_folder, yolo_output_folder, patch_size, rescale_factor):
    Image.MAX_IMAGE_PIXELS = None

    raw_image = Image.open(raw_image_path)
    bbox_image = Image.open(bbox_image_path)
    
    raw_image_np = rescale_image(raw_image_path, rescale_factor)
    bbox_image_np = rescale_image(bbox_image_path, rescale_factor)

    original_image_name = os.path.splitext(os.path.basename(raw_image_path))[0]

    os.makedirs(output_folder, exist_ok=True)
    os.makedirs(yolo_output_folder, exist_ok=True)
    os.makedirs(raw_output_folder, exist_ok=True)

    width, height = raw_image.size
    nx = width // patch_size
    ny = height // patch_size

    for i in range(nx):
        for j in range(ny):
            left = i * patch_size
            upper = j * patch_size
            right = left + patch_size
            lower = upper + patch_size

            raw_patch = raw_image_np[upper:lower, left:right]
            bbox_patch = bbox_image_np[upper:lower, left:right]
            
            if raw_patch.size == 0 or bbox_patch.size == 0:
                continue

            yolo_bboxes, processed_patch, new_raw_patch = find_bboxes(raw_patch, bbox_patch)
            
            if yolo_bboxes:
                patch_filename = f"{original_image_name}_patch_{i}_{j}.png"
                patch_path = os.path.join(output_folder, patch_filename)
                processed_patch_rgb = cv2.cvtColor(processed_patch, cv2.COLOR_BGR2RGB)
                cv2.imwrite(patch_path, processed_patch_rgb)
                
                raw_patch_filename = f"{original_image_name}_patch_{i}_{j}.png"
                raw_patch_path = os.path.join(raw_output_folder, raw_patch_filename)
                new_raw_patch_rgb = cv2.cvtColor(new_raw_patch, cv2.COLOR_BGR2RGB)
                cv2.imwrite(raw_patch_path, new_raw_patch_rgb)

                yolo_filename = f"{original_image_name}_patch_{i}_{j}.txt"
                yolo_path = os.path.join(yolo_output_folder, yolo_filename)
                with open(yolo_path, 'w') as file:
                    for bbox in yolo_bboxes:
                        file.write(" ".join(map(str, bbox)) + "\n")
            else:
                continue

# Define your patch size
patch_size = 640
rescale_factor = 1.0

output_folder = '/Users/tgautam/Documents/PUC/bbox_patches'
raw_output_folder = '/Users/tgautam/Documents/PUC/raw_patches'
yolo_output_folder = '/Users/tgautam/Documents/PUC/yolo_annotations'

paths = [
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS015S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS015S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS016S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS016S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS017S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS017S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS018S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS018S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS019S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS019S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS020S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS020S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS021S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS021S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS022S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS022S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS023S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS023S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS024S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS024S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS025S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS025S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS026S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS026S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS028S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS028S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS029S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 2/Images/DSP Scans/MS029S1_MU.png'),
    
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS035S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS035S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS037S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS037S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS038S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS038S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS039S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS039S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS040S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS040S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS043S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS043S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS044S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 3/Images/DSP Scans/MOS044S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 5/Images/DSP Scans/MOS066S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 5/Images/DSP Scans/MOS066S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 5/Images/DSP Scans/MOS068S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 5/Images/DSP Scans/MOS068S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 5/Images/DSP Scans/MOS069S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 5/Images/DSP Scans/MOS069S1_MU.png'),
    
    ('/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 7/Images/DSP Scans/MOS079S1.png',
    '/Volumes/Genomic Medicine/2014-0938/MOSAIC/DSP/MOSAIC DSP Set 7/Images/DSP Scans/MOS079S1_MU.png'),
    

]

for raw_image_path, bbox_image_path in tqdm(paths, desc='Processing images'):
    patch_and_process_images(raw_image_path, bbox_image_path, output_folder,raw_output_folder, yolo_output_folder, patch_size, rescale_factor)

print("Completed")
