<a href="https://colab.research.google.com/github/arnav-meduri/PVO-segmentation/blob/main/Arnav_Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install opencv-python
! pip install numpy



In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
folder_path = "/content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh/PVO_Segmentation_Project/Annotated_PVO_OM_7-2-23/"
!ls -laF $folder_path

total 172031
-rw------- 1 root root 4939321 Jul  1 19:39 '216 OD- 1.png'
-rw------- 1 root root 4890773 Jul  1 19:39 '216 OD- 2.png'
-rw------- 1 root root 4948651 Jul  1 19:39 '216 OD- 3.png'
-rw------- 1 root root 4939316 Jul  1 19:39 '216 OD- 4.png'
-rw------- 1 root root 4820389 Jul  1 19:39 '216 OD- 5.png'
-rw------- 1 root root 3041556 Jul  1 15:50 '24 OS- 1.png'
-rw------- 1 root root 3056103 Jul  1 15:51 '24 OS- 2.png'
-rw------- 1 root root 3023277 Jul  1 15:52 '24 OS- 3.png'
-rw------- 1 root root 3021458 Jul  3 01:34 '24 OS- 4.png'
-rw------- 1 root root 3027693 Jul  1 15:53 '24 OS- 5.png'
-rw------- 1 root root 4826426 Jul  1 19:39 '298 OS-1.png'
-rw------- 1 root root 4778798 Jul  1 19:39 '298 OS-2.png'
-rw------- 1 root root 4835214 Jul  1 19:39 '298 OS-3.png'
-rw------- 1 root root 4807548 Jul  1 19:39 '298 OS-4.png'
-rw------- 1 root root 4768877 Jul  1 19:39 '298 OS-5.png'
-rw------- 1 root root 4869357 Jul  1 16:55 '357 OD-1.png'
-rw------- 1 root root 4729393 Jul  1 

In [4]:
import cv2
import numpy as np

def is_grayscale(img_path):
    # Load image
    img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)

    if len(img.shape) == 3 and img.shape[2] == 4:
      img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)

    # If image is grayscale both dimensions of 3D image array will be equal
    if len(img.shape) < 3:
        return True
    if img.shape[2]  == 1:
        return True

    # If color image is read as grayscale, all channels will be equal.
    # Compare all channels to see if they are equal
    b,g,r = img[:,:,0], img[:,:,1], img[:,:,2]
    if (b==g).all() and (b==r).all():
        return True

    return False

In [5]:
def findBoundingBox(img_path):

  img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
  print(f'image height = {img.shape[0]}, width = {img.shape[1]}')

  # Convert the image to HSV
  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

  # Define range for red color in HSV space
  lower_red1 = np.array([0,70,50])
  upper_red1 = np.array([10,255,255])
  lower_red2 = np.array([170,70,50])
  upper_red2 = np.array([180,255,255])

  # Threshold the HSV image to get only red colors
  mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
  mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
  mask = mask1 + mask2

  mask = cv2.erode(mask, None, iterations=2)
  mask = cv2.dilate(mask, None, iterations=2)

  # Find contours in the mask
  cnts, _ = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

  coordinates = []

  # Iterate over the contours and find the bounding boxes
  for cnt in cnts:
      x,y,w,h = cv2.boundingRect(cnt)
      coordinates.append(((x, y), (x+w, y+h)))

  return coordinates


In [6]:
import cv2
import numpy as np

def padAndResize(img_path, upload_folder, to_size = 512):
  # Load an image
  image = cv2.imread(img_path)

  # Get the image's height and width
  height, width = image.shape[:2]

  # Determine the padding needed to make the image square
  padding = height - width

  # If padding is needed...
  if padding > 0:
      # Padding is added to the right side of the image
      pad_right = np.zeros((height, padding, 3), dtype=np.uint8)
      square_image = np.concatenate((image, pad_right), axis=1)
  elif padding < 0:
      # Padding is added to the bottom of the image
      pad_bottom = np.zeros((abs(padding), width, 3), dtype=np.uint8)
      square_image = np.concatenate((image, pad_bottom), axis=0)
  else:
      square_image = image

  # Now resize the image to 512x512
  resized_image = cv2.resize(square_image, (to_size, to_size))

  # Save the resized image
  img_name = os.path.join(upload_folder, img_path.replace('/', '_'))
  cv2.imwrite(img_name, resized_image)


In [36]:
import cv2
import numpy as np

def gray_downsample(img_path, upload_folder, discard_prefix = "", to_size = 512):
  # Load an image
  image = cv2.imread(img_path)

  # Convert the image to grayscale.
  gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

  # Now downsample the image to 512x512
  resized_image = cv2.resize(gray_image, (to_size, to_size), interpolation=cv2.INTER_NEAREST)

  # Save the resized image
  img_name = os.path.join(upload_folder, img_path[len(discard_prefix):])
  try:
    img_path = os.path.dirname(img_name)
    if img_path != "":
      os.makedirs(img_path, exist_ok=True)
  except FileExistsError:
    print(f"The directory {img_path} already exists")
  except OSError as error:
    print(f'Creation of the directory {img_path} failed with error: {error}')
  else:
    print(f'Successfully created the directory {img_path}')
  cv2.imwrite(img_name, resized_image)

In [8]:
#Preprocessing - bounding box masks

import cv2
import numpy as np
from numpy import asarray

def maskPadAndResize(img_path, upload_folder, to_size = 512):
  # Load an image
  image = cv2.imread(img_path)
  x_array = asarray(image)

  coordinates = findBoundingBox(img_path)

  # Define all-zero mask
  mask = np.zeros(x_array.shape)
  # Zero areas inside the boxes
  for coord in coordinates:
    #print(coord[0][0])
    # mask1 = copy.copy(mask)
    mask[coord[0][1]:coord[1][1],coord[0][0]:coord[1][0]] = 255

  # Get the image's height and width
  height, width = mask.shape[:2]

  # Determine the padding needed to make the image square
  padding = height - width

  # If padding is needed...
  if padding > 0:
      # Padding is added to the right side of the image
      pad_right = np.zeros((height, padding, 3), dtype=np.uint8)
      square_image = np.concatenate((mask, pad_right), axis=1)
  elif padding < 0:
      # Padding is added to the bottom of the image
      pad_bottom = np.zeros((abs(padding), width, 3), dtype=np.uint8)
      square_image = np.concatenate((mask, pad_bottom), axis=0)
  else:
      square_image = mask

  # Now resize the image to 512x512
  resized_image = cv2.resize(square_image, (to_size, to_size))

  # Save the resized image
  img_name = os.path.join(upload_folder, img_path.replace('/', '_'))
  cv2.imwrite(img_name, resized_image)


In [9]:
path = "/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/24/24 OS- 1 Omar/24 OS- 1 Omar.png.images/00001/"
!ls -laF "$path"

total 2
-r-------- 1 root root 358 Jul 11 00:10 0.1.png
-r-------- 1 root root 346 Jul 11 00:10 0.2.png
-r-------- 1 root root 344 Jul 11 00:10 0.3.png
-r-------- 1 root root 398 Jul 11 00:10 all.png


In [10]:
p = "/content/drive/MyDrive/RCompSci/Resized_Images"
!ls -laF "$p"

total 12
drwx------ 2 root root 4096 Jul 14 05:14 BoundingBox_Resized_512/
drwx------ 2 root root 4096 Jul 14 05:17 Keylabs_Resized_512/
drwx------ 2 root root 4096 Jul 14 05:14 Original_Resized_512/


In [11]:
import os
import fnmatch

def find_files(directory, pattern):
    for root, dirs, files in os.walk(directory):
        for basename in files:
            if fnmatch.fnmatch(basename, pattern):
                filename = os.path.join(root, basename)
                yield filename

p = "/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/"
for filename in find_files(p, 'all.png'):
    print(filename)

/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/690/690 OS-1 Omar/690 OS-1 Omar.png.images/00001/all.png
/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/690/690 OS-1 Luke/690 OS-1 Luke.png.images/00001/all.png
/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/690/690 OS-3 Omar/690 OS-3 Omar.png.images/00001/all.png
/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/794/794 OD-3 Luke/794 OD-3 Luke.png.images/00001/all.png
/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/794/794 OD-2 Omar/794 OD-2 Omar.png.images/00001/all.png
/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/794/794 OD-4 Omar/794 OD-4 Omar.png.images/00001/all.png
/content/drive/.

In [35]:
#Processing Keylabs annotated images

import os
import datetime
size = 512
path = "/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/"
!ls -laF "$path"
target_folder = "/content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh/PVO_Segmentation_Project/Processed_Images/Keylabs_Annotated/" + datetime.datetime.now().strftime("%d-%m-%Y-%H-%M") + '.' + str(size)
print(target_folder)
try:
    os.mkdir(target_folder)
except FileExistsError:
    print(f"The directory {target_folder} already exists")
except OSError as error:
    print(f'Creation of the directory {target_folder} failed with error: {error}')
else:
    print(f'Successfully created the directory {target_folder}')

for file in find_files(path, 'all.png'):
  print(f'Converting to grayscale and downsampling {file}')
  gray_downsample(file, target_folder, path, size)

total 40
dr-x------  2 root root 4096 Jul 10 19:39 216/
dr-x------ 10 root root 4096 Jul 10 19:39 24/
dr-x------  2 root root 4096 Jul 10 19:39 298/
dr-x------ 12 root root 4096 Jul 10 19:39 340/
dr-x------  8 root root 4096 Jul 10 19:39 357/
dr-x------ 12 root root 4096 Jul 10 19:39 45/
dr-x------  5 root root 4096 Jul 10 19:39 690/
dr-x------ 10 root root 4096 Jul 10 19:39 794/
dr-x------  3 root root 4096 Jul 10 19:39 962/
dr-x------  5 root root 4096 Jul 10 19:39 996/
/content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh/PVO_Segmentation_Project/Processed_Images/Keylabs_Annotated/17-07-2023-05-07.512
Successfully created the directory /content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh/PVO_Segmentation_Project/Processed_Images/Keylabs_Annotated/17-07-2023-05-07.512
Converting to grayscale and downsampling /content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs Annotated PVO Grading- #1/690/690 OS-1 Omar/690 OS-1 Omar.

In [18]:
!ls -aF "/content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh/PVO_Segmentation_Project/Processed_Images/"

BB_Annotated/  Keylabs_Annotated/  Orig_Annotated/


In [None]:
orig_path = "/content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh/PVO_Segmentation_Project/Original_Images/"
!ls -laF "$orig_path"

total 196175
-rw------- 1 root root 4939321 Jul  9 05:25 '216 OD- 1.png'
-rw------- 1 root root 4890773 Jul  9 05:25 '216 OD- 2.png'
-rw------- 1 root root 4948651 Jul  9 05:25 '216 OD- 3.png'
-rw------- 1 root root 4939316 Jul  9 05:25 '216 OD- 4.png'
-rw------- 1 root root 4820389 Jul  9 05:25 '216 OD- 5.png'
-rw------- 1 root root 3009054 Jul  9 05:26 '24 OS- 1.png'
-rw------- 1 root root 3024549 Jul  9 05:26 '24 OS- 2.png'
-rw------- 1 root root 2981965 Jul  9 05:26 '24 OS- 3.png'
-rw------- 1 root root 2969068 Jul  9 05:26 '24 OS- 4.png'
-rw------- 1 root root 2987338 Jul  9 05:26 '24 OS- 5.png'
-rw------- 1 root root 4826426 Jul  9 05:26 '298 OS-1.png'
-rw------- 1 root root 4778798 Jul  9 05:26 '298 OS-2.png'
-rw------- 1 root root 4835214 Jul  9 05:26 '298 OS-3.png'
-rw------- 1 root root 4807548 Jul  9 05:26 '298 OS-4.png'
-rw------- 1 root root 4768877 Jul  9 05:27 '298 OS-5.png'
-rw------- 1 root root 4773012 Jul  9 05:27 '357 OD-1.png'
-rw------- 1 root root 4729393 Jul  9 

In [37]:
#Processing original annotated images

import os
import datetime
size = 512
orig_path = "/content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh/PVO_Segmentation_Project/Original_Images/"
!ls -laF "$orig_path"
target_folder = "/content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh/PVO_Segmentation_Project/Processed_Images/Orig_Annotated/" + datetime.datetime.now().strftime("%d-%m-%Y-%H-%M") + '.' + str(size)
print(target_folder)
try:
    os.mkdir(target_folder)
except FileExistsError:
    print(f"The directory {target_folder} already exists")
except OSError as error:
    print(f'Creation of the directory {target_folder} failed with error: {error}')
else:
    print(f'Successfully created the directory {target_folder}')

for file in find_files(orig_path, '*.png'):
  print(f'Converting to grayscale and downsampling {file}')
  gray_downsample(file, target_folder, orig_path, size)

total 196175
-rw------- 1 root root 4939321 Jul  9 05:25 '216 OD- 1.png'
-rw------- 1 root root 4890773 Jul  9 05:25 '216 OD- 2.png'
-rw------- 1 root root 4948651 Jul  9 05:25 '216 OD- 3.png'
-rw------- 1 root root 4939316 Jul  9 05:25 '216 OD- 4.png'
-rw------- 1 root root 4820389 Jul  9 05:25 '216 OD- 5.png'
-rw------- 1 root root 3009054 Jul  9 05:26 '24 OS- 1.png'
-rw------- 1 root root 3024549 Jul  9 05:26 '24 OS- 2.png'
-rw------- 1 root root 2981965 Jul  9 05:26 '24 OS- 3.png'
-rw------- 1 root root 2969068 Jul  9 05:26 '24 OS- 4.png'
-rw------- 1 root root 2987338 Jul  9 05:26 '24 OS- 5.png'
-rw------- 1 root root 4826426 Jul  9 05:26 '298 OS-1.png'
-rw------- 1 root root 4778798 Jul  9 05:26 '298 OS-2.png'
-rw------- 1 root root 4835214 Jul  9 05:26 '298 OS-3.png'
-rw------- 1 root root 4807548 Jul  9 05:26 '298 OS-4.png'
-rw------- 1 root root 4768877 Jul  9 05:27 '298 OS-5.png'
-rw------- 1 root root 4773012 Jul  9 05:27 '357 OD-1.png'
-rw------- 1 root root 4729393 Jul  9 

In [38]:
#Processing bounding box masks

import os
import datetime
size = 512
bb_path = "/content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh//PVO_Segmentation_Project/Annotated_PVO_OM_7-2-23/"
!ls -laF "$bb_path"
target_folder = "/content/drive/.shortcut-targets-by-id/1Gd6Qvx67HvZr5HE48UMhRc2vhUfBl1Jh/PVO_Segmentation_Project/Processed_Images/BB_Annotated/" + datetime.datetime.now().strftime("%d-%m-%Y-%H-%M") + '.' + str(size)
print(target_folder)
try:
    os.mkdir(target_folder)
except FileExistsError:
    print(f"The directory {target_folder} already exists")
except OSError as error:
    print(f'Creation of the directory {target_folder} failed with error: {error}')
else:
    print(f'Successfully created the directory {target_folder}')

for file in find_files(bb_path, '*.png'):
  print(f'Converting to grayscale and downsampling {file}')
  gray_downsample(file, target_folder, bb_path, size)

total 172031
-rw------- 1 root root 4939321 Jul  1 19:39 '216 OD- 1.png'
-rw------- 1 root root 4890773 Jul  1 19:39 '216 OD- 2.png'
-rw------- 1 root root 4948651 Jul  1 19:39 '216 OD- 3.png'
-rw------- 1 root root 4939316 Jul  1 19:39 '216 OD- 4.png'
-rw------- 1 root root 4820389 Jul  1 19:39 '216 OD- 5.png'
-rw------- 1 root root 3041556 Jul  1 15:50 '24 OS- 1.png'
-rw------- 1 root root 3056103 Jul  1 15:51 '24 OS- 2.png'
-rw------- 1 root root 3023277 Jul  1 15:52 '24 OS- 3.png'
-rw------- 1 root root 3021458 Jul  3 01:34 '24 OS- 4.png'
-rw------- 1 root root 3027693 Jul  1 15:53 '24 OS- 5.png'
-rw------- 1 root root 4826426 Jul  1 19:39 '298 OS-1.png'
-rw------- 1 root root 4778798 Jul  1 19:39 '298 OS-2.png'
-rw------- 1 root root 4835214 Jul  1 19:39 '298 OS-3.png'
-rw------- 1 root root 4807548 Jul  1 19:39 '298 OS-4.png'
-rw------- 1 root root 4768877 Jul  1 19:39 '298 OS-5.png'
-rw------- 1 root root 4869357 Jul  1 16:55 '357 OD-1.png'
-rw------- 1 root root 4729393 Jul  1 

In [None]:
import os
for file in os.listdir(folder_path):
  if file.endswith('.png'):
    full_path = os.path.join(folder_path, file)
    print(f'Examining file {file}')
    print(f'\tGrayScale? {is_grayscale(full_path)}')
    coordinates = findBoundingBox(full_path)
    print(f'\tNumber of bounding boxes={len(coordinates)}')
    for coord in coordinates:
      print(f'\ttop left: {coord[0]}, bottom right: {coord[1]}')


Examining file 24 OS- 1.png
	GrayScale? False
image height = 1100, width = 1654
	Number of bounding boxes=3
	top left: (766, 241), bottom right: (794, 276)
	top left: (1039, 91), bottom right: (1082, 135)
	top left: (668, 88), bottom right: (693, 117)
Examining file 24 OS- 2.png
	GrayScale? False
image height = 1102, width = 1652
	Number of bounding boxes=1
	top left: (779, 295), bottom right: (806, 331)
Examining file 24 OS- 3.png
	GrayScale? False
image height = 1098, width = 1656
	Number of bounding boxes=1
	top left: (1221, 84), bottom right: (1247, 114)
Examining file 24 OS- 5.png
	GrayScale? False
image height = 1100, width = 1648
	Number of bounding boxes=3
	top left: (976, 263), bottom right: (998, 290)
	top left: (940, 244), bottom right: (961, 272)
	top left: (166, 111), bottom right: (186, 134)
Examining file 45 OS-5.png
	GrayScale? False
image height = 1408, width = 2114
	Number of bounding boxes=14
	top left: (54, 365), bottom right: (83, 396)
	top left: (269, 351), bottom

In [None]:
import os
keylabs_path = "/content/drive/.shortcut-targets-by-id/1qLYsV6I4ZxygIqoBJi9uibX68ClOhCH3/Keylabs\ Annotated\ PVO\ Grading-\ #1/"
!ls -laF $keylabs_path
for patient in os.listdir(keylabs_path):
  print(patient)


total 40
dr-x------ 2 root root 4096 Jul 10 19:39 216/
dr-x------ 3 root root 4096 Jul 10 19:39 24/
dr-x------ 2 root root 4096 Jul 10 19:39 298/
dr-x------ 2 root root 4096 Jul 10 19:39 340/
dr-x------ 2 root root 4096 Jul 10 19:39 357/
dr-x------ 2 root root 4096 Jul 10 19:39 45/
dr-x------ 2 root root 4096 Jul 10 19:39 690/
dr-x------ 2 root root 4096 Jul 10 19:39 794/
dr-x------ 2 root root 4096 Jul 10 19:39 962/
dr-x------ 2 root root 4096 Jul 10 19:39 996/


FileNotFoundError: ignored