# Preprocessing V2

In [1]:
import sys
!{sys.executable} -m pip install opencv-python
import cv2
!{sys.executable} -m pip install matplotlib
from matplotlib import pyplot as plt
!{sys.executable} -m pip install numpy
import numpy as np
!{sys.executable} -m pip install imutils
import imutils
!{sys.executable} -m pip install sklearn
from sklearn.decomposition import PCA
from math import atan2, cos, sin, sqrt, pi
from glob import glob
import os



In [2]:
filepaths = glob("images/*/*.png", recursive=True)
print(filepaths[:10])

['images/Trimed/Screen Shot 2022-02-22 at 11.03.10 AM.png', 'images/Trimed/Screen Shot 2022-02-22 at 11.03.34 AM.png', 'images/Trimed/Screen Shot 2022-02-22 at 11.05.36 AM.png', 'images/Trimed/trimed va ex1 at 4.05.14 AM.png', 'images/Trimed/Screen Shot 2022-02-22 at 11.11.49 AM.png', 'images/Trimed/trimed va ex1 at 4.05.20 AM.png', 'images/Trimed/Screen Shot 2022-02-22 at 11.11.26 AM.png', 'images/Trimed/Screen Shot 2022-02-22 at 11.05.02 AM.png', 'images/Trimed/Screen Shot 2022-02-22 at 11.06.42 AM.png', 'images/Trimed/Screen Shot 2022-02-22 at 11.10.04 AM.png']


In [3]:
images = {}

for filepath in filepaths:
    image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)

    # set crop bounds for background color detection
    x_min = 0
    x_max = image.shape[1]
    y_min = int(image.shape[0] * 0.2)
    y_max = int(image.shape[0] * 0.8)

    # recolored image (if black on white x-ray)
    cropped_image = image[y_min:y_max, x_min:x_max]
    
    recolored_image = image


    # print("MEAN", np.mean(cropped_image))
    if np.mean(cropped_image) > 110:
        recolored_image = cv2.bitwise_not(image)


    edited = recolored_image
    
    min = np.min(edited)
    max = np.max(edited)
    
    # scale pixels to 0 - 255 for max contrast
    scaled = ((edited - min) / (max - min) * 255).astype(np.uint8)
    
    images[filepath] = (image, scaled)


    # f, axarr = plt.subplots(1,2)
    # axarr[0].axis('off')
    # axarr[0].imshow(image, cmap='gray')
    # axarr[1].axis('off')
    # axarr[1].imshow(edited, cmap='gray')
    # plt.show(block=True)

## Now we actually try to preprocess the images

In [4]:
thresholded = {}

for filepath, (original, edited) in images.items():
    
    print(filepath)

    ret, thresh = cv2.threshold(edited, 200, 255, cv2.THRESH_TOZERO)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE , (3,3))
    morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

    thresholded[filepath] = morph
    
    # f, axarr = plt.subplots(1,2)
    # axarr[0].axis('off')
    # axarr[0].imshow(edited, cmap='gray')
    # axarr[1].axis('off')
    # axarr[1].imshow(morph, cmap='gray')
    # plt.show(block=True)

images/Trimed/Screen Shot 2022-02-22 at 11.03.10 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.03.34 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.05.36 AM.png
images/Trimed/trimed va ex1 at 4.05.14 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.11.49 AM.png
images/Trimed/trimed va ex1 at 4.05.20 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.11.26 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.05.02 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.06.42 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.10.04 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.13.56 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.07.27 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.13.11 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.07.03 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.04.20 AM.png
images/Trimed/trimed va ex1 at 4.07.05 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.06.37 AM.png
images/Trimed/Screen Shot 2022-02-22 at 11.06.54 AM.png
images/Trimed/trimed v

In [5]:
rotated = {}

for filepath, thresh in thresholded.items():

    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    areas = [cv2.contourArea(c) if cv2.boundingRect(c)[2] < (thresh.shape[1]) * 0.8 else -1 for c in contours]
    max_index = np.argmax(areas)
    cnt = contours[max_index]
    
    pts = cnt
    
    sz = len(pts)
    data_pts = np.empty((sz, 2), dtype=np.float64)
    for i in range(data_pts.shape[0]):
        data_pts[i,0] = pts[i,0,0]
        data_pts[i,1] = pts[i,0,1]
        
    mean = np.empty((0))
    mean, eigenvectors, eigenvalues = cv2.PCACompute2(data_pts, mean)
    
    eigenvector = eigenvectors[0]

    angle = atan2(*eigenvector) # orientation in radians
    degrees = angle / np.pi * 180 # orientation in degrees

    # f, axarr = plt.subplots(1,2)
    # axarr[0].axis('off')
    # axarr[0].imshow(images[filepath][1], cmap='gray')
    # axarr[1].axis('off')
    # axarr[1].imshow(imutils.rotate_bound(thresh, degrees), cmap='gray')
    # plt.show(block=True)

    original = images[filepath][0]
    edited = images[filepath][1]
    rotated[filepath] = (imutils.rotate_bound(original, degrees), imutils.rotate_bound(edited, degrees), imutils.rotate_bound(thresh, degrees))


In [6]:
for filepath, (original, edited, morph) in rotated.items():

    contours, hierarchy = cv2.findContours(morph, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    areas = [cv2.contourArea(c) if cv2.boundingRect(c)[2] < (edited.shape[1]) * 0.8 else -1 for c in contours]
    max_index = np.argmax(areas)
    cnt = contours[max_index]

    colored_image = cv2.cvtColor(edited, cv2.COLOR_GRAY2RGB)
    x, y, w, h = cv2.boundingRect(cnt)
    halfW, halfH = (w // 2, h // 2)
    
    cx, cy = (x + halfW, y + halfH)
    
    majorHalf = halfW if w > h else halfH
    
    majorHalf += 20 #padding
    
    crop_img = original[cy-majorHalf:cy+majorHalf, cx-majorHalf:cx+majorHalf]
    
    colored_crop = colored_image[cy-majorHalf:cy+majorHalf, cx-majorHalf:cx+majorHalf]

    
    
    filepath = filepath.replace('images', 'images_processed')

    try:
        os.makedirs('/'.join(filepath.split('/')[:-1]))
    except Exception as e:
        pass
    print('new filepath', filepath)
    try:
        cv2.imwrite(filepath, colored_crop)
    except Exception as e:
        print(e)
        



    # f, axarr = plt.subplots(1,3)
    # axarr[0].axis('off')
    # axarr[0].imshow(image, cmap='gray')
    # axarr[1].axis('off')
    # axarr[1].imshow(thresh, cmap='gray')
    # axarr[2].axis('off')
    # axarr[2].imshow(crop_img, cmap='gray')
    # plt.show(block=True)

new filepath images_processed/Trimed/Screen Shot 2022-02-22 at 11.03.10 AM.png
new filepath images_processed/Trimed/Screen Shot 2022-02-22 at 11.03.34 AM.png
new filepath images_processed/Trimed/Screen Shot 2022-02-22 at 11.05.36 AM.png
OpenCV(4.5.5) /Users/xperience/actions-runner/_work/opencv-python/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp:801: error: (-215:Assertion failed) !_img.empty() in function 'imwrite'

new filepath images_processed/Trimed/trimed va ex1 at 4.05.14 AM.png
new filepath images_processed/Trimed/Screen Shot 2022-02-22 at 11.11.49 AM.png
new filepath images_processed/Trimed/trimed va ex1 at 4.05.20 AM.png
new filepath images_processed/Trimed/Screen Shot 2022-02-22 at 11.11.26 AM.png
new filepath images_processed/Trimed/Screen Shot 2022-02-22 at 11.05.02 AM.png
new filepath images_processed/Trimed/Screen Shot 2022-02-22 at 11.06.42 AM.png
new filepath images_processed/Trimed/Screen Shot 2022-02-22 at 11.10.04 AM.png
new filepath images_processed/Trime