In [15]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import cv2 as cv
import os

In [16]:
tf.__version__

'2.15.0'

In [17]:
"""Define the path to the training data""" 
train_dict = '../data/train/train_images/'
output_dir = '../data/train/outputs/'
snippets_dir = '../data/train/snippets/'

In [18]:
"""Create an image array from the training data and smooth the images"""
images = []
gray_images = []
for filename in os.listdir(train_dict):
    img_path = os.path.join(train_dict, filename)
    img = cv.imread(img_path)

    if img is not None:
        # Convert the image to grayscale
        gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

        # Apply Gaussian blur to the grayscale image
        blurred = cv.GaussianBlur(gray, (5, 5), 0)  # Apply the first Gaussian blur with kernel size 5x5

        # Add the original image and the blurred grayscale image to their respective lists
        images.append(img)
        gray_images.append(blurred)  # Use 'blurred_twice' if two blurs are applied, otherwise use 'blurred'

In [19]:
"""Thresholding all the gray images"""
thresholded_images = []
for gray in gray_images:
    _, thresh = cv.threshold(gray, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
    thresholded_images.append(thresh)

In [20]:
"""Save all the thresholded images"""
for i, thresh in enumerate(thresholded_images):
    cv.imwrite(output_dir + 'thresh' + str(i) + '.png', thresh)

In [21]:
"""Find the contours of the thresholded images"""
contours_list = []
for thresh in thresholded_images:
    contours_list.append(cv.findContours(thresh, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE))

In [22]:
"""Print length of the contours list"""
print(len(contours_list[0][0]))

9


In [23]:
"""Draw the contours on the original images"""
contour_images = []
for i, img in enumerate(images):
    # contour_img = cv.drawContours(img, contours_list[i][0], -1, (0, 255, 0), 3)
    contour_img = img
    contour_images.append(contour_img)

In [24]:
"""Save the images with the drawn contour"""
for i, contour_img in enumerate(contour_images):
    cv.imwrite(output_dir + 'contour' + str(i) + '.png', contour_img)

In [25]:
"""Initialize a list to keep track of filenames and placeholders for classes"""
image_descriptions = []

In [26]:
"""Define the desired size for all output images"""
desired_dim = (150, 150)

In [27]:
"""Loop through each contour in the contours list"""
for i, contours in enumerate(contours_list):
    for j, contour in enumerate(contours[0]):
        """Get the bounding rectangle of the contour"""
        x, y, w, h = cv.boundingRect(contour)
        """Crop the image to the bounding rectangle"""
        cropped_img = gray_images[i][y:y+h, x:x+w]
        """Resize the cropped image to the desired dimensions"""
        resized_img = cv.resize(cropped_img, desired_dim)
        """Save the resized image"""
        cv.imwrite(output_dir + 'resized' + str(i) + '_' + str(j) + '.png', resized_img)
        """Add the filename to the image descriptions list"""
        image_descriptions.append('resized' + str(i) + '_' + str(j) + '.npy')
        """Save it as a .npy file"""
        np.save(snippets_dir + 'resized' + str(i) + '_' + str(j) + '.npy', resized_img)

In [28]:
# Write descriptions to a text file
description_filepath = os.path.join(output_dir, 'image_descriptions.txt')
with open(description_filepath, 'w') as f:
    for description in image_descriptions:
        f.write(description + ",0" + '\n')