In [5]:
import os
import cv2
import torch
import random
import numpy as np
from glob import glob
from pathlib import Path
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

def load_label(path:str, img_shape):
    with open(path, "r") as f:
        labels = f.readlines()
    
    res = []
    for row in labels:
        coords = []
        for i,num in enumerate(row.replace("\n", "").split()[1:]):
            coord = int(float(num)*img_shape[(i+1)%2])
            coords.append(coord)
        res.append(coords)
    if len(res)==0:
        return []
    return np.array(res).reshape(len(labels), -1, 2)

def crop_polygon(image:np.ndarray, poly_points:np.ndarray, empty_fill_color=(255,255,255)):
    mask = np.zeros_like(image)
    cv2.fillPoly(mask, [poly_points], empty_fill_color)

    # Apply the mask to the image
    masked_image = cv2.bitwise_and(image, mask)
    
    # Crop the image to the desired rectangle
    x, y, w, h = cv2.boundingRect(poly_points)
    cropped_image = masked_image[y:y+h, x:x+w]

    return cropped_image

def cut_words(image_paths:list, label_paths:list, output_dir="./Cuts"):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    for i, path in enumerate(tqdm(image_paths)):
        img = cv2.imread(path)
        polygons = load_label(label_paths[i], img.shape)
        for j,polygon in enumerate(polygons):
            crop = crop_polygon(img, polygon)
            cv2.imwrite(os.path.join(output_dir, f"{Path(path).stem}_{j}.jpeg"), crop)
            
def get_image_paths(root:str,
                    split:str,
                    extensions=[".jpeg", ".jpg", ".png"],
                    return_labels=True,
                    path_type=str):
    
    img_root = Path(os.path.join(root, split, "images"))
    image_paths = []
    for ext in extensions:
        image_paths += list(img_root.glob(f"*{ext}"))
    
    if return_labels:
        label_paths = [i.parent.parent/f"labels/{i.stem}.txt" for i in image_paths]
    
    if path_type is str:
        image_paths = [path_type(i) for i in image_paths]
        label_paths = [path_type(i) for i in label_paths]
    
    if return_labels:
        return image_paths, label_paths        
    
    return image_paths

def shuffle_lists(list1, list2):
    combined = list(zip(list1, list2))
    random.shuffle(combined)
    list1[:], list2[:] = zip(*combined)

In [6]:
root = "../TRAIN_DATA/Words_2/"
split = "train"

img_paths, label_paths = get_image_paths(root, split)
shuffle_lists(img_paths, label_paths)

In [7]:
img_paths[0], label_paths[0]

('..\\TRAIN_DATA\\Words_2\\train\\images\\20230327_234331.jpeg',
 '..\\TRAIN_DATA\\Words_2\\train\\labels\\20230327_234331.txt')

In [8]:
cut_words(img_paths, label_paths, output_dir="../Data/Word_cuts")

  0%|          | 0/189 [00:00<?, ?it/s]

  return np.array(res).reshape(len(labels), -1, 2)


ValueError: cannot reshape array of size 172 into shape (172,newaxis,2)