In [None]:
import os
import numpy as np
from skimage.io import imread

def process_image_cpc(image_path, text_path, output_subdir):
    try:
        # Ensure the output subdirectory exists
        if not os.path.exists(output_subdir):
            os.makedirs(output_subdir)

        # Check if the CPC file exists
        if not os.path.exists(text_path):
            print(f"CPC file does not exist for {image_path}")
            return  # Skip processing this image

        # Load true dimensions of the image
        image = imread(image_path)
        true_length, true_width = image.shape[1], image.shape[0]

        # Read dimensions and data from CPC file
        with open(text_path) as textfile:
            lines = textfile.readlines()
            first_line = lines[0].split(',')
            original_len, original_width = int(first_line[2]), int(first_line[3])
            count_points = int(lines[5])

            data = lines[6:6+count_points]
            coordinates = np.zeros([count_points,2], dtype=np.int)
            for n, row in enumerate(data):
                data1 = row.split(',')
                coordinates[n,0] = int(int(data1[0]) * true_length / original_len)
                coordinates[n,1] = int(int(data1[1]) * true_width / original_width)

            label_encode = np.zeros(count_points)
            label_data = lines[6+count_points: 6+count_points+count_points]
            for m, label_row in enumerate(label_data):
                label1 = label_row.split(",")
                new_l = label1[1].replace('\"', '').strip().lower()
                label_dict = {'agalg': 0, 'aga': 0, 'agalf': 0, 'dcp': 1, 'roc': 2, 'cca': 3, 'ana': 4}
                label_encode[m] = label_dict.get(new_l, 5)

        # Write to a new txt file
        name_img = os.path.basename(image_path).split('.')[0]
        file_path = os.path.join(output_subdir, name_img + '.txt')

        with open(file_path, 'w+') as newtxtfile:
            newtxtfile.write(name_img + '\n')
            newtxtfile.write('x,y,label\n')
            for i in range(count_points):
                x, y, label = coordinates[i, 0], coordinates[i, 1], int(label_encode[i])
                newtxtfile.write(f'{x},{y},{label}\n')

    except Exception as e:
        print(f"Error processing file {image_path}: {e}")

def process_subdirectories(main_dir, output_base_dir):
    for root, dirs, files in os.walk(main_dir):
        jpg_files = [file for file in files if file.endswith('.jpg')]
        for jpg_file in jpg_files:
            image_path = os.path.join(root, jpg_file)
            text_path = image_path.replace('.jpg', '.cpc')
            
            # Calculate relative path to maintain directory structure in output
            relative_dir = os.path.relpath(root, main_dir)
            output_subdir = os.path.join(output_base_dir, relative_dir)
            
            process_image_cpc(image_path, text_path, output_subdir)

# Example usage
main_directory = '/Volumes/Shaq Toshiba/Cuba CPCe Images'
output_base_directory = '/Users/shagundeepsingh/Documents/coral/text-new'
process_subdirectories(main_directory, output_base_directory)
