In [70]:
import os, json, shutil

In [71]:
root_data_path = "../../data/plantdoc-DatasetNinja"
new_data_path = "../../data/plantdoc-Tomato-YOLO"

In [72]:
def standardize_file_name(filename, extension=".jpg"):
     # Extract base name and extensions
    name, ext = os.path.splitext(filename)
    extensions = []
    
    # Handle multiple extensions
    while ext:
        extensions.append(ext.lower())  # Convert to lowercase
        name, ext = os.path.splitext(name)
    
    # Reconstruct the filename
    base_name = name  # The name without any extensions
    # Choose your desired standard extension
    if extension == '.json': standard_extension = '.jpg.json'
    else: standard_extension = extension
    # Create the new filename
    new_filename = base_name + standard_extension
    if (filename != new_filename):
        print("Renamed", filename, "to", new_filename)
    return new_filename

In [73]:
def standardize_image_filenames(image_dir):
    for filename in os.listdir(image_dir):
        filepath = os.path.join(image_dir, filename)
        if os.path.isfile(filepath):
            new_filename = standardize_file_name(filename)
            new_filepath = os.path.join(image_dir, new_filename)
            # Rename the file if the name has changed
            if filename != new_filename:
                os.rename(filepath, new_filepath)

In [74]:
standardize_image_filenames(os.path.join(root_data_path, "train", "img"))
standardize_image_filenames(os.path.join(root_data_path, "test", "img"))

Renamed 6078710516023296.jpeg.jpg to 6078710516023296.jpg
Renamed TomatoBlight.JPG.jpg to TomatoBlight.jpg
Renamed Powdery+Mildew_00.JPG.jpg to Powdery+Mildew_00.jpg
Renamed IMG_0649.JPG.jpg to IMG_0649.jpg
Renamed tomato-late-blight-foliar-lesions_Scot-Nelson_CC-BY-SA-2.0.jpg to tomato-late-blight-foliar-lesions_Scot-Nelson_CC-BY-SA-2.jpg
Renamed IMG_9275.JPG.jpg to IMG_9275.jpg
Renamed IMG_2254.JPG.jpg to IMG_2254.jpg
Renamed IMG_2686.JPG.jpg to IMG_2686.jpg
Renamed disease.JPG.jpg to disease.jpg
Renamed Figure%202.JPG.jpg to Figure%202.jpg
Renamed 14456.img.jpg to 14456.jpg
Renamed 5516100468998144-600x272.jpeg.jpg to 5516100468998144-600x272.jpg
Renamed mold_web12.16.14.jpg to mold_web12.jpg
Renamed DSC02007.JPG.jpg to DSC02007.jpg
Renamed 020.JPG.jpg to 020.jpg
Renamed 5787ab74428a5fdc70e3defda7f00f0a4623876d.jpeg.jpg to 5787ab74428a5fdc70e3defda7f00f0a4623876d.jpg
Renamed Bell-pepper-plant.JPG.jpg to Bell-pepper-plant.jpg
Renamed tomato%2Bseptoria%2Bleaf%2Bspot2.JPG.jpg to tomato

In [75]:
classes_to_keep = [
    "Tomato Early blight leaf",
    "Tomato leaf",
    "Tomato leaf bacterial spot",
    "Tomato leaf late blight",
    "Tomato leaf mosaic virus",
    "Tomato leaf yellow virus", 
    "Tomato mold leaf", 
    "Tomato Septoria leaf spot", 
    "Tomato two spotted spider mites leaf", 
]

# Create a mapping from class titles to class IDs
class_title_to_id = {class_name: idx for idx, class_name in enumerate(classes_to_keep)}

In [76]:
new_train_path = os.path.join(new_data_path, "train")
new_test_path = os.path.join(new_data_path, "val")
for set in ["train", "test"]:
    ann_path = os.path.join(root_data_path, set, "ann")
    img_path = os.path.join(root_data_path, set, "img")
    new_label_path = os.path.join(new_data_path, "labels", set)
    new_img_path = os.path.join(new_data_path, "images", set)
    
    os.makedirs(new_img_path, exist_ok=True)
    os.makedirs(new_label_path, exist_ok=True)
    
    for file_name in os.listdir(ann_path):
        ann_file_path = os.path.join(ann_path, file_name)
        with open(ann_file_path, 'r') as f:
            file_content = json.load(f)
        
        image_width = file_content['size']['width']
        image_height = file_content['size']['height']
        
        annotations = []
        for obj in file_content["objects"]:
            class_title = obj["classTitle"]
            if class_title in classes_to_keep:
                class_id = class_title_to_id[class_title]
                # Get bounding box coordinates
                points = obj['points']['exterior']
                x_min = points[0][0]
                y_min = points[0][1]
                x_max = points[1][0]
                y_max = points[1][1]
                
                # Convert to YOLO format (normalized)
                x_center = ((x_min + x_max) / 2) / image_width
                y_center = ((y_min + y_max) / 2) / image_height
                width = (x_max - x_min) / image_width
                height = (y_max - y_min) / image_height
                
                annotations.append(f"{class_id} {x_center} {y_center} {width} {height}")
        
        if not annotations:
            continue  # Skip images without desired classes
        
        # Save the annotations to a .txt file
        label_file_name = standardize_file_name(file_name, extension='.txt')
        label_file_path = os.path.join(new_label_path, label_file_name)
        with open(label_file_path, 'w') as f:
            f.write('\n'.join(annotations))
        
        # Copy the corresponding image file
        std_filename = standardize_file_name(file_name, extension='.json')
        root, ext = os.path.splitext(std_filename)
        image_file = os.path.basename(root)
        image_file_path = os.path.join(img_path, image_file)
        if os.path.exists(image_file_path):
            new_image_file_path = os.path.join(new_img_path, image_file)
            shutil.copyfile(image_file_path, new_image_file_path)
            print("Copying", image_file_path, "to", new_image_file_path)
            image_found = True
        if not image_found:
            print(f"Image file not found for annotation {file_name}")
        

Renamed Tomato%20bacterial%20canker.JPG.jpg.json to Tomato%20bacterial%20canker.txt
Renamed Tomato%20bacterial%20canker.JPG.jpg.json to Tomato%20bacterial%20canker.jpg.json
Copying ../../data/plantdoc-DatasetNinja/train/img/Tomato%20bacterial%20canker.jpg to ../../data/plantdoc-Tomato-YOLO/images/train/Tomato%20bacterial%20canker.jpg
Renamed droppedImage_3.jpg.json to droppedImage_3.txt
Copying ../../data/plantdoc-DatasetNinja/train/img/droppedImage_3.jpg to ../../data/plantdoc-Tomato-YOLO/images/train/droppedImage_3.jpg
Renamed tomato-mosaic-virus-disease-on-260nw-625499777.jpg.json to tomato-mosaic-virus-disease-on-260nw-625499777.txt
Copying ../../data/plantdoc-DatasetNinja/train/img/tomato-mosaic-virus-disease-on-260nw-625499777.jpg to ../../data/plantdoc-Tomato-YOLO/images/train/tomato-mosaic-virus-disease-on-260nw-625499777.jpg
Renamed leaf_mold_nightshade1x500.jpg.json to leaf_mold_nightshade1x500.txt
Copying ../../data/plantdoc-DatasetNinja/train/img/leaf_mold_nightshade1x500.j

Renamed Septoria+Leaf+Spot.jpg.json to Septoria+Leaf+Spot.txt
Copying ../../data/plantdoc-DatasetNinja/train/img/Septoria+Leaf+Spot.jpg to ../../data/plantdoc-Tomato-YOLO/images/train/Septoria+Leaf+Spot.jpg
Renamed Figure-1-6.jpg.json to Figure-1-6.txt
Copying ../../data/plantdoc-DatasetNinja/train/img/Figure-1-6.jpg to ../../data/plantdoc-Tomato-YOLO/images/train/Figure-1-6.jpg
Renamed 5Tomato+Mosaic+Virus.jpg.json to 5Tomato+Mosaic+Virus.txt
Copying ../../data/plantdoc-DatasetNinja/train/img/5Tomato+Mosaic+Virus.jpg to ../../data/plantdoc-Tomato-YOLO/images/train/5Tomato+Mosaic+Virus.jpg
Renamed tylcv1-.jpg.json to tylcv1-.txt
Copying ../../data/plantdoc-DatasetNinja/train/img/tylcv1-.jpg to ../../data/plantdoc-Tomato-YOLO/images/train/tylcv1-.jpg
Renamed 5146.jpg.json to 5146.txt
Copying ../../data/plantdoc-DatasetNinja/train/img/5146.jpg to ../../data/plantdoc-Tomato-YOLO/images/train/5146.jpg
Renamed tomato-septoria-leaf-spot-.jpg.json to tomato-septoria-leaf-spot-.txt
Copying ../