In [10]:
import os
import shutil
import json
from tqdm import tqdm

In [11]:
# Function to create folders for each class
def create_class_folders(output_path, classes):
    for class_name in classes:
        folder_path = os.path.join(output_path, class_name)
        os.makedirs(folder_path, exist_ok=True)

In [12]:
def create_new_json(original_json_file, transformed_json_file, filtered_boxes):
  with open(original_json_file, 'r') as f:
    data = json.load(f)

  data['shapes'] = filtered_boxes

  with open(transformed_json_file, 'w') as f:
    json.dump(data, f, indent=2)

In [13]:
# Function to process each image and associated JSON
def process_image(image_path, json_path, output_path, target_class):
    with open(json_path, 'r') as json_file:
      data = json.load(json_file)

    # Filter bounding boxes belonging to the target class
    filtered_boxes = []
    for box in data['shapes']:
      if target_class == "damage_detection":
        if "undamaged" in box['label']:
          box['label'] = "undamaged"
        else:
          box['label'] = "damaged"
        filtered_boxes.append(box)
      else:
        if "residentialbuilding" in box['label']:
          box['label'] = "residentialbuilding"
        else:
          box['label'] = "commercialbuilding"
        filtered_boxes.append(box)

    # Copy the image and create a new JSON file with filtered bounding boxes
    if filtered_boxes:
      shutil.copy(image_path, output_path)
      new_json_path = os.path.join(output_path, f"{os.path.basename(image_path).split('.')[0]}.json")
      create_new_json(json_path, new_json_path, filtered_boxes)

In [14]:
# Main function to organize images based on classes
def organize_images(input_folder, output_folder, classes):
  for i in tqdm(range(1, 382)):
    image_filename = f"Image_{i}.jpg"
    json_filename = f"Image_{i}.json"

    image_path = os.path.join(input_folder, image_filename)
    json_path = os.path.join(input_folder, json_filename)

    if os.path.exists(json_path):
      for target_class in classes:
        output_path = os.path.join(output_folder, target_class)
        process_image(image_path, json_path, output_path, target_class)

In [15]:
input_folder = '/content/drive/MyDrive/EYOpenScienceDataChallenge/code/data_augmentation/training_data_6'
output_folder = '/content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling'
classes = ["damage_detection", "building_detection"]
create_class_folders(output_folder, classes)
organize_images(input_folder, output_folder, classes)

100%|██████████| 381/381 [06:12<00:00,  1.02it/s]


In [16]:
# !rm -rf /content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/undamagedresidentialbuilding

In [17]:
# !rm -rf /content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/undamagedcommercialbuilding

In [18]:
# !rm -rf /content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/damagedresidentialbuilding

In [19]:
# !rm -rf /content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/damagedcommercialbuilding

In [20]:
# !ls /content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/undamagedresidentialbuilding/*json | wc -l

In [21]:
# !ls /content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/undamagedcommercialbuilding/*json | wc -l

In [22]:
# !ls /content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/damagedresidentialbuilding/*json | wc -l

In [23]:
# !ls /content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/damagedcommercialbuilding/*json | wc -l

In [26]:
!ls /content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/damage_detection/.*

ls: cannot access '/content/drive/MyDrive/EYOpenScienceDataChallenge/code/ensembling/damage_detection/.*': No such file or directory
