In [162]:
# !pip install -U albumentations

In [163]:
# !pip install labelme

In [164]:
# import the required libraries
import albumentations as A
import cv2
from matplotlib import pyplot as plt
import json
import random
from PIL import Image
import base64
import labelme
import os
from tqdm import tqdm
%matplotlib inline

In [165]:
# Define an augmentation pipeline
transform1 = A.Compose([
    A.Affine(rotate=[90, 90], p=1, mode=cv2.BORDER_CONSTANT, fit_output=True),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']))

In [166]:
# Define an augmentation pipeline
transform2 = A.Compose([
    A.Affine(rotate=[180, 180], p=1, mode=cv2.BORDER_CONSTANT, fit_output=True),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']))

In [167]:
# Define an augmentation pipeline
transform3 = A.Compose([
    A.Affine(rotate=[270, 270], p=1, mode=cv2.BORDER_CONSTANT, fit_output=True),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']))

In [168]:
transformations = [transform1, transform2, transform3]

In [169]:
category_id_dict = {
    "undamagedresidentialbuilding": 0,
    "damagedresidentialbuilding": 1,
    "undamagedcommercialbuilding": 2,
    "damagedcommercialbuilding": 3
}
category_id_to_name = {value: key for key, value in category_id_dict.items()}

In [170]:
def process_json_file(filename):
  # takes as a json file as input and returns two lists
  # list 1 has the coordinates of all bounding boxes
  # list 2 has the category id for each bounding box in list 1
  with open(filename) as json_file:
    data_json = json.load(json_file)
  bboxes = []
  category_ids = []
  for annotation in data_json["shapes"]:
    class_label = annotation["label"]
    category_ids.append(category_id_dict[class_label])

    bbox = annotation["points"]
    x_min, x_max = min([bbox[0][0], bbox[1][0]]), max([bbox[0][0], bbox[1][0]])
    y_min, y_max = min([bbox[0][1], bbox[1][1]]), max([bbox[0][1], bbox[1][1]])
    bboxes.append([x_min, y_min, x_max, y_max])

  return bboxes, category_ids

In [171]:
def transform_annotations(original_json_file, transformed_json_file, new_bboxes, new_labels, new_img_filename, new_img_data):
    # Step 1: Read the original JSON file
    with open(original_json_file, 'r') as f:
        data = json.load(f)

    data['imagePath'] = new_img_filename
    data['imageData'] = new_img_data
    # Step 2: Modify the necessary fields (labels and bounding box coordinates)
    for idx, annotation in enumerate(data['shapes']):
        # Assuming annotation format is like {'label': 'person', 'bbox': [x, y, width, height]}
        label = annotation['label']
        bbox = annotation['points']

        # Apply your transformation function to get the new bbox coordinates
        new_bbox = [[new_bboxes[idx][0], new_bboxes[idx][1]],[new_bboxes[idx][2], new_bboxes[idx][3]]]
        new_label = category_id_to_name[new_labels[idx]]

        # Update the annotation with new values
        annotation['label'] = new_label # Implement get_new_label function
        annotation['points'] = new_bbox

    # Step 3: Save the modified data into a new JSON file
    with open(transformed_json_file, 'w') as f:
        json.dump(data, f, indent=2)

In [175]:
def process_json_indices(source_dir, indices):
  for i in tqdm(indices):
    source_json = f"Post_Event_000{i}.json" if i < 10 else f"Post_Event_00{i}.json"
    source_img = f"Post_Event_000{i}.jpg" if i < 10 else f"Post_Event_00{i}.jpg"
    in_image_data = labelme.LabelFile.load_image_file(os.path.join(source_dir, source_img))
    in_image_data = base64.b64encode(in_image_data).decode('utf-8')
    bboxes, category_ids = process_json_file(os.path.join(source_dir, source_json))
    for idx, transform in enumerate(transformations):
      in_image = cv2.imread(os.path.join(source_dir, source_img))
      in_image = cv2.cvtColor(in_image, cv2.COLOR_BGR2RGB)
      transformed = transform(image=in_image, bboxes=bboxes, category_ids=category_ids)
      out_image = Image.fromarray(transformed['image'])
      out_img_file = source_img.split(".")[0] + "_r" + str(idx) + ".jpg"
      out_image.save(os.path.join(source_dir, out_img_file))
      out_image_data = labelme.LabelFile.load_image_file(os.path.join(source_dir, out_img_file))
      out_image_data = base64.b64encode(out_image_data).decode('utf-8')
      out_json_file = source_json.split(".")[0] + "_r" + str(idx) + ".json"
      transform_annotations(
          os.path.join(source_dir, source_json),
          os.path.join(source_dir, out_json_file),
          transformed['bboxes'],
          transformed['category_ids'],
          os.path.join(source_dir, out_img_file),
          out_image_data
          )

Class definitions are as follows:
1. Damaged Count = 0, Undamaged Count > 0
    - Residential > 0, Commercial = 0 (class 1)
    - Residential = 0, Commercial > 0 (class 2)
    - Residential > 0, Commercial > 0 (class 3)
2. Damaged Count > 0, Undamaged Count = 0
    - Residential > 0, Commercial = 0 (class 4)
    - Residential = 0, Commercial > 0 (class 5)
    - Residential > 0, Commercial > 0 (class 6)
3. Damaged Count > 0, Undamaged Count > 0
    - Residential > 0, Commercial = 0 (class 7)
    - Residential = 0, Commercial > 0 (class 8)
    - Residential > 0, Commercial > 0 (class 9)

Class counts for the original dataset were as follows:
1. class 1: 1
2. class 2: 3
3. class 3: 4
4. class 4: 0
5. class 5: 1
6. class 6: 0
7. class 7: 14
8. class 8: 21
9. class 9: 47

We augment classes 2, 5 and 8. Class indices were obtained using the data_overview notebook.

In [184]:
indices = [0,
 9,
 12,
 13,
 20,
 21,
 22,
 24,
 30,
 33,
 38,
 44,
 46,
 48,
 52,
 55,
 57,
 58,
 64,
 66,
 75,
 79,
 80,
 82,
 90]

In [185]:
source_dir = '/content/drive/MyDrive/EYOpenScienceDataChallenge/code/data_augmentation/training_data_3'
process_json_indices(source_dir, indices)

100%|██████████| 25/25 [00:06<00:00,  4.11it/s]


In [187]:
# !tar -cvf /content/drive/MyDrive/EYOpenScienceDataChallenge/code/data_augmentation/training_data_4.tar /content/drive/MyDrive/EYOpenScienceDataChallenge/code/data_augmentation/training_data_3