In [152]:
# !pip install -U albumentations

In [153]:
# !pip install labelme

In [154]:
# import the required libraries
import albumentations as A
import cv2
from matplotlib import pyplot as plt
import json
import random
from PIL import Image
import base64
import labelme
import os
from tqdm import tqdm
import numpy as np
%matplotlib inline

In [155]:
# augmentations that will be included
# small rotations
# crops by mike
# random contrast and brightness
# random gamma
# sharpen

In [261]:
# small rotations

transform1 = A.Compose([
    A.SafeRotate(limit=5, p=1, border_mode=cv2.BORDER_CONSTANT, value=0),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']))

In [262]:
# random contrast and brightness

transform2 = A.Compose([
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=1),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']))


In [263]:
# random gamma

transform3 = A.Compose([
    A.RandomGamma(gamma_limit=(120,150), p=1),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']))

In [264]:
# sharpen

transform4 = A.Compose([
    A.Sharpen(alpha=1.0, p=1),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']))

In [265]:
# crops by mike

transform5 = A.Compose([
        A.RandomCrop(width=256, height=256),
        A.PadIfNeeded(min_height=512, min_width=512, border_mode=cv2.BORDER_CONSTANT, value=(0, 0, 0))],
        bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids'])
    )

In [266]:
transformations = [transform1, transform2, transform3, transform4, transform5]

In [267]:
category_id_dict = {
    "undamagedresidentialbuilding": 0,
    "damagedresidentialbuilding": 1,
    "undamagedcommercialbuilding": 2,
    "damagedcommercialbuilding": 3
}
category_id_to_name = {value: key for key, value in category_id_dict.items()}

In [268]:
def shapes_element_maker(new_bboxes, new_labels, category_id_to_name):
    shapes = []
    for bbox, category_id in zip(new_bboxes, new_labels):
        subdict = {
            "label": category_id_to_name[category_id],
            "points": [[bbox[0], bbox[1]],[bbox[2], bbox[3]]],
            "group_id": None,
            "description": "",
            "shape_type": "rectangle",
            "flags": {},
            "mask": None
        }
        shapes.append(subdict)
    return shapes

In [269]:
def process_json_file(filename):
  # takes as a json file as input and returns two lists
  # list 1 has the coordinates of all bounding boxes
  # list 2 has the category id for each bounding box in list 1
  with open(filename) as json_file:
    data_json = json.load(json_file)
  bboxes = []
  category_ids = []
  for annotation in data_json["shapes"]:
    class_label = annotation["label"]
    category_ids.append(category_id_dict[class_label])

    bbox = annotation["points"]
    x_min, x_max = min([bbox[0][0], bbox[1][0]]), max([bbox[0][0], bbox[1][0]])
    y_min, y_max = min([bbox[0][1], bbox[1][1]]), max([bbox[0][1], bbox[1][1]])
    bboxes.append([x_min, y_min, x_max, y_max])

  return bboxes, category_ids

In [270]:
def transform_annotations(original_json_file, transformed_json_file, new_bboxes, new_labels, new_img_filename, new_img_data):
    # Step 1: Read the original JSON file
    with open(original_json_file, 'r') as f:
        data = json.load(f)

    data['imagePath'] = new_img_filename
    data['imageData'] = new_img_data

    # Step 2: Modify the necessary fields (labels and bounding box coordinates)
    #transformation for when the number of bounding boxes is conserved
    if len(data['shapes']) == len(new_bboxes):
      for idx, annotation in enumerate(data['shapes']):
          # Assuming annotation format is like {'label': 'person', 'bbox': [x, y, width, height]}
          label = annotation['label']
          bbox = annotation['points']

          # Apply your transformation function to get the new bbox coordinates
          new_bbox = [[new_bboxes[idx][0], new_bboxes[idx][1]],[new_bboxes[idx][2], new_bboxes[idx][3]]]
          new_label = category_id_to_name[new_labels[idx]]

          # Update the annotation with new values
          annotation['label'] = new_label # Implement get_new_label function
          annotation['points'] = new_bbox

      # Step 3: Save the modified data into a new JSON file
      with open(transformed_json_file, 'w') as f:
          json.dump(data, f, indent=2)

    else:
      #transformation for when the number of bounding boxes is not conserved
      data['shapes'] = shapes_element_maker(new_bboxes, new_labels, category_id_to_name)
      with open(transformed_json_file, 'w') as f:
        json.dump(data, f, indent=2)

In [271]:
def process_json_indices(source_dir, indices, prefix, counter):
  for i in tqdm(indices):
    source_json = f"{prefix}_{i}.json"
    source_img = f"{prefix}_{i}.jpg"
    if os.path.exists(os.path.join(source_dir, source_json)):
      in_image_data = labelme.LabelFile.load_image_file(os.path.join(source_dir, source_img))
      in_image_data = base64.b64encode(in_image_data).decode('utf-8')
      bboxes, category_ids = process_json_file(os.path.join(source_dir, source_json))
      for idx, transform in enumerate(transformations):
        in_image = cv2.imread(os.path.join(source_dir, source_img))
        in_image = cv2.cvtColor(in_image, cv2.COLOR_BGR2RGB)
        transformed = transform(image=in_image, bboxes=bboxes, category_ids=category_ids)
        out_image = Image.fromarray(transformed['image'])
        out_img_file = prefix + "_" + str(counter) + ".jpg"
        out_image.save(os.path.join(source_dir, out_img_file))
        out_image_data = labelme.LabelFile.load_image_file(os.path.join(source_dir, out_img_file))
        out_image_data = base64.b64encode(out_image_data).decode('utf-8')
        out_json_file = prefix + "_" + str(counter) + ".json"
        transform_annotations(
            os.path.join(source_dir, source_json),
            os.path.join(source_dir, out_json_file),
            transformed['bboxes'],
            transformed['category_ids'],
            os.path.join(source_dir, out_img_file),
            out_image_data
           )
        counter += 1

In [272]:
BOX_COLOR = (255, 0, 0) # Red
TEXT_COLOR = (255, 255, 255) # White


def visualize_bbox(img, bbox, class_name, color=BOX_COLOR, thickness=2):
    """Visualizes a single bounding box on the image"""
    x_min, y_min, x_max, y_max = bbox
    x_min, x_max, y_min, y_max = int(x_min), int(x_max), int(y_min), int(y_max)

    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)

    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
    cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), BOX_COLOR, -1)
    cv2.putText(
        img,
        text=class_name,
        org=(x_min, y_min - int(0.3 * text_height)),
        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        fontScale=0.35,
        color=TEXT_COLOR,
        lineType=cv2.LINE_AA,
    )
    return img


def visualize(image, bboxes, category_ids, category_id_to_name):
    img = image.copy()
    for bbox, category_id in zip(bboxes, category_ids):
        class_name = category_id_to_name[category_id]
        img = visualize_bbox(img, bbox, class_name)
    plt.figure(figsize=(12, 12))
    plt.axis('off')
    plt.imshow(img)

In [274]:
# image = cv2.imread(os.path.join(source_dir, 'Image_1.jpg'))
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# bboxes, category_ids = process_json_file(os.path.join(source_dir, 'Image_1.json'))

In [275]:
source_dir = '/content/drive/MyDrive/EYOpenScienceDataChallenge/code/data_augmentation/training_data_6'

In [276]:
process_json_indices(source_dir, list(range(1,382)), "Image", 382)

100%|██████████| 381/381 [01:41<00:00,  3.76it/s]


In [253]:
# visualize(image, bboxes, category_ids, category_id_to_name)

In [279]:
# !tar -cvf /content/drive/MyDrive/EYOpenScienceDataChallenge/code/data_augmentation/training_data_7.tar /content/drive/MyDrive/EYOpenScienceDataChallenge/code/data_augmentation/training_data_6/