# Project Preprocessing

- usage: call function `generate_new_train_img(train_path)`, where train_path is the directory of train images
- new train_annotation json file and new images will be generated to a directory 'new'
  - google drive download link
  - [version1](https://drive.google.com/file/d/1vKhKZugDBe11JA9ss6sJ9U1StgN681bw/view?usp=sharing) - generating 9 new augmentated image for each train images, updated train_annotation with labels and recalculate bounding boxes.
  - [version2](https://drive.google.com/file/d/14VIAkTLwMEs-f0MII-YHXAXDQWtIy1ig/view?usp=sharing) - apart from version1, 50 images is moved to new directory for validation without generating any image augmentation new images
  - [version3](https://drive.google.com/file/d/1Vd9zstfV_TO1mPygXt3jhkfxmiZVo7LD/view?usp=sharing) - apart from version2, bbox data in generated annotations files are converted back to `[xmin, ymin, width, height]` format
  - [version4](https://drive.google.com/file/d/13d7a9JlNWWpmOvTplTG-gbraqB6IqwM9/view?usp=sharing) - apart from version3, images are moved to new directory
    - `new/penguin` contains original penguin images (excluding those images in `valid_penguin`) and augmentation images based on these original images
    - `new/turtle` contains original penguin images (excluding those images in `valid_penguin`) and augmentation images based on these original images
    - `new/valid_penguin` contains 25 randomly selected penguin images for validation purpose without any augmentation
    - `new/valid_turtle` contains 25 randomly selected turtle images for validation purpose without any augmentation

In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from random import randint, seed
from PIL import Image, ImageChops
import torch
from torchvision.transforms import ToPILImage, ColorJitter
from torchvision.transforms.functional import invert, posterize, solarize, hflip, vflip
from torchvision.utils import draw_bounding_boxes
from torchvision.io import read_image

Annotation File
- id: integer, id #
- image_id: integer, image id #
- category_id: 0 for background, 1 for penguin, 2 for turtle
- bbox: list of integers representing the bounding box coordinates in Pascal VOC format [xmin, ymin, xmax, ymax]
- area: integer representing area of bounding box.
- segmentation: empty list; add segmentation masks if you'd like!
- iscrowd: integer 0 or 1; whether the instance is a crowd or individual. Not relevant to this particular use case, but is a necessary key for some models.

In [2]:
def clean_annotations():
    train_annotations = pd.read_json('train_annotations')

    # remove redundant data
    train_annotations = train_annotations.drop(['id', 'segmentation','iscrowd'], axis=1)

    # category_id: background = 0 penguin = 1, turtle = 2
    category_background = train_annotations[train_annotations['category_id'] == 0]
    print(f'number of category_id 0 (background) = {len(category_background)}')
    # no rows with category_id = 0, make it binary, now penguin = 0, turtle = 1
    train_annotations['category_id'] = train_annotations['category_id'].replace({1:0, 2:1})

    # add new column for preprocessing labels
    train_annotations['label'] = 'original'

    print(train_annotations.head())
    return train_annotations

_ = clean_annotations()

number of category_id 0 (background) = 0
   image_id  category_id                  bbox    area     label
0         0            0   [119, 25, 205, 606]  124230  original
1         1            0   [131, 82, 327, 440]  143880  original
2         2            1  [225, 298, 198, 185]   36630  original
3         3            1  [468, 109, 172, 193]   33196  original
4         4            0   [14, 242, 611, 154]   94094  original


In [3]:
class GenerateImage:
    def __init__(self, img, image_id, category_id, bbox, area):
        self.img = img
        self.image_id = image_id
        self.category_id = category_id
        self.bbox = bbox
        self.height = 640 # height
        self.width = 640 # width
        self.area = area
        self.color_jitter = ColorJitter(saturation=.5, hue=.3) # satuation, contrast, brightness=.7
        self.path = ''
        self.which_path()
    
    def which_path(self):
        if self.category_id == 0:
            self.path = 'new/penguin/'
        else:
            self.path = 'new/turtle/'

    def jitter_image(self, index):
        new_img = self.color_jitter(self.img)
        new_img.save(f'{self.path}image_id_{index}.jpg')
        return [index, self.category_id, self.bbox, self.area, 'jitter']

    def inverted_image(self, index):
        new_img = invert(self.img)
        new_img.save(f'{self.path}image_id_{index}.jpg')
        return [index, self.category_id, self.bbox, self.area, 'invert']

    def posterized_image(self, index):
        new_img = posterize(self.img, bits=3)
        new_img.save(f'{self.path}image_id_{index}.jpg')
        return [index, self.category_id, self.bbox, self.area, 'posterize']

    def solarized_image(self, index):
        # not using this method
        new_img = solarize(self.img, threshold=210) # 192, 240
        new_img.save(f'{self.path}image_id_{index}.jpg')
        return [index, self.category_id, self.bbox, self.area, 'solarize']

    def rotate_90_degree_image(self, index):
        new_img = self.img.rotate(90)
        new_img.save(f'{self.path}image_id_{index}.jpg')
        # new_bbox = [int(self.bbox[1]), self.height-int(self.bbox[2]), int(self.bbox[3]), self.height-int(self.bbox[0])]
        new_bbox = [int(self.bbox[1]), self.width-(int(self.bbox[0])+int(self.bbox[2])), int(self.bbox[3]), int(self.bbox[2])]
        return [index, self.category_id, new_bbox, self.area, 'rotate90']
      
    def rotate_180_degree_image(self, index):
        new_img = self.img.rotate(180)
        new_img.save(f'{self.path}image_id_{index}.jpg')
        # new_bbox = [self.width-int(self.bbox[2]), self.height-int(self.bbox[3]), self.width-int(self.bbox[0]), self.height-int(self.bbox[1])]
        new_bbox = [self.width-(int(self.bbox[0])+int(self.bbox[2])), self.height-(int(self.bbox[1])+int(self.bbox[3])), int(self.bbox[2]), int(self.bbox[3])]
        return [index, self.category_id, new_bbox, self.area, 'rotate180']
 
    def rotate_270_degree_image(self, index):
        new_img = self.img.rotate(270)
        new_img.save(f'{self.path}image_id_{index}.jpg')
        # new_bbox = [self.width-int(self.bbox[3]), int(self.bbox[0]), self.width-int(self.bbox[1]), int(self.bbox[2])]
        new_bbox = [self.width-(int(self.bbox[1])+int(self.bbox[3])), int(self.bbox[0]), int(self.bbox[3]), int(self.bbox[2])]
        return [index, self.category_id, new_bbox, self.area, 'rotate270']

    def horizontally_flip_image(self, index):
        new_img = hflip(self.img)
        new_img.save(f'{self.path}image_id_{index}.jpg')
        # new_bbox = [self.width-int(self.bbox[2]), int(self.bbox[1]), self.width-int(self.bbox[0]), int(self.bbox[3])]
        new_bbox = [self.width-(int(self.bbox[0])+int(self.bbox[2])), int(self.bbox[1]), int(self.bbox[2]), int(self.bbox[3])]
        return [index, self.category_id, new_bbox, self.area, 'hflip']

    def vertically_flip_image(self, index):
        new_img = vflip(self.img)
        new_img.save(f'{self.path}image_id_{index}.jpg')
        # new_bbox = [int(self.bbox[0]), self.height-int(self.bbox[3]), int(self.bbox[2]), self.height-int(self.bbox[1])]
        new_bbox = [int(self.bbox[0]), self.height-(int(self.bbox[1])+int(self.bbox[3])), int(self.bbox[2]), int(self.bbox[3])]
        return [index, self.category_id, new_bbox, self.area, 'vflip']

    def jitter_vertically_flip_image(self, index):
        new_img = vflip(self.color_jitter(self.img))
        new_img.save(f'{self.path}image_id_{index}.jpg')
        # new_bbox = [int(self.bbox[0]), self.height-int(self.bbox[3]), int(self.bbox[2]), self.height-int(self.bbox[1])]
        new_bbox = [int(self.bbox[0]), self.height-(int(self.bbox[1])+int(self.bbox[3])), int(self.bbox[2]), int(self.bbox[3])]
        return [index, self.category_id, new_bbox, self.area, 'jitter_vflip']

    def jitter_180_degree_image(self, index):
        new_img = self.color_jitter(self.img).rotate(180)
        new_img.save(f'{self.path}image_id_{index}.jpg')
        # new_bbox = [self.width-int(self.bbox[2]), self.height-int(self.bbox[3]), self.width-int(self.bbox[0]), self.height-int(self.bbox[1])]
        new_bbox = [self.width-(int(self.bbox[0])+int(self.bbox[2])), self.height-(int(self.bbox[1])+int(self.bbox[3])), int(self.bbox[2]), int(self.bbox[3])]
        return [index, self.category_id, new_bbox, self.area, 'jitter180']


In [4]:
def test_bbox(test, original, train_annotations):
    bbox = train_annotations.iloc[test]['bbox']
    bbox = [bbox[0], bbox[1], bbox[2]+bbox[0], bbox[3]+bbox[1]]
    bbox = torch.tensor(bbox, dtype=torch.int)
    bbox = bbox.unsqueeze(0)
    img = read_image(f'new/turtle/image_id_{test}.jpg')
    new_img = draw_bounding_boxes(img, boxes=bbox, width=3, colors=(255, 255, 0))
    new_img = ToPILImage()(new_img)
    new_img.show()

    bbox1 = train_annotations.iloc[original]['bbox']
    bbox1 = [bbox1[0], bbox1[1], bbox1[2]+bbox1[0], bbox1[3]+bbox1[1]]
    bbox1 = torch.tensor(bbox1, dtype=torch.int)
    bbox1 = bbox1.unsqueeze(0)
    print(bbox1)
    img1 = read_image(f'new/turtle/image_id_{original}.jpg')
    new_img1 = draw_bounding_boxes(img1, boxes=bbox1, width=3, colors=(255, 255, 0))
    new_img1 = ToPILImage()(new_img1)
    new_img1.show()

# train_annotations = pd.read_json('new/train_annotations')
# test_bbox(test=1697, original=517, train_annotations=train_annotations)

references: [Pytorch ILLUSTRATION OF TRANSFORMS](https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py)

- The **ColorJitter** transform randomly changes the brightness, saturation, and other properties of an image.
- The **RandomInvert** transform randomly inverts the colors of the given image.
  - since it's random, it have high change to no modify the original image results in large number of duplicate images
  - use [invert](https://pytorch.org/vision/stable/generated/torchvision.transforms.functional.invert.html#torchvision.transforms.functional.invert) instead
- The RandomPosterize transform (see also posterize()) randomly posterizes the image by reducing the number of bits of each color channel.
  - use [posterize](https://pytorch.org/vision/stable/generated/torchvision.transforms.functional.posterize.html#torchvision.transforms.functional.posterize) instead
- The RandomSolarize transform (see also solarize()) randomly solarizes the image by inverting all pixel values above the threshold.
  - use [solarize](https://pytorch.org/vision/stable/generated/torchvision.transforms.functional.solarize.html#torchvision.transforms.functional.solarize) insted

In [5]:
def generate_new_train_img(train_path):
    train_annotations = clean_annotations()
    valid_annotations = pd.DataFrame(columns=['image_id', 'category_id', 'bbox', 'area', 'label'])
    print(train_annotations[train_annotations['image_id'] == 2])

    index = 500
    num_train_files = 500
    test_penguin_counter = 0
    test_turtle_counter = 0
    penguin_path = 'new/penguin/'
    turtle_path = 'new/turtle/'
    seed(5)

    for i in range(num_train_files):
        img = Image.open(f'{train_path}/image_id_{i:03}.jpg')

        category_id = train_annotations.iloc[i]['category_id']
        area = train_annotations.iloc[i]['area']
        bbox = train_annotations.iloc[i]['bbox']

        valid_path = 'new/valid_'

        # randomly choose 25:25 images for validation only
        if randint(1, 7) == 3:
            if (category_id == 0 and test_penguin_counter < 25):
                test_penguin_counter += 1
                img.save(f'{valid_path}penguin/image_id_{i}.jpg')
                valid_annotations.loc[len(valid_annotations)] = [i, category_id, bbox, area, 'validation']
                continue
            elif (category_id == 1 and test_turtle_counter < 25):
                test_turtle_counter += 1
                img.save(f'{valid_path}turtle/image_id_{i}.jpg')
                valid_annotations.loc[len(valid_annotations)] = [i, category_id, bbox, area, 'validation']
                continue
        
        if category_id == 0:
            img.save(f'{penguin_path}image_id_{i}.jpg')
        else:
            img.save(f'{turtle_path}image_id_{i}.jpg')

        generator = GenerateImage(img, i, category_id, bbox, area)
        train_annotations.loc[index] = generator.jitter_image(index)
        index += 1
        train_annotations.loc[index] = generator.inverted_image(index)
        index += 1
        train_annotations.loc[index] = generator.posterized_image(index)
        index += 1
        train_annotations.loc[index] = generator.rotate_90_degree_image(index)
        index += 1
        train_annotations.loc[index] = generator.rotate_180_degree_image(index)
        index += 1
        train_annotations.loc[index] = generator.rotate_270_degree_image(index)
        index += 1
        train_annotations.loc[index] = generator.horizontally_flip_image(index)
        index += 1
        train_annotations.loc[index] = generator.jitter_vertically_flip_image(index)
        index += 1
        train_annotations.loc[index] = generator.jitter_180_degree_image(index)
        index += 1
    
    # remove redundant rows from train_annotations
    for i in valid_annotations.index:
        image_id = valid_annotations['image_id'][i]
        condition = train_annotations[train_annotations['image_id'] == image_id].index
        train_annotations.drop(condition, inplace=True)

    train_annotations.to_json('new/train_annotations', orient='records') # , orient = 'split', compression = 'infer'
    valid_annotations.to_json('new/valid_annotations', orient='records')
    return train_annotations, valid_annotations

# train_annotations, valid_annotations = generate_new_train_img('train')
# test_bbox(test=512, original=2, train_annotations=train_annotations)
# print(valid_annotations.tail())

In [6]:
train_annotations = pd.read_json('new/train_annotations')
print(train_annotations.tail())

      image_id  category_id                  bbox    area         label
4046      4096            1  [150, 142, 477, 258]  123066     rotate180
4047      4097            1   [142, 13, 258, 477]  123066     rotate270
4048      4098            1  [150, 240, 477, 258]  123066         hflip
4049      4099            1   [13, 142, 477, 258]  123066  jitter_vflip
4050      4100            1  [150, 142, 477, 258]  123066     jitter180


In [7]:
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [8]:
# function written based on request after 2 weeks of the original code pushed to Github
# written in a way such that no modification to original code is needed
def preprocessing_demo(image, image_id):
    # input image is in PIL.Image format
    image_path = 'new/penguin/' # use this if input penguin image
    # image_path = 'new/turtle/' # use this if input turtle image

    ret = []
    # not sure which image and directory you would be using so i made the indices 
    # large such that it won't overwtire any existing images to generate possible error
    files = [10001, 10002, 10003, 10004, 10005, 10006, 10007, 10008, 10009]
    train_annotations = clean_annotations()
    category_id = train_annotations.iloc[image_id]['category_id']
    area = train_annotations.iloc[image_id]['area']
    bbox = train_annotations.iloc[image_id]['bbox']

    generator = GenerateImage(image, image_id, category_id, bbox, area)
    _ = generator.jitter_image(files[0])
    _ = generator.inverted_image(files[1])
    _ = generator.posterized_image(files[2])
    _ = generator.rotate_90_degree_image(files[3])
    _ = generator.rotate_180_degree_image(files[4])
    _ = generator.rotate_270_degree_image(files[5])
    _ = generator.horizontally_flip_image(files[6])
    _ = generator.jitter_vertically_flip_image(files[7])
    _ = generator.jitter_180_degree_image(files[8])

    for file in files:
        try:
            ret.append(Image.open(f'{image_path}/image_id_{file}.jpg'))
        except IOError:
            print(f'Error: unable to open file {image_path}/image_id_{file}.jpg')

    return ret

image = Image.open(f'new/penguin/image_id_0.jpg')
print(preprocessing_demo(image, 0))

number of category_id 0 (background) = 0
   image_id  category_id                  bbox    area     label
0         0            0   [119, 25, 205, 606]  124230  original
1         1            0   [131, 82, 327, 440]  143880  original
2         2            1  [225, 298, 198, 185]   36630  original
3         3            1  [468, 109, 172, 193]   33196  original
4         4            0   [14, 242, 611, 154]   94094  original
[<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x640 at 0x17BA77D30>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x640 at 0x17BA75600>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x640 at 0x17BA74970>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x640 at 0x17BA77CA0>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x640 at 0x17BA77160>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x640 at 0x17BA77130>, <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x640 at 0x17BA771F0>, <PIL.Jpe