## IMAGES & AUGMENTATION

Antes de usar este Colab, debemos:

Seleccionar las imágenes de plantas. Redimensionarlas a 640x480 y dividirlas en 2 sets
(training/test). Etiquetar las imágenes en LabelImg (generando un .xml por cada .jpg) y subirlas a GitHub.


Este Colab agarra las imágenes y los .xml desde GitHub y genera archivos .record que son usados para el entrenamiento,
los cuales deben ser subidos manualmente (por el momento) a GitHub.

###Clonamos repo que contiene imagenes y xmls

In [0]:
repo_url = 'https://github.com/mapo-lp/obj_det_plantas' #@param {type:"string"}

import os

%cd /content

repo_dir_path = os.path.abspath(os.path.join('.', os.path.basename(repo_url)))

!git clone {repo_url}
%cd {repo_dir_path}

!git pull

In [0]:
%cd /content
!git clone --quiet https://github.com/tensorflow/models.git

!apt-get install -qq protobuf-compiler python-pil python-lxml python-tk

!pip install -q Cython contextlib2 pillow lxml matplotlib

!pip install -q pycocotools

%cd /content/models/research
!protoc object_detection/protos/*.proto --python_out=.

import os
os.environ['PYTHONPATH'] += ':/content/models/research/:/content/models/research/slim/'

!python object_detection/builders/model_builder_test.py

###Aumentamos imagenes. Generamos 10 imagenes por cada imagen original

In [0]:
import imgaug as ia
ia.seed(1)
# imgaug uses matplotlib backend for displaying images
%matplotlib inline
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from imgaug import augmenters as iaa 
# imageio library will be used for image input/output
import imageio
import pandas as pd
import numpy as np
import re
import os
import glob
# this library is needed to read XML files for converting it into CSV
import xml.etree.ElementTree as ET
import shutil

In [0]:
# Function that will extract column data for our CSV file as pandas DataFrame
def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df
   
# apply the function to convert all XML files in images/ folder into labels.csv
if os.path.exists("/content/obj_det_plantas/models/object_detection/data/annotations"):
  shutil.rmtree("/content/obj_det_plantas/models/object_detection/data/annotations")

os.mkdir('/content/obj_det_plantas/models/object_detection/data/annotations') 

train_labels_df = xml_to_csv('/content/obj_det_plantas/models/object_detection/data/images/train')
train_labels_df.to_csv(('/content/obj_det_plantas/models/object_detection/data/annotations/temp_train_labels.csv'), index=None)

test_labels_df = xml_to_csv('/content/obj_det_plantas/models/object_detection/data/images/test')
test_labels_df.to_csv(('/content/obj_det_plantas/models/object_detection/data/annotations/temp_test_labels.csv'), index=None)

In [0]:
# function to convert BoundingBoxesOnImage object into DataFrame
def bbs_obj_to_df(bbs_object):
#     convert BoundingBoxesOnImage object into array
    bbs_array = bbs_object.to_xyxy_array()
#     convert array into a DataFrame ['xmin', 'ymin', 'xmax', 'ymax'] columns
    df_bbs = pd.DataFrame(bbs_array, columns=['xmin', 'ymin', 'xmax', 'ymax'])
    return df_bbs

In [0]:
def resize_imgaug(df, images_path, aug_images_path, image_prefix):
    # create data frame which we're going to populate with augmented image info
    aug_bbs_xy = pd.DataFrame(columns=
                              ['filename','width','height','class', 'xmin', 'ymin', 'xmax', 'ymax']
                             )
    grouped = df.groupby('filename')    
    
    for filename in df['filename'].unique():
    #   Get separate data frame grouped by file name
        group_df = grouped.get_group(filename)
        group_df = group_df.reset_index()
        group_df = group_df.drop(['index'], axis=1)
        
        aug_bbs_xy = pd.concat([aug_bbs_xy, group_df])
    # return dataframe with updated images and bounding boxes annotations 
    aug_bbs_xy = aug_bbs_xy.reset_index()
    aug_bbs_xy = aug_bbs_xy.drop(['index'], axis=1)
    return aug_bbs_xy

In [0]:
resized_train_images_df = resize_imgaug(train_labels_df, 'models/object_detection/data/images/train/', 'models/object_detection/data/images/train/', '')

In [0]:
resized_test_images_df = resize_imgaug(test_labels_df, 'models/object_detection/data/images/test/', 'models/object_detection/data/images/test/', '')

In [0]:
aug = iaa.SomeOf(2, [    
    iaa.Affine(scale=(0.5, 1.5)),
    iaa.Affine(rotate=(-60, 60)),
    iaa.Affine(translate_percent={"x":(-0.3, 0.3),"y":(-0.3, 0.3)}),
    iaa.Fliplr(1),
    iaa.Multiply((0.5, 1.5)),
    iaa.GaussianBlur(sigma=(1.0, 3.0)),
    iaa.AdditiveGaussianNoise(scale=(0.03*255, 0.05*255))
])

In [0]:
def image_aug(df, images_path, aug_images_path, image_prefix, augmentor):
    # create data frame which we're going to populate with augmented image info
    aug_bbs_xy = pd.DataFrame(columns=
                              ['filename','width','height','class', 'xmin', 'ymin', 'xmax', 'ymax']
                             )
    grouped = df.groupby('filename')
    
    for filename in df['filename'].unique():
    #   get separate data frame grouped by file name
        group_df = grouped.get_group(filename)
        group_df = group_df.reset_index()
        group_df = group_df.drop(['index'], axis=1)   
    #   read the image
        image = imageio.imread(images_path+filename)
    #   get bounding boxes coordinates and write into array        
        bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
    #   pass the array of bounding boxes coordinates to the imgaug library
        bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
    #   apply augmentation on image and on the bounding boxes
        image_aug, bbs_aug = augmentor(image=image, bounding_boxes=bbs)
    #   disregard bounding boxes which have fallen out of image pane    
        bbs_aug = bbs_aug.remove_out_of_image()
    #   clip bounding boxes which are partially outside of image pane
        bbs_aug = bbs_aug.clip_out_of_image()
        
    #   don't perform any actions with the image if there are no bounding boxes left in it    
        if re.findall('Image...', str(bbs_aug)) == ['Image([]']:
            pass
        
    #   otherwise continue
        else:
        #   write augmented image to a file
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
        #   create a data frame with augmented values of image width and height
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)    
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
        #   rename filenames by adding the predifined prefix
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
        #   create a data frame with augmented bounding boxes coordinates using the function we created earlier
            bbs_df = bbs_obj_to_df(bbs_aug)
        #   concat all new augmented info into new data frame
            aug_df = pd.concat([info_df, bbs_df], axis=1)
        #   append rows to aug_bbs_xy data frame
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])            
    
    # return dataframe with updated images and bounding boxes annotations 
    aug_bbs_xy = aug_bbs_xy.reset_index()
    aug_bbs_xy = aug_bbs_xy.drop(['index'], axis=1)
    return aug_bbs_xy

In [0]:
if os.path.exists('/content/obj_det_plantas/models/object_detection/data/images/train/aug_images'):
  shutil.rmtree('/content/obj_det_plantas/models/object_detection/data/images/train/aug_images')
  
os.mkdir('/content/obj_det_plantas/models/object_detection/data/images/train/aug_images') 

In [0]:
if os.path.exists('/content/obj_det_plantas/models/object_detection/data/images/test/aug_images'):
  shutil.rmtree('/content/obj_det_plantas/models/object_detection/data/images/test/aug_images')

os.mkdir('/content/obj_det_plantas/models/object_detection/data/images/test/aug_images') 

In [0]:
#augmented_images_train_df = image_aug(resized_train_images_df, '/content/obj_det_plantas/models/object_detection/data/images/train/', '/content/obj_det_plantas/models/object_detection/data/images/train/aug_images/', 'aug1_', aug)

# initialize empty DataFrame
augmented_images_train_df = pd.DataFrame(columns=['filename','width','height','class','xmin','ymin','xmax','ymax'])

# apply augmentation function 5 times to the same set of images
for i in range(10):
    aug_df = image_aug(resized_train_images_df, '/content/obj_det_plantas/models/object_detection/data/images/train/', '/content/obj_det_plantas/models/object_detection/data/images/train/aug_images/', 'aug'+str(i)+'_', aug)
    augmented_images_train_df = pd.concat([augmented_images_train_df, aug_df])

In [0]:
#augmented_images_test_df = image_aug(resized_test_images_df, '/content/obj_det_plantas/models/object_detection/data/images/test/', '/content/obj_det_plantas/models/object_detection/data/images/test/aug_images/', 'aug1_', aug)

# initialize empty DataFrame
augmented_images_test_df = pd.DataFrame(columns=['filename','width','height','class','xmin','ymin','xmax','ymax'])

# apply augmentation function 5 times to the same set of images
for i in range(10):
    aug_df = image_aug(resized_test_images_df, '/content/obj_det_plantas/models/object_detection/data/images/test/', '/content/obj_det_plantas/models/object_detection/data/images/test/aug_images/', 'aug'+str(i)+'_', aug)
    augmented_images_test_df = pd.concat([augmented_images_test_df, aug_df])

In [0]:
all_labels_train_df = pd.concat([resized_train_images_df, augmented_images_train_df])
all_labels_train_df.to_csv('/content/obj_det_plantas/models/object_detection/data/annotations/train_labels.csv', index=False)

all_labels_test_df = pd.concat([resized_test_images_df, augmented_images_test_df])
all_labels_test_df.to_csv('/content/obj_det_plantas/models/object_detection/data/annotations/test_labels.csv', index=False)

In [0]:
for file in os.listdir('/content/obj_det_plantas/models/object_detection/data/images/train/aug_images/'):
    shutil.copy('/content/obj_det_plantas/models/object_detection/data/images/train/aug_images/'+file, '/content/obj_det_plantas/models/object_detection/data/images/train/'+file)

In [0]:
for file in os.listdir('/content/obj_det_plantas/models/object_detection/data/images/test/aug_images/'):
    shutil.copy('/content/obj_det_plantas/models/object_detection/data/images/test/aug_images/'+file, '/content/obj_det_plantas/models/object_detection/data/images/test/'+file)

###Generamos train.record y test.record a partir de los .csv

In [0]:
%cd {repo_dir_path}/models/object_detection

# Convert train folder annotation xml files to a single csv file,
# generate the `label_map.pbtxt` file to `data/` directory as well.
!python code/xml_to_csv.py -i data/images/train -o data/annotations/trainn_labels.csv -l data/annotations

# Convert test folder annotation xml files to a single csv.
#!python code/xml_to_csv.py -i data/images/test -o data/annotations/test_labels.csv

# Generate `train.record`
!python code/generate_tfrecord.py --csv_input=data/annotations/train_labels.csv --output_path=data/annotations/train.record --img_path=data/images/train --label_map data/annotations/label_map.pbtxt

# Generate `test.record`
!python code/generate_tfrecord.py --csv_input=data/annotations/test_labels.csv --output_path=data/annotations/test.record --img_path=data/images/test --label_map data/annotations/label_map.pbtxt

###Listo. Bajar train.record, test.record y subirlos a Github.