In [143]:
import pandas as pd
import numpy as np
import re
import os
import glob
import xml.etree.ElementTree as ET
import shutil
import imgaug as ia
ia.seed(1)
%matplotlib inline
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from imgaug import augmenters as iaa 
import imageio

In [144]:
def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '\\*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            try:
                value = (root.find('filename').text,
                         int(root.find('size')[0].text),
                         int(root.find('size')[1].text),
                         member[0].text,
                         int(member[4][0].text),
                         int(member[4][1].text),
                         int(member[4][2].text),
                         int(member[4][3].text)
                         )
                xml_list.append(value)
            except:
                pass
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

In [145]:
uk_passport_labels_df = xml_to_csv('C:\\Users\\HP\\Desktop\\labels\\uk_passport')
uk_passport_labels_df.to_csv(('C:\\Users\\HP\\Desktop\\labels\\uk_passport_labels.csv'), index=None)

singapore_passport_labels_df = xml_to_csv('C:\\Users\\HP\\Desktop\\labels\\singapore_passport')
singapore_passport_labels_df.to_csv(('C:\\Users\\HP\\Desktop\\labels\\singapore_passport_labels.csv'), index=None)

pakistan_passport_labels_df = xml_to_csv('C:\\Users\\HP\\Desktop\\labels\\pakistani_passport')
pakistan_passport_labels_df.to_csv(('C:\\Users\\HP\\Desktop\\labels\\pakistan_passport_labels.csv'), index=None)

pakistan_cnic_labels_df = xml_to_csv('C:\\Users\\HP\\Desktop\\labels\\pakistani_cnic')
pakistan_cnic_labels_df.to_csv(('C:\\Users\\HP\\Desktop\\labels\\pakistan_cnic_labels.csv'), index=None)

In [146]:
def bbs_obj_to_df(bbs_object):
    bbs_array = bbs_object.to_xyxy_array()
    df_bbs = pd.DataFrame(bbs_array, columns=['xmin', 'ymin', 'xmax', 'ymax'])
    return df_bbs

In [147]:
def resize_imgaug(df, images_path, aug_images_path, image_prefix):
    aug_bbs_xy = pd.DataFrame(columns=['filename','width','height','class', 'xmin', 'ymin', 'xmax', 'ymax'])
    grouped = df.groupby('filename')    
    
    for filename in df['filename'].unique():
        group_df = grouped.get_group(filename)
        group_df = group_df.reset_index()
        group_df = group_df.drop(['index'], axis=1)

        if group_df['height'].unique()[0] >= group_df['width'].unique()[0] and group_df['height'].unique()[0] > 600:
            image = imageio.imread(images_path+filename)   
            bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
            bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
            image_aug, bbs_aug = height_resize(image=image, bounding_boxes=bbs)
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)        
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
            bbs_df = bbs_obj_to_df(bbs_aug)
            aug_df = pd.concat([info_df, bbs_df], axis=1)
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])

        elif group_df['width'].unique()[0] > group_df['height'].unique()[0] and group_df['width'].unique()[0] > 600:
            image = imageio.imread(images_path+filename)     
            bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
            bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
            image_aug, bbs_aug = width_resize(image=image, bounding_boxes=bbs)
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)        
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
            bbs_df = bbs_obj_to_df(bbs_aug)
            aug_df = pd.concat([info_df, bbs_df], axis=1)
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])
        else:
            aug_bbs_xy = pd.concat([aug_bbs_xy, group_df])
    aug_bbs_xy = aug_bbs_xy.reset_index()
    aug_bbs_xy = aug_bbs_xy.drop(['index'], axis=1)
    return aug_bbs_xy

In [148]:
pakistan_CNIC_resized_images_df = resize_imgaug(pakistan_cnic_labels_df, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\Pakistani_CNIC\\', 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\Pakistani_CNIC\\', '')
pakistan_passport_resized_images_df = resize_imgaug(pakistan_passport_labels_df, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\Pakistani_Passport\\', 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\Pakistani_Passport\\', '')
singapore_passport_resized_images_df = resize_imgaug(singapore_passport_labels_df, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\singapore_passport\\', 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\singapore_passport\\', '')
uk_passport_resized_images_df = resize_imgaug(uk_passport_labels_df, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\uk_passport\\', 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\uk_passport\\', '')

In [150]:
aug = iaa.SomeOf(2, [    
    iaa.Affine(scale=(0.5, 1.5)),
    iaa.Affine(rotate=(-60, 60)),
    iaa.Affine(translate_percent={"x":(-0.3, 0.3),"y":(-0.3, 0.3)}),
    iaa.Fliplr(1),
    iaa.Multiply((0.5, 1.5)),
    iaa.GaussianBlur(sigma=(1.0, 3.0)),
    iaa.AdditiveGaussianNoise(scale=(0.03*255, 0.05*255))
])

In [151]:
  def image_aug(df, images_path, aug_images_path, image_prefix, augmentor):
    aug_bbs_xy = pd.DataFrame(columns=['filename','width','height','class', 'xmin', 'ymin', 'xmax', 'ymax'])
    grouped = df.groupby('filename')
    for filename in df['filename'].unique():
        group_df = grouped.get_group(filename)
        group_df = group_df.reset_index()
        group_df = group_df.drop(['index'], axis=1)   
        image = imageio.imread(images_path+filename)       
        bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
        bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
        image_aug, bbs_aug = augmentor(image=image, bounding_boxes=bbs) 
        bbs_aug = bbs_aug.remove_out_of_image()
        bbs_aug = bbs_aug.clip_out_of_image()
        if re.findall('Image...', str(bbs_aug)) == ['Image([]']:
            pass
        else:
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)    
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
            bbs_df = bbs_obj_to_df(bbs_aug)
            aug_df = pd.concat([info_df, bbs_df], axis=1)
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])            
    aug_bbs_xy = aug_bbs_xy.reset_index()
    aug_bbs_xy = aug_bbs_xy.drop(['index'], axis=1)
    return aug_bbs_xy

In [152]:
for i in range (0, 100):
    p_cnic_df = image_aug(pakistan_CNIC_resized_images_df, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\Pakistani_CNIC\\', 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\p_cnic\\', 'aug'+str(i)+'_', aug)
    p_cnic_augmented_images_df = augmented_images_df.append(p_cnic_df)

In [153]:
for i in range (0, 100):
    p_pass_df = image_aug(pakistan_passport_resized_images_df, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\Pakistani_Passport\\', 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\p_pass\\', 'aug'+str(i)+'_', aug)
    p_pass_augmented_images_df = augmented_images_df.append(p_pass_df)

In [154]:
for i in range (0, 100):
    u_pass_df = image_aug(uk_passport_resized_images_df, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\uk_passport\\', 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\u_pass\\', 'aug'+str(i)+'_', aug)
    u_pass_augmented_images_df = augmented_images_df.append(u_pass_df)

In [155]:
for i in range (0, 100):
    s_pass_df = image_aug(singapore_passport_resized_images_df, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\singapore_passport\\', 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\s_pass\\', 'aug'+str(i)+'_', aug)
    s_pass_augmented_images_df = augmented_images_df.append(s_pass_df)

In [156]:
all_labels_df = pd.concat([uk_passport_resized_images_df, u_pass_augmented_images_df])
all_labels_df.to_csv('C:\\Users\\HP\\Desktop\\labels\\all_labels_uk_passport.csv', index=False)

all_labels_df = pd.concat([singapore_passport_resized_images_df, s_pass_augmented_images_df])
all_labels_df.to_csv('C:\\Users\\HP\\Desktop\\labels\\all_labels_singapore_passport.csv', index=False)

all_labels_df = pd.concat([pakistan_passport_resized_images_df, p_pass_augmented_images_df])
all_labels_df.to_csv('C:\\Users\\HP\\Desktop\\labels\\all_labels_pakistan_passport.csv', index=False)

all_labels_df = pd.concat([pakistan_CNIC_resized_images_df, p_cnic_augmented_images_df])
all_labels_df.to_csv('C:\\Users\\HP\\Desktop\\labels\\all_labels_pakistan_cnic.csv', index=False)

In [157]:
for file in os.listdir('C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\s_pass'):
    shutil.copy('C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\s_pass\\'+file, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\Pakistani_CNIC\\'+file)
for file in os.listdir('C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\u_pass'):
    shutil.copy('C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\u_pass\\'+file, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\Pakistani_Passport\\'+file)
for file in os.listdir('C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\p_pass'):
    shutil.copy('C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\p_pass\\'+file, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\uk_passport\\'+file)
for file in os.listdir('C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\p_cnic\\'):
    shutil.copy('C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\p_cnic\\'+file, 'C:\\Users\\HP\\Desktop\\DS_Project_Dataset\\singapore_passport\\'+file)