In [26]:
#image resize
#In this dataset, the resolution is 0.3m*0.3m
#To keep the same resolution, the images have to be shrink at a ratio of 0.3/1
#The images are 300*300, after shrinking, the size will be smaller than 256*256
#The rest of the image will be filled with gray. 

"""Resize images.
    
Args: 
    path (str): The path of work direction
    new_path (str): The path to save resized images
    filelist (list): a list of original images
    image_path (str): Image path
    im (array): Original image
    w (int): The width of the image
    h (int): The height of the image
    nw (int): Resized width of the image
    nh (int): Resized height of the image
    re_im (array): Resized image
        
Returns: 

"""

from PIL import Image
import numpy as np
import os

path = "./images/"
new_path = "./resize_images/"
filelist = os.listdir(path)

for file in filelist:
    image_path = path + file
    im = Image.open(image_path)
    w, h = im.size
    nw = int(w*(3/10))+1
    nh = int(h*(3/10))+1
    re_im = im.resize((nw, nh), Image.ANTIALIAS)
    new_image = Image.new('RGB', (256, 256), (128, 128, 128))
    new_image.paste(re_im, (0, 0))
    save_path = new_path + file
    new_image.save(save_path)

In [10]:
import shutil
from lxml.etree import Element,SubElement,tostring
from xml.dom.minidom import parseString
import xml.dom.minidom
import os
import sys
from PIL import Image

In [15]:
import pandas as pd
import numpy as np
import json
# load data using Python JSON module
with open("C:/Users/rapiduser/Box/EPA STAR 2019 (Community Resistance to Environmental Disasters)/Data/AST Datasets/Oil_Gas_Tank_Dataset/Oil Tanks/labels_coco.json",'r') as f:
    data = json.loads(f.read())
    
# Normalizing data
df_anno = pd.json_normalize(data, record_path =['annotations'])
df_images = pd.json_normalize(data, record_path =['images'])


In [17]:
len(np.unique(df_anno["image_id"]))

1595

In [11]:
#read size of images. 

def readsize(path):
    """Read size of images.
    
    Args: 
        path (str): The path of work direction

        
    Returns: 
        width (int): The width of the image
        height (int): The height of the image
    """
    img=Image.open(path)
    width=img.size[0]
    height=img.size[1]
    return height, width

In [21]:
#After resizing the images 
#the annotations have to be resized. 
#This function is to write annotations

def writexml(path,filename,num,xmins,ymins,xmaxs,ymaxs,names,height, width):# Nwpu-vhr-10 < 1000*600
    """write xml files for the images.
    
    Args: 
        create nodes in the xml files
  
    Returns: 
        create xml files
    """
    node_root=Element('annotation')

    node_folder=SubElement(node_root,'folder')
    node_folder.text="Oil and Gas Tank Dataset"

    node_filename=SubElement(node_root,'filename')
    node_filename.text="%s" % filename
    
    node_path=SubElement(node_root,'path')
    node_path.text="%s" % path
    
    node_source=SubElement(node_root,"source")
    node_database = SubElement(node_source, 'database')
    node_database.text = 'Unknown'

    node_size=SubElement(node_root,"size")
    node_width = SubElement(node_size, 'width')
    node_width.text = '%s' % width

    node_height = SubElement(node_size, 'height')
    node_height.text = '%s' % height

    node_depth = SubElement(node_size, 'depth')
    node_depth.text = '3'
    for i in range(num):
        node_object = SubElement(node_root, 'object')
        node_name = SubElement(node_object, 'name')
        node_name.text = '%s' % names[i]
        node_name = SubElement(node_object, 'pose')
        node_name.text = '%s' % "unspecified"
        node_name = SubElement(node_object, 'truncated')
        node_name.text = '%s' % "0"
        node_difficult = SubElement(node_object, 'difficult')
        node_difficult.text = '0'
        node_bndbox = SubElement(node_object, 'bndbox')
        node_xmin = SubElement(node_bndbox, 'xmin')
        node_xmin.text = '%s'% xmins[i]
        node_ymin = SubElement(node_bndbox, 'ymin')
        node_ymin.text = '%s' % ymins[i]
        node_xmax = SubElement(node_bndbox, 'xmax')
        node_xmax.text = '%s' % xmaxs[i]
        node_ymax = SubElement(node_bndbox, 'ymax')
        node_ymax.text = '%s' % ymaxs[i]

    xml = tostring(node_root, pretty_print=True)  
    dom = parseString(xml)
    with open(path, 'wb') as f:
        f.write(xml)
    return

In [28]:
#Annotation resize
#read original annotations and write new xml files. 

from __future__ import division
import os
from PIL import Image
import xml.dom.minidom
import numpy as np

"""Resize annotation.
    
Args: 
    ImgPath (str): The path of the folder containing original images
    Annopath (str): The path of the folder containing original annotation files
    ProcessedPath (str): The path of folder to save new annotation files
    imagelist (list): a list of original images
    image_pre (str): The file name of the image
    ext (str): The extension name of the image
    imgfile (str): The path of the image
    xmlfile (str): The path of the xml file of the image
    DomTree, annotation, filenamelist, filename, objectlist are nodes in the xml file
    xmins, xmaxs, ymins, ymaxs (int): they are the resized locations of bounding box of tanks
    names (str): image names
    ratio (float): resize ratio
    num (int): number of images
    filename_fill, filename_jpg (str): image name
    dealpath (str): path to save new xml file
    imagpath (str): path of the image
    height, width (str): new size 256*256
    
        
Returns: 
    create new xml files
"""


ImgPath = './images/' 
AnnoPath = './xml/'
ProcessedPath = './new_xml/'
 
if not os.path.exists(ProcessedPath):
    os.makedirs(ProcessedPath)

imagelist = os.listdir(ImgPath)
 
for image in imagelist:
    print('a new image:', image)
    image_pre, ext = os.path.splitext(image)
    imgfile = ImgPath + image 
    xmlfile = AnnoPath + image_pre + '.xml'
    
    DomTree = xml.dom.minidom.parse(xmlfile)
    annotation = DomTree.documentElement

    filenamelist = annotation.getElementsByTagName('filename') #[<DOM Element: filename at 0x381f788>]
    filename = filenamelist[0].childNodes[0].data
    objectlist = annotation.getElementsByTagName('object')
    
    count = 0
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    names = []
    for objects in objectlist:
        # print objects
        count = count + 1
        namelist = objects.getElementsByTagName('name')
        # print 'namelist:',namelist
        objectname = namelist[0].childNodes[0].data
        names.append(objectname)
        bndbox = objects.getElementsByTagName('bndbox')
        cropboxes = []
        for box in bndbox:
            try:
                x1_list = box.getElementsByTagName('xmin')
                x1 = int(x1_list[0].childNodes[0].data)
                y1_list = box.getElementsByTagName('ymin')
                y1 = int(y1_list[0].childNodes[0].data)
                x2_list = box.getElementsByTagName('xmax')
                x2 = int(x2_list[0].childNodes[0].data)
                y2_list = box.getElementsByTagName('ymax')
                y2 = int(y2_list[0].childNodes[0].data)
                
                ratio = (int(512*(3/10))+1)/512
                x1_1 = x1*ratio
                y1_1 = y1*ratio
                x2_1 = x2*ratio
                y2_1 = y2*ratio
 
                img = Image.open(imgfile)
                width,height = img.size
 
                xmins.append(x1_1)
                ymins.append(y1_1)
                xmaxs.append(x2_1)
                ymaxs.append(y2_1)
        
            except Exception as e:
                print(e)
    num = count
    print(num)
    print(names)
    filename_fill = image_pre
    filename_jpg = filename_fill + ".jpg"
    dealpath=ProcessedPath+ filename_fill +".xml"
    imagpath = './images/' + filename_fill + ".jpg"
    with open(dealpath, 'w') as f:
        height, width = (256, 256)
        writexml(dealpath,filename_jpg,num,xmins,ymins,xmaxs,ymaxs,names, height, width)

a new image: cold-lake_1-1-0_20041_512_512_0_57088_32256.jpg
1
['tank']
a new image: cold-lake_1-1-0_20450_512_512_0_57088_32256.jpg
1
['tank']
a new image: cold-lake_1-1-0_20859_512_512_0_57088_32256.jpg
1
['tank']
a new image: cold-lake_1-1-0_22495_512_512_0_57088_32256.jpg
2
['tank', 'tank']
a new image: cold-lake_1-1-0_24949_512_512_0_57088_32256.jpg
2
['tank', 'tank']
a new image: cold-lake_1-1-0_26585_512_512_0_57088_32256.jpg
1
['tank']
a new image: cold-lake_1-1-0_30266_512_512_0_57088_32256.jpg
2
['tank', 'tank']
a new image: cold-lake_1-1-0_32311_512_512_0_57088_32256.jpg
3
['tank', 'tank', 'tank']
a new image: cold-lake_1-1-0_48262_512_512_0_57088_32256.jpg
1
['tank']
a new image: cold-lake_1-1-0_818_512_512_0_57088_32256.jpg
1
['tank']
a new image: cold-lake_1-1-10225_13906_512_512_0_57088_32256.jpg
2
['tank', 'tank']
a new image: cold-lake_1-1-10225_14315_512_512_0_57088_32256.jpg
7
['tank', 'tank', 'tank', 'tank', 'tank', 'tank', 'tank']
a new image: cold-lake_1-1-10225_3