In [3]:
import xml.etree.ElementTree as ET
import os

In [4]:
dataset_path = "./"
images_path = os.path.join(dataset_path, "Images")
annotations_path = os.path.join(dataset_path, "Annotations")

In [14]:
examples_list = [os.path.splitext(fname)[0] for fname in os.listdir(annotations_path) if not fname.startswith(".")]

In [15]:
examples_list[:10]

['img_2346_37854497175_o',
 'img_2390_37854456895_o',
 'img_2192_37854552055_o',
 'img_2389_37854482245_o',
 'img_2211_24868829648_o',
 '63',
 'IMG_4058',
 'IMG_3737',
 'img_2147_38710631922_o',
 'img_2183_37854460995_o']

In [17]:
from PIL import Image

scaled_images_path = os.path.join(dataset_path, "Images_scaled")
if not os.path.exists(scaled_images_path):
    os.mkdir(scaled_images_path)
    print("Created directory:", scaled_images_path)

size = (667, 375)
for img_name in examples_list:
    orig_img_path = os.path.join(images_path, img_name + ".jpg")
    outfile = os.path.join(scaled_images_path, img_name + ".jpg")
    im = Image.open(orig_img_path)
    im.thumbnail(size, Image.ANTIALIAS)
    im.save(outfile, "JPEG")
    print("Saved:", outfile)

In [51]:
scaled_annotations_path = os.path.join(dataset_path, "Annotations_scaled")
if not os.path.exists(scaled_annotations_path):
    os.mkdir(scaled_annotations_path)
    print("Created directory:", scaled_annotations_path)
    
for example in examples_list:
    orig_xml_path = os.path.join(annotations_path, example + ".xml")
    scaled_xml_path = os.path.join(scaled_annotations_path, example + ".xml")
    
    orig_img_path = os.path.join(images_path, example + ".jpg")
    orig_image = Image.open(orig_img_path)
    orig_height = orig_image.height
    orig_width = orig_image.width
    
    scaled_img_path = os.path.join(scaled_images_path, example + ".jpg")
    scaled_image = Image.open(scaled_img_path)
    scaled_height = scaled_image.height
    scaled_width = scaled_image.width
    
    orig_xml = ET.parse(orig_xml_path)
    root = orig_xml.getroot()
    for child in root:
        if child.tag == "size":
            fields = child.getchildren()
            
            width_field = fields[0]
            assert(width_field.tag == "width")
            width = int(width_field.text)
            assert(width == orig_width)            
            
            height_field = fields[1]
            assert(height_field.tag == "height")
            height = int(height_field.text)
            assert(height == orig_height)

            # make sure the original image has the same width and height
            im = Image.open(orig_img_path)
            assert(im.height == height)
            assert(im.width == width)
            
            # replace it with the scaled version
            width_field.text = str(scaled_width)
            height_field.text = str(scaled_height)

        if child.tag == "object":
            bndboxes = child.findall("bndbox")
            assert(len(bndboxes) == 1)
            fields = bndboxes[0].getchildren()
            assert(len(fields) == 4)
            
            xmin_field = fields[0]
            xmin_field.text = str(int(int(xmin_field.text) / orig_width * scaled_width))
            
            ymin_field = fields[1]
            ymin_field.text = str(int(int(ymin_field.text) / orig_height * scaled_height))
            
            xmax_field = fields[2]
            xmax_field.text = str(int(int(xmax_field.text) / orig_width * scaled_width))
            
            ymax_field = fields[3]
            ymax_field.text = str(int(int(ymax_field.text) / orig_height * scaled_height))
            
            
    orig_xml.write(scaled_xml_path)
    print(orig_xml_path, "=>", scaled_xml_path)

./Annotations/img_2346_37854497175_o.xml => ./Annotations_scaled/img_2346_37854497175_o.xml
./Annotations/img_2390_37854456895_o.xml => ./Annotations_scaled/img_2390_37854456895_o.xml
./Annotations/img_2192_37854552055_o.xml => ./Annotations_scaled/img_2192_37854552055_o.xml
./Annotations/img_2389_37854482245_o.xml => ./Annotations_scaled/img_2389_37854482245_o.xml
./Annotations/img_2211_24868829648_o.xml => ./Annotations_scaled/img_2211_24868829648_o.xml
./Annotations/63.xml => ./Annotations_scaled/63.xml
./Annotations/IMG_4058.xml => ./Annotations_scaled/IMG_4058.xml
./Annotations/IMG_3737.xml => ./Annotations_scaled/IMG_3737.xml
./Annotations/img_2147_38710631922_o.xml => ./Annotations_scaled/img_2147_38710631922_o.xml
./Annotations/img_2183_37854460995_o.xml => ./Annotations_scaled/img_2183_37854460995_o.xml
./Annotations/img_2428_26965729389_o.xml => ./Annotations_scaled/img_2428_26965729389_o.xml
./Annotations/img_2238_23877342157_o.xml => ./Annotations_scaled/img_2238_2387734215

./Annotations/img_2226_37854543645_o.xml => ./Annotations_scaled/img_2226_37854543645_o.xml
./Annotations/IMG_0167.xml => ./Annotations_scaled/IMG_0167.xml
./Annotations/IMG_0173.xml => ./Annotations_scaled/IMG_0173.xml
./Annotations/img_2397_37854479955_o.xml => ./Annotations_scaled/img_2397_37854479955_o.xml
./Annotations/IMG_3693.xml => ./Annotations_scaled/IMG_3693.xml
./Annotations/img_2321_38024999194_o.xml => ./Annotations_scaled/img_2321_38024999194_o.xml
./Annotations/IMG_3877.xml => ./Annotations_scaled/IMG_3877.xml
./Annotations/img_2352_38024982794_o.xml => ./Annotations_scaled/img_2352_38024982794_o.xml
./Annotations/img_2225_38025029424_o.xml => ./Annotations_scaled/img_2225_38025029424_o.xml
./Annotations/img_2300_24868808568_o.xml => ./Annotations_scaled/img_2300_24868808568_o.xml
./Annotations/img_2444_38024930844_o.xml => ./Annotations_scaled/img_2444_38024930844_o.xml
./Annotations/IMG_4063.xml => ./Annotations_scaled/IMG_4063.xml
./Annotations/img_2333_24868800188_o

./Annotations/IMG_0115.xml => ./Annotations_scaled/IMG_0115.xml
./Annotations/IMG_0129.xml => ./Annotations_scaled/IMG_0129.xml
./Annotations/img_2406_38710565632_o.xml => ./Annotations_scaled/img_2406_38710565632_o.xml
./Annotations/IMG_3811.xml => ./Annotations_scaled/IMG_3811.xml
./Annotations/img_2336_37854502095_o.xml => ./Annotations_scaled/img_2336_37854502095_o.xml
./Annotations/IMG_3959.xml => ./Annotations_scaled/IMG_3959.xml
./Annotations/img_2212_24868829218_o.xml => ./Annotations_scaled/img_2212_24868829218_o.xml
./Annotations/img_2305_38742019821_o.xml => ./Annotations_scaled/img_2305_38742019821_o.xml
./Annotations/IMG_0075.xml => ./Annotations_scaled/IMG_0075.xml
./Annotations/IMG_0065.xml => ./Annotations_scaled/IMG_0065.xml
./Annotations/IMG_0071.xml => ./Annotations_scaled/IMG_0071.xml
./Annotations/img_2361_23877288787_o.xml => ./Annotations_scaled/img_2361_23877288787_o.xml
./Annotations/img_2382_38741983671_o.xml => ./Annotations_scaled/img_2382_38741983671_o.xml


./Annotations/IMG_4027.xml => ./Annotations_scaled/IMG_4027.xml
./Annotations/img_2380_38741984351_o.xml => ./Annotations_scaled/img_2380_38741984351_o.xml
./Annotations/IMG_0080.xml => ./Annotations_scaled/IMG_0080.xml
./Annotations/img_2362_37854457235_o.xml => ./Annotations_scaled/img_2362_37854457235_o.xml
./Annotations/IMG_0094.xml => ./Annotations_scaled/IMG_0094.xml
./Annotations/img_2197_24868833388_o.xml => ./Annotations_scaled/img_2197_24868833388_o.xml
./Annotations/IMG_3576.xml => ./Annotations_scaled/IMG_3576.xml
./Annotations/IMG_0069.xml => ./Annotations_scaled/IMG_0069.xml
./Annotations/img_2101_23877392587_o.xml => ./Annotations_scaled/img_2101_23877392587_o.xml
./Annotations/img_2263_23877332157_o.xml => ./Annotations_scaled/img_2263_23877332157_o.xml
./Annotations/img_2344_37854497925_o.xml => ./Annotations_scaled/img_2344_37854497925_o.xml
./Annotations/img_2204_38742054921_o.xml => ./Annotations_scaled/img_2204_38742054921_o.xml
./Annotations/IMG_0082.xml => ./Anno

./Annotations/IMG_3413.xml => ./Annotations_scaled/IMG_3413.xml
./Annotations/IMG_3375.xml => ./Annotations_scaled/IMG_3375.xml
./Annotations/img_2299_38742021971_o.xml => ./Annotations_scaled/img_2299_38742021971_o.xml
./Annotations/IMG_0132.xml => ./Annotations_scaled/IMG_0132.xml
./Annotations/IMG_0126.xml => ./Annotations_scaled/IMG_0126.xml
./Annotations/img_2258_26965786139_o.xml => ./Annotations_scaled/img_2258_26965786139_o.xml
./Annotations/IMG_3611.xml => ./Annotations_scaled/IMG_3611.xml
./Annotations/img_2146_26965812179_o.xml => ./Annotations_scaled/img_2146_26965812179_o.xml
./Annotations/img_2114_37854574135_o.xml => ./Annotations_scaled/img_2114_37854574135_o.xml
./Annotations/img_2279_24868815438_o.xml => ./Annotations_scaled/img_2279_24868815438_o.xml
./Annotations/img_2281_38742027911_o.xml => ./Annotations_scaled/img_2281_38742027911_o.xml
./Annotations/img_2391_23877277447_o.xml => ./Annotations_scaled/img_2391_23877277447_o.xml
./Annotations/img_2426_26965730079_o

./Annotations/IMG_4047.xml => ./Annotations_scaled/IMG_4047.xml
./Annotations/40.xml => ./Annotations_scaled/40.xml
./Annotations/54.xml => ./Annotations_scaled/54.xml
./Annotations/img_2405_38710566302_o.xml => ./Annotations_scaled/img_2405_38710566302_o.xml
./Annotations/68.xml => ./Annotations_scaled/68.xml
./Annotations/img_2379_38741949781_o.xml => ./Annotations_scaled/img_2379_38741949781_o.xml
./Annotations/img_2133_38025063344_o.xml => ./Annotations_scaled/img_2133_38025063344_o.xml
./Annotations/img_2132_24868849628_o.xml => ./Annotations_scaled/img_2132_24868849628_o.xml
./Annotations/img_2441_38710550042_o.xml => ./Annotations_scaled/img_2441_38710550042_o.xml
./Annotations/img_2189_37854552845_o.xml => ./Annotations_scaled/img_2189_37854552845_o.xml
./Annotations/img_2100_37854579045_o.xml => ./Annotations_scaled/img_2100_37854579045_o.xml
./Annotations/img_2388_38741981401_o.xml => ./Annotations_scaled/img_2388_38741981401_o.xml
./Annotations/img_2283_38742027461_o.xml => 