# WHAT THIS DOES

1. Creates a folder called 'ssd-imagenet'
2. Takes imagenet raw data and converts it into a form for use with the ssd-7 model

In [1]:

import os
import pandas as pd
from sklearn.model_selection import train_test_split
from shutil import copyfile

from PIL import Image
import pickle

In [9]:
def boundingBoxesSSD(max_folders=50):
    ssd_path = 'ssd-imagenet'
    if not os.path.isdir(ssd_path):
        os.makedirs(ssd_path)
    
    path = 'RealImageNet/'
    boxesPath = os.path.join(path, "LOC_train_solution.csv")
    imageBoxes = [line.rstrip('\n').split(',') for line in open(boxesPath)][1:]
    
    filenames = []
    b1 = []
    b2 = []
    b3 = []
    b4 = []
    labels = []


    for boxes in imageBoxes:
        imageFileName = boxes[0] + ".JPEG"
        boxesSplit = boxes[1].split()
    #     print(boxesSplit)

        for i in range(0, len(boxesSplit), 5):

            box = boxesSplit[i:i+5]
            label = box[0]
            box = box[1:]
            box = [int(b) for b in box]
            filenames.append(imageFileName)
            b1.append(box[0])
            b2.append(box[1])
            b3.append(box[2])
            b4.append(box[3])
            labels.append(label)



    df = pd.DataFrame({'image': filenames, 'xmin': b1, 'xmax': b2, 'ymin': b3, 'ymax': b4, 'class_id': labels})
    classes = df.class_id.unique()
    mapping = dict(zip(classes, range(len(classes))))
    
    with open(os.path.join(ssd_path, 'class_mapping.pickle'), 'wb') as handle:
        pickle.dump(mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    df = df.replace({'class_id': mapping})
    valid_images = []
    traindir = os.path.join(path, 'ImageNetSubsample', 'Data', 'CLS-LOC', 'train')
    
    n = 1
    for folder in os.listdir(traindir):
        if n> max_folders:
            break
        if folder.startswith('n'):

            for f in os.listdir(os.path.join(traindir, folder)):
                if f.endswith('.JPEG'):
                    valid_images.append(f)
                    
        n += 1

    # valid_images

    valid_df = df[df.image.isin(valid_images)]
    
    
    df_train, df_val = train_test_split(valid_df, test_size=0.2)
    df_train.to_csv(os.path.join(ssd_path, 'labels_train.csv'), sep=',', index=False)
    df_val.to_csv(os.path.join(ssd_path, 'labels_val.csv'), sep=',', index=False)
    
    
    
#     outpath = '/Users/matt/work/ucsc/class/cmps240/project/ssd_keras/data/imagenet/'
    for img in valid_df.image:
        copyfile(os.path.join(traindir, img.split('_')[0], img), os.path.join(ssd_path, img))
    
    
    size = 224
    for f in os.listdir(ssd_path):
        if f.endswith('.JPEG'):
            img = Image.open(os.path.join(ssd_path, f))
            img = img.resize((size, size), Image.ANTIALIAS)
            img.save(os.path.join(ssd_path, f))

In [10]:
boundingBoxesSSD(1)