In [None]:
import os
import random
from google.colab import drive
from shutil import copyfile
from pycocotools.coco import COCO

In [None]:
drive.flush_and_unmount()

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!wget http://images.cocodataset.org/zips/train2017.zip
!wget http://images.cocodataset.org/zips/val2017.zip
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!wget http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip

--2020-11-25 18:14:50--  http://images.cocodataset.org/zips/train2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.228.192
Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.228.192|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19336861798 (18G) [application/zip]
Saving to: ‘train2017.zip’


2020-11-25 18:40:45 (11.9 MB/s) - ‘train2017.zip’ saved [19336861798/19336861798]

--2020-11-25 18:40:45--  http://images.cocodataset.org/zips/val2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.217.99.164
Connecting to images.cocodataset.org (images.cocodataset.org)|52.217.99.164|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 815585330 (778M) [application/zip]
Saving to: ‘val2017.zip’


2020-11-25 18:41:34 (16.1 MB/s) - ‘val2017.zip’ saved [815585330/815585330]

--2020-11-25 18:41:34--  http://images.cocodataset.org/annotations/annotations_trainval2017.zip
Resolving images.coco

In [None]:
%%capture
!unzip ./train2017.zip
!unzip ./val2017.zip
!unzip ./stuff_annotations_trainval2017.zip
!unzip ./annotations_trainval2017.zip

In [None]:
!mkdir -p /content/COCOdataset2017/images/train
!mkdir -p /content/COCOdataset2017/images/val
!mkdir -p /content/COCOdataset2017/annotations

In [None]:
!find ./train2017/ -name "*.jpg" -print0 | xargs -0 mv -t COCOdataset2017/images/train
!find ./val2017/ -name "*.jpg" -print0 | xargs -0 mv -t COCOdataset2017/images/val
!find ./annotations/ -name "*.json" -print0 | xargs -0 mv -t COCOdataset2017/annotations

In [None]:
def filterDataset(folder, classes=None, mode='train'):    
    # initialize COCO api for instance annotations
    annFile = '{}/annotations/instances_{}2017.json'.format(folder, mode)
    coco = COCO(annFile)
    
    images = []
    if classes!=None:
        # iterate for each individual class in the list
        for className in classes:
            # get all images containing given categories
            catIds = coco.getCatIds(catNms=className)
            imgIds = coco.getImgIds(catIds=catIds)
            images += coco.loadImgs(imgIds)
    
    else:
        imgIds = coco.getImgIds()
        images = coco.loadImgs(imgIds)
    
    # Now, filter out the repeated images
    unique_images = []
    for i in range(len(images)):
        if images[i] not in unique_images:
            unique_images.append(images[i])
            
    random.shuffle(unique_images)
    dataset_size = len(unique_images)
    
    return unique_images, dataset_size, coco

In [None]:
def visualizeGenerator(gen):
    # Iterate the generator to get images
    img = next(gen)
 
    fig = plt.figure(figsize=(20, 10))
    outerGrid = gridspec.GridSpec(1, 2, wspace=0.1, hspace=0.1)
   
    for i in range(2):        
        innerGrid = gridspec.GridSpecFromSubplotSpec(2, 2, subplot_spec=outerGrid[i], wspace=0.05, hspace=0.05)

        for j in range(4):
            ax = plt.Subplot(fig, innerGrid[j])
            if(i==0):
                ax.imshow(img[j]);

            ax.axis('off')
            fig.add_subplot(ax)
    plt.show()

In [None]:
def getClassName(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return None

In [None]:
def getImage(imageObj, img_folder, input_image_size):
    # Read and normalize an image
    train_img = io.imread(img_folder + '/' + imageObj['file_name'])/255.0
    # Resize
    train_img = cv2.resize(train_img, input_image_size)
    if (len(train_img.shape)==3 and train_img.shape[2]==3): # If it is a RGB 3 channel image
        return train_img
    else: # To handle a black and white image, increase dimensions to 3
        stacked_img = np.stack((train_img,)*3, axis=-1)
        return stacked_img

In [None]:
def dataGeneratorCoco(images, classes, coco, folder, 
                      input_image_size=(224,224), batch_size=4, mode='train'):
    
    img_folder = '{}/images/{}'.format(folder, mode)
    dataset_size = len(images)
    catIds = coco.getCatIds(catNms=classes)
    
    c = 0
    while(True):
        img = np.zeros((batch_size, input_image_size[0], input_image_size[1], 3)).astype('float')

        for i in range(c, c+batch_size): #initially from 0 to batch_size, when c = 0
            imageObj = images[i]
            
            ### Retrieve Image ###
            image = getImage(imageObj, img_folder, input_image_size)
            
            # Add to respective batch sized arrays
            img[i-c] = image
            
        c+=batch_size
        if(c + batch_size >= dataset_size):
            c=0
            random.shuffle(images)
        yield img, 0

In [None]:
def copyImages(images, src_folder, dst_folder, mode='train',
                input_image_size=(224,224)):
  
    src_img_folder = '{}/images/{}'.format(src_folder, mode)
    dst_img_folder = '{}/{}/person'.format(dst_folder, mode)
    dataset_size = len(images)
    print(f'Copy %f files from %s to %s' % 
          (dataset_size, src_img_folder, dst_img_folder))

    for i in range(0, len(images)):
      imageObj = images[i]
      file_name = imageObj['file_name']
      src = '{}/{}'.format(src_img_folder, file_name)
      dst = '{}/{}'.format(dst_img_folder, file_name)

      #print(f'Src - %s' % (src))
      #print(f'dst - %s' % (dst))
      copyfile(src, dst)

In [None]:
folder = './COCOdataset2017'
classes = ['person']

In [None]:
!ls -l

total 21052948
drwxrwxr-x 3 root root        4096 Nov 25 18:51 annotations
-rw-r--r-- 1 root root   252907541 Jul 10  2018 annotations_trainval2017.zip
drwxr-xr-x 4 root root        4096 Nov 25 18:51 COCOdataset2017
drwx------ 6 root root        4096 Nov 25 18:13 drive
drwxr-xr-x 1 root root        4096 Nov 13 17:33 sample_data
-rw-r--r-- 1 root root  1148688564 Jul 10  2018 stuff_annotations_trainval2017.zip
drwxrwxr-x 2 root root     3969024 Nov 25 18:51 train2017
-rw-r--r-- 1 root root 19336861798 Jul 11  2018 train2017.zip
drwxrwxr-x 2 root root      163840 Nov 25 18:51 val2017
-rw-r--r-- 1 root root   815585330 Jul 11  2018 val2017.zip


In [None]:
# Generate Training set
train_mode = 'train'
train_images, train_dataset_size, train_coco = filterDataset(folder, classes, train_mode)

loading annotations into memory...
Done (t=17.25s)
creating index...
index created!


In [None]:
# Generate Validation set
val_mode = 'val'
val_images, val_dataset_size, val_coco = filterDataset(folder, classes, val_mode)

loading annotations into memory...
Done (t=0.81s)
creating index...
index created!


In [None]:
batch_size = 4
input_image_size = (224,224)
mask_type = 'normal'

In [None]:
train_gen = dataGeneratorCoco(train_images, classes, train_coco, folder,
                              input_image_size, batch_size, train_mode)

In [None]:
val_gen = dataGeneratorCoco(val_images, classes, val_coco, folder,
                            input_image_size, 1, val_mode)

In [None]:
!rm -rf /content/drive/MyDrive/Colab\ Notebooks/CS581/Project/COCOdataset
os.mkdir('/content/drive/MyDrive/Colab Notebooks/CS581/Project/COCOdataset')
os.mkdir('/content/drive/MyDrive/Colab Notebooks/CS581/Project/COCOdataset/val')
os.mkdir('/content/drive/MyDrive/Colab Notebooks/CS581/Project/COCOdataset/val/person')
os.mkdir('/content/drive/MyDrive/Colab Notebooks/CS581/Project/COCOdataset/train')
os.mkdir('/content/drive/MyDrive/Colab Notebooks/CS581/Project/COCOdataset/train/person')
os.chdir('/content/')

In [None]:
!rm -rf /content/drive/MyDrive/Colab\ Notebooks/CS581/Project/COCOdataset/train/person

In [None]:
os.mkdir('/content/drive/MyDrive/Colab Notebooks/CS581/Project/COCOdataset/train/person')

In [None]:
src_folder = '/content/COCOdataset2017'
dst_folder = '/content/drive/MyDrive/Colab Notebooks/CS581/Project/COCOdataset'

In [None]:
copyImages(train_images, src_folder, dst_folder, train_mode)
!ls -l /content/drive/MyDrive/Colab\ Notebooks/CS581/Project/COCOdataset/train/person | wc -l

Copy 64115.000000 files from /content/COCOdataset2017/images/train to /content/drive/MyDrive/Colab Notebooks/CS581/Project/COCOdataset/train/person
64116


In [None]:
copyImages(val_images, src_folder, dst_folder, val_mode)
!ls -l /content/drive/MyDrive/Colab\ Notebooks/CS581/Project/COCOdataset/val/person | wc -l

Copy 2693.000000 files from /content/COCOdataset2017/images/val to /content/drive/MyDrive/Colab Notebooks/CS581/Project/COCOdataset/val/person
2694


In [None]:
!ls -l /content/drive/MyDrive/Colab\ Notebooks/CS581/Project/COCOdataset/val/person | wc -l

2694


In [None]:
!ls -l /content/drive/MyDrive/Colab\ Notebooks/CS581/Project/COCOdataset/train/person | wc -l

64116


In [1]:
!python --version

Python 3.6.9
