In [None]:
import torch
from IPython.display import Image, clear_output 
print('PyTorch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

PyTorch 1.5.1+cu101 _CudaDeviceProperties(name='Tesla K80', major=3, minor=7, total_memory=11441MB, multi_processor_count=13)


Install Requirements

In [None]:
%%shell

pip install cython

# Install pycocotools, the version by default in Colab
# has a bug fixed in https://github.com/cocodataset/cocoapi/pull/354

pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-49dlrbbn
  Running command git clone -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-49dlrbbn
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25l[?25hdone
  Created wheel for pycocotools: filename=pycocotools-2.0-cp36-cp36m-linux_x86_64.whl size=266985 sha256=727d0fafd391523dfe1ad9bbc552b91ea3295cca74aa942cf424adc7aee02e2a
  Stored in directory: /tmp/pip-ephem-wheel-cache-fupudkqq/wheels/90/51/41/646daf401c3bc408ff10de34ec76587a9b3ebfac8d21ca5c3a
Successfully built pycocotools
Installing collected packages: pycocotools
  Found existing installation: pycocotools 2.0.1
    Uninstalling pycocotools-2.0.1:
      Successfully uninstalled pycocotools-2.0.1
Successfully installed pycocotools-2.0




Download Pascal VOC 2012 Dataset


In [None]:
# Download pascal voc 2012 dataset
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
!tar -xvf VOCtrainval_11-May-2012.tar
!rm VOCtrainval_11-May-2012.tar

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
VOCdevkit/VOC2012/SegmentationClass/2008_001876.png
VOCdevkit/VOC2012/SegmentationClass/2008_001882.png
VOCdevkit/VOC2012/SegmentationClass/2008_001885.png
VOCdevkit/VOC2012/SegmentationClass/2008_001895.png
VOCdevkit/VOC2012/SegmentationClass/2008_001896.png
VOCdevkit/VOC2012/SegmentationClass/2008_001926.png
VOCdevkit/VOC2012/SegmentationClass/2008_001966.png
VOCdevkit/VOC2012/SegmentationClass/2008_001971.png
VOCdevkit/VOC2012/SegmentationClass/2008_001992.png
VOCdevkit/VOC2012/SegmentationClass/2008_001997.png
VOCdevkit/VOC2012/SegmentationClass/2008_002032.png
VOCdevkit/VOC2012/SegmentationClass/2008_002043.png
VOCdevkit/VOC2012/SegmentationClass/2008_002064.png
VOCdevkit/VOC2012/SegmentationClass/2008_002066.png
VOCdevkit/VOC2012/SegmentationClass/2008_002067.png
VOCdevkit/VOC2012/SegmentationClass/2008_002073.png
VOCdevkit/VOC2012/SegmentationClass/2008_002079.png
VOCdevkit/VOC2012/SegmentationClass/2008_002080.png

Convert Dataset format from Pascal VOC to COCO (Suit Input for SSD Implementations)

In [None]:
!git clone https://github.com/ganeshkulkarni98/PascalVOC2COCO
%cd PascalVOC2COCO
!python voc2coco.py --annotation_folder_path /content/VOCdevkit/VOC2012/Annotations --json_file_path /content/VOCdevkit/VOC2012/coco_dataset.json --image_folder_path /content/VOCdevkit/VOC2012/JPEGImages
%cd ../

Cloning into 'PascalVOC2COCO'...
remote: Enumerating objects: 85, done.[K
remote: Counting objects:   1% (1/85)[Kremote: Counting objects:   2% (2/85)[Kremote: Counting objects:   3% (3/85)[Kremote: Counting objects:   4% (4/85)[Kremote: Counting objects:   5% (5/85)[Kremote: Counting objects:   7% (6/85)[Kremote: Counting objects:   8% (7/85)[Kremote: Counting objects:   9% (8/85)[Kremote: Counting objects:  10% (9/85)[Kremote: Counting objects:  11% (10/85)[Kremote: Counting objects:  12% (11/85)[Kremote: Counting objects:  14% (12/85)[Kremote: Counting objects:  15% (13/85)[Kremote: Counting objects:  16% (14/85)[Kremote: Counting objects:  17% (15/85)[Kremote: Counting objects:  18% (16/85)[Kremote: Counting objects:  20% (17/85)[Kremote: Counting objects:  21% (18/85)[Kremote: Counting objects:  22% (19/85)[Kremote: Counting objects:  23% (20/85)[Kremote: Counting objects:  24% (21/85)[Kremote: Counting objects:  25% (22/85)[Kremote: Co

Take set of dataset for Training, Validation and Testing of SSD Model

In [None]:
import json
import os
import shutil
# split factor for validation and testing is same
# if split_factor is 0.9 then 90 % train, 5% validation, 5% test

def create_dataset(coco_json_path, image_folder_path, split_factor, percentage_dataset):

  data = json.load(open(coco_json_path))
  os.mkdir('/content/data')                                          # data folder is created

  #percentage of dataset that you have to consider for train, val and test
  length_dataset = int(len(data['annotations']) * percentage_dataset / 100) 
  end_train = int(length_dataset * split_factor)

  for process in ['train','val','test']:

    if process == 'train':
      start = 0
      end = end_train - 1
    
    elif process == 'val' : 
      start = end_train
      end = start + int((100-split_factor*100)/200 * length_dataset) - 1
    
    elif process == 'test' :
      start = start + int((100-split_factor*100)/200 * length_dataset)
      end = length_dataset - 1
  
    print(process, start, end)
    image_id = []
    for i in data['annotations'][start:end]:
      image_id.append(i['image_id'])
    imgs = []
    images_details = []
    [imgs.append(x) for x in image_id if x not in imgs]
    for i in imgs:
      for j in data['images']:
        if j['id'] == i:
          imgs[imgs.index(i)] = data['images'][data['images'].index(j)]['file_name']
          images_details.append(j)
          break

    outputs = {}

    outputs['images'] = images_details
    outputs['annotations'] = data['annotations'][start:end]
    outputs['categories'] = data['categories']

    images_folder = '/content/data/' + process + '_images'
    os.mkdir(images_folder)

    for i in outputs['images']: 
      shutil.copyfile(os.path.join(image_folder_path, i['file_name']), os.path.join(images_folder, i['file_name']))

    # write json file
    json_file = '/content/data/' + process + '_coco_dataset.json'
    json.dump(outputs, open(json_file, 'w'))

In [None]:
# split converted pascal voc dataset to coco into train, validation and testing

!rm -rf /content/data                        #remove exiting data folder
coco_json_path = '/content/VOCdevkit/VOC2012/coco_dataset.json'
image_folder_path = '/content/VOCdevkit/VOC2012/JPEGImages'
split_factor = 0.7                # 70% train, 15% test and 15% validate
percentage_dataset = 10      # consider percentage of dataset for split into train, val and test

create_dataset(coco_json_path, image_folder_path, split_factor, percentage_dataset)

train 0 2808
val 2809 3409
test 3410 4012


Clone repository of SSD Implementation

In [None]:
!git clone https://github.com/ganeshkulkarni98/ssd

Cloning into 'ssd'...
remote: Enumerating objects: 77, done.[K
remote: Counting objects:   1% (1/77)[Kremote: Counting objects:   2% (2/77)[Kremote: Counting objects:   3% (3/77)[Kremote: Counting objects:   5% (4/77)[Kremote: Counting objects:   6% (5/77)[Kremote: Counting objects:   7% (6/77)[Kremote: Counting objects:   9% (7/77)[Kremote: Counting objects:  10% (8/77)[Kremote: Counting objects:  11% (9/77)[Kremote: Counting objects:  12% (10/77)[Kremote: Counting objects:  14% (11/77)[Kremote: Counting objects:  15% (12/77)[Kremote: Counting objects:  16% (13/77)[Kremote: Counting objects:  18% (14/77)[Kremote: Counting objects:  19% (15/77)[Kremote: Counting objects:  20% (16/77)[Kremote: Counting objects:  22% (17/77)[Kremote: Counting objects:  23% (18/77)[Kremote: Counting objects:  24% (19/77)[Kremote: Counting objects:  25% (20/77)[Kremote: Counting objects:  27% (21/77)[Kremote: Counting objects:  28% (22/77)[Kremote: Counting obje

In [None]:
%cd ssd

/content/ssd


SSD300 model pretrained weight (Trained on Pascal VOC dataset) : https://drive.google.com/file/d/14erG4YgA1EepHYjWiE3GGGbPT_y_9bb6/view?usp=sharing

In [None]:
!cp /content/drive/My\ Drive/SSD300_pretrained_weight.pth /content/ssd/SSD300_pretrained_weight.pth

VGG16 (Backbone) pretrained weight (Trained on ImageNet Dataset) : https://download.pytorch.org/models/vgg16-397923af.pth

In [None]:
!wget https://download.pytorch.org/models/vgg16-397923af.pth 

--2020-06-19 18:11:33--  https://download.pytorch.org/models/vgg16-397923af.pth
Resolving download.pytorch.org (download.pytorch.org)... 13.224.226.112, 13.224.226.28, 13.224.226.4, ...
Connecting to download.pytorch.org (download.pytorch.org)|13.224.226.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 553433881 (528M) [binary/octet-stream]
Saving to: ‘vgg16-397923af.pth’


2020-06-19 18:12:00 (19.4 MB/s) - ‘vgg16-397923af.pth’ saved [553433881/553433881]



# **Run Train function**

Train Case 1

I have trained model using pretrained weight file

    weight_file_path = '/content/ssd/SSD300_pretrained_weight.pth'

In [None]:
!python train.py

Loading model
Loading trained weight file...
model initialized
Epoch: [0][0/39]	Batch Time 11.334 (11.334)	Data Time 4.857 (4.857)	Training Loss 2.2250 Avg Training Loss (2.2250)	
Epoch: [0][8/39]	Batch Time 0.676 (1.831)	Data Time 0.001 (0.540)	Training Loss 2.1418 Avg Training Loss (2.0378)	
Epoch: [0][16/39]	Batch Time 1.357 (1.469)	Data Time 0.672 (0.469)	Training Loss 2.4026 Avg Training Loss (2.1496)	
Epoch: [0][24/39]	Batch Time 0.892 (1.404)	Data Time 0.216 (0.511)	Training Loss 1.9875 Avg Training Loss (2.1485)	
Epoch: [0][32/39]	Batch Time 1.804 (1.370)	Data Time 1.094 (0.528)	Training Loss 2.3781 Avg Training Loss (2.1287)	
creating index...
index created!
Evaluating: 100% 9/9 [00:05<00:00,  1.66it/s]
Total Average Validation Loss (7.339)	
{'aeroplane': 0.27012988924980164,
 'bicycle': 0.383838415145874,
 'bird': 0.11577311903238297,
 'boat': 0.23636363446712494,
 'bottle': 0.0,
 'bus': 0.3636363744735718,
 'car': 0.304915189743042,
 'cat': 0.553128719329834,
 'chair': 0.045

# **Run Test function**

Test Case 1

    weight_file_path = '/content/ssd/SSD300_pretrained_weight.pth'  (path to pretrained weights)

In [None]:
!python eval.py

Loading model
Loading trained weight file...
model initialized
creating index...
index created!
Evaluating: 100% 16/16 [00:04<00:00,  3.55it/s]
Total Average Validation Loss (8.443)	
{'aeroplane': 0.4318181872367859,
 'bicycle': 0.40909093618392944,
 'bird': 0.06060606241226196,
 'boat': 0.07575757801532745,
 'bottle': 0.3636363744735718,
 'bus': 0.379021018743515,
 'car': 0.10303030908107758,
 'cat': 0.5979798436164856,
 'chair': 0.09090909361839294,
 'cow': 0.0,
 'diningtable': 0.5909091234207153,
 'dog': 0.1666666716337204,
 'horse': 0.11255411803722382,
 'motorbike': 0.05194805562496185,
 'person': 0.11816374212503433,
 'pottedplant': 0.0,
 'sheep': 0.09090909361839294,
 'sofa': 0.06198347359895706,
 'train': 0.9090909361839294,
 'tvmonitor': 0.12337663024663925}

Mean Average Precision (mAP): 0.237
Accumulating evaluation results...
DONE (t=0.04s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.234
 Average Precision  (AP) @[ IoU=0.50 

Test Case 2

    weight_file_path = '/content/ssd/best_model.pth'  (path to pretrained weights)

In [None]:
!python eval.py

Loading model
Loading trained weight file...
model initialized
creating index...
index created!
Evaluating: 100% 16/16 [00:04<00:00,  3.52it/s]
Total Average Validation Loss (7.360)	
{'aeroplane': 0.5227273106575012,
 'bicycle': 0.4393939673900604,
 'bird': 0.06060606241226196,
 'boat': 0.12121212482452393,
 'bottle': 0.3636363744735718,
 'bus': 0.4292929470539093,
 'car': 0.09469697624444962,
 'cat': 0.614973247051239,
 'chair': 0.04545454680919647,
 'cow': 0.0,
 'diningtable': 0.5909091234207153,
 'dog': 0.25,
 'horse': 0.04545454680919647,
 'motorbike': 0.0727272778749466,
 'person': 0.18359063565731049,
 'pottedplant': 0.0,
 'sheep': 0.09090909361839294,
 'sofa': 0.09090909361839294,
 'train': 0.9090909361839294,
 'tvmonitor': 0.1428571492433548}

Mean Average Precision (mAP): 0.253
Accumulating evaluation results...
DONE (t=0.04s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.225
 Average Precision  (AP) @[ IoU=0.50      | area=   al

Test Case 3

    weight_file_path = '/content/ssd/CP_epoch10.pth'  (path to pretrained weights)

In [None]:
!python eval.py

Loading model
Loading trained weight file...
model initialized
creating index...
index created!
Evaluating: 100% 16/16 [00:04<00:00,  3.51it/s]
Total Average Validation Loss (7.473)	
{'aeroplane': 0.4318181872367859,
 'bicycle': 0.4285714626312256,
 'bird': 0.06060606241226196,
 'boat': 0.04545454680919647,
 'bottle': 0.3636363744735718,
 'bus': 0.43030306696891785,
 'car': 0.125,
 'cat': 0.6393939852714539,
 'chair': 0.09090909361839294,
 'cow': 0.0,
 'diningtable': 0.6363636255264282,
 'dog': 0.20649351179599762,
 'horse': 0.03305785357952118,
 'motorbike': 0.0727272778749466,
 'person': 0.14409999549388885,
 'pottedplant': 0.0,
 'sheep': 0.13636364042758942,
 'sofa': 0.09090909361839294,
 'train': 0.9090909361839294,
 'tvmonitor': 0.11742424964904785}

Mean Average Precision (mAP): 0.248
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.203
 Average Precision  (AP) @[ IoU=0.50      | area= 

# **Run Detect function**

Detect Case

    weight_file_path = '/content/ssd/SSD300_pretrained_weight.pth'  (path to pretrained weights)

In [None]:
!python detect.py

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
       [[ 33,  57,  23],
        [ 36,  61,  22],
        [ 44,  69,  30],
        ...,
        [ 63,  81,  41],
        [ 25,  39,   4],
        [ 22,  31,   4]],

       [[ 27,  46,  18],
        [ 41,  60,  28],
        [ 53,  72,  40],
        ...,
        [ 74,  91,  49],
        [ 39,  49,  14],
        [ 40,  44,  17]],

       [[ 42,  56,  33],
        [ 47,  61,  35],
        [ 43,  58,  29],
        ...,
        [ 80,  96,  51],
        [ 62,  70,  33],
        [ 76,  75,  47]]], dtype=uint8), array([[[112, 137, 193],
        [118, 147, 213],
        [155, 176, 219],
        ...,
        [  5,   3,   4],
        [  4,   0,   1],
        [  7,   3,   4]],

       [[123, 144, 197],
        [116, 140, 200],
        [188, 205, 248],
        ...,
        [  3,   0,   0],
        [  6,   2,   3],
        [ 10,   6,   7]],

       [[124, 137, 190],
        [117, 135, 199],
        [208, 221, 253],
        ...,
        