In [1]:
import os
import math
import random

import numpy as np
import tensorflow as tf
import cv2

slim = tf.contrib.slim

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [3]:
import sys
sys.path.append('../')

In [4]:
from nets import ssd_vgg_300, ssd_common, np_methods
from preprocessing import ssd_vgg_preprocessing
from notebooks import visualization

In [5]:
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
isess = tf.InteractiveSession(config=config)

## SSD 300 Model

The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation).

SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors.

In [6]:
# Input placeholder.
net_shape = (300, 300)
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
ssd_net = ssd_vgg_300.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)

# Restore SSD model.
ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
# ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
isess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)

# SSD default anchor boxes.
ssd_anchors = ssd_net.anchors(net_shape)

INFO:tensorflow:Restoring parameters from ../checkpoints/ssd_300_vgg.ckpt


## Post-processing pipeline

The SSD outputs need to be post-processed to provide proper detections. Namely, we follow these common steps:

* Select boxes above a classification threshold;
* Clip boxes to the image shape;
* Apply the Non-Maximum-Selection algorithm: fuse together boxes whose Jaccard score > threshold;
* If necessary, resize bounding boxes to original image shape.

In [7]:
# Main image processing routine.
def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):
    # Run SSD network.
    rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
                                                              feed_dict={img_input: img})
    
    # Get classes and bboxes from the net outputs.
    rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions, rlocalisations, ssd_anchors,
            select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)
    
    rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
    rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)
    rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
    # Resize bboxes to original image shape. Note: useless for Resize.WARP!
    rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
    return rclasses, rscores, rbboxes

In [None]:
# Test on some demo image and visualize output.
path = 'bddogtrain\\train\\1\\'
image_names = sorted(os.listdir(path))

for name in image_names:
    if name.split('_')[0][0]!='a':
        continue
    print(name)
    img = mpimg.imread(path + name)
    rclasses, rscores, rbboxes =  process_image(img)
    visualization.plt_bboxes(img, rclasses, rscores, rbboxes)

In [8]:
import shutil,path
import pandas as pd
import numpy as np
from PIL import Image
import os
import os.path
    
for i in range(97):
    rootdir = 'bddogtrain\\datax2old\\' + str(i)
    desdir = 'bddogtrain\\datax2crop\\' + str(i)
    print(i)
    for parent,dirnames,filenames in os.walk(rootdir):
        for filename in filenames:
            img = mpimg.imread(rootdir+'\\'+filename)
            rclasses, rscores, rbboxes =  process_image(img)
            rclasses=pd.Series(rclasses)
            rclasses=(rclasses==12).astype('int')
            dogs_num=rclasses.sum()
            if dogs_num==1:
                img=Image.fromarray(img)
                idx=rclasses.argmax()
                y1,x1,y2,x2=rbboxes[idx,0],rbboxes[idx,1],rbboxes[idx,2],rbboxes[idx,3]
                x1,x2,y1,y2=int(x1*img.size[0]),int(x2*img.size[0]),int(y1*img.size[1]),int(y2*img.size[1])
                #------------------正方形切割，延伸短边策略
                height=y2-y1
                width=x2-x1
                if height>=width:
                    tmp=(height-width)//2
                    x2=x2+tmp
                    x1=x1-tmp
                    if x1<=0:
                        x1=1
                    if x2>=img.size[0]:
                        x2=img.size[0]-1
                else:
                    tmp=(width-height)//2
                    y2=y2+tmp
                    y1=y1-tmp
                    if y1<=0:
                        y1=1
                    if y2>=img.size[1]:
                        y2=img.size[1]-1
                #------------------
                cropImg = img.crop((x1,y1,x2,y2))      
                cropImg.save(desdir+'\\'+filename)
            else:
                img=Image.fromarray(img)
                img.save(desdir+'\\'+filename)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96


In [9]:
import winsound
Freq = 2500 
Dur = 1200 
winsound.Beep(Freq,Dur)