In [1]:
import numpy as np
import os
import sys
import scipy
import cv2
import gc

#解析使用
import xml
from xml.etree import ElementTree as ET

from glob import glob

import keras.backend as K
from keras.applications import VGG19
from keras.models import Model
from keras.utils import to_categorical

import imageio
from skimage import transform

from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.svm import SVC #类别分类使用
from sklearn.linear_model import Ridge #bounding-box回归
from sklearn.externals import joblib

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import tensorflow as tf

from tensorflow.contrib import slim

import pandas as pd

import selectivesearch as ss #候选框产生使用

from ImageNet_classes import class_names #验证alexnet使用

In [3]:
#BATCH_SIZE = 1 #一次一张图像 切勿修改

PROPOSAL_SIZE_POSITIVE = 32 #finetune 正样本
PROPOSAL_SIZE_NEGATIVE = 96 #finetune 负样本
PROPOSAL_SIZE = PROPOSAL_SIZE_POSITIVE+PROPOSAL_SIZE_NEGATIVE #128

#应该是224 224 3
#使用预训练的alexnet 58% 或者vgg16 66%
#HEIGHT = 224
#WIDTH = 224
#CHANNEL = 3

#IMG_SHAPE = (HEIGHT , WIDTH , CHANNEL)

TRAIN_DATA_PATH = '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/'
TEST_DATA_PATH = '../../tensorflow2/dataset/VOC2012test/JPEGImages/'

TRAIN_XML_PATH = '../../tensorflow2/dataset/VOCtrainval_11-May-2012/Annotations/'
TEST_XML_PATH = '../../tensorflow2/dataset/VOC2012test/Annotations/'

OBJECT_PATH = '../../tensorflow2/dataset/VOCtrainval_11-May-2012/ImageSets/Main/' #SVM需要使用的训练数据（正负样本） 训练20个svm

#pascal VOC数据集目标数量
#目标的数目 还有一个背景
CLASSES_NUM = 20+1

STR = [
    'background', #label=0
    'person',
    'bird','cat','cow','dog','horse','sheep',
    'aeroplane','bicycle','boat','bus','car','motorbike','train',
    'bottle','chair','diningtable','pottedplant','sofa','tvmonitor'
]

LABEL2STR = {idx:value for idx , value in enumerate(STR)}
STR2LABEL = {value:key for key,value in LABEL2STR.items()}
#STR2LABEL = {value:idx for idx , value in enumerate(STR)}


STR2LABEL['none'] = 'none' #先不使用part部分 只进行naive目标检测

#目标检测相关
IoU_THRESHOLD = 0.5

#SVM相关
SVM_IoU_THRESHOLD = 0.3

#NMS相关
NMS_IoU_THRESHOLD = 0.3 #or ~0.5

#bbox回归
BBOX_REGRESS_IoU_THRESHOLD = 0.6

#roi尺寸为6
ROI_BINS = 6

In [4]:
xml_file_names_train = glob(TRAIN_XML_PATH + '*') #所有的xml文件 完整路径

#从xml文件中读出图片相关的信息

def xml_parse(xml_file):
    '''
    return filename , shape , name_boxes , crop_boxes
    xml文件中的shape格式为 (width height 3)
    '''
    xml_file = xml.dom.minidom.parse(xml_file)
    xml_file_docu_ele = xml_file.documentElement

    filename_list = xml_file_docu_ele.getElementsByTagName('filename')
    
    #filename_list可能有多个filename的 所以要索引0(此数据集中filename只有一个)
    filename = filename_list[0].childNodes[0].data #filename_list.firstChild.data

    #图像的尺寸信息
    size_list = xml_file_docu_ele.getElementsByTagName('size')

    for size in size_list:
        width_list = size.getElementsByTagName('width')
        width = int(width_list[0].childNodes[0].data)

        height_list = size.getElementsByTagName('height')
        height = int(height_list[0].childNodes[0].data)

        channel_list = size.getElementsByTagName('depth')
        channel = int(channel_list[0].childNodes[0].data)

    shape = (width , height , channel)

    #一个文件中有多个object
    object_list = xml_file_docu_ele.getElementsByTagName('object')

    #多个object与多个object对应的详细信息
    name_boxes = [] #一个元素就是一个object
    crop_boxes = []

    for objects in object_list:
        #一次循环处理一个object信息
        #一个xml文件（即一个图像中）有多个object

        #name
        name_list = objects.getElementsByTagName('name')

        name_box = name_list[0].childNodes[0].data

        #bounding box points
        bndbox = objects.getElementsByTagName('bndbox')

        x1_list = bndbox[0].getElementsByTagName('xmin')
        x1 = int( round( float(x1_list[0].childNodes[0].data) ) )

        y1_list = bndbox[0].getElementsByTagName('ymin')
        y1 = int(round(float( y1_list[0].childNodes[0].data )))

        x2_list = bndbox[0].getElementsByTagName('xmax')
        x2 = int(round(float( x2_list[0].childNodes[0].data )))

        y2_list = bndbox[0].getElementsByTagName('ymax')
        y2 = int(round(float( y2_list[0].childNodes[0].data )))

        crop_box = [x1,x2,y1,y2]

        name_boxes.append(name_box)
        crop_boxes.append(crop_box)

    #shape:[width height channel]
    #crop_box:[x1 x2 y1 y2]
    return filename , shape , name_boxes , crop_boxes

#xml_parse(xml_file_names_train[10])

In [5]:
xml_parse(xml_file_names_train[897])

('2008_000281.jpg',
 (500, 455, 3),
 ['car', 'car', 'person'],
 [[106, 186, 377, 419], [194, 283, 396, 444], [413, 429, 399, 444]])

In [6]:
class Image(object):
    '''
    图片的真实信息
    '''
    def __init__(self):
        self.img_file_names_train = glob(TRAIN_DATA_PATH+'*') #训练全路径信息
                
    def load(self , img_path_name = None):
        '''
        如果传入 传入完整路径信息
        return img_arr , ground_truth_data , labels , crop_boxes , img_path_name[-15:-4]
        img_arr的shape为 (height width 3) 与xml文件中区分
        '''
        if not img_path_name:
            #没有指定文件名
            img_path_name = np.random.choice(self.img_file_names_train) #随机选择一张图片
            #img_path_idx = np.random.randint(0 , high = len(self.img_file_names_train)) #随机索引
        
        #else
            #svm和bndbox回归时使用 需要指定图片的路径信息
            #完整路径信息
        
        #img_arr = imageio.imread(img_name) #使用此函数打开 导致迁移失败 RGB height*width*channel
        img_arr = cv2.imread(img_path_name) #BGR height*width*chanel
        
        xml_file_name = TRAIN_XML_PATH + img_path_name[-15:-4] +  '.xml'
        
        _ , _ , name_boxes , crop_boxes = xml_parse(xml_file_name)
        
        ground_truth_data = [] #存储bndbox的图像 数据信息
        labels = [] #存储与bndbox对应的 label信息

        for i in range(len(crop_boxes)): #多个object
            x1 = crop_boxes[i][0]
            x2 = crop_boxes[i][1]
            y1 = crop_boxes[i][2]
            y2 = crop_boxes[i][3]
            
            ground_truth_data.append(img_arr[y1:y2 , x1:x2 , :])
            
            labels.append(STR2LABEL.get(name_boxes[i] , 'none'))
        
        #图片数据 ground truth具体数据 bndbox对应label bndbox坐标信息 图片文件名
        
        return img_arr , ground_truth_data , labels , crop_boxes , img_path_name[-15:-4]
    

In [7]:
#候选区域的产生
class Clip(object):
    '''
    selectivesearch 产生与图片相关的信息 候选信息
    '''
    def __init__(self):
        self.size_threshold = 220 #ss算法产生过小的bbox略去

    #单张图像裁剪函数 产生~2k的候选区域
    #返回值：候选区域的位置坐标
    def clip(self , img_arr):
        '''
        由ss算法对img_arr产生~2K proposals
        '''
        #传入图片张量
        #关键函数
        #return rects
        _ , regions = ss.selective_search(img_arr , 500 , 0.9 , 50) #可以调整ss算法的参数 _为ss算法产生的labels
        
        rects = []
        
        no_repeat = set() #保证候选框不重复出现
        
        for r in regions:
            if r['rect'] in no_repeat:
                # 已经存在了
                continue
            
            if r['size'] < self.size_threshold:
                # 小于指定size
                continue
            
            x , y , w , h = r['rect']
            
            if w == 0 or h == 0:
                continue
            
            #img_arr的shape为 height width channel
            d0 , d1 , d2 = img_arr[y:y+h , x:x+w , :].shape
            
            if d0 == 0 or d1 == 0 or d2 == 0:
                continue
                
            no_repeat.add(r['rect'])
            
            rects.append([x , x+w , y , y+h]) #x1 x2 y1 y2形式
        
        return rects
  
    

In [8]:
def roi_coord(rect):
    '''
    由原图中的roi坐标向conv5的feature map映射
    feature map中的坐标
    '''    
    rect[1:] = (rect[1:] - (11-1)//2 ) // 4
    rect[1:] = (rect[1:] - (3-1)//2 ) // 2
    rect[1:] = (rect[1:] - (3-1)//2 ) // 2
    
    '''-1修正'''
    #rect[2] = rect[2] - 1
    #rect[3] = rect[3] - 1
    
    #224*224 经过conv之后 变为13*13
    return np.concatenate( (rect[0:1] , np.clip(rect[1:] , a_min=0 , a_max=12) ) , axis=0)

class Img_generator(object):
    def __init__(self):
        
        self.pr_generator = Clip()
        self.img_loader = Image()

    #计算bbox面积
    def bbox_area(self , bbox):
        w = bbox[1] - bbox[0]
        h = bbox[3] - bbox[2]
        
        return w*h
    
    #计算交并比
    def IoU(self , bbox_a , bbox_b):
        xmin_a = bbox_a[0]
        xmax_a = bbox_a[1]
        ymin_a = bbox_a[2]
        ymax_a = bbox_a[3]
        
        xmin_b = bbox_b[0]
        xmax_b = bbox_b[1]
        ymin_b = bbox_b[2]
        ymax_b = bbox_b[3]
        
        if   xmin_a < xmax_b <= xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
            flag = True
        elif xmin_a <= xmin_b < xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
            flag = True
        elif xmin_b < xmax_a <= xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
            flag = True
        elif xmin_b <= xmin_a < xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
            flag = True
        else:
            flag = False
        
        if flag:
            x_sorted_list = sorted([xmin_a, xmax_a, xmin_b, xmax_b])
            y_sorted_list = sorted([ymin_a, ymax_a, ymin_b, ymax_b])
            
            x_intersect_w = x_sorted_list[2] - x_sorted_list[1] #0 1 2 3
            y_intersect_h = y_sorted_list[2] - y_sorted_list[1] #0 1 2 3
            
            area_inter = x_intersect_w * y_intersect_h #计算重合面积
            
            union_area = self.bbox_area(bbox_a) + self.bbox_area(bbox_b) - area_inter
            
            return area_inter/union_area
        else:
            return 0.0
    
    #ground truth coord and proposal coord计算bb回归使用的标签
    def __to_t(self , G_box , P_box):
        #print(G_box , P_box)
        def to(rect):
            x1 = rect[0]
            x2 = rect[1]
            y1 = rect[2]
            y2 = rect[3]
            
            w = x2-x1
            h = y2-y1
            
            x_c = (x1+x2)//2
            y_c = (y1+y2)//2
            
            return x_c , y_c , w , h
        
        G_x , G_y , G_w , G_h = to(G_box)
        P_x , P_y , P_w , P_h = to(P_box)
        
        t_x = (G_x-P_x)/P_w
        t_y = (G_y-P_y)/P_h
        t_w = np.log(G_w/P_w)
        t_h = np.log(G_h/P_h)
        
        return t_x , t_y , t_w , t_h
    
    def get_train_proposal(self , img_arr , labels , ground_truth_coord):
        '''
        labels与ground_truth_coord相对应
        一张图片中所有可能的labels
        '''
        #下面使用的img_arr必须是原始的图 没有resize 也没有归一化到-1 1
        proposals_coord = self.pr_generator.clip(img_arr) #ss算法产生的bbox
        
        '''
        需要对ss算法产生的框子进行修正
        因为对原图进行了resize ss算法产生的框子也要发生变化
        
        对ground truth也要进行修正 修正后才可以与proposals计算iou
        '''
        h = img_arr.shape[0]
        w = img_arr.shape[1]
                
        def bbox_trans(rect):
            '''0:idx'''
            rect[1] = int(rect[1]*224 / w)
            rect[2] = int(rect[2]*224 / w)
            rect[3] = int(rect[3]*224 / h)
            rect[4] = int(rect[4]*224 / h)
        
            return rect
        
        rois = []
        y = []
                        
        for j in range(len(proposals_coord)):
            for i in range(len(ground_truth_coord)):
            
        #for i in range(len(ground_truth_coord)):
            #for j in range(len(proposals_coord)):
                
                label = np.zeros(shape=CLASSES_NUM + 4 ) #one-hot + 4 coords #21+4 elements
                
                '''proposal coord'''
                #第一个元素为0 因为一次一张图片 见tf.crop_and_resize函数
                roi = [0 , proposals_coord[j][0] , proposals_coord[j][1] , proposals_coord[j][2] ,  proposals_coord[j][3]]
                
                roi = np.array(roi)
                
                roi = bbox_trans(roi) #转换为resize之后的图中的坐标
                
                roi = roi_coord(roi) #向conv5 feature map中映射
                
                iou = self.IoU(ground_truth_coord[i] , proposals_coord[j])
                if iou < IoU_THRESHOLD and iou >= 0.1 : #0.5
                    #背景
                    label[0] = 1
                    
                    #==============
                    if np.random.random() > 0.8:
                        '''
                        概率性增加负样本
                        '''
                        rois.append(roi)
                        y.append(label)
                    #==============
                                                            
                elif iou >= 0.5 :
                    #前景
                    label[labels[i]] = 1
                    
                    target = self.__to_t(ground_truth_coord[i] , proposals_coord[j])
                    
                    label[CLASSES_NUM + 0] = target[0]
                    label[CLASSES_NUM + 1] = target[1]
                    label[CLASSES_NUM + 2] = target[2]
                    label[CLASSES_NUM + 3] = target[3]
                    
                    rois.append(roi)
                    y.append(label)
                    
                    #========
                    '''
                    增加正样本数量
                    '''
                    rois.append(roi)
                    y.append(label)
                    
                    rois.append(roi)
                    y.append(label)
                    
                    rois.append(roi)
                    y.append(label)
                    #========
                    
                    
                    '''
                    两种写法 效果一样 正样本相同 负样本有差异 
                    '''
                    break
                
                #else:
                    #ios<0.1 情况
                      
        return np.array(rois) , np.array(y)
    
    def get_test_proposal(self , img_arr):
        '''
        return:rois
        proposals_coord
        '''
        proposals_coord = self.pr_generator.clip(img_arr)
        
        h = img_arr.shape[0]
        w = img_arr.shape[1]
        
        def bbox_trans(rect):
            '''0:idx'''
            rect[1] = int(rect[1]*224 / w)
            rect[2] = int(rect[2]*224 / w)
            rect[3] = int(rect[3]*224 / h)
            rect[4] = int(rect[4]*224 / h)
        
            return rect
        
        rois = []
        
        #可以使用np一次性处理 不使用循环
        for i in range(len(proposals_coord)):
            roi = [0 , proposals_coord[i][0] , proposals_coord[i][1] , proposals_coord[i][2] , proposals_coord[i][3]]
            
            roi = np.array(roi)
            
            roi = bbox_trans(roi)
            
            roi = roi_coord(roi)
            
            rois.append(roi)
            
        return rois , proposals_coord
        
                
    def load(self , img_path_name):
        '''
        img_path_name:绝对路径
        '''
        
        #图片数据 ground truth具体数据 ground truth对应label ground truth坐标信息 图片文件名
        img_arr , _ , labels , ground_truth_coord , _ = self.img_loader.load(img_path_name)
        
        rois , y = self.get_train_proposal(img_arr , labels , ground_truth_coord)
        
        img_arr = cv2.resize(img_arr , (224 , 224))
        img_arr = img_arr/127.5-1.0

        #'''增加一维 batch_size维'''
        return np.expand_dims(img_arr , axis=0) , rois , y
    
    def load_test(self , img_path_name):
        '''
        return:resize和归一后的图片
                rois
                原图数据
        '''
        img_arr = cv2.imread(img_path_name)
        
        rois , proposals_coord = self.get_test_proposal(img_arr)
        
        img_arr_resize = cv2.resize(img_arr , (224 , 224))
        img_arr_resize_norm = img_arr_resize / 127.5 - 1.0
        
        return np.expand_dims(img_arr_resize_norm , axis=0) , rois , img_arr , proposals_coord
    

In [9]:
class Dataset(object):
    def __init__(self):
        self.img_generator = Img_generator()
        
        self.img_loader = Image()
        
        self.img_file_names_train = glob(TRAIN_DATA_PATH + '*')
        self.img_file_names_test = glob(TEST_DATA_PATH + '*')
        
    def get_batch(self):
        path = np.random.choice(self.img_file_names_train)
        
        x , rois , y = self.img_generator.load(path)
        
        return x , rois , y
    
    def get_batch_test(self , path):
        '''
        返回图片的真实img_arr 未resize 未归一化
        注意cv2打开图片通道为BGR
        '''
        if not path:
            #未指定path 从测试目录中随机选一张图片测试
            path = np.random.choice(self.img_file_names_test)
        
        img_arr_resize_norm , rois , img_arr , porposals_coord = self.img_generator.load_test(path)
        
        return img_arr_resize_norm , rois , img_arr , porposals_coord
    
    
    def target2coord(self , bbox_pred , img_arr , proposals_coord):
        img_height = img_arr.shape[0]
        img_width = img_arr.shape[1]
        
        def to(rect):
            x1 = rect[0]
            x2 = rect[1]
            y1 = rect[2]
            y2 = rect[3]
            
            w = x2-x1
            h = y2-y1
            
            x_c = (x1+x2)//2
            y_c = (y1+y2)//2
            
            return x_c , y_c , w , h
        
        def ot(target):
            x_c = target[0]
            y_c = target[1]
            w = target[2]
            h = target[3]
            
            x1 = 0.5*(2*x_c-w)
            y1 = 0.5*(2*y_c-h)
            x2 = x1+w
            y2 = y1+h
            
            x1=int(round(x1))
            y1=int(round(y1))
            x2=int(round(x2))
            y2=int(round(y2))
            
            if x1<0:
                x1 = 0
            if x2>img_width:
                x2 = img_width
            if y1<0:
                y1 = 0
            if y2>img_height:
                y2 = img_height
            
            return [x1 , x2 , y1 , y2]
        
        def target2rect(target_hat , P_box):
            t_x = target_hat[0]
            t_y = target_hat[1]
            t_w = target_hat[2]
            t_h = target_hat[3]
            
            P_x , P_y , P_w , P_h = to(P_box) #将P框转换为 中点坐标 宽 高 形式
            
            G_x_hat = P_w*t_x+P_x
            G_y_hat = P_h*t_y+P_y
            G_w_hat = P_w*np.exp(t_w)
            G_h_hat = P_h*np.exp(t_h)
            
            return ot([G_x_hat , G_y_hat , G_w_hat , G_h_hat]) #ot还需要转化为(x1,x2,y1,y2)形式
        
        bbox_coord_pred = []
        
        for i in range(len(bbox_pred)):
            bbox_coord_pred.append( target2rect(bbox_pred[i] , proposals_coord[i]) )
        
        return bbox_coord_pred

In [10]:
#refer:https://blog.csdn.net/two_vv/article/details/76769860
#alexnet原始模型以及预训练参数导入

def roi_pooling(conv5 , rois , pool_height , pool_width):
        '''
        conv5:[batch height width channel]
        roi-idx upper-left bottom-right
        rois中的坐标是在feature map中的坐标
        '''

        conv5_height = 13
        conv5_width = 13
        
        rois_ind = tf.cast(rois[: , 0] , tf.int32) #如果只有一张图片 则rois_ind都为0
        
        rois = tf.cast(rois , tf.float32)

        rois_coord = rois[: , 1:] #[x1 x2 y1 y2]

        normalization = tf.cast(tf.stack([ conv5_width , conv5_width , conv5_height , conv5_height ],axis=-1) , dtype=tf.float32)
        rois_coord = tf.div(rois_coord , normalization)

        rois_coord = tf.stack([rois_coord[: , 2] , rois_coord[: , 0] , rois_coord[: , 3] , rois_coord[: , 1] ] , axis=1)
        #box_ind参数为图片的索引 对第几张图片进行crop and resize
        #只有一张图片 则box_ind中全为0
        rois_conv5_feature = tf.image.crop_and_resize(conv5 , boxes=rois_coord , box_ind=rois_ind , crop_size=[12 , 12] )

        rois_pooling_feature = slim.max_pool2d(rois_conv5_feature , kernel_size=[2 , 2 ] , stride=[2 , 2 ] , padding='SAME')

        return rois_pooling_feature
    
    
class AlexNet_model(object):
    def __init__(self , x , rois , y , is_training=True):
        
        self.x = x
        self.rois = rois
        self.y = y
                        
        self.load_paramter()
        
        self.cls_pred , self.bbox_pred = self.model(is_training)
        
        if is_training:
            self.loss_layer(self.cls_pred , self.bbox_pred , self.y)
            
            self.total_loss = tf.losses.get_total_loss(add_regularization_losses=False) #cls_loss bbox_loss
            
    def group_conv(self , x , kernel , strides):
        #2 GPUs
        #原始alexnet配置
        group_x = tf.split(x , num_or_size_splits=2 , axis=3)
        group_kernel = tf.split(kernel , num_or_size_splits=2 , axis=3)

        group_conv0 = tf.nn.conv2d(group_x[0] , group_kernel[0] , strides=strides , padding='SAME')
        group_conv1 = tf.nn.conv2d(group_x[1] , group_kernel[1] , strides=strides , padding='SAME')

        group_conv = tf.concat((group_conv0 , group_conv1) , axis=3)

        return group_conv
    
    def load_paramter(self):
        #=======
        #加载预训练权重
        #获取预训练参数
        net_data = np.load('bvlc_alexnet.npy' , encoding='bytes').item() #不加encoding='bytes' 死机
        
        self.conv1w = tf.Variable(net_data["conv1"][0] , trainable=False)
        self.conv1b = tf.Variable(net_data["conv1"][1] , trainable=False)

        self.conv2w = tf.Variable(net_data["conv2"][0] , trainable=False)
        self.conv2b = tf.Variable(net_data["conv2"][1] , trainable=False)

        self.conv3w = tf.Variable(net_data["conv3"][0] , trainable=False)
        self.conv3b = tf.Variable(net_data["conv3"][1] , trainable=False)

        self.conv4w = tf.Variable(net_data["conv4"][0] , trainable=False)
        self.conv4b = tf.Variable(net_data["conv4"][1] , trainable=False)

        self.conv5w = tf.Variable(net_data["conv5"][0] , trainable=False)
        self.conv5b = tf.Variable(net_data["conv5"][1] , trainable=False)
    
    #构建alexnet模型
    def model(self , is_training=True , keep_prob=0.5):
        conv1 = tf.nn.conv2d(self.x , self.conv1w , strides=(1,4,4,1) , padding='SAME')
        conv1 = tf.nn.bias_add(conv1 , self.conv1b)
        conv1 = tf.nn.relu(conv1)
        lrn1 = tf.nn.local_response_normalization(conv1 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
        maxpool1 = tf.nn.max_pool(lrn1 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')

        conv2 = self.group_conv(maxpool1 , self.conv2w , strides=(1,1,1,1))
        conv2 = tf.nn.bias_add(conv2 , self.conv2b)
        conv2 = tf.nn.relu(conv2)
        lrn2 = tf.nn.local_response_normalization(conv2 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
        maxpool2 = tf.nn.max_pool(lrn2 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')

        conv3 = tf.nn.conv2d(maxpool2 , self.conv3w , strides=(1,1,1,1) , padding='SAME')
        conv3 = tf.nn.bias_add(conv3 , self.conv3b)
        conv3 = tf.nn.relu(conv3)

        conv4 = self.group_conv(conv3 , self.conv4w , strides=(1,1,1,1))
        conv4 = tf.nn.bias_add(conv4 , self.conv4b)
        conv4 = tf.nn.relu(conv4)

        conv5 = self.group_conv(conv4 , self.conv5w , strides=(1,1,1,1))
        conv5 = tf.nn.bias_add(conv5 , self.conv5b)
        conv5 = tf.nn.relu(conv5)
        
        roi_pool5 = roi_pooling(conv5 , self.rois , pool_height = ROI_BINS , pool_width = ROI_BINS)
   
        flatten = tf.layers.flatten(roi_pool5)
        
        fc6 = slim.fully_connected(flatten , num_outputs=1024)
        fc6 = slim.dropout(fc6 , keep_prob=keep_prob , is_training=is_training)
        
        fc7 = slim.fully_connected(fc6 , num_outputs=1024)
        fc7 = slim.dropout(fc7 , keep_prob=keep_prob , is_training=is_training)

        cls = slim.fully_connected(fc7 , num_outputs=CLASSES_NUM , activation_fn=tf.nn.softmax) #batch 21
        bbox = slim.fully_connected(fc7 , num_outputs=4 , activation_fn=None , weights_initializer=tf.initializers.truncated_normal(mean=0.0 , stddev=0.001))
        
        return cls , bbox       
        
    def loss_layer(self , cls_pred , bbox_pred , labels):
        cls_true = labels[: , : CLASSES_NUM]
        bbox_true = labels[: , CLASSES_NUM :]
        
        cross_entropy = - tf.reduce_sum( cls_true * tf.log(cls_pred) )
        cls_loss = tf.reduce_mean(cross_entropy)
        
        mask = tf.tile( tf.reshape(cls_true[ : , 0] , [-1 , 1]) , multiples=[1 , 4]) #1 和 4 分别是在相应的维度重复的次数 不能是0次 
        bbox_loss = tf.reduce_mean( tf.reduce_sum( tf.square( (1-mask) * (bbox_pred - bbox_true) ) ) )
        
        tf.losses.add_loss(cls_loss)
        tf.losses.add_loss(bbox_loss)
        

In [11]:
class Display(object):
    def __init__(self):
        pass
    
    def display(self , img_arr , labels , bbox , name):    
        for i in range(len(labels)):
            
            x1 = bbox[i][0]
            x2 = bbox[i][1]
            y1 = bbox[i][2]
            y2 = bbox[i][3]
            
            img_arr = cv2.rectangle(img_arr , (x1 , y1) , (x2 , y2) , (255,255,255))
            
            img_arr = cv2.putText(img_arr , labels[i] , org=(x1 , y1+10) , fontFace = cv2.FONT_HERSHEY_PLAIN , fontScale=1 , color = (255,255,255), thickness = 1)
        
        #plt.imshow(meta_img) #图像查看
        
        plt.imsave(arr=img_arr[: , : ,[2,1,0]] , fname = 'result/%s.jpg' % name) #保存图像
        

In [12]:
#refer:https://blog.csdn.net/two_vv/article/details/76769860

class FRCN(object):
    '''
    完整模型
    '''
    
    def __init__(self , is_training = True):      
        self.dataset = Dataset()
        self.display = Display()
        self.img_generator = Img_generator()
        
        self.filewriter_path = 'save/logs' #模型可视化
        self.checkpoint_path = 'save/model/' #模型持久化
        
        self.x = tf.placeholder(tf.float32 , shape=[None , 224 , 224 , 3])
        self.rois = tf.placeholder(tf.int32 , shape=[None , 5])
        
        if is_training:
            self.y = tf.placeholder(tf.float32 , shape=[None , CLASSES_NUM + 4])
        else:
            self.y = None
                                
        self.model = AlexNet_model(self.x , self.rois , self.y , is_training)
        
        self.sess = tf.Session()

        if is_training:
            '''训练参数'''
            self.epoch = 100000
            
            self.global_step = tf.Variable(initial_value=0 , trainable=False)
            
            self.learning_rate = tf.train.exponential_decay(learning_rate=0.00001 , global_step=self.global_step,
                                                            decay_steps=900 , decay_rate=0.8 , staircase=True)
            
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.model.total_loss , global_step=self.global_step)
        
            '''引入滑动平均'''
            self.ema = tf.train.ExponentialMovingAverage(decay=0.9) #滑动平均
            self.average_op = self.ema.apply(tf.trainable_variables()) #给所有的可训练变量应用滑动平均
            
            with tf.control_dependencies([self.optimizer]):
                self.train_op = tf.group(self.average_op)
            
        self.sess.run(tf.global_variables_initializer())
        
        if is_training:
            tf.summary.scalar('total_loss' , self.model.total_loss)
        
            self.merged_summary = tf.summary.merge_all() #merge all summaries in the default graph
            self.writer = tf.summary.FileWriter(self.filewriter_path , self.sess.graph) #可视化
            
        self.saver = tf.train.Saver(max_to_keep=2) #max_to_keep 最大保存5次模型  之后继续保存则会覆盖前面的模型
        

    def train(self):
        self.sess.run(tf.global_variables_initializer())

        '''
        增加从保存的文件中导入后训练
        '''
        for i in range(100000):
            x , rois , y = self.dataset.get_batch()
            
            if len(rois) == 0:
                continue

            self.sess.run(self.train_op , feed_dict={self.x : x , self.rois : rois , self.y : y} )

            if i % 10 == 0:
                self.saver.save(self.sess , self.checkpoint_path + 'model.ckpt' , global_step = i)
                
                total_loss , summary = self.sess.run([self.model.total_loss , self.merged_summary] , feed_dict={self.x : x , self.rois : rois , self.y : y})
                
                self.writer.add_summary(summary , global_step = i)
                
                print(i , total_loss)
            
        self.writer.close() #event to disk and close the file

    def predict(self , path=None , scores_threshold = 0.1 , nms_iou_threshold = 0.7):
        if os.path.exists(self.checkpoint_path + 'checkpoint'):
            self.saver.restore(self.sess , tf.train.latest_checkpoint(self.checkpoint_path) )
            
            return self._predict(path , scores_threshold , nms_iou_threshold)
            
        else:
            print('no model!!!')
            return 
    
    def _predict(self , path , scores_threshold , nms_iou_threshold):
        x , rois , img_arr , proposals_coord = self.dataset.get_batch_test(path)
        
        cls_pred , bbox_pred = self.sess.run([self.model.cls_pred , self.model.bbox_pred] , feed_dict={self.x : x , self.rois : rois})
        
        #转换为原始图片中的坐标
        bbox_coord_pred = self.dataset.target2coord(bbox_pred , img_arr , proposals_coord)
        
        '''
        由target到原始坐标 在进行nms
        '''
        
        scores_pred_f = [] #符合条件的概率值
        bbox_coord_pred_f = [] #符合条件的框子坐标
        
        labels_pred_f = [] #label名字
        
        for i in range(len(cls_pred)):
            if np.argmax(cls_pred[i]) != 0 and (np.max(cls_pred[i]) > scores_threshold):
                scores_pred_f.append(np.max(cls_pred[i]))
                
                bbox_coord_pred_f.append(bbox_coord_pred[i])
                
                labels_pred_f.append(LABEL2STR[np.argmax(cls_pred[i])])
        
        scores_pred_f = np.array(scores_pred_f)
        bbox_coord_pred_f = np.array(bbox_coord_pred_f)
        labels_pred_f = np.array(labels_pred_f)
        
        #降序scores
        sort_idx = np.argsort(- np.array(scores_pred_f) )
        
        scores_pred_f = scores_pred_f[sort_idx]
        bbox_coord_pred_f = bbox_coord_pred_f[sort_idx]
        labels_pred_f = labels_pred_f[sort_idx]
                
        final_idx = self._nms(scores_pred_f , bbox_coord_pred_f , nms_iou_threshold)
                
        #scores_pred_f = scores_pred_f[final_idx] #用不上
        bbox_coord_pred_f = bbox_coord_pred_f[final_idx]
        labels_pred_f = labels_pred_f[final_idx]
                
        # 绘制并保存
        self.display.display(img_arr , labels_pred_f , bbox_coord_pred_f , 'first')
        
        return cls_pred , bbox_coord_pred , labels_pred_f , bbox_coord_pred_f
        
        
    def _nms(self , probability_hat , rects_hat , nms_iou_threshold):
        idx = []
        
        length = len(probability_hat)
        lost_flag = [1]*length #标记丢弃的框 0表示丢弃
        
        max_score_idx = 0 #记录当前最大score的idx
        
        while max_score_idx < length:
            max_score_rect = rects_hat[max_score_idx]
            
            for i in range(max_score_idx+1 , length):
                if lost_flag[i] == 1 and (self.img_generator.IoU( max_score_rect , rects_hat[i] ) > nms_iou_threshold): #大于阈值 丢弃
                    lost_flag[i] = 0

            max_score_idx_bak = max_score_idx #后续使用
            
            #让max_score_idx指向下一个没被丢弃的最大值
            for i in range(max_score_idx+1 , length):
                if lost_flag[i] == 1:
                    max_score_idx = i
                    break
            
            #说明max_score_idx没有移动过 即后续的都被丢弃了 终止循环
            if max_score_idx == max_score_idx_bak:
                break
        
        for i in range(length):
            if lost_flag[i] == 1:
                idx.append(i)
                
        return idx

In [13]:
frcn = FRCN(is_training=True)

In [14]:
frcn.train()

0 62.875347
10 19.762892
30 139.14847
40 2.8885696
50 45.632683
60 20.211267
70 35.841427
80 4.3190784
90 21.220499
100 124.274506
110 49.967445
120 126.15239
130 92.001785
140 39.174496
150 120.70168
160 37.915867
170 9.078591
180 72.711006
190 56.124756
200 97.45361
210 179.13608
220 76.65666
230 34.40466
240 11.216581
250 46.91525
260 8.514629
270 6.941144
280 109.85525
290 139.04457
300 112.40413
310 24.024101
320 21.600712
330 166.5533
340 80.1755
350 51.472496
360 33.23967
370 41.36718
380 1.6602086
390 7.4222403
400 44.898487
410 41.053055
420 64.402985
430 24.197208
440 58.65966
450 95.05671
470 4.3910413
480 2.0455923
490 57.04611
500 35.097366
510 57.61841
520 77.82683
530 30.847776
540 110.19743
550 81.95177
560 104.57734
570 23.56056
580 119.47887
590 39.353474
600 25.599161
610 29.42296
620 12.826466
630 1.314438
640 209.05391
650 46.99232
660 77.271866
670 70.87059
680 27.206333
690 30.609653
700 81.75041
710 19.691303
720 120.90669
730 180.65279
740 21.197985
750 24.9107

5810 76.37651
5820 6.6650724
5830 7.105751
5840 46.95953
5850 36.36407
5860 1.5926404
5870 1.6610415
5880 88.36638
5890 11.405785
5910 14.4659815
5920 33.193115
5930 46.56262
5940 22.4198
5950 38.95291
5960 20.128212
5970 135.51248
5980 52.22898
5990 62.236557
6000 54.692173
6010 114.6627
6020 13.586322
6030 81.24308
6040 23.970932
6050 31.922665
6070 24.02981
6080 65.32936
6090 21.816685
6100 305.2882
6110 26.17394
6130 18.677843
6140 28.861208
6150 24.32585
6160 29.640863
6170 18.67803
6180 64.978676
6190 53.58938
6200 17.099678
6210 44.082497
6220 23.554462
6230 26.841373
6240 4.572399
6250 7.4564877
6260 8.97731
6280 3.0164325
6290 44.29642
6300 10.945057
6310 42.825264
6320 185.83876
6330 116.06426
6340 22.619759
6350 24.816238
6360 51.78724
6370 56.49795
6380 18.74129
6390 146.82404
6400 76.780815
6410 127.48109
6420 14.991672
6430 24.244566
6440 18.499863
6450 39.12593
6460 12.976469
6470 9.434934
6480 193.68893
6490 54.281727
6500 24.575453
6510 12.179005
6520 22.23102
6530 42.

11460 58.50263
11470 34.164738
11480 34.308113
11490 46.865543
11500 16.73211
11510 64.20895
11520 33.9298
11530 14.235338
11540 38.693733
11550 15.909128
11560 30.65404
11570 29.310669
11580 2.475072
11590 43.36702
11600 233.54672
11610 22.226515
11620 19.309559
11630 55.71998
11640 59.35133
11650 13.777907
11660 4.5492826
11670 112.54774
11680 98.39718
11690 40.053745
11700 87.91933
11710 50.663258
11720 8.544536
11730 25.124193
11740 59.449852
11750 35.97519
11760 3.752442
11770 42.751953
11780 57.54215
11790 54.05491
11800 20.422968
11820 9.972973
11830 43.51495
11840 83.39759
11850 62.94016
11860 75.89767
11870 142.8883
11880 135.6713
11890 102.4347
11900 6.6250715
11910 4.8681226
11920 54.259125
11930 2.2422853
11940 1.0564557
11950 110.4041
11960 22.555542
11970 2.3261719
11980 14.780515
11990 44.113674
12000 38.4024
12010 1.6542454
12020 8.503094
12030 24.47762
12040 13.760037
12050 45.37477
12060 54.26597
12070 59.979572
12080 5.018731
12090 14.3750105
12100 88.84126
12110 85.

16800 103.94904
16810 90.45651
16820 14.959606
16830 30.549706
16840 10.188615
16850 43.71139
16860 5.846475
16870 0.8444666
16880 117.443535
16890 9.845479
16900 59.44602
16910 4.2157965
16920 64.23681
16930 38.90212
16940 166.01947
16950 54.059418
16960 15.335943
16970 2.547872
16980 31.501883
16990 53.58395
17000 54.87873
17010 43.42681
17020 46.454247
17030 0.8134246
17040 23.702003
17050 126.170334
17060 21.314552
17070 43.707645
17080 56.003563
17090 88.05811
17100 8.053702
17110 102.90577
17120 55.419796
17130 73.84284
17140 6.989917
17150 67.27832
17160 41.925285
17170 37.842426
17180 10.036174
17190 34.88641
17200 83.0538
17210 19.082638
17220 8.426602
17230 1.4677069
17240 5.7905655
17250 54.479717
17260 119.46836
17270 41.1234
17280 4.778741
17290 61.58363
17300 49.149685
17310 17.290098
17320 33.86966
17330 3.2883255
17340 96.18222
17350 41.25483
17360 4.6014447
17370 79.14205
17380 2.0799928
17390 15.165223
17400 31.986694
17410 7.9151335
17420 109.45737
17430 52.22452
174

22130 52.168518
22140 43.04872
22150 8.462435
22160 5.0532293
22170 70.724266
22180 4.462144
22190 7.586648
22200 15.808231
22210 18.34178
22220 11.379419
22230 44.502647
22240 28.631073
22250 7.8364477
22260 45.299606
22270 46.818993
22280 89.7382
22290 149.76389
22300 17.01646
22310 15.2713995
22320 322.98694
22330 29.549063
22340 18.382038
22350 60.671085
22360 24.848513
22370 137.13892
22380 40.243965
22390 61.500565
22400 0.94049114
22410 3.5962262
22420 33.84661
22430 28.011673
22440 1.9159597
22450 8.159294
22460 13.526126
22470 87.2097
22480 69.75689
22490 21.687891
22500 32.62886
22510 84.85822
22520 83.39213
22530 40.285366
22540 20.418156
22550 45.336517
22570 176.61731
22580 67.359245
22590 89.57894
22600 52.14402
22610 31.206184
22620 30.115631
22630 145.17244
22640 15.970413
22650 20.943157
22660 38.58022
22670 78.170525
22680 1.5302192
22690 99.84219
22700 23.096039
22710 39.317665
22720 7.405143
22730 69.64832
22740 5.5437794
22750 100.79233
22760 56.1502
22770 47.22210

27510 1.5615973
27520 37.480476
27530 51.721302
27540 72.00131
27550 86.561775
27570 28.295658
27580 135.64235
27590 84.09813
27600 68.782616
27610 46.478943
27620 15.548185
27630 20.581942
27640 25.331003
27650 27.313772
27660 9.624429
27670 93.24943
27680 16.981148
27690 29.5808
27700 94.810165
27710 146.42511
27720 76.153534
27730 8.378143
27740 2.7030513
27750 17.175613
27760 92.41834
27770 71.55337
27780 54.73048
27790 10.766584
27800 117.725044
27810 9.335521
27820 111.64474
27830 8.777975
27840 32.70445
27850 54.63096
27860 25.83745
27870 64.01965
27880 33.922436
27890 1.3870065
27900 78.01462
27910 45.836113
27920 33.57463
27930 18.68509
27940 15.983967
27950 18.281385
27960 30.972948
27970 11.041817
27980 12.63651
27990 11.719971
28000 16.112677
28010 50.49462
28020 26.2473
28030 123.487564
28040 80.56704
28050 116.66015
28060 130.61592
28070 38.164043
28080 90.82438
28090 15.760768
28100 20.817364
28110 94.90967
28120 70.44839
28130 66.43077
28140 22.253008
28150 4.276838
281

32840 16.751219
32850 14.223515
32860 81.97023
32870 30.417957
32880 21.828789
32890 98.05985
32900 21.958073
32910 12.523122
32920 34.19979
32930 71.68265
32940 19.574125
32950 1.6953548
32960 72.77384
32970 115.54754
32980 19.051481
32990 43.803486
33000 47.281418
33010 8.488521
33020 120.44826
33030 21.428076
33040 41.197117
33050 222.03096
33060 21.705019
33070 19.484024
33080 28.462769
33090 144.51738
33100 121.6532
33110 14.325623
33120 29.55006
33130 16.175348
33140 5.506958
33150 27.525434
33160 27.071774
33170 14.030752
33180 26.735788
33190 16.039122
33200 52.21341
33210 6.638318
33220 4.232175
33230 110.78468
33240 8.015515
33250 16.972702
33260 53.087547
33270 44.408745
33280 139.99124
33290 61.88716
33300 195.74446
33310 50.802
33320 40.329
33330 3.7274857
33340 69.20417
33350 147.17218
33360 50.20457
33370 17.121124
33380 113.917114
33390 29.08743
33400 123.25842
33410 17.60957
33420 91.86623
33430 19.85288
33440 50.25072
33450 49.68992
33460 30.904179
33470 63.131138
334

38140 76.006096
38150 5.1409
38160 47.105167
38170 0.98043954
38180 11.1218815
38190 22.303236
38200 56.307865
38210 19.003304
38230 18.99938
38240 44.745113
38250 288.43292
38260 11.03166
38270 8.605629
38280 13.24004
38290 116.79942
38300 30.80787
38310 22.095797
38320 107.061226
38330 8.342405
38340 16.245409
38350 32.153854
38360 16.691181
38370 113.93859
38380 70.69201
38390 34.96458
38400 34.350018
38410 47.533577
38420 29.405735
38430 70.09992
38440 60.13626
38450 159.98354
38460 10.609905
38470 15.603647
38480 124.99922
38490 9.24389
38500 15.091566
38510 26.30192
38520 21.009136
38530 27.990417
38540 18.525831
38550 84.91133
38560 18.727182
38570 122.174324
38580 92.45377
38600 23.285795
38610 14.203949
38620 16.816555
38630 19.027876
38640 15.973884
38650 175.27727
38660 25.711088
38670 71.93263
38680 15.283182
38690 59.880627
38700 50.00185
38710 18.728104
38720 122.22452
38730 71.57094
38740 68.57845
38750 16.352451
38760 24.981443
38770 38.439728
38780 123.86744
38790 16.4

43440 5.196908
43450 36.34993
43460 45.225655
43470 44.73386
43480 111.760155
43490 11.445717
43500 35.480312
43510 10.461838
43530 22.191002
43540 74.66063
43550 86.97203
43560 19.248705
43570 134.07161
43580 39.541084
43590 8.109191
43620 18.562653
43630 74.94388
43640 10.326491
43650 101.29072
43660 76.447624
43670 71.94234
43680 14.565446
43690 44.529102
43700 21.06038
43710 132.01338
43720 23.688288
43730 77.26282
43740 67.82742
43750 10.555912
43760 2.2965243
43770 29.13543
43780 50.320335
43790 17.98189
43800 13.562873
43810 136.88707
43820 130.18536
43830 112.37775
43840 18.502728
43850 136.07448
43860 63.54091
43870 128.4685
43880 52.719414
43890 94.98037
43900 468.1904
43910 265.97202
43920 7.712493
43930 2.5054903
43940 3.2857542
43950 51.394806
43960 19.676342
43970 4.342022
43980 33.061863
43990 15.533126
44000 17.13414
44010 78.730965
44020 137.56702
44030 172.78772
44040 65.68307
44050 133.3591
44060 100.2635
44070 113.929375
44080 160.82938
44090 116.71579
44100 83.5161

48740 82.436066
48750 10.981432
48760 0.96833426
48770 36.42801
48780 196.77344
48790 20.927341
48800 23.963493
48810 88.37907
48820 20.816153
48830 7.8672624
48840 40.932133
48850 43.10626
48860 26.461218
48870 19.662474
48880 8.871665
48890 38.089314
48900 18.620468
48910 28.523085
48920 64.69111
48930 106.03195
48940 57.173576
48950 113.991356
48960 4.727941
48970 36.928604
48980 1.3675599
48990 18.076214
49000 72.56952
49010 27.743576
49020 47.15335
49030 128.17316
49040 147.4198
49050 102.64922
49060 11.262915
49070 26.415903
49080 44.90491
49090 45.956917
49100 21.026457
49110 69.19788
49120 98.73403
49130 9.413221
49140 107.95512
49150 46.921837
49160 79.469826
49170 23.993322
49190 28.512653
49200 20.57964
49210 71.9992
49220 47.42498
49230 130.43727
49240 62.071835
49250 101.05375
49260 13.458391
49270 8.333357
49280 27.065413
49290 143.65685
49300 22.888714
49310 20.570414
49320 75.64365
49330 6.258973
49340 60.214672
49350 89.6763
49360 79.07933
49370 77.57939
49380 66.58225

54100 14.559249
54110 37.591442
54120 105.636055
54130 85.19821
54140 73.75851
54150 13.089876
54160 51.91562
54170 61.12066
54180 27.837002
54190 5.4873943
54200 14.396305
54210 69.41031
54220 19.218056
54230 38.589138
54240 22.423388
54250 21.340155
54260 95.2643
54270 6.111094
54280 3.2351415
54290 3.60671
54300 2.5405679
54310 35.036205
54320 63.341904
54330 25.373926
54340 39.145645
54350 49.694744
54360 86.09716
54370 58.78186
54380 92.39989
54390 121.57199
54400 45.94103
54410 54.570705
54420 47.24737
54430 14.963051
54440 53.458218
54450 44.27898
54460 22.872467
54470 30.393854
54480 17.91609
54490 79.90339
54500 86.51879
54510 143.20053
54520 35.169884
54530 8.066242
54540 137.39171
54550 46.820744
54560 24.029587
54570 185.23434
54580 2.6221457
54590 13.522898
54600 172.83255
54610 43.777576
54620 44.51295
54630 54.166264
54640 7.7112656
54650 1.7750437
54660 159.44615
54670 77.85002
54680 19.394466
54690 70.98919
54700 3.694007
54710 75.960205
54720 62.193428
54730 10.675047

59400 6.14884
59410 24.779835
59420 47.26151
59430 76.210815
59440 24.483839
59450 5.756653
59460 34.888706
59470 9.550003
59480 185.29657
59490 39.426857
59500 77.984634
59510 30.43849
59520 30.436523
59530 12.16384
59540 142.45439
59550 44.81558
59560 54.417686
59570 34.872356
59580 62.358643
59590 18.134655
59600 12.184572
59610 62.701214
59630 51.0155
59640 47.29681
59650 16.174967
59660 90.655716
59670 12.64023
59680 2.9690495
59690 9.310275
59700 16.035173
59710 60.65583
59720 28.74181
59730 42.579777
59740 363.77716
59750 44.77846
59760 25.108522
59770 42.945415
59780 37.28106
59790 43.185455
59800 30.158363
59810 15.363738
59820 87.749596
59830 56.96451
59840 1.1642159
59850 110.652985
59860 30.586102
59870 16.888979
59880 29.93932
59890 12.068942
59900 57.095524
59910 103.40579
59920 21.863838
59930 15.298113
59940 164.64163
59950 56.594078
59960 5.0953116
59970 12.668278
59980 299.7595
59990 2.558051
60000 72.478096
60010 9.839413
60020 43.275566
60030 38.939552
60040 74.8634

64760 57.61533
64770 29.867115
64780 26.326288
64790 25.574518
64800 36.88706
64810 77.64066
64820 20.748173
64830 52.7995
64840 26.841833
64850 205.71448
64860 12.575736
64870 80.15092
64880 18.213785
64890 16.064133
64900 45.34975
64910 31.280788
64920 130.62332
64930 85.97689
64940 50.855354
64950 51.04869
64960 16.061934
64970 16.984158
64980 80.65646
64990 10.0918665
65000 38.16665
65010 3.9101992
65020 145.02324
65030 25.485504
65040 60.75953
65050 49.360523
65060 55.176018
65070 48.619667
65080 43.45032
65090 30.071411
65100 74.99505
65110 2.7054338
65120 33.13064
65130 35.98259
65140 43.392223
65150 4.8069816
65160 17.198252
65170 42.303688
65180 138.7427
65190 25.37218
65200 34.333076
65210 33.940548
65220 52.807926
65230 46.660812
65240 18.953455
65250 70.05029
65260 34.811256
65280 8.632896
65290 42.95658
65300 41.350998
65310 41.82039
65320 6.0439982
65330 114.43276
65340 12.270643
65350 25.857536
65360 27.052937
65370 36.24778
65380 55.041225
65390 22.537487
65400 4.977563

70090 50.796314
70100 22.884047
70110 58.883915
70120 47.19848
70130 9.11893
70140 41.66831
70150 9.145163
70160 99.49171
70170 11.648399
70180 8.935679
70190 45.931538
70200 18.392534
70210 54.962856
70220 20.892157
70230 116.437065
70240 79.30849
70250 75.9541
70270 182.75516
70280 96.44841
70290 89.086845
70300 53.557796
70310 64.39512
70320 14.461917
70330 82.052345
70350 83.928566
70360 98.914635
70370 7.6417246
70380 38.83792
70400 100.22657
70410 5.7856636
70420 91.95869
70430 3.4521358
70440 9.186256
70450 91.52678
70460 57.381855
70470 32.594086
70480 112.48004
70490 45.254295
70500 16.275436
70530 25.464947
70540 120.60839
70550 67.22312
70560 15.697821
70570 38.467735
70580 28.169563
70590 7.9930153
70600 72.19144
70610 50.46162
70620 25.005224
70630 118.29653
70640 33.09575
70650 154.76582
70660 5.5895987
70670 110.70056
70680 37.44573
70690 79.096405
70700 164.9474
70710 95.9307
70720 55.399097
70730 41.06771
70740 1.9362582
70750 50.99125
70760 44.788307
70770 56.664158
7

75440 28.388285
75450 12.8000765
75460 16.822271
75470 9.8061
75480 7.1016746
75490 67.81221
75500 33.786156
75510 4.1798697
75520 5.7335553
75530 26.90818
75540 50.596684
75550 37.347305
75560 49.510124
75570 94.24538
75580 10.567078
75590 92.658554
75600 4.567635
75610 40.230526
75620 25.645159
75630 80.18092
75640 61.16819
75660 19.316494
75670 23.301317
75680 41.019165
75690 1.6541057
75700 13.533878
75710 23.553997
75720 55.618706
75730 74.68415
75740 117.81585
75750 2.4685469
75760 133.38527
75770 21.917791
75780 0.8110613
75790 49.68679
75800 39.725285
75810 7.901916
75820 78.55814
75830 24.656729
75840 39.15458
75850 41.185047
75860 8.915005
75870 50.201424
75880 72.92069
75890 59.96831
75900 8.827487
75910 37.56521
75920 45.16019
75930 122.751366
75940 10.1044
75950 81.6751
75960 64.231895
75970 29.68771
75980 129.70674
75990 4.4122505
76000 27.576605
76010 25.242022
76020 25.388786
76030 27.538912
76040 38.08947
76050 24.28578
76060 7.4630933
76070 5.2969112
76090 44.388313
7

80820 87.021416
80830 65.26407
80840 25.957262
80850 23.605415
80860 19.330553
80870 48.853878
80880 13.25593
80890 66.31975
80900 37.556187
80910 51.375496
80920 55.419384
80930 40.22891
80950 21.437393
80960 14.774228
80970 43.677715
80980 22.571682
80990 26.531134
81000 87.80525
81010 184.6198
81020 101.95835
81030 49.501198
81040 10.770316
81050 93.39927
81060 7.489533
81070 17.890915
81080 61.02756
81090 18.584587
81100 37.063484
81110 5.036972
81120 37.513775
81130 23.112364
81140 13.465673
81150 94.97501
81160 5.5874376
81170 95.19586
81180 5.5088964
81190 31.076485
81200 29.97141
81210 51.92791
81220 34.29558
81230 21.815655
81240 73.61526
81250 4.3004146
81260 35.600346
81270 17.944124
81280 11.576683
81290 33.939167
81300 4.1842184
81310 11.138713
81320 47.676632
81330 10.005321
81340 9.260271
81350 13.675535
81360 6.225289
81370 44.50608
81380 94.64681
81390 52.36046
81400 30.99549
81410 30.855076
81420 20.542332
81430 61.119797
81440 12.006462
81450 15.824832
81460 43.76735

86190 66.48513
86200 57.713833
86210 110.32352
86220 11.524958
86230 23.666206
86240 85.8907
86250 38.79001
86260 57.290108
86270 28.859428
86290 82.380844
86300 86.03595
86310 129.76651
86320 10.797743
86330 43.584076
86340 10.549815
86350 112.84148
86360 23.306278
86370 47.589256
86380 22.067999
86390 114.591
86400 49.65221
86410 47.953606
86420 17.236172
86430 49.956623
86440 30.01381
86450 97.49715
86460 124.33122
86470 49.215855
86480 92.7149
86490 114.277985
86500 175.08035
86510 32.732353
86520 15.898086
86530 2.9021626
86540 84.845085
86550 117.2842
86560 2.2159238
86570 1.2897443
86580 24.384861
86590 65.22618
86600 68.72887
86610 137.91237
86620 124.586945
86630 117.56998
86640 53.595364
86650 47.10589
86660 208.64052
86670 154.20631
86680 89.84245
86690 76.63655
86700 38.914375
86710 101.43741
86720 13.260759
86730 51.395367
86740 15.204874
86750 26.22541
86760 92.630905
86770 10.008929
86780 27.675543
86790 58.394108
86800 0.84291065
86810 17.126282
86820 19.091526
86830 7.

91500 27.219028
91510 41.25614
91520 18.057665
91530 8.671475
91540 68.68666
91550 91.99519
91560 59.341286
91570 133.06503
91580 16.68237
91590 39.7275
91600 135.04063
91610 59.937496
91620 15.269613
91630 30.710968
91640 79.231705
91650 157.11246
91660 34.0174
91670 24.598707
91680 30.622868
91690 3.7529118
91700 23.212345
91710 93.273735
91720 55.02202
91730 14.614138
91740 43.73636
91750 99.89993
91760 24.055372
91770 16.308779
91780 276.71378
91790 5.5022607
91800 33.840313
91810 18.593126
91820 43.532513
91830 27.756016
91840 131.01505
91850 3.1968691
91860 120.002235
91870 9.484987
91880 78.371544
91890 13.038758
91900 9.058362
91910 40.146603
91920 59.001118
91930 39.83566
91940 26.331646
91950 30.966122
91960 27.12084
91970 87.31502
91980 110.500885
91990 14.290845
92000 32.390717
92010 48.645443
92020 76.18705
92030 78.16939
92040 65.2162
92050 13.011639
92060 19.701672
92070 44.912415
92080 58.63294
92090 78.14267
92100 26.54339
92110 126.78159
92120 29.08323
92130 41.468006

96890 84.60801
96900 94.655205
96910 50.46215
96920 65.94092
96930 37.946846
96940 55.300163
96950 17.709139
96960 12.559445
96970 15.281064
96980 46.790966
96990 33.564068
97000 28.91194
97010 57.67721
97020 151.53381
97030 65.4969
97040 39.619408
97050 113.72807
97060 18.794287
97070 22.096872
97080 61.12635
97090 2.1846628
97100 113.781784
97110 3.75283
97120 1.6559973
97130 42.676804
97140 23.006699
97150 41.813564
97160 49.511772
97170 43.011826
97180 175.33723
97190 33.15562
97200 5.4145985
97210 31.22929
97220 69.492874
97230 22.556793
97240 67.94378
97250 40.144035
97260 19.219929
97270 45.500523
97280 96.3571
97290 22.052868
97300 53.404953
97310 20.256485
97320 45.23809
97330 50.86056
97340 95.55583
97350 10.270584
97360 5.6340857
97370 30.172478
97380 41.736706
97390 54.780052
97400 67.527084
97410 37.69821
97420 26.874033
97430 68.83352
97440 19.309727
97450 54.128304
97460 4.3514833
97470 141.42139
97480 4.3531656
97490 22.928864
97500 110.95875
97510 10.000735
97520 19.75

In [13]:
frcn_test = FRCN(is_training=False)

In [24]:
l , b , c , v = frcn_test.predict('9.jpg' , scores_threshold=0.4)

print(c)

INFO:tensorflow:Restoring parameters from save/model/model.ckpt-99990
['person' 'person' 'person' 'person' 'person' 'person' 'person' 'person'
 'person' 'person' 'person' 'person' 'person' 'person' 'person' 'person']


In [21]:
d=Display()
gg = Img_generator()

In [49]:
d.display(cv2.imread('../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2007_006560.jpg') , labels=['=']*l.shape[0] , bbox=b , name='cc')

In [24]:
l.argmax(axis=1)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       0], dtype=int64)

In [27]:
l.argmax(axis=1)[-4]

1

In [28]:
l.argmax(axis=1)[-2]

1

In [29]:
b[-2]

[11, 326, 0, 375]

In [30]:
b[-4]

[59, 326, 0, 375]