In [126]:
import numpy as np
import os
import sys
import scipy
import cv2
import gc

#解析使用
import xml
from xml.etree import ElementTree as ET

from glob import glob

import keras.backend as K
from keras.applications import VGG19
from keras.models import Model
from keras.utils import to_categorical

from skimage import transform

from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.externals import joblib

In [127]:
import tensorflow as tf

from tensorflow.contrib import slim

# import selectivesearch as ss #候选框产生使用 RPN不使用此函数

from ImageNet_classes import class_names #验证alexnet使用

In [546]:
import warnings

warnings.filterwarnings('error')

In [547]:
TRAIN_DATA_PATH = '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/'
TEST_DATA_PATH = '../../tensorflow2/dataset/VOC2012test/JPEGImages/'

TRAIN_XML_PATH = '../../tensorflow2/dataset/VOCtrainval_11-May-2012/Annotations/'
TEST_XML_PATH = '../../tensorflow2/dataset/VOC2012test/Annotations/'

OBJECT_PATH = '../../tensorflow2/dataset/VOCtrainval_11-May-2012/ImageSets/Main/' #SVM需要使用的训练数据（正负样本） 训练20个svm

#pascal VOC数据集目标数量
#目标的数目 还有一个背景
CLASSES_NUM = 20+1

STR = [
    'background', #label=0
    'person',
    'bird','cat','cow','dog','horse','sheep',
    'aeroplane','bicycle','boat','bus','car','motorbike','train',
    'bottle','chair','diningtable','pottedplant','sofa','tvmonitor'
]

LABEL2STR = {idx:value for idx , value in enumerate(STR)}
STR2LABEL = {value:key for key,value in LABEL2STR.items()}
#STR2LABEL = {value:idx for idx , value in enumerate(STR)}


STR2LABEL['none'] = 'none' #先不使用part部分 只进行naive目标检测

#目标检测相关
IoU_THRESHOLD = 0.5

#NMS相关
NMS_IoU_THRESHOLD = 0.3 #or ~0.5


In [130]:
xml_file_names_train = glob(TRAIN_XML_PATH + '*') #所有的xml文件 完整路径

#从xml文件中读出图片相关的信息

def xml_parse(xml_file):
    '''
    return filename , shape , name_boxes , crop_boxes
    xml文件中的shape格式为 (width height 3)
    '''
    xml_file = xml.dom.minidom.parse(xml_file)
    xml_file_docu_ele = xml_file.documentElement

    filename_list = xml_file_docu_ele.getElementsByTagName('filename')
    
    #filename_list可能有多个filename的 所以要索引0(此数据集中filename只有一个)
    filename = filename_list[0].childNodes[0].data #filename_list.firstChild.data

    #图像的尺寸信息
    size_list = xml_file_docu_ele.getElementsByTagName('size')

    for size in size_list:
        width_list = size.getElementsByTagName('width')
        width = int(width_list[0].childNodes[0].data)

        height_list = size.getElementsByTagName('height')
        height = int(height_list[0].childNodes[0].data)

        channel_list = size.getElementsByTagName('depth')
        channel = int(channel_list[0].childNodes[0].data)

    shape = (width , height , channel)

    #一个文件中有多个object
    object_list = xml_file_docu_ele.getElementsByTagName('object')

    #多个object与多个object对应的详细信息
    name_boxes = [] #一个元素就是一个object
    crop_boxes = []

    for objects in object_list:
        #一次循环处理一个object信息
        #一个xml文件（即一个图像中）有多个object

        #name
        name_list = objects.getElementsByTagName('name')

        name_box = name_list[0].childNodes[0].data

        #bounding box points
        bndbox = objects.getElementsByTagName('bndbox')

        x1_list = bndbox[0].getElementsByTagName('xmin')
        x1 = int( round( float(x1_list[0].childNodes[0].data) ) )

        y1_list = bndbox[0].getElementsByTagName('ymin')
        y1 = int(round(float( y1_list[0].childNodes[0].data )))

        x2_list = bndbox[0].getElementsByTagName('xmax')
        x2 = int(round(float( x2_list[0].childNodes[0].data )))

        y2_list = bndbox[0].getElementsByTagName('ymax')
        y2 = int(round(float( y2_list[0].childNodes[0].data )))

        crop_box = [x1,x2,y1,y2]

        name_boxes.append(name_box)
        crop_boxes.append(crop_box)

    #shape:[width height channel]
    #crop_box:[x1 x2 y1 y2]
    return filename , shape , name_boxes , crop_boxes

#xml_parse(xml_file_names_train[10])

In [5]:
#xml_parse(xml_file_names_train[897])

In [131]:
#不需要修改
class Image(object):
    '''
    图片的真实信息
    '''
    def __init__(self):
        self.img_file_names_train = glob(TRAIN_DATA_PATH+'*') #训练全路径信息
                
    def load(self , img_path_name = None):
        '''
        如果传入 传入完整路径信息
        return img_arr , ground_truth_data , labels , crop_boxes , img_path_name[-15:-4]
        img_arr的shape为 (height width 3) 与xml文件中区分
        '''
        if not img_path_name:
            #没有指定文件名
            img_path_name = np.random.choice(self.img_file_names_train) #随机选择一张图片
            #img_path_idx = np.random.randint(0 , high = len(self.img_file_names_train)) #随机索引
      
        img_arr = cv2.imread(img_path_name) #BGR height*width*chanel
        
        xml_file_name = TRAIN_XML_PATH + img_path_name[-15:-4] +  '.xml'
        
        _ , _ , name_boxes , crop_boxes = xml_parse(xml_file_name)
        
        ground_truth_data = [] #存储bndbox的图像 数据信息
        labels = [] #存储与bndbox对应的 label信息

        for i in range(len(crop_boxes)): #多个object
            x1 = crop_boxes[i][0]
            x2 = crop_boxes[i][1]
            y1 = crop_boxes[i][2]
            y2 = crop_boxes[i][3]
            
            ground_truth_data.append(img_arr[y1:y2 , x1:x2 , :])
            
            labels.append(STR2LABEL.get(name_boxes[i] , 'none'))
        
        #图片数据 ground truth具体数据 bndbox对应label bndbox坐标信息 图片文件名
        
        return img_arr , ground_truth_data , labels , crop_boxes , img_path_name[-15:-4]
    

In [457]:
class Img_generator(object):
    def __init__(self):
        self.img_loader = Image()

    #计算bbox面积
    def bbox_area(self , bbox):
        w = bbox[1] - bbox[0]
        h = bbox[3] - bbox[2]
        
        return w*h
    
    #计算交并比
    def IoU(self , bbox_a , bbox_b):
        xmin_a = bbox_a[0]
        xmax_a = bbox_a[1]
        ymin_a = bbox_a[2]
        ymax_a = bbox_a[3]
        
        xmin_b = bbox_b[0]
        xmax_b = bbox_b[1]
        ymin_b = bbox_b[2]
        ymax_b = bbox_b[3]
        
        if   xmin_a < xmax_b <= xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
            flag = True
        elif xmin_a <= xmin_b < xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
            flag = True
        elif xmin_b < xmax_a <= xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
            flag = True
        elif xmin_b <= xmin_a < xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
            flag = True
        else:
            flag = False
        
        if flag:
            x_sorted_list = sorted([xmin_a, xmax_a, xmin_b, xmax_b])
            y_sorted_list = sorted([ymin_a, ymax_a, ymin_b, ymax_b])
            
            x_intersect_w = x_sorted_list[2] - x_sorted_list[1] #0 1 2 3
            y_intersect_h = y_sorted_list[2] - y_sorted_list[1] #0 1 2 3
            
            area_inter = x_intersect_w * y_intersect_h #计算重合面积
            
            if area_inter <= 0.0:
                return 0.0
            
            if self.bbox_area(bbox_a) <= 0.0 or self.bbox_area(bbox_b) <= 0.0:
                return 0.0
            
            union_area = self.bbox_area(bbox_a) + self.bbox_area(bbox_b) - area_inter
            
            if union_area <= 0.0:
                return 0.0            
            
            return area_inter/union_area
        else:
            return 0.0
    
    #转换为 位置参数
    def __to_t(self , G_box , P_box):
        #print(G_box , P_box) #debug
        
        def to(rect):
            x1 = rect[0]
            x2 = rect[1]
            y1 = rect[2]
            y2 = rect[3]
            
            w = x2-x1
            h = y2-y1
            
            x_c = (x1+x2)//2
            y_c = (y1+y2)//2
            
            return x_c , y_c , w , h
        
        G_x , G_y , G_w , G_h = to(G_box)
        P_x , P_y , P_w , P_h = to(P_box)
        
        t_x = (G_x-P_x)/P_w
        t_y = (G_y-P_y)/P_h
        t_w = np.log(G_w/P_w)
        t_h = np.log(G_h/P_h)
        
        return t_x , t_y , t_w , t_h
    
    def get_train_proposal(self , img_arr , labels , ground_truth_coord):
        
        anchors = np.zeros(shape=[61,36,9,7] , dtype=float) #保存非位置参数 直观的的坐标 所有的计算在此进行
        anchors_aux = np.zeros(shape=[61,36,9,7] , dtype=float) #保存位置参数 不参与计算 只进行保存
        
        feature_map_height = 61
        feature_map_width = 36
        
        scales = [128 , 256 , 512]
        ratios = [[1,2] , [1,1] , [2,1]] #用scale除以即可 [height_ratio width_ratio]
        
        '''
        跑出图片边界的anchor丢弃
        '''
        
        #x_0 y_0 为 feature map中的坐标
        for x_0 in range(61): #width
            for y_0 in range(36): #height
                #x_0_coord y_0_coord 为原图中的坐标（中点坐标）
                x_0_coord = x_0 * 16
                y_0_coord = y_0 * 16
                
                for scale_idx , scale in enumerate(scales):
                    for ratio_idx , ratio in enumerate(ratios):
                        scale_height = int(scale / ratio[0])
                        scale_width = int(scale / ratio[1])
                    
                        x_1_coord = int(x_0_coord - scale_width/2)
                        y_1_coord = int(y_0_coord - scale_height/2)

                        if x_1_coord < 0 or y_1_coord < 0:
                            anchors[x_0 , y_0 , scale_idx*3+ratio_idx][0] = -1 #不参与训练的样本 丢弃 标记为-1
                            anchors_aux[x_0 , y_0 , scale_idx*3+ratio_idx][0] = -1
                            continue

                        x_2_coord = int(x_0_coord + scale_width/2)
                        y_2_coord = int(y_0_coord + scale_height/2)

                        if x_2_coord > 600 or y_2_coord > 1000:
                            anchors[x_0 , y_0 , scale_idx*3+ratio_idx][0] = -1 #不参与训练的样本 丢弃 标记为-1
                            anchors_aux[x_0 , y_0 , scale_idx*3+ratio_idx][0] = -1
                            continue
                        
                        #基本的anchors 不出边界 图片内部
                        anchors[x_0 , y_0 , scale_idx*3+ratio_idx][2 : 6] = [x_1_coord , x_2_coord , y_1_coord , y_2_coord]

        '''
        对ground truth进行放缩
        '''
        img_h = img_arr.shape[0]
        img_w = img_arr.shape[1]
        
        def bbox_trans(_rect):
            rect = [-1,-1,-1,-1]
            
            rect[0] = int(_rect[0]*600 / img_w)
            rect[1] = int(_rect[1]*600 / img_w)
            rect[2] = int(_rect[2]*1000 / img_h)
            rect[3] = int(_rect[3]*1000 / img_h)
        
            return rect
                
        '''
        每个gt iou最大的anchor作为正样本
        '''
        
        for j in range(len(ground_truth_coord)):
            max_iou = 0.0
            max_iou_idx = [0,0,0]
            
            gt_coord_trans = bbox_trans( ground_truth_coord[j] )
            
            for w in range(61): #width
                for h in range(36): #height
                    for anchor_idx in range(9):
                        if anchors[w , h , anchor_idx][0] != -1:
                            #不等于-1参与训练
                            iou = self.IoU(gt_coord_trans , anchors[w,h,anchor_idx][2:6])
                            
                            if iou > max_iou:
                                max_iou = iou
                                max_iou_idx = [w,h,anchor_idx]
              
            if anchors[max_iou_idx[0] , max_iou_idx[1] , max_iou_idx[2]][0] != -1:
                #判断 max_iou_idx没有被更新过 一直是[0 0 0]
                #如果[0 0 0]对应的标识不是-1 即参与训练
                anchors_aux[max_iou_idx[0] , max_iou_idx[1] , max_iou_idx[2]][1] = 1
                anchors_aux[max_iou_idx[0] , max_iou_idx[1] , max_iou_idx[2]][2:6] = self.__to_t(gt_coord_trans , anchors[max_iou_idx[0] , max_iou_idx[1] , max_iou_idx[2]][2:6])
                anchors_aux[max_iou_idx[0] , max_iou_idx[1] , max_iou_idx[2]][6] = labels[j]
            #else:
            #    continue
        
        '''
        寻找剩余的正负样本(训练集)
        '''
        for w in range(61): #width
            for h in range(36): #height
                for anchor_idx in range(9):
                    if anchors[w,h,anchor_idx][0] != -1:
                        #不等于-1参与训练
                        negative_count = 0 #产生负样本计数器

                        for j in range(len(ground_truth_coord)):

                            gt_coord_trans = bbox_trans( ground_truth_coord[j] )

                            iou = self.IoU(gt_coord_trans , anchors[w,h,anchor_idx][2:6])

                            if iou > 0.7: 
                                #正样本
                                #每一个正样本anchor只可能对应一个ground truth
                                #print(iou , gt_coord_trans , anchors[w,h,anchor_idx][2:6])
                                anchors_aux[w,h,anchor_idx][1] = 1
                                anchors_aux[w,h,anchor_idx][2:6] = self.__to_t(gt_coord_trans , anchors[w,h,anchor_idx][2:6])
                                anchors_aux[w,h,anchor_idx][6] = labels[j]
                                
                                break #为当前的正样本anchor找到了ground truth

                            elif iou < 0.3:
                                negative_count = negative_count + 1
                            else:
                                #此处的样本不参与训练
                                anchors_aux[w,h,anchor_idx][0] = -1

                            if negative_count == len(ground_truth_coord):
                                #与所有的ground truth的iou均小于0.3 则为负样本
                                anchors_aux[w,h,anchor_idx][0] = 1
                                
                                break
        
        anchors_aux = np.reshape(anchors_aux , newshape=[-1 , 7])
        
        return anchors_aux
    
    
    def load(self , img_path_name):
        '''
        img_path_name:绝对路径
        '''
        
        #图片数据 ground truth具体数据 ground truth对应label ground truth坐标信息 图片文件名
        img_arr , _ , labels , ground_truth_coord , _ = self.img_loader.load(img_path_name)
        
        anchors = self.get_train_proposal(img_arr , labels , ground_truth_coord)
        
        '''
        resize 并 归一化像素值
        img_arr 为 BGR形式
        '''
        img_arr = cv2.resize(img_arr , (600 , 1000))
        '''[R G B] [123.68 116.779 103.939]
        减去每个通道的像素平均值 归一化'''
        #img_arr[:,:,0] = img_arr[:,:,0] - 103.939
        #img_arr[:,:,1] = img_arr[:,:,1] - 116.779
        #img_arr[:,:,2] = img_arr[:,:,2] - 123.680
        
        img_arr = img_arr / 127.5 - 1
        
        #'''增加一维 batch_size维'''
        return np.expand_dims(img_arr , axis=0) , anchors
    
    def get_test_proposal(self , img_arr):
        '''
        return:rois
        proposals_coord
        '''
        
        img_h = img_arr.shape[0]
        img_w = img_arr.shape[1]
        
        def bbox_trans(_rect):
            rect = [-1,-1,-1,-1]
            
            rect[0] = int(_rect[0]*600 / w)
            rect[1] = int(_rect[1]*600 / w)
            rect[2] = int(_rect[2]*1000 / h)
            rect[3] = int(_rect[3]*1000 / h)
        
            return rect
        
        anchors = np.zeros(shape=[61,36,9,4] , dtype=float)
        
        feature_map_height = 61
        feature_map_width = 36
        
        scales = [128 , 256 , 512]
        ratios = [[1,2] , [1,1] , [2,1]] #用scale除以即可 [height_ratio width_ratio]
        
        #x_0 y_0 为 feature map中的坐标
        for x_0 in range(61): #height
            for y_0 in range(36): #width
                #x_0_coord y_0_coord 为原图中的坐标（中点坐标）
                x_0_coord = x_0 * 16
                y_0_coord = y_0 * 16
                
                for scale_idx , scale in enumerate(scales):
                    for ratio_idx , ratio in enumerate(ratios):
                        
                        scale_height = int(scale / ratio[0])
                        scale_width = int(scale / ratio[1])
                    
                        x_1_coord = int(x_0_coord - scale_width/2)
                        y_1_coord = int(y_0_coord - scale_height/2)
                        #跨越边界的anchor 进行截断
                        if x_1_coord < 0:
                            x_1_coord = 0
                            
                        if y_1_coord < 0:
                            y_1_coord = 0

                        x_2_coord = int(x_0_coord + scale_width/2)
                        y_2_coord = int(y_0_coord + scale_height/2)

                        if x_2_coord > 600:
                            x_2_coord = 600
                            
                        if y_2_coord > 1000:
                            y_2_coord = 1000
                        
                        anchors[x_0 , y_0 , scale_idx*3+ratio_idx] = [x_1_coord , x_2_coord , y_1_coord , y_2_coord]

        return np.array(anchors)
    
    def load_test(self , img_path_name):
        img_arr = cv2.imread(img_path_name)
        
        anchors = self.get_test_proposal(img_arr)
                
        img_arr = cv2.resize(img_arr , (600 , 1000))
        img_arr = img_arr / 127.5 - 1.0
        
        return np.expand_dims(img_arr , axis=0) , anchors

# class Img_generator

In [458]:
class Dataset(object):
    def __init__(self):
        self.img_generator = Img_generator()
        
        self.img_loader = Image()
        
        self.img_file_names_train = glob(TRAIN_DATA_PATH + '*')
        self.img_file_names_test = glob(TEST_DATA_PATH + '*')
    
    def get_batch(self):
        path = np.random.choice(self.img_file_names_train)
        
        x , anchors = self.img_generator.load(path)
        
        return x , anchors
    
    def get_batch_test(self , path):
        '''
        返回图片的真实img_arr 未resize 未归一化
        注意cv2打开图片通道为BGR
        '''
        if not path:
            #未指定path 从测试目录中随机选一张图片测试
            path = np.random.choice(self.img_file_names_test)
        
        '''resize & norm/anchors/原图 '''
        x , anchors = self.img_generator.load_test(path)
        
        return x , anchors
    
    
    def target2coord(self , bbox_pred , img_arr , anchors):
        img_height = img_arr.shape[0]
        img_width = img_arr.shape[1]
        
        def to(rect):
            x1 = rect[0]
            x2 = rect[1]
            y1 = rect[2]
            y2 = rect[3]
            
            w = x2-x1
            h = y2-y1
            
            x_c = (x1+x2)//2
            y_c = (y1+y2)//2
            
            return x_c , y_c , w , h
        
        def ot(target):
            x_c = target[0]
            y_c = target[1]
            w = target[2]
            h = target[3]
            
            x1 = 0.5*(2*x_c-w)
            y1 = 0.5*(2*y_c-h)
            x2 = x1+w
            y2 = y1+h
            
            x1=int(round(x1))
            y1=int(round(y1))
            x2=int(round(x2))
            y2=int(round(y2))
            
            if x1<0:
                x1 = 0
            if x2>img_width:
                x2 = img_width
            if y1<0:
                y1 = 0
            if y2>img_height:
                y2 = img_height
                            
            return [x1 , x2 , y1 , y2]
        
        def target2rect(target_hat , P_box):
            t_x = target_hat[0]
            t_y = target_hat[1]
            t_w = target_hat[2]
            t_h = target_hat[3]
            
            P_x , P_y , P_w , P_h = to(P_box) #将P框转换为 中点坐标 宽 高 形式
            
            G_x_hat = P_w*t_x+P_x
            G_y_hat = P_h*t_y+P_y
            G_w_hat = P_w*np.exp(t_w)
            G_h_hat = P_h*np.exp(t_h)
            
            return ot([G_x_hat , G_y_hat , G_w_hat , G_h_hat]) #ot还需要转化为(x1,x2,y1,y2)形式
        
        bbox_coord_pred = np.zeros(shape=[61,36,9,4] , dtype=float)
        
        for w in range(61):
            for h in range(36):
                for anchor_idx in range(9):
                    bbox_coord_pred[w,h,anchor_idx] = target2rect(bbox_pred[w,h,anchor_idx] , anchors[w,h,anchor_idx])
                
        return bbox_coord_pred

In [459]:
class AlexNet_model_RPN(object):
    '''
    构建RPN
    替代selective search算法
    这个网络输出proposals
    并入FRCN中
    '''
    def __init__(self , is_training=True):
        '''
        x:[batch 1000 600 channel] #or batch 600 1000 channel
        anchors:2 class score  4 rois coord 规定2 class score 第0位表示背景 第1位表示前景 (一个基本元素)
        '''
        self.x = tf.placeholder(tf.float32 , shape=[1 , 1000 , 600 , 3])
        self.anchors = tf.placeholder(tf.float32 , shape=[19764 , 2 + 4 + 1])
                
        self.load_parameter()
    
        self.build()
        
        if is_training:
            self.loss_layer()
    
    def load_parameter(self , trainable = False):
        '''
        卷积结构不进行训练
        '''
        parameter = np.load('bvlc_alexnet.npy' , encoding='bytes').item()
        
        self.conv1_w = tf.Variable(parameter['conv1'][0] , trainable = trainable)
        self.conv1_b = tf.Variable(parameter['conv1'][1] , trainable = trainable)
        
        self.conv2_w = tf.Variable(parameter['conv2'][0] , trainable = trainable)
        self.conv2_b = tf.Variable(parameter['conv2'][1] , trainable = trainable)
        
        self.conv3_w = tf.Variable(parameter['conv3'][0] , trainable = trainable)
        self.conv3_b = tf.Variable(parameter['conv3'][1] , trainable = trainable)
        
        self.conv4_w = tf.Variable(parameter['conv4'][0] , trainable = trainable)
        self.conv4_b = tf.Variable(parameter['conv4'][1] , trainable = trainable)
        
        self.conv5_w = tf.Variable(parameter['conv5'][0] , trainable = trainable)
        self.conv5_b = tf.Variable(parameter['conv5'][1] , trainable = trainable)
    
    def group_conv(self , x , kernel, strides, padding='SAME'):
        x_splits = tf.split(x , num_or_size_splits=2 , axis=3)
        kernel_splits = tf.split(kernel , num_or_size_splits=2 , axis=3)
        
        conv_splits_1 = tf.nn.conv2d(x_splits[0] , kernel_splits[0] , strides , padding)
        conv_splits_2 = tf.nn.conv2d(x_splits[1] , kernel_splits[1] , strides , padding)
        
        return tf.concat([conv_splits_1 , conv_splits_2] , axis=3)

    def build(self):
        conv1 = tf.nn.conv2d(self.x , self.conv1_w , strides=[1,4,4,1] , padding='SAME')
        conv1 = tf.nn.bias_add(conv1 , self.conv1_b)
        conv1 = tf.nn.relu(conv1)
        lrn1 = tf.nn.local_response_normalization(conv1 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
        pool1 = tf.nn.max_pool(lrn1 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')
        
        conv2 = self.group_conv(pool1 , self.conv2_w , strides=[1,1,1,1] , padding='SAME')
        conv2 = tf.nn.bias_add(conv2 , self.conv2_b)
        conv2 = tf.nn.relu(conv2)
        lrn2 = tf.nn.local_response_normalization(conv2 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
        pool2 = tf.nn.max_pool(lrn2 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')
        
        conv3 = tf.nn.conv2d(pool2 , self.conv3_w , strides=[1,1,1,1] , padding='SAME')
        conv3 = tf.nn.bias_add(conv3 , self.conv3_b)
        conv3 = tf.nn.relu(conv3)
        
        conv4 = self.group_conv(conv3 , self.conv4_w , strides=[1,1,1,1] , padding='SAME')
        conv4 = tf.nn.bias_add(conv4 , self.conv4_b)
        conv4 = tf.nn.relu(conv4)
        
        conv5 = self.group_conv(conv4 , self.conv5_w , strides=[1,1,1,1] , padding='SAME')
        conv5 = tf.nn.bias_add(conv5 , self.conv5_b)
        conv5 = tf.nn.relu(conv5)
        
        '''
        上面为迁移alexnet(zf/vgg16)
        '''
        
        '''
        RPN 增加的3*3 conv
        '''  
        conv6 = slim.conv2d(conv5 , num_outputs=conv5.get_shape().as_list()[-1] , kernel_size=[3 , 3] , stride=[1,1] , padding='SAME')
        
        '''
        ZF channel 256-d
        VGG16 channel 512-d
        '''
        '''
        self.conv6's shape [1 61 36 256]
        '''
        # pool6 = slim.max_pool2d(conv6 , kernel_size=[3 , 3] , stride=[1 , 1] , padding='SAME')
        '''
        cls layer
        '''
        cls = slim.conv2d(conv6 , num_outputs=9*2 , kernel_size=[1,1] , stride=[1,1] , padding='VALID' , activation_fn=None) #[1 61 36 9*2]
        cls = tf.reshape(cls , shape=(-1 , 2))  #[19764 2]
        
        self.cls = tf.nn.softmax(cls)
        
        '''
        reg layer
        '''
        reg = slim.conv2d(conv6 , num_outputs=9*4 , kernel_size=[1,1] , stride=[1,1] , padding='VALID' , activation_fn=None) #[1 61 36 9*4]
        reg = tf.reshape(reg , shape=(-1 , 4)) #[19764 4]
        
        self.reg = reg
            
    def loss_layer(self):
        '''
        self.anchors: [19764 2+4+1] [softmax_score coord label]
        '''
        '''
        正负样本的不平衡 需要使用损失权重 loss_weight
        因为正样本的数量少 较大权重值
        负样本较小权重值
        '''
        #为-1的 不参与训练
        
        '''
        分类损失
        '''     
        cls_true = self.anchors[: , :2] #whether object probs [19764 2]
        bbox_true = self.anchors[: , 2:6] #rois coord [19764 4]
        
        mask_cls = tf.tile( tf.cast( tf.not_equal( tf.reshape(cls_true[: , 0] , shape=[-1,1]) , -1*np.ones(shape=[cls_true.shape[0] , 1])) , dtype=tf.float32) , multiples=[1,2])
        
        cross_entropy = - tf.reduce_sum( cls_true * tf.log(self.cls) * mask_cls , axis=1)
        cls_loss = tf.reduce_mean(cross_entropy)
                
        '''
        回归损失
        '''
        #回归损失中 标记为-1 和 背景 均不参与训练
        mask_back = tf.tile( tf.reshape( cls_true[: , 1] , shape=[-1 , 1] ) , multiples=[1 , 4]) #重复4次 是否为前景
        mask_box  = tf.tile( tf.cast( tf.not_equal( tf.reshape(cls_true[: , 0] , shape=[-1,1]) , -1*np.ones(shape=[cls_true.shape[0] , 1])) , dtype=tf.float32) , multiples=[1,4])
        
        reg_loss = tf.reduce_sum( tf.square( mask_back * mask_box * (bbox_true - self.reg) ) )
        
        # reg_loss = tf.div(reg_loss , tf.reduce_sum(cls_true[: , 1]) ) #计算平均损失
        
        self.RPN_loss = cls_loss + reg_loss
    

In [466]:
class RPN(object):    
    def __init__(self , is_training = True):      
        self.dataset = Dataset()
        self.img_generator = Img_generator()
        
        self.filewriter_path = 'save/RPN/logs' #模型可视化
        self.checkpoint_path = 'save/RPN/model/' #模型持久化
                                        
        self.model = AlexNet_model_RPN(is_training)

        self.sess = tf.Session()
        self.saver = tf.train.Saver(max_to_keep=2) #max_to_keep 最大保存5次模型  之后继续保存则会覆盖前面的模型

        if is_training:
            '''训练参数'''
            self.epoch = 100000
            
            self.global_step = tf.Variable(initial_value=0 , trainable=False)
            
            self.learning_rate = tf.train.exponential_decay(learning_rate=0.00001 , global_step=self.global_step,
                                                            decay_steps=900 , decay_rate=0.8 , staircase=True)
            
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.model.RPN_loss , global_step=self.global_step)
        
            #引入滑动平均
            self.ema = tf.train.ExponentialMovingAverage(decay=0.9) #滑动平均
            self.average_op = self.ema.apply(tf.trainable_variables()) #给所有的可训练变量应用滑动平均
            
            with tf.control_dependencies([self.optimizer]):
                self.train_op = tf.group(self.average_op)
            
            '''可视化'''
            self.sess.run(tf.global_variables_initializer())
            
            tf.summary.scalar('RPN_loss' , self.model.RPN_loss)
            self.merged_summary = tf.summary.merge_all() #merge all summaries in the default graph
            self.writer = tf.summary.FileWriter(self.filewriter_path , self.sess.graph) #可视化

    def train(self):
        if os.path.exists(self.checkpoint_path + 'checkpoint'):
            self.saver.restore(self.sess , tf.train.latest_checkpoint(self.checkpoint_path))
        else:
            self.sess.run(tf.global_variables_initializer())
       
        for i in range(100):
            x , anchors = self.dataset.get_batch()
                        
            self.sess.run(self.train_op , feed_dict={self.model.x : x , self.model.anchors : anchors} )

            if i % 10 == 0:
                self.saver.save(self.sess , self.checkpoint_path + 'model.ckpt' , global_step = i)
                
                RPN_loss , summary = self.sess.run([self.model.RPN_loss , self.merged_summary] , feed_dict={self.model.x : x , self.model.anchors : anchors})
                
                self.writer.add_summary(summary , global_step = i)
                
                print(i , RPN_loss)
        
        self.writer.close() #event to disk and close the file

    def predict(self , path=None , scores_threshold = 0.5 , nms_iou_threshold = 0.7 , top_N = 2000):
        '''返回的坐标是在(1000 600)坐标系中'''
        if os.path.exists(self.checkpoint_path + 'checkpoint'):
            self.saver.restore(self.sess , tf.train.latest_checkpoint(self.checkpoint_path) )
            
            return self._predict(path , scores_threshold , nms_iou_threshold)
            
        else:
            print('no model!!!')
            return 
    
    def _predict(self , path , scores_threshold , nms_iou_threshold , top_N):
        '''返回的坐标是在(1000 600)坐标系中'''
        x , anchors = self.dataset.get_batch_test(path)
                   #bbox_pred
        cls_pred , reg_pred = self.sess.run([self.model.cls , self.model.reg] , feed_dict={self.model.x : x})
        
        cls_pred = np.reshape(cls_pred , newshape=[61 , 36 , 9 , 2])
        reg_pred = np.reshape(reg_pred , newshape=[61 , 36 , 9 , 4])
        
        '''转换为原始图片中的坐标'''
        '''
        超出边界的进行截断
        '''
        bbox_coord_pred = self.dataset.target2coord(reg_pred , x[0] , anchors)
                
        scores_pred_f = [] #符合条件的概率值
        bbox_coord_pred_f = [] #符合条件的框子坐标
        
        for w in range(61):
            for h in range(36):
                for anchor_idx in range(9):
                    if cls_pred[w,h,anchor_idx][0] < cls_pred[w,h,anchor_idx][1] and cls_pred[w,h,anchor_idx][1] > scores_threshold:
                        scores_pred_f.append(cls_pred[w,h,anchor_idx][1]) #保存 score
                        
                        bbox_coord_pred_f.append(bbox_coord_pred[w,h,anchor_idx]) #保存位置参数
        
        scores_pred_f = np.array(scores_pred_f)
        bbox_coord_pred_f = np.array(bbox_coord_pred_f)
        
        #降序scores
        sort_idx = np.argsort(- np.array(scores_pred_f) )
        
        scores_pred_f = scores_pred_f[sort_idx]
        bbox_coord_pred_f = bbox_coord_pred_f[sort_idx]
                
        final_idx = self._nms(scores_pred_f , bbox_coord_pred_f , nms_iou_threshold)
                
        bbox_coord_pred_f = bbox_coord_pred_f[final_idx]
        
        return bbox_coord_pred_f
    
        #返回 top-N 的proposals(根据scores)
        #if len(bbox_coord_pred_f) <= top_N: #小于或等于top_N 全部返回即可
        #    return bbox_coord_pred_f
       
        #scores_pred_f = scores_pred_f[final_idx]
        #top_N_idx = np.argsort(-1*scores_pred_f)
        #
        #return bbox_coord_pred_f[top_N_idx]
        
        
        
    def _nms(self , probability_hat , rects_hat , nms_iou_threshold):
        idx = []
        
        length = len(probability_hat)
        lost_flag = [1]*length #标记丢弃的框 0表示丢弃
        
        max_score_idx = 0 #记录当前最大score的idx
        
        while max_score_idx < length:
            max_score_rect = rects_hat[max_score_idx]
            
            for i in range(max_score_idx+1 , length):
                if lost_flag[i] == 1 and (self.img_generator.IoU( max_score_rect , rects_hat[i] ) > nms_iou_threshold): #大于阈值 丢弃
                    lost_flag[i] = 0

            max_score_idx_bak = max_score_idx #后续使用
            
            #让max_score_idx指向下一个没被丢弃的最大值
            for i in range(max_score_idx+1 , length):
                if lost_flag[i] == 1:
                    max_score_idx = i
                    break
            
            #说明max_score_idx没有移动过 即后续的都被丢弃了 终止循环
            if max_score_idx == max_score_idx_bak:
                break
        
        for i in range(length):
            if lost_flag[i] == 1:
                idx.append(i)
                
        return idx

In [467]:
tf.reset_default_graph()

In [437]:
rpn = RPN()

In [438]:
rpn.train()

0 0.20317003
10 4.509024
20 0.19229999
30 0.20175572
40 0.20087923
50 0.18708783
60 1.8425518
70 3.2151296
80 0.18926746
90 0.18095013


In [447]:
tf.reset_default_graph()

In [468]:
rpnn = RPN(is_training=False)

In [None]:
a=rpnn.predict(path='4.jpg')

In [522]:
def roi_coord(rect):
    '''
    由原图中的roi坐标向conv5的feature map映射
    feature map中的坐标
    ''' 
    rect[1:] = (rect[1:] - (11-1)//2 ) // 4
    rect[1:] = (rect[1:] - (3-1)//2 ) // 2
    rect[1:] = (rect[1:] - (3-1)//2 ) // 2
    
    '''-1修正'''
    #rect[2] = rect[2] - 1
    #rect[3] = rect[3] - 1
    
    #224*224 经过conv之后 变为13*13
    return np.concatenate( (rect[0:1] , np.clip(rect[1:] , a_min=0 , a_max=12) ) , axis=0)

In [610]:
class RPN_SS(object):
    def __init__(self):
        self.img_loader = Image()
        self.rpn = RPN()

    #计算bbox面积
    def bbox_area(self , bbox):
        w = bbox[1] - bbox[0]
        h = bbox[3] - bbox[2]
        
        return w*h
    
    #计算交并比
    def IoU(self , bbox_a , bbox_b):
        xmin_a = bbox_a[0]
        xmax_a = bbox_a[1]
        ymin_a = bbox_a[2]
        ymax_a = bbox_a[3]
        
        xmin_b = bbox_b[0]
        xmax_b = bbox_b[1]
        ymin_b = bbox_b[2]
        ymax_b = bbox_b[3]
        
        if   xmin_a < xmax_b <= xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
            flag = True
        elif xmin_a <= xmin_b < xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
            flag = True
        elif xmin_b < xmax_a <= xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
            flag = True
        elif xmin_b <= xmin_a < xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
            flag = True
        else:
            flag = False
        
        if flag:
            x_sorted_list = sorted([xmin_a, xmax_a, xmin_b, xmax_b])
            y_sorted_list = sorted([ymin_a, ymax_a, ymin_b, ymax_b])
            
            x_intersect_w = x_sorted_list[2] - x_sorted_list[1] #0 1 2 3
            y_intersect_h = y_sorted_list[2] - y_sorted_list[1] #0 1 2 3
            
            area_inter = x_intersect_w * y_intersect_h #计算重合面积
            
            union_area = self.bbox_area(bbox_a) + self.bbox_area(bbox_b) - area_inter
            
            return area_inter/union_area
        else:
            return 0.0
    
    #ground truth coord and proposal coord计算bb回归使用的标签
    def __to_t(self , G_box , P_box):
        
        def to(rect):
            x1 = rect[0]
            x2 = rect[1]
            y1 = rect[2]
            y2 = rect[3]
            
            w = x2-x1
            h = y2-y1
            
            x_c = (x1+x2)//2
            y_c = (y1+y2)//2
            
            return x_c , y_c , w , h
        
        G_x , G_y , G_w , G_h = to(G_box)
        P_x , P_y , P_w , P_h = to(P_box)
        
        t_x = (G_x-P_x)/P_w
        t_y = (G_y-P_y)/P_h
        t_w = np.log(G_w/P_w)
        t_h = np.log(G_h/P_h)
        
        return t_x , t_y , t_w , t_h
    
    def clip(self , img_arr , img_path_name):
        proposals = self.rpn.predict(img_path_name) #1000*600坐标系中
                
        '''
        转换上面到img_arr同坐标系中坐标
        '''
        h = img_arr.shape[0]
        w = img_arr.shape[1]
        
        def bbox_trans(_rect):
            rect = [-1 , -1 , -1 , -1]
            rect[0] = int(_rect[0]*w / 600)
            rect[1] = int(_rect[1]*w / 600)
            rect[2] = int(_rect[2]*h / 1000)
            rect[3] = int(_rect[3]*h / 1000)
        
            return rect
        
        for i in range(len(proposals)):
            proposals[i] = bbox_trans(proposals[i])
        
        return np.array(proposals)
    
    def get_train_proposal(self , img_arr , labels , ground_truth_coord , img_path_name):
        #下面使用的img_arr必须是原始的图 没有resize 也没有归一化到-1 1
        proposals_coord = self.clip(img_arr , img_path_name) #RPN产生的bbox
        
        h = img_arr.shape[0]
        w = img_arr.shape[1]
                
        def bbox_trans_roi(_rect):
            rect = [_rect[0] , -1 , -1 , -1 , -1]
            rect[1] = int(_rect[1]*224 / w)
            rect[2] = int(_rect[2]*224 / w)
            rect[3] = int(_rect[3]*224 / h)
            rect[4] = int(_rect[4]*224 / h)
        
            return np.array(rect)
                        
        rois = []
        y = []
                        
        for j in range(len(proposals_coord)):
            for i in range(len(ground_truth_coord)):
            
        #for i in range(len(ground_truth_coord)):
            #for j in range(len(proposals_coord)):
                
                label = np.zeros(shape=CLASSES_NUM + 4 ) #one-hot + 4 coords #21+4 elements
                
                #第一个元素为0 因为一次一张图片 见tf.crop_and_resize函数
                roi = [0 , proposals_coord[j][0] , proposals_coord[j][1] , proposals_coord[j][2] ,  proposals_coord[j][3]]
                
                roi = np.array(roi)
                
                roi = bbox_trans_roi(roi) #转换为resize之后的图中的坐标
                
                roi = roi_coord(roi) #向conv5 feature map中映射
                
                iou = self.IoU(ground_truth_coord[i] , proposals_coord[j])
                if iou < IoU_THRESHOLD and iou >= 0.1 : #0.5
                    #背景
                    label[0] = 1
                    
                    #==============
                    if np.random.random() > 0.8:
                        '''
                        概率性增加负样本
                        '''
                        rois.append(roi)
                        y.append(label)
                    #==============
                                                            
                elif iou >= 0.5 :
                    #前景
                    label[labels[i]] = 1
                    
                    target = self.__to_t(ground_truth_coord[i] , proposals_coord[j])
                    
                    label[CLASSES_NUM + 0] = target[0]
                    label[CLASSES_NUM + 1] = target[1]
                    label[CLASSES_NUM + 2] = target[2]
                    label[CLASSES_NUM + 3] = target[3]
                    
                    rois.append(roi)
                    y.append(label)
                    
                    #========
                    '''
                    增加正样本数量
                    '''
                    rois.append(roi)
                    y.append(label)
                    
                    rois.append(roi)
                    y.append(label)
                    
                    rois.append(roi)
                    y.append(label)
                    #========
                    
                    '''
                    两种写法 效果一样 正样本相同 负样本有差异 
                    '''
                    break
                
                #else:
                    #ios<0.1 情况
                      
        return np.array(rois) , np.array(y)
        
          
    def load(self , img_path_name):
        #图片数据 ground truth具体数据 ground truth对应label ground truth坐标信息 图片文件名
        img_arr , _ , labels , ground_truth_coord , _ = self.img_loader.load(img_path_name)
        
        rois , y = self.get_train_proposal(img_arr , labels , ground_truth_coord , img_path_name)
        
        img_arr = cv2.resize(img_arr , (224 , 224))
        img_arr = img_arr/127.5-1.0

        #'''增加一维 batch_size维'''
        return np.expand_dims(img_arr , axis=0) , rois , y
    
    
    def get_test_proposal(self , img_arr):
        '''
        return:rois
        proposals_coord
        '''
        proposals_coord = self.pr_generator.clip(img_arr)
        
        h = img_arr.shape[0]
        w = img_arr.shape[1]
        
        def bbox_trans_roi(_rect):
            '''0:idx'''
            rect = [_rect[0] , -1 , -1 , -1 , -1]
            rect[1] = int(_rect[1]*224 / w)
            rect[2] = int(_rect[2]*224 / w)
            rect[3] = int(_rect[3]*224 / h)
            rect[4] = int(_rect[4]*224 / h)
        
            return rect
        
        rois = []
        
        for i in range(len(proposals_coord)):
            roi = [0 , proposals_coord[i][0] , proposals_coord[i][1] , proposals_coord[i][2] , proposals_coord[i][3]]
            
            roi = np.array(roi)
            
            roi = bbox_trans_roi(roi) #转换到224*224坐标系中
            
            roi = roi_coord(roi) #向conv5 feature map中映射
            
            rois.append(roi)
            
        return rois , proposals_coord
    
    
    def load_test(self , img_path_name):
        img_arr = cv2.imread(img_path_name)
        
        rois , proposals_coord = self.get_test_proposal(img_arr)
        
        img_arr_resize = cv2.resize(img_arr , (224 , 224))
        img_arr_resize_norm = img_arr_resize / 127.5 - 1.0
        
        return np.expand_dims(img_arr_resize_norm , axis=0) , rois , img_arr , proposals_coord
    

In [595]:
#不需要修改
class RPN_Dataset(object):
    def __init__(self):
        self.img_generator = RPN_SS()
        
        self.img_loader = Image()
        
        self.img_file_names_train = glob(TRAIN_DATA_PATH + '*')
        self.img_file_names_test = glob(TEST_DATA_PATH + '*')
        
    def get_batch(self):
        path = np.random.choice(self.img_file_names_train)
        
        x , rois , y = self.img_generator.load(path)
        
        return x , rois , y
    
    def get_batch_test(self , path):
        '''
        返回图片的真实img_arr 未resize 未归一化
        注意cv2打开图片通道为BGR
        '''
        if not path:
            #未指定path 从测试目录中随机选一张图片测试
            path = np.random.choice(self.img_file_names_test)
        
        img_arr_resize_norm , rois , img_arr , porposals_coord = self.img_generator.load_test(path)
        
        return img_arr_resize_norm , rois , img_arr , porposals_coord
    
    
    def target2coord(self , bbox_pred , img_arr , proposals_coord):
        img_height = img_arr.shape[0]
        img_width = img_arr.shape[1]
        
        def to(rect):
            x1 = rect[0]
            x2 = rect[1]
            y1 = rect[2]
            y2 = rect[3]
            
            w = x2-x1
            h = y2-y1
            
            x_c = (x1+x2)//2
            y_c = (y1+y2)//2
            
            return x_c , y_c , w , h
        
        def ot(target):
            x_c = target[0]
            y_c = target[1]
            w = target[2]
            h = target[3]
            
            x1 = 0.5*(2*x_c-w)
            y1 = 0.5*(2*y_c-h)
            x2 = x1+w
            y2 = y1+h
            
            x1=int(round(x1))
            y1=int(round(y1))
            x2=int(round(x2))
            y2=int(round(y2))
            
            if x1<0:
                x1 = 0
            if x2>img_width:
                x2 = img_width
            if y1<0:
                y1 = 0
            if y2>img_height:
                y2 = img_height
            
            return [x1 , x2 , y1 , y2]
        
        def target2rect(target_hat , P_box):
            t_x = target_hat[0]
            t_y = target_hat[1]
            t_w = target_hat[2]
            t_h = target_hat[3]
            
            P_x , P_y , P_w , P_h = to(P_box) #将P框转换为 中点坐标 宽 高 形式
            
            G_x_hat = P_w*t_x+P_x
            G_y_hat = P_h*t_y+P_y
            G_w_hat = P_w*np.exp(t_w)
            G_h_hat = P_h*np.exp(t_h)
            
            return ot([G_x_hat , G_y_hat , G_w_hat , G_h_hat]) #ot还需要转化为(x1,x2,y1,y2)形式
        
        bbox_coord_pred = []
        
        for i in range(len(bbox_pred)):
            bbox_coord_pred.append( target2rect(bbox_pred[i] , proposals_coord[i]) )
        
        return bbox_coord_pred

In [596]:
def roi_pooling(conv5 , rois , pool_height , pool_width):
        '''
        conv5:[batch height width channel]
        roi-idx upper-left bottom-right
        rois中的坐标是在feature map中的坐标
        '''

        conv5_height = 13
        conv5_width = 13
        
        rois_ind = tf.cast(rois[: , 0] , tf.int32) #如果只有一张图片 则rois_ind都为0
        
        rois = tf.cast(rois , tf.float32)

        rois_coord = rois[: , 1:] #[x1 x2 y1 y2]

        normalization = tf.cast(tf.stack([ conv5_width , conv5_width , conv5_height , conv5_height ],axis=-1) , dtype=tf.float32)
        rois_coord = tf.div(rois_coord , normalization)

        rois_coord = tf.stack([rois_coord[: , 2] , rois_coord[: , 0] , rois_coord[: , 3] , rois_coord[: , 1] ] , axis=1)
        #box_ind参数为图片的索引 对第几张图片进行crop and resize
        #只有一张图片 则box_ind中全为0
        rois_conv5_feature = tf.image.crop_and_resize(conv5 , boxes=rois_coord , box_ind=rois_ind , crop_size=[12 , 12] )

        rois_pooling_feature = slim.max_pool2d(rois_conv5_feature , kernel_size=[2 , 2 ] , stride=[2 , 2 ] , padding='SAME')

        return rois_pooling_feature
    

In [597]:
#refer:https://blog.csdn.net/two_vv/article/details/76769860
#alexnet原始模型以及预训练参数导入
class AlexNet_model(object):
    def __init__(self , is_training=True):
        
        self.x = tf.placeholder(tf.float32 , shape=[None , 224 , 224 , 3])
        self.rois = tf.placeholder(tf.int32 , shape=[None , 5])
                     
        self.load_parameter()
        
        self.build(is_training)
        
        if is_training:
            self.y = tf.placeholder(tf.float32 , shape=[None , CLASSES_NUM + 4])
            
            self.loss_layer()
            
            
    def group_conv(self , x , kernel , strides):
        #2 GPUs
        #原始alexnet配置
        group_x = tf.split(x , num_or_size_splits=2 , axis=3)
        group_kernel = tf.split(kernel , num_or_size_splits=2 , axis=3)

        group_conv0 = tf.nn.conv2d(group_x[0] , group_kernel[0] , strides=strides , padding='SAME')
        group_conv1 = tf.nn.conv2d(group_x[1] , group_kernel[1] , strides=strides , padding='SAME')

        group_conv = tf.concat((group_conv0 , group_conv1) , axis=3)

        return group_conv
    
    def load_parameter(self):
        #=======
        #加载预训练权重
        #获取预训练参数
        net_data = np.load('bvlc_alexnet.npy' , encoding='bytes').item() #不加encoding='bytes' 死机
        
        self.conv1w = tf.Variable(net_data["conv1"][0] , trainable=False)
        self.conv1b = tf.Variable(net_data["conv1"][1] , trainable=False)

        self.conv2w = tf.Variable(net_data["conv2"][0] , trainable=False)
        self.conv2b = tf.Variable(net_data["conv2"][1] , trainable=False)

        self.conv3w = tf.Variable(net_data["conv3"][0] , trainable=False)
        self.conv3b = tf.Variable(net_data["conv3"][1] , trainable=False)

        self.conv4w = tf.Variable(net_data["conv4"][0] , trainable=False)
        self.conv4b = tf.Variable(net_data["conv4"][1] , trainable=False)

        self.conv5w = tf.Variable(net_data["conv5"][0] , trainable=False)
        self.conv5b = tf.Variable(net_data["conv5"][1] , trainable=False)
    
    
    def build(self , is_training=True , keep_prob=0.5):
        conv1 = tf.nn.conv2d(self.x , self.conv1w , strides=(1,4,4,1) , padding='SAME')
        conv1 = tf.nn.bias_add(conv1 , self.conv1b)
        conv1 = tf.nn.relu(conv1)
        lrn1 = tf.nn.local_response_normalization(conv1 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
        maxpool1 = tf.nn.max_pool(lrn1 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')

        conv2 = self.group_conv(maxpool1 , self.conv2w , strides=(1,1,1,1))
        conv2 = tf.nn.bias_add(conv2 , self.conv2b)
        conv2 = tf.nn.relu(conv2)
        lrn2 = tf.nn.local_response_normalization(conv2 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
        maxpool2 = tf.nn.max_pool(lrn2 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')

        conv3 = tf.nn.conv2d(maxpool2 , self.conv3w , strides=(1,1,1,1) , padding='SAME')
        conv3 = tf.nn.bias_add(conv3 , self.conv3b)
        conv3 = tf.nn.relu(conv3)

        conv4 = self.group_conv(conv3 , self.conv4w , strides=(1,1,1,1))
        conv4 = tf.nn.bias_add(conv4 , self.conv4b)
        conv4 = tf.nn.relu(conv4)

        conv5 = self.group_conv(conv4 , self.conv5w , strides=(1,1,1,1))
        conv5 = tf.nn.bias_add(conv5 , self.conv5b)
        conv5 = tf.nn.relu(conv5)
        
        roi_pool5 = roi_pooling(conv5 , self.rois , pool_height = 6 , pool_width = 6)
   
        flatten = tf.layers.flatten(roi_pool5)
        
        fc6 = slim.fully_connected(flatten , num_outputs=1024)
        fc6 = slim.dropout(fc6 , keep_prob=keep_prob , is_training=is_training)
        
        fc7 = slim.fully_connected(fc6 , num_outputs=1024)
        fc7 = slim.dropout(fc7 , keep_prob=keep_prob , is_training=is_training)

        self.cls_pred = slim.fully_connected(fc7 , num_outputs=CLASSES_NUM , activation_fn=tf.nn.softmax) #batch 21
        self.bbox_pred = slim.fully_connected(fc7 , num_outputs=4 , activation_fn=None , weights_initializer=tf.initializers.truncated_normal(mean=0.0 , stddev=0.001))
        
        
    def loss_layer(self):
        cls_true = self.y[: , : CLASSES_NUM]
        bbox_true = self.y[: , CLASSES_NUM :]
        
        cross_entropy = - tf.reduce_sum( cls_true * tf.log(self.cls_pred) )
        cls_loss = tf.reduce_mean(cross_entropy)
    
        mask = tf.tile( tf.reshape(cls_true[ : , 0] , [-1 , 1]) , multiples=[1 , 4]) #1 和 4 分别是在相应的维度重复的次数 不能是0次 
        bbox_loss = tf.reduce_mean( tf.reduce_sum( tf.square( (1-mask) * (bbox_true - self.bbox_pred) ) ) )
    
        self.total_loss = cls_loss + bbox_loss

In [598]:

def display(img_arr , labels , bbox , name):    
    for i in range(len(labels)):

        x1 = bbox[i][0]
        x2 = bbox[i][1]
        y1 = bbox[i][2]
        y2 = bbox[i][3]

        img_arr = cv2.rectangle(img_arr , (x1 , y1) , (x2 , y2) , (255,255,255))

        img_arr = cv2.putText(img_arr , labels[i] , org=(x1 , y1+10) , fontFace = cv2.FONT_HERSHEY_PLAIN , fontScale=1 , color = (255,255,255), thickness = 1)

    #plt.imshow(meta_img) #图像查看

    plt.imsave(arr=img_arr[: , : ,[2,1,0]] , fname = 'result/%s.jpg' % name) #保存图像


In [606]:
#refer:https://blog.csdn.net/two_vv/article/details/76769860

class FRCN(object):
    '''
    完整模型
    '''
    
    def __init__(self , is_training = True):      
        self.dataset = RPN_Dataset()
        self.img_generator = Img_generator()
        
        self.filewriter_path = 'save/FRCN/logs' #模型可视化
        self.checkpoint_path = 'save/FRCN/model/' #模型持久化
                                        
        self.model = AlexNet_model(is_training)
        
        self.sess = tf.Session()

        self.saver = tf.train.Saver(max_to_keep=2) #max_to_keep 最大保存5次模型  之后继续保存则会覆盖前面的模型
        
        if is_training:
            '''训练参数'''
            self.epoch = 100000
            
            self.global_step = tf.Variable(initial_value=0 , trainable=False)
            
            self.learning_rate = tf.train.exponential_decay(learning_rate=0.00001 , global_step=self.global_step,
                                                            decay_steps=900 , decay_rate=0.8 , staircase=True)
            
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.model.total_loss , global_step=self.global_step)
        
            #引入滑动平均
            self.ema = tf.train.ExponentialMovingAverage(decay=0.9) #滑动平均
            self.average_op = self.ema.apply(tf.trainable_variables()) #给所有的可训练变量应用滑动平均
            
            with tf.control_dependencies([self.optimizer]):
                self.train_op = tf.group(self.average_op)
            
            '''可视化'''
            self.sess.run(tf.global_variables_initializer())
            
            tf.summary.scalar('total_loss' , self.model.total_loss)
            self.merged_summary = tf.summary.merge_all() #merge all summaries in the default graph
            self.writer = tf.summary.FileWriter(self.filewriter_path , self.sess.graph) #可视化
            
            
    def train(self):
        if os.path.exists(self.checkpoint_path + 'checkpoint'):
            self.saver.restore(self.sess , tf.train.latest_checkpoint(self.checkpoint_path))
        else:
            self.sess.run(tf.global_variables_initializer())

        for i in range(30):
            x , rois , y = self.dataset.get_batch()
                        
            if len(rois) == 0:
                continue
            
            feed_dict={self.model.x : x , self.model.rois : rois , self.model.y : y}
            
            self.sess.run(self.train_op , feed_dict=feed_dict)

            if i % 10 == 0:
                self.saver.save(self.sess , self.checkpoint_path + 'model.ckpt' , global_step = i)
                
                total_loss , summary = self.sess.run([self.model.total_loss , self.merged_summary] , feed_dict=feed_dict)
                
                self.writer.add_summary(summary , global_step = i)
                
                print(i , total_loss)
            
        self.writer.close() #event to disk and close the file

    def predict(self , path=None , scores_threshold = 0.5 , nms_iou_threshold = 0.7):
        if os.path.exists(self.checkpoint_path + 'checkpoint'):
            self.saver.restore(self.sess , tf.train.latest_checkpoint(self.checkpoint_path) )
            
            self._predict(path , scores_threshold , nms_iou_threshold)
            
        else:
            print('no model!!!')
            return 
    
    def _predict(self , path , scores_threshold , nms_iou_threshold):
        #proposals_coord 为由rpn产生的rois在10000*600(h*w)坐标系中的坐标
        x , rois , img_arr , proposals_coord = self.dataset.get_batch_test(path)
        
        feed_dict = {self.model.x : x , self.model.rois : rois}
        cls_pred , bbox_pred = self.sess.run([self.model.cls_pred , self.model.bbox_pred] , feed_dict=feed_dict)
        
        #转换为原始图片中的坐标
        bbox_coord_pred = self.dataset.target2coord(bbox_pred , img_arr , proposals_coord)
        
        '''
        由target到原始坐标 在进行nms
        '''
        scores_pred_f = [] #符合条件的概率值
        bbox_coord_pred_f = [] #符合条件的框子坐标
        
        labels_pred_f = [] #label名字
        
        for i in range(len(cls_pred)):
            if np.argmax(cls_pred[i]) != 0 and (np.max(cls_pred[i]) > scores_threshold):
                scores_pred_f.append(np.max(cls_pred[i]))
                
                bbox_coord_pred_f.append(bbox_coord_pred[i])
                
                labels_pred_f.append(LABEL2STR[np.argmax(cls_pred[i])])
        
        scores_pred_f = np.array(scores_pred_f)
        bbox_coord_pred_f = np.array(bbox_coord_pred_f)
        labels_pred_f = np.array(labels_pred_f)
        
        #降序scores
        sort_idx = np.argsort(- np.array(scores_pred_f) )
        
        scores_pred_f = scores_pred_f[sort_idx]
        bbox_coord_pred_f = bbox_coord_pred_f[sort_idx]
        labels_pred_f = labels_pred_f[sort_idx]
                
        final_idx = self._nms(scores_pred_f , bbox_coord_pred_f , nms_iou_threshold)
                
        #scores_pred_f = scores_pred_f[final_idx] #用不上
        bbox_coord_pred_f = bbox_coord_pred_f[final_idx]
        labels_pred_f = labels_pred_f[final_idx]
                
        # 绘制并保存
        display(img_arr , labels_pred_f , bbox_coord_pred_f , 'first')
                
        
    def _nms(self , probability_hat , rects_hat , nms_iou_threshold):
        idx = []
        
        length = len(probability_hat)
        lost_flag = [1]*length #标记丢弃的框 0表示丢弃
        
        max_score_idx = 0 #记录当前最大score的idx
        
        while max_score_idx < length:
            max_score_rect = rects_hat[max_score_idx]
            
            for i in range(max_score_idx+1 , length):
                if lost_flag[i] == 1 and self.img_generator.IoU( max_score_rect , rects_hat[i] ) > nms_iou_threshold: #大于阈值 丢弃
                    lost_flag[i] = 0

            max_score_idx_bak = max_score_idx #后续使用
            
            #让max_score_idx指向下一个没被丢弃的最大值
            for i in range(max_score_idx+1 , length):
                if lost_flag[i] == 1:
                    max_score_idx = i
                    break
            
            #说明max_score_idx没有移动过 即后续的都被丢弃了 终止循环
            if max_score_idx == max_score_idx_bak:
                break
        
        for i in range(length):
            if lost_flag[i] == 1:
                idx.append(i)
                
        return idx

In [608]:
frcn = FRCN(is_training=True)

In [609]:
frcn.train()

INFO:tensorflow:Restoring parameters from save/RPN/model/model.ckpt-90


InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_1' with dtype float and shape [19764,7]
	 [[Node: Placeholder_1 = Placeholder[dtype=DT_FLOAT, shape=[19764,7], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
	 [[Node: Reshape/_151 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_281_Reshape", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'Placeholder_1', defined at:
  File "C:\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Anaconda3\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-608-fe4845503517>", line 1, in <module>
    frcn = FRCN(is_training=True)
  File "<ipython-input-606-cb177f995579>", line 9, in __init__
    self.dataset = RPN_Dataset()
  File "<ipython-input-595-8c21b653dcec>", line 4, in __init__
    self.img_generator = RPN_SS()
  File "<ipython-input-594-08fb231bc7a3>", line 4, in __init__
    self.rpn = RPN()
  File "<ipython-input-466-03583547254c>", line 9, in __init__
    self.model = AlexNet_model_RPN(is_training)
  File "<ipython-input-459-76405fb525cf>", line 14, in __init__
    self.anchors = tf.placeholder(tf.float32 , shape=[19764 , 2 + 4 + 1])
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1735, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 5928, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3155, in create_op
    op_def=op_def)
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder_1' with dtype float and shape [19764,7]
	 [[Node: Placeholder_1 = Placeholder[dtype=DT_FLOAT, shape=[19764,7], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
	 [[Node: Reshape/_151 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_281_Reshape", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [607]:
tf.reset_default_graph()