In [1]:
import numpy as np
import os
import sys
import scipy
import cv2
import gc

#解析使用
import xml
from xml.etree import ElementTree as ET

from glob import glob

import keras.backend as K
from keras.applications import VGG19
from keras.models import Model
from keras.utils import to_categorical

import imageio
from skimage import transform

from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.svm import SVC #类别分类使用
from sklearn.linear_model import Ridge #bounding-box回归
from sklearn.externals import joblib

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import tensorflow as tf

from tensorflow.contrib import slim

import pandas as pd

import selectivesearch as ss #候选框产生使用

from ImageNet_classes import class_names #验证alexnet使用

In [3]:
#BATCH_SIZE = 1 #一次一张图像 切勿修改

PROPOSAL_SIZE_POSITIVE = 32 #finetune 正样本
PROPOSAL_SIZE_NEGATIVE = 96 #finetune 负样本
PROPOSAL_SIZE = PROPOSAL_SIZE_POSITIVE+PROPOSAL_SIZE_NEGATIVE #128

#应该是224 224 3
#使用预训练的alexnet 58% 或者vgg16 66%
HEIGHT = 224
WIDTH = 224
CHANNEL = 3

IMG_SHAPE = (HEIGHT , WIDTH , CHANNEL)

TRAIN_DATA_PATH = '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/'
TEST_DATA_PATH = '../../tensorflow2/dataset/VOC2012test/JPEGImages/'

TRAIN_XML_PATH = '../../tensorflow2/dataset/VOCtrainval_11-May-2012/Annotations/'
TEST_XML_PATH = '../../tensorflow2/dataset/VOC2012test/Annotations/'

OBJECT_PATH = '../../tensorflow2/dataset/VOCtrainval_11-May-2012/ImageSets/Main/' #SVM需要使用的训练数据（正负样本） 训练20个svm

#pascal VOC数据集目标数量
#目标的数目 还有一个背景
CLASSES_NUM = 20+1

STR = [
    'background', #label=0
    'person',
    'bird','cat','cow','dog','horse','sheep',
    'aeroplane','bicycle','boat','bus','car','motorbike','train',
    'bottle','chair','diningtable','pottedplant','sofa','tvmonitor'
]

LABEL2STR = {idx:value for idx , value in enumerate(STR)}
STR2LABEL = {value:key for key,value in LABEL2STR.items()}

STR2LABEL['none'] = 'none' #先不使用part部分 只进行naive目标检测

#目标检测相关
IoU_THRESHOLD = 0.5

#SVM相关
SVM_IoU_THRESHOLD = 0.3

#NMS相关
NMS_IoU_THRESHOLD = 0.3 #or ~0.5

#bbox回归
BBOX_REGRESS_IoU_THRESHOLD = 0.6

#roi尺寸为6
ROI_BINS = 6

In [4]:


def spatial_pyramid_pool(conv5 , pyramid_bins): #[8 6 4]
    '''
    spp已经拉成向量
    一共batch_size个向量
    '''
    batch_size = conv5.get_shape().as_list()[0] #batch_size
    conv5_height = conv5.get_shape().as_list()[1] #feature map height
    conv5_width = conv5.get_shape().as_list()[2] #feature map width
    
    #channel = conv5.get_shape().as_list()[3]

    for i in range(len(pyramid_bins)):
        pooling_height = np.ceil(conv5_height / pyramid_bins[i])
        stride_height = np.ceil(conv5_height / pyramid_bins[i]) #floor
        
        pooling_width = np.ceil(conv5_width / pyramid_bins[i])
        stride_width = np.ceil(conv5_width / pyramid_bins[i]) #floor
        
        padding_height = int(pyramid_bins[i] * pooling_height - conv5_height)
        padding_width = int(pyramid_bins[i] * pooling_width - conv5_width)
        
        conv5_padding = tf.pad(conv5 , tf.constant([[0,0] , [0,padding_height] , [0,padding_width] ,[0,0]]))
        
        #max_pooling = tf.layers.max_pooling2d(conv5_padding , [pooling_height , pooling_width] , [stride_height , stride_width] , padding='same')
        max_pooling = tf.nn.max_pool(conv5_padding , ksize=[1,pooling_height,pooling_width,1] , strides=[1,stride_height,stride_width,1] , padding='SAME')
        
        if i==0:
            spp = tf.reshape(max_pooling , shape=(batch_size , -1))
        else:
            spp = tf.concat(values=[spp , tf.reshape(max_pooling , shape=(batch_size , -1)) ] , axis=-1)
            
    return spp

In [5]:
xml_file_names_train = glob(TRAIN_XML_PATH + '*') #所有的xml文件 完整路径

#从xml文件中读出图片相关的信息

def xml_parse(xml_file):
    '''
    return filename , shape , name_boxes , crop_boxes
    xml文件中的shape格式为 (width height 3)
    '''
    xml_file = xml.dom.minidom.parse(xml_file)
    xml_file_docu_ele = xml_file.documentElement

    filename_list = xml_file_docu_ele.getElementsByTagName('filename')
    
    #filename_list可能有多个filename的 所以要索引0(此数据集中filename只有一个)
    filename = filename_list[0].childNodes[0].data #filename_list.firstChild.data

    #图像的尺寸信息
    size_list = xml_file_docu_ele.getElementsByTagName('size')

    for size in size_list:
        width_list = size.getElementsByTagName('width')
        width = int(width_list[0].childNodes[0].data)

        height_list = size.getElementsByTagName('height')
        height = int(height_list[0].childNodes[0].data)

        channel_list = size.getElementsByTagName('depth')
        channel = int(channel_list[0].childNodes[0].data)

    shape = (width , height , channel)

    #一个文件中有多个object
    object_list = xml_file_docu_ele.getElementsByTagName('object')

    #多个object与多个object对应的详细信息
    name_boxes = [] #一个元素就是一个object
    crop_boxes = []

    for objects in object_list:
        #一次循环处理一个object信息
        #一个xml文件（即一个图像中）有多个object

        #name
        name_list = objects.getElementsByTagName('name')

        name_box = name_list[0].childNodes[0].data

        #bounding box points
        bndbox = objects.getElementsByTagName('bndbox')

        x1_list = bndbox[0].getElementsByTagName('xmin')
        x1 = int( round( float(x1_list[0].childNodes[0].data) ) )

        y1_list = bndbox[0].getElementsByTagName('ymin')
        y1 = int(round(float( y1_list[0].childNodes[0].data )))

        x2_list = bndbox[0].getElementsByTagName('xmax')
        x2 = int(round(float( x2_list[0].childNodes[0].data )))

        y2_list = bndbox[0].getElementsByTagName('ymax')
        y2 = int(round(float( y2_list[0].childNodes[0].data )))

        crop_box = [x1,x2,y1,y2]

        name_boxes.append(name_box)
        crop_boxes.append(crop_box)

    #shape:[width height channel]
    #crop_box:[x1 x2 y1 y2]
    return filename , shape , name_boxes , crop_boxes

#xml_parse(xml_file_names_train[10])

In [6]:
xml_parse(xml_file_names_train[897])

('2008_000281.jpg',
 (500, 455, 3),
 ['car', 'car', 'person'],
 [[106, 186, 377, 419], [194, 283, 396, 444], [413, 429, 399, 444]])

In [7]:
class Image(object):
    '''
    图片的真实信息
    '''
    def __init__(self):
        self.img_file_names_train = glob(TRAIN_DATA_PATH+'*') #训练全路径信息
                
    def load(self , img_path_name = None):
        '''
        如果传入 传入完整路径信息
        return img_arr , ground_truth_data , labels , crop_boxes , img_path_name[-15:-4]
        img_arr的shape为 (height width 3) 与xml文件中区分
        '''
        if not img_path_name:
            #没有指定文件名
            img_path_name = np.random.choice(self.img_file_names_train) #随机选择一张图片
            #img_path_idx = np.random.randint(0 , high = len(self.img_file_names_train)) #随机索引
        
        #else
            #svm和bndbox回归时使用 需要指定图片的路径信息
            #完整路径信息
        
        #img_arr = imageio.imread(img_name) #使用此函数打开 导致迁移失败 RGB height*width*channel
        img_arr = cv2.imread(img_path_name) #BGR height*width*chanel
        
        xml_file_name = TRAIN_XML_PATH + img_path_name[-15:-4] +  '.xml'
        
        _ , _ , name_boxes , crop_boxes = xml_parse(xml_file_name)
        
        ground_truth_data = [] #存储bndbox的图像 数据信息
        labels = [] #存储与bndbox对应的 label信息

        for i in range(len(crop_boxes)): #多个object
            x1 = crop_boxes[i][0]
            x2 = crop_boxes[i][1]
            y1 = crop_boxes[i][2]
            y2 = crop_boxes[i][3]
            
            ground_truth_data.append(img_arr[y1:y2 , x1:x2 , :])
            
            labels.append(STR2LABEL.get(name_boxes[i] , 'none'))
        
        #图片数据 ground truth具体数据 bndbox对应label bndbox坐标信息 图片文件名
        
        return img_arr , ground_truth_data , labels , crop_boxes , img_path_name[-15:-4]
    

In [8]:
#候选区域的产生
class Clip(object):
    '''
    selectivesearch 产生与图片相关的信息 候选信息
    '''
    def __init__(self):
        self.size_threshold = 220 #ss算法产生过小的bbox略去

    #单张图像裁剪函数 产生~2k的候选区域
    #返回值：候选区域的位置坐标
    def clip(self , img_arr):
        '''
        由ss算法对img_arr产生~2K proposals
        '''
        #传入图片张量
        #关键函数
        #return rects
        _ , regions = ss.selective_search(img_arr , 500 , 0.9 , 50) #可以调整ss算法的参数 _为ss算法产生的labels
        
        rects = []
        
        no_repeat = set() #保证候选框不重复出现
        
        for r in regions:
            if r['rect'] in no_repeat:
                # 已经存在了
                continue
            
            if r['size'] < self.size_threshold:
                # 小于指定size
                continue
            
            x , y , w , h = r['rect']
            
            if w == 0 or h == 0:
                continue
            
            #img_arr的shape为 height width channel
            d0 , d1 , d2 = img_arr[y:y+h , x:x+w , :].shape
            
            if d0 == 0 or d1 == 0 or d2 == 0:
                continue
                
            no_repeat.add(r['rect'])
            
            rects.append([x , x+w , y , y+h]) #x1 x2 y1 y2形式
        
        return rects
    
    #def _preprocess(self , img_arr):
    #    
    #    #TODO
    #    #对ss产生的候选区域进行预处理
    #    #例如 resize
    #    
    #    img_arr = cv2.resize(img_arr , (HEIGHT , WIDTH))
    #    
    #    img_arr = img_arr/127.5-1.0
    #    
    #    return img_arr
    #
    ##返回数组表示候选区域的具体数据信息
    #def clip_region(self , img_arr , rects = None):
    #    '''
    #    return rects_region 具体数据信息
    #    rects_region的shape为(height width 3)
    #    '''
    #    if not rects:
    #        rects = self.clip(img_arr)
    #    
    #    rects_region = []
    #    
    #    for x1 , x2 , y1 , y2 in rects:
    #        rects_region.append( self._preprocess(img_arr[y1:y2 , x1:x2 , :]) ) #正确的切片格式
    #        
    #        #预处理函数可以进行数据增强 故换为+
    #        #rects_region += ( self._preprocess(img_arr[y1:y2 , x1:x2 , :]) )
    #
    #    return rects_region
    

In [9]:
def roi_coord(rect):
    '''
    由原图中的roi坐标向conv5的feature map映射
    feature map中的坐标
    '''    
    rect[1:] = (rect[1:] - (11-1)//2 ) // 4
    rect[1:] = (rect[1:] - (3-1)//2 ) // 2
    rect[1:] = (rect[1:] - (3-1)//2 ) // 2
    
    '''-1修正'''
    #rect[2] = rect[2] - 1
    #rect[3] = rect[3] - 1
    
    #224*224 经过conv之后 变为13*13
    return np.concatenate( (rect[0:1] , np.clip(rect[1:] , a_min=0 , a_max=12) ) , axis=0)

class Img_generator(object):
    def __init__(self):
        
        self.pr_generator = Clip()
        self.img_loader = Image()

    #计算bbox面积
    def bbox_area(self , bbox):
        w = bbox[1] - bbox[0]
        h = bbox[3] - bbox[2]
        
        return w*h
    
    #计算交并比
    def IoU(self , bbox_a , bbox_b):
        xmin_a = bbox_a[0]
        xmax_a = bbox_a[1]
        ymin_a = bbox_a[2]
        ymax_a = bbox_a[3]
        
        xmin_b = bbox_b[0]
        xmax_b = bbox_b[1]
        ymin_b = bbox_b[2]
        ymax_b = bbox_b[3]
        
        if   xmin_a < xmax_b <= xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
            flag = True
        elif xmin_a <= xmin_b < xmax_a and (ymin_a < ymax_b <= ymax_a or ymin_a <= ymin_b < ymax_a):
            flag = True
        elif xmin_b < xmax_a <= xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
            flag = True
        elif xmin_b <= xmin_a < xmax_b and (ymin_b < ymax_a <= ymax_b or ymin_b <= ymin_a < ymax_b):
            flag = True
        else:
            flag = False
        
        if flag:
            x_sorted_list = sorted([xmin_a, xmax_a, xmin_b, xmax_b])
            y_sorted_list = sorted([ymin_a, ymax_a, ymin_b, ymax_b])
            
            x_intersect_w = x_sorted_list[2] - x_sorted_list[1] #0 1 2 3
            y_intersect_h = y_sorted_list[2] - y_sorted_list[1] #0 1 2 3
            
            area_inter = x_intersect_w * y_intersect_h #计算重合面积
            
            union_area = self.bbox_area(bbox_a) + self.bbox_area(bbox_b) - area_inter
            
            return area_inter/union_area
        else:
            return 0.0
    
    #ground truth coord and proposal coord计算bb回归使用的标签
    def __to_t(self , G_box , P_box):
        #print(G_box , P_box)
        def to(rect):
            x1 = rect[0]
            x2 = rect[1]
            y1 = rect[2]
            y2 = rect[3]
            
            w = x2-x1
            h = y2-y1
            
            x_c = (x1+x2)//2
            y_c = (y1+y2)//2
            
            return x_c , y_c , w , h
        
        G_x , G_y , G_w , G_h = to(G_box)
        P_x , P_y , P_w , P_h = to(P_box)
        
        t_x = (G_x-P_x)/P_w
        t_y = (G_y-P_y)/P_h
        t_w = np.log(G_w/P_w)
        t_h = np.log(G_h/P_h)
        
        return t_x , t_y , t_w , t_h
    
    def get_train_proposal(self , img_arr , labels , ground_truth_coord):
        '''
        labels与ground_truth_coord相对应
        一张图片中所有可能的labels
        '''
        #下面使用的img_arr必须是原始的图 没有resize 也没有归一化到-1 1
        proposals_coord = self.pr_generator.clip(img_arr) #ss算法产生的bbox
        
        '''
        需要对ss算法产生的框子进行修正
        因为对原图进行了resize ss算法产生的框子也要发生变化
        
        对ground truth也要进行修正 修正后才可以与proposals计算iou
        '''
        h = img_arr.shape[0]
        w = img_arr.shape[1]
        
        trans_h = 224
        trans_w = 224
        
        def bbox_trans(rect):
            '''0:idx'''
            rect[1] = int(rect[1]*trans_w / w)
            rect[2] = int(rect[2]*trans_w / w)
            rect[3] = int(rect[3]*trans_h / h)
            rect[4] = int(rect[4]*trans_h / h)
        
            return rect
        
        #def bbox_retrans(rect):
        #推理中使用 再转换为原图中的坐标
        #    pass
        
        rois = []
        y = []
                        
        for j in range(len(proposals_coord)):
            for i in range(len(ground_truth_coord)):
            
        #for i in range(len(ground_truth_coord)):
            #for j in range(len(proposals_coord)):
                
                label = np.zeros(shape=CLASSES_NUM + 4 ) #one-hot + 20 coords #21+80 elements
                
                '''proposal coord'''
                roi = [0 , proposals_coord[j][0] , proposals_coord[j][1] , proposals_coord[j][2] ,  proposals_coord[j][3]]
                
                roi = np.array(roi)
                
                roi = bbox_trans(roi) #转换为resize之后的图中的坐标
                
                roi = roi_coord(roi) #向conv5 feature map中映射
                
                iou = self.IoU(ground_truth_coord[i] , proposals_coord[j])
                if iou < IoU_THRESHOLD and iou >= 0.0 : #0.5
                    #背景
                    label[0] = 1
                    
                    rois.append(roi)
                    y.append(label)
                    
                                        
                elif iou >= 0.5 :
                    #前景
                    label[labels[i]] = 1
                    
                    target = self.__to_t(ground_truth_coord[i] , proposals_coord[j])
                    
                    label[CLASSES_NUM + 0] = target[0]
                    label[CLASSES_NUM + 1] = target[1]
                    label[CLASSES_NUM + 2] = target[2]
                    label[CLASSES_NUM + 3] = target[3]
                    
                    rois.append(roi)
                    y.append(label)
                    
                    
                    '''
                    两种写法 效果一样 正样本相同 负样本有差异 
                    '''
                    break
                      
        return np.array(rois) , np.array(y)
                
    def load(self , img_path_name = None):
        '''
        img_path_name:绝对路径
        '''
        
        #图片数据 ground truth具体数据 ground truth对应label ground truth坐标信息 图片文件名
        img_arr , _ , labels , ground_truth_coord , _ = self.img_loader.load(img_path_name)
        
        rois , y = self.get_train_proposal(img_arr , labels , ground_truth_coord)
        
        img_arr = cv2.resize(img_arr , (224 , 224))
        img_arr = img_arr/127.5-1.0

        #'''增加一维 batch_size维'''
        return img_arr , rois , y
    
    #============
    #============
    '''推理阶段使用'''
    
    #Alexnet_finetune demo 和 SVM_set中使用
    def one_img_rect_region(self , path): 
        '''
        返回指定的图像由ss算法产生的proposals信息
        '''
        img_arr = cv2.imread(path) #BGR height*width*chanel
        
        proposals = self.pr_generator.clip(img_arr) #proposal*4(x1,x2,y1,y2) selectivesearch算法产生
        proposals_region = self.pr_generator.clip_region(img_arr , proposals) #proposal*224*224*3
        
        return img_arr , proposals , proposals_region
    
    #svm使用
    def one_img_rect_region_label_svm(self , path , label):
        '''
        返回ground truth信息 给svm使用
        一个类别训练一个svm使用 ovr形式
        返回与参数label相同的数据供训练svm
        '''
        #proposals
        img_arr , ground_truth_data , _labels , crop_boxes , _ = self.img_loader.load(path)
        #rects rects_region
        _ , proposals , proposals_region = self.one_img_rect_region(path)
        
        crop_boxes_x = [] #人工标定的框与参数中label对应的
        
        #训练一个类别的svm使用的数据集
        proposals_region_x = [] #候选区域数据
        labels = [] #标记
        
        #寻找与指定label相同的ground truth
        for i in range(len(_labels)):
            if _labels[i] == label:
                crop_boxes_x.append(crop_boxes[i])
        
        #svm训练负样本
        #与人工标定框iou小于0.3的为负样本
        for i in range(len(crop_boxes_x)): #一幅图的与label相对应的人工标定bbox proposal
            for j in range(len(proposals)): #一幅图的ss产生的bbox
                
                if self.IoU(crop_boxes_x[i] , proposals[j]) < SVM_IoU_THRESHOLD: #0.3
                    labels.append(0)
                
                    proposals_region_x.append(proposals_region[j])
        
        #svm训练正样本
        #与label对应的人工标定框的图像数据
        crop_boxes_x_region = self.pr_generator.clip_region(img_arr , crop_boxes_x) #proposal*227*227*3
        
        for i in range(len(crop_boxes_x)):
            labels.append(label)
            
            proposals_region_x.append(crop_boxes_x_region[i])
                
        return np.array(proposals_region_x) , labels
    
    #bbox回归使用
    
    '''
    bbox回归使用
    '''
    def one_img_rect_region_label_bnd(self , path , label):
        '''
        bbox回归使用 每一个类别一个bbox回归器
        '''
        
        img_arr , ground_truth_data , _labels , crop_boxes , _ = self.img_loader.load(path)
        
        _ , proposals , proposals_region = self.one_img_rect_region(path)
        
        crop_boxes_x = [] #人工标定的框
        
        proposals_region_x = [] #候选区域数据
        targets = [] #bbox回归使用的训练数据中的y
        
        for i in range(len(_labels)):
            if _labels[i] == label:
                crop_boxes_x.append(crop_boxes[i])
            
        for i in range(len(crop_boxes_x)): #一幅图的人工标定bbox proposal
            for j in range(len(proposals)): #一幅图的ss产生的bbox
                
                if self.IoU(crop_boxes_x[i] , proposals[j]) > BBOX_REGRESS_IoU_THRESHOLD: #0.6
                    target = self.__to_t(crop_boxes_x[i] , proposals[j])
                    
                    targets.append(target)
                    proposals_region_x.append(proposals_region[j])
                
        return np.array(proposals_region_x) , targets #区域具体数据和t


In [79]:
gg=Img_generator()

In [95]:
img_arr , a , b = gg.load('../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000284.jpg')

In [96]:
b.shape

(180, 101)

In [42]:
c , d = gg.one_img_rect_region_label_bnd('../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000278.jpg' , 1)

In [10]:
class Dataset(object):
    def __init__(self):
        self.img_generator = Img_generator()
        
        self.img_loader = Image()
        
        self.img_file_names_train = glob(TRAIN_DATA_PATH + '*')
        self.img_file_names_test = glob(TEST_DATA_PATH + '*')
        
        self._train_data()
        #self._test_data()
    
    def train_data(self):
        return self.iterator_train
    
    def test_data(self):
        return self.iterator_test
        
    def _train_data(self):
        dataset = tf.data.Dataset.from_tensor_slices((self.img_file_names_train))
        dataset = dataset.map(lambda filename : tuple( tf.py_func(self._map_train , [filename] , [tf.float32 , tf.int16 , tf.float32]) ) )
        dataset = dataset.shuffle(buffer_size = 10).batch(1).repeat(1) #一次一张图片 重复1次
        
        self.iterator_train = dataset.make_initializable_iterator()
        
    def _test_data(self):
        '''测试集没有y'''
        dataset = tf.data.Dataset.from_tensor_slices((self.img_file_names_test))
        dataset = dataset.map(lambda filename : tuple( tf.py_func(self._map_test , [filename] , [tf.float32 , tf.int16]) ))
        dataset = dataset.shuffle(buffer_size = 10).batch(1).repeat(1)
        
        self.iterator_test = dataset.make_initializable_iterator()
            
        #iterator = tf.data.Iterator.from_structure(dataset.output_types)
        #init_op = iterator.make_initializer(dataset)
        
        #x , rois = iterator.get_next() #使用之前 对init_op进行sess.run
        
        #return x , rois , init_op
    
    def _map_train(self , filename):
        '''
        x:img_arr
        rois:rois [idx coord coord]
        y:one-hot label targets
        '''
        x , rois , y = self.img_generator.load(filename)
        
        return x , rois , y
    
    def _map_test(self , filename):
        x , rois = self.img_generator.xxx(filename)
        
        return x , rois

In [11]:
#refer:https://blog.csdn.net/two_vv/article/details/76769860
#alexnet原始模型以及预训练参数导入

def roi_pooling(conv5 , rois , pool_height , pool_width):
    '''
    conv5:[batch height width channel]
    roi-idx upper-left bottom-right
    rois中的坐标是在feature map中的坐标
    '''
    
    #conv5_height = conv5.get_shape().as_list()[1]
    #conv5_width = conv5.get_shape().as_list()[2]
    
    conv5_height = 13
    conv5_width = 13
    
    rois_idx = tf.cast(rois[: , 0] , tf.int32)
                    
    rois = tf.cast(rois , tf.float32)
        
    rois_coord = rois[: , 1:] #[x1 x2 y1 y2]
    
    normalization = tf.cast(tf.stack([ conv5_width , conv5_width , conv5_height , conv5_height ],axis=-1) , dtype=tf.float32)
    rois_coord = tf.div(rois_coord , normalization)
    
    rois_coord = tf.stack([rois_coord[: , 2] , rois_coord[: , 0] , rois_coord[: , 3] , rois_coord[: , 1] ] , axis=1)
    
    rois_conv5_feature = tf.image.crop_and_resize(conv5 , boxes=rois_coord , box_ind=rois_idx , crop_size=[12 , 12] )
    
    rois_pooling_feature = slim.max_pool2d(rois_conv5_feature , kernel_size=[2 , 2 ] , stride=[2 , 2 ] , padding='SAME')
    
    return rois_pooling_feature
    
    #def _roi_pooling(_rois):
    #    for (i , roi) in enumerate(rois):
    #        roi_new = roi_coord(roi[1:])

    #        roi_new_height = roi_new[3] - roi_new[2]
    #        roi_new_width = roi_new[1] - roi_new[0]

    #        roi_new_pool_height = int( np.ceil(roi_new_height / pool_height) )
    #        roi_new_pool_width = int( np.ceil(roi_new_width / pool_width) )

    #        conv5_roi = tf.slice(conv5 , begin=[0 , roi_new[2] , roi_new[0] , 0] , size=[1 , roi_new[3] - roi_new[2] , roi_new[1] - roi_new[0] , -1])

    #        print(conv5_roi.get_shape())

    #        if i == 0:
    #            '''第一次运行'''
    #            pool5_roi = slim.max_pool2d(conv5_roi , kernel_size=[roi_new_pool_height , roi_new_pool_width] , stride=[roi_new_pool_height , roi_new_pool_width] , padding='SAME')
    #        else:
    #            pool5_roi = tf.concat(values=[pool5_roi , slim.max_pool2d(conv5_roi , kernel_size=[roi_new_pool_height , roi_new_pool_width] , stride=[roi_new_pool_height , roi_new_pool_width] , padding='SAME')] , axis=0)

    #    return pool5_roi
    '''tf.map_fn is a good function'''



class AlexNet_model(object):
    def __init__(self ,x , rois , y , is_training=True):
        
        self.x = x
        self.rois = rois
        self.y = y
                        
        self.load_paramter()
        
        self.cls_pred , self.bbox_pred = self.model(is_training)
        
        if is_training:
            self.loss_layer(self.cls_pred , self.bbox_pred , self.y)
            self.total_loss = tf.losses.get_total_loss(add_regularization_losses=False) #cls_loss bbox_loss
        
    def group_conv(self , x , kernel , strides):
        #2 GPUs
        #原始alexnet配置
        group_x = tf.split(x , num_or_size_splits=2 , axis=3)
        group_kernel = tf.split(kernel , num_or_size_splits=2 , axis=3)

        group_conv0 = tf.nn.conv2d(group_x[0] , group_kernel[0] , strides=strides , padding='SAME')
        group_conv1 = tf.nn.conv2d(group_x[1] , group_kernel[1] , strides=strides , padding='SAME')

        group_conv = tf.concat((group_conv0 , group_conv1) , axis=3)

        return group_conv
    
    def load_paramter(self):
        #=======
        #加载预训练权重
        #获取预训练参数
        net_data = np.load('bvlc_alexnet.npy' , encoding='bytes').item() #不加encoding='bytes' 死机
        
        self.conv1w = tf.Variable(net_data["conv1"][0] , trainable=False)
        self.conv1b = tf.Variable(net_data["conv1"][1] , trainable=False)

        self.conv2w = tf.Variable(net_data["conv2"][0] , trainable=False)
        self.conv2b = tf.Variable(net_data["conv2"][1] , trainable=False)

        self.conv3w = tf.Variable(net_data["conv3"][0] , trainable=False)
        self.conv3b = tf.Variable(net_data["conv3"][1] , trainable=False)

        self.conv4w = tf.Variable(net_data["conv4"][0] , trainable=False)
        self.conv4b = tf.Variable(net_data["conv4"][1] , trainable=False)

        self.conv5w = tf.Variable(net_data["conv5"][0] , trainable=False)
        self.conv5b = tf.Variable(net_data["conv5"][1] , trainable=False)
    
    #构建alexnet模型
    def model(self , is_training=True , keep_prob=0.5):
        conv1 = tf.nn.conv2d(self.x , self.conv1w , strides=(1,4,4,1) , padding='SAME')
        conv1 = tf.nn.bias_add(conv1 , self.conv1b)
        conv1 = tf.nn.relu(conv1)
        lrn1 = tf.nn.local_response_normalization(conv1 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
        maxpool1 = tf.nn.max_pool(lrn1 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')

        conv2 = self.group_conv(maxpool1 , self.conv2w , strides=(1,1,1,1))
        conv2 = tf.nn.bias_add(conv2 , self.conv2b)
        conv2 = tf.nn.relu(conv2)
        lrn2 = tf.nn.local_response_normalization(conv2 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
        maxpool2 = tf.nn.max_pool(lrn2 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')

        conv3 = tf.nn.conv2d(maxpool2 , self.conv3w , strides=(1,1,1,1) , padding='SAME')
        conv3 = tf.nn.bias_add(conv3 , self.conv3b)
        conv3 = tf.nn.relu(conv3)

        conv4 = self.group_conv(conv3 , self.conv4w , strides=(1,1,1,1))
        conv4 = tf.nn.bias_add(conv4 , self.conv4b)
        conv4 = tf.nn.relu(conv4)

        conv5 = self.group_conv(conv4 , self.conv5w , strides=(1,1,1,1))
        conv5 = tf.nn.bias_add(conv5 , self.conv5b)
        conv5 = tf.nn.relu(conv5)
        
        roi_pool5 = roi_pooling(conv5 , self.rois , pool_height = ROI_BINS , pool_width = ROI_BINS)
        
        flatten = slim.flatten(roi_pool5)    
        
        fc6 = slim.fully_connected(flatten , num_outputs=1024)
        fc6 = slim.dropout(fc6 , keep_prob=keep_prob , is_training=is_training)
        
        fc7 = slim.fully_connected(fc6 , num_outputs=1024)
        fc7 = slim.dropout(fc7 , keep_prob=keep_prob , is_training=is_training)

        cls = slim.fully_connected(fc7 , num_outputs=CLASSES_NUM , activation_fn=tf.nn.softmax) #batch 21
        '''batch 80 20个坐标信息'''
        '''线性激活y=x'''
        bbox = slim.fully_connected(fc7 , num_outputs=4 , activation_fn=None , weights_initializer=tf.initializers.truncated_normal(mean=0.0 , stddev=0.001))
        
        return cls , bbox
        
    def loss_layer(self , cls_pred , bbox_pred , labels):
        cls_true = labels[: , : CLASSES_NUM]
        bbox_true = labels[: , CLASSES_NUM :]
        
        cross_entropy = - tf.reduce_sum( cls_true * tf.log(cls_pred) )
        cls_loss = tf.reduce_mean(cross_entropy)
        
        mask = tf.tile( tf.reshape(cls_true[ : , 0] , [-1 , 1]) , multiples=[1 , 4]) #先扩展第一列
                
        bbox_loss = tf.reduce_mean( tf.reduce_sum( tf.square( (1-mask) * (bbox_pred - bbox_true) ) ) )
        
        tf.losses.add_loss(cls_loss)
        tf.losses.add_loss(bbox_loss)
        

In [14]:
#refer:https://blog.csdn.net/two_vv/article/details/76769860

class FRCN(object):
    '''
    完整模型
    '''
    
    def __init__(self , is_training = True):      
        self.dataset = Dataset()
        
        #self.filewriter_path = 'qp/image' #tensorboard
        #self.checkpoint_path = 'qp/finetune_alexnet' #模型持久化
        
        if is_training:
            '''构造训练集'''
            self.iterator_train = self.dataset.train_data()
            
            self.x = self.iterator_train.get_next()[0]
            self.rois = self.iterator_train.get_next()[1]
            self.y = self.iterator_train.get_next()[2]
        else:
            '''构造测试集'''
            self.iterator_test = self.dataset.test_data()
            
            self.x = self.iterator_test.get_next()[0]
            self.rois = self.iterator_test.get_next()[1]
            self.y = None
                        
        self.model = AlexNet_model(self.x , self.rois , self.y , is_training)

        if is_training:
            '''训练参数'''
            self.epoch = 100000
            
            self.global_step = tf.Variable(initial_value=0 , trainable=False)
            
            self.learning_rate = tf.train.exponential_decay(learning_rate=0.00001 , global_step=self.global_step,
                                                            decay_steps=900 , decay_rate=0.8 , staircase=True)
            
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.model.total_loss , global_step=self.global_step)
        
            '''引入滑动平均'''
            self.ema = tf.train.ExponentialMovingAverage(decay=0.9) #滑动平均
            self.average_op = self.ema.apply(tf.trainable_variables()) #给所有的可训练变量应用滑动平均
            
            with tf.control_dependencies([self.optimizer]):
                self.train_op = tf.group(self.average_op)
        
        '''new'''
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
                
        #self.merged_summary = tf.summary.merge_all()
        #self.writer = tf.summary.FileWriter(logdir = self.filewriter_path ) #qp/image
        
        #self.saver = tf.train.Saver()

    def train(self):
        count = 0
        
        self.sess.run(self.iterator_train.initializer)
        
        for i in range(1):
            print(i , end=',')
            
            try:
                while True:                    
                    _ , total_loss = self.sess.run([self.train_op , self.model.total_loss] )
                    
                    print(count , total_loss)
                    count += 1
                    
            except tf.errors.OutOfRangeError:
                #重置迭代器
                self.sess.run(self.iterator_train.initializer)
                count = 0
            
    def predict(self , path):
        self.sess.run(self.iterator_test.initializer)
        pass
    
    
    '''
    def train(self):
        with tf.Session() as sess:
            
            if os.path.exists('qp/finetune_alexnet/checkpoint'):
                sess.run(tf.global_variables_initializer())
                self.saver.restore(sess , tf.train.latest_checkpoint('qp/finetune_alexnet/')) #读取模型
               
                #计算图放到tensorboard中
                self.writer.add_graph(sess.graph)
            
                merge_summary = sess.run(self.merged_summary , {self.x:train_images_batch , self.y:train_labels_batch , self.keep_prob:1.0 })
                self.writer.add_summary(merge_summary , i) #写入tenorboard

                checkpoint_name = os.path.join(self.checkpoint_path , 'model_epoch.ckpt')
                save_path = self.saver.save(sess , checkpoint_name)
                
        #self.writer.close()
    '''



In [15]:
frcn = FRCN(is_training=True)

In [16]:
frcn.train()

0,

FailedPreconditionError: GetNext() failed because the iterator has not been initialized. Ensure that you have run the initializer operation for this iterator before getting the next element.
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>, <unknown>], output_types=[DT_FLOAT, DT_INT16, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](Iterator)]]
	 [[Node: fully_connected_6/Softmax/_47 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_629_fully_connected_6/Softmax", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

Caused by op 'IteratorGetNext', defined at:
  File "C:\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Anaconda3\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-fe4845503517>", line 1, in <module>
    frcn = FRCN(is_training=True)
  File "<ipython-input-12-c8c039c02137>", line 18, in __init__
    self.x = self.iterator_train.get_next()[0]
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\data\ops\iterator_ops.py", line 370, in get_next
    name=name)), self._output_types,
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_dataset_ops.py", line 1495, in iterator_get_next
    output_shapes=output_shapes, name=name)
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3392, in create_op
    op_def=op_def)
  File "C:\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

FailedPreconditionError (see above for traceback): GetNext() failed because the iterator has not been initialized. Ensure that you have run the initializer operation for this iterator before getting the next element.
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>, <unknown>, <unknown>], output_types=[DT_FLOAT, DT_INT16, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](Iterator)]]
	 [[Node: fully_connected_6/Softmax/_47 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_629_fully_connected_6/Softmax", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]


In [11]:
#训练svm与bounding回归使用
class Object_load(object):
    def __init__(self):
        '''
        object_path:训练svm的每一个类别使用
        '''
        self.object_path = [OBJECT_PATH+LABEL2STR[i]+'_train.txt' for i in range(1 , len(LABEL2STR))]
        self.img_generator = Img_generator()
    
    
    def load(self , label): #label为load的数据类型 1-20
        '''
        label先减1
        object_path从0开始
        '''
        imgs_path = []
        
        for line in open(self.object_path[label-1]).readlines(): #需要减1 list下标从0开始
            line = line.rstrip('\n') #去掉末尾的\n
            
            line_split = line.split(' ')
            
            if line_split[-1] == '1':
                #正样本
                imgs_path.append(TRAIN_DATA_PATH+line_split[0]+'.jpg')
    
        return imgs_path
    

In [19]:
ol = Object_load()

In [20]:
ol.load(1)

['../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000008.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000023.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000036.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000041.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000096.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000109.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000128.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000132.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000141.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000142.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000143.jpg',
 '../../tensorflow2/dataset/VOCtrainval_11-May-2012/JPEGImages/2008_000144.jpg',
 '../../tensorflow2/dataset/

In [12]:
#20个SVM分类
class SVM_set(object):
    def __init__(self):
        self.classes_num = CLASSES_NUM #21
        
        self.img_generator = Img_generator() #IoU
        self.object_load = Object_load()
        self.alexnet = Alexnet_finetune()
        
        self.svms = [] #svm集合 20个类别 就有20个svm
        
        #防止显存溢出添加
        self.resource_threshold_shape_0 = 800 #候选框超过800 将其拆分为两块处理 #1000还是有点大
        
        #self.train_all_svm() #自调
        
    def train_svm_with_label(self , label):
        if os.path.exists('qp/svm_model/svm_%d.m' % label):
            #模型已经存在 载入即可
            print('exist,loading......')
            self.svms.append(joblib.load('qp/svm_model/svm_%d.m' % label))
            
            print('finish loading')
            return 
        
        imgs_path = self.object_load.load(label)
        
        '''
        自平衡训练数据
        概率项设为True 概率值供后续nms使用
        '''
        svm = SVC(probability = True , class_weight='balanced') #为True能输出类别的概率值 自动平衡训练数据
        
        #没有model的保存数据就先训练
        if not os.path.exists('qp/finetune_alexnet/checkpoint'):
            self.alexnet.train() 
            
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            self.alexnet.saver.restore(sess , tf.train.latest_checkpoint('qp/finetune_alexnet/')) #读取模型整个model的模型参数
            
            scores = 0.0 #debug
            count = 0 #debug
            
            images_count = len(imgs_path) #debug
            step = 0 #debug
            
            for path in imgs_path:
                print('No.%d/%d' % (step , images_count)) #debug
                step = step+1 #debug
                
                #每次使用label类型的一张图片进行训练
                #将所有的label对应的图片全部训练完
                regions , labels = self.img_generator.one_img_rect_region_label_svm(path , label)
                
                if np.sum(labels) == label*len(labels) or np.sum(labels) == 0:
                    #说明所有的label都一样 那就不训练了 而且fit函数报错
                    continue
                
                #显存太小 不得不增加判断逻辑 这会增加运行时间
                
                '''
                if regions.shape[0] > self.resource_threshold_shape_0:
                    #拆开运行训练
                    for idx in range(regions.shape[0] // self.resource_threshold_shape_0):
                        labels_sum = np.sum(labels[idx*self.resource_threshold_shape_0 : (idx+1)*self.resource_threshold_shape_0])
                        
                        if labels_sum == label*self.resource_threshold_shape_0 or labels_sum == 0:
                            #所有的label全部一样 sklearn中svm报错
                            continue
                        
                        regions_cnn_features_block = sess.run(self.alexnet.features , feed_dict={self.alexnet.x : regions[idx*self.resource_threshold_shape_0 : (idx+1)*self.resource_threshold_shape_0] , self.alexnet.keep_prob:1.0})
                        svm.fit(regions_cnn_features_block , labels[idx*self.resource_threshold_shape_0 : (idx+1)*self.resource_threshold_shape_0])

                        scores = scores + svm.score(regions_cnn_features_block , labels[idx*self.resource_threshold_shape_0 : (idx+1)*self.resource_threshold_shape_0])
                        count = count+1
                    
                    #余下的部分（不是整的）
                    labels_sum = np.sum(labels[(idx+1)*self.resource_threshold_shape_0:])
                    if labels_sum == label*len(labels[(idx+1)*self.resource_threshold_shape_0:]) or labels_sum == 0:
                        continue
                            
                    regions_cnn_features_extra = sess.run(self.alexnet.features , feed_dict={self.alexnet.x : regions[(idx+1)*self.resource_threshold_shape_0:] , self.alexnet.keep_prob:1.0})
                    svm.fit(regions_cnn_features_extra , labels[(idx+1)*self.resource_threshold_shape_0:])
                    
                    scores = scores + svm.score(regions_cnn_features_extra , labels[(idx+1)*self.resource_threshold_shape_0:])
                    count = count+1
                
                else:
                    regions_cnn_features = sess.run(self.alexnet.features , feed_dict={self.alexnet.x : regions , self.alexnet.keep_prob:1.0})
                    svm.fit(regions_cnn_features , labels)

                    scores = scores + svm.score(regions_cnn_features , labels)
                    count = count+1
                '''
                
                '''batch数量 先限制在128'''
                if regions.shape[0] >= 128:
                    '''
                    svm要求数据集中类别不能一样
                    '''
                    if np.sum(labels[: 128]) == 128*label or np.sum(labels[: 128]) == 0:
                        continue
                    
                    regions_cnn_features = sess.run(self.alexnet.features , feed_dict={self.alexnet.x : regions[ : 128] , self.alexnet.keep_prob:1.0})
                    
                    svm.fit(regions_cnn_features , labels[: 128])

                    count = count + 1
                else:
                    '''
                    svm要求数据集中类别不能一样
                    '''
                    
                    if np.sum(labels) == 128*label or np.sum(labels) == 0:
                        continue
                    
                    
                    regions_bak = np.copy(regions)
                    labels_bak = np.copy(labels)
                    
                    _add_count = int( np.floor(128 / regions.shape[0]) )
                    
                    for i in range(_add_count):
                        regions = np.concatenate((regions , regions))
                        
                        labels = np.concatenate((labels , labels))
                    
                    
                    regions_cnn_features = sess.run(self.alexnet.features , feed_dict={self.alexnet.x : regions[ : 128] , self.alexnet.keep_prob:1.0})

                    svm.fit(regions_cnn_features , labels[: 128])

                    count = count + 1
                    
                
                #np.copy() #不需要重新拷贝回去 因为重新赋值了
                
        try:
            print('label:%s average_score:%f' % (LABEL2STR[label] , scores/count)) #debug
        except ZeroDivisionError:
            pass #这里不会被执行到

        #先存到变量中 再保存至磁盘
        self.svms.append(svm)
        joblib.dump(svm , 'qp/svm_model/svm_%d.m' % label) #将svm保存起来

    
    def train_all_svm(self):
        svms_path = glob('qp/svm_model/*.m')
        
        if len(svms_path) == len(LABEL2STR) - 1: #==20
            #存在已经训练好的模型 就不再训练了 直接读取训练好的模型即可
            print('loading all model......')
            
            for label in range(1 , len(LABEL2STR)):
                path = 'qp/svm_model\svm_%d.m' % label
                print('loading %s model ......' % path)
                self.svms.append(joblib.load(path))
                
            print('finish loading')
            return 
        
        for label in range(1 , len(LABEL2STR)):
            self.train_svm_with_label(label)
    
    '''
    推理阶段使用
    '''
    def nms(self , rects_hat , probability_hat): #已降序处理
        #非极大值抑制 #在同一个label之间进行抑制
        final_rects_hat = []
        final_probability_hat = [] #应该用不上了
        
        length = len(probability_hat)
        lost_flag = [1]*length #标记丢弃的框
        
        max_score_idx = 0 #记录当前最大score的idx
        
        while max_score_idx < length:
            max_score_rect = rects_hat[max_score_idx]
            
            for i in range(max_score_idx+1 , length): #rects_hat[max_score_idx:]:
                if lost_flag[i] == 1 and self.img_generator.IoU( max_score_rect , rects_hat[i] ) > NMS_IoU_THRESHOLD: #大于阈值 丢弃
                    lost_flag[i] = 0

            max_score_idx_bak = max_score_idx #后续使用
            
            #让max_score_idx指向下一个没被丢弃的最大值
            for i in range(max_score_idx+1 , length):
                if lost_flag[i] == 1:
                    max_score_idx = i
                    break
            
            #说明max_score_idx没有移动过 即后续的都被丢弃了
            if max_score_idx == max_score_idx_bak:
                break
        
        for i in range(length):
            if lost_flag[i] == 1:
                final_rects_hat.append(rects_hat[i])
                final_probability_hat.append(probability_hat[i])
                
        return np.array(final_rects_hat) , np.array(final_probability_hat)
    
    def __meta_predict(self , regions_cnn_features , svm , rects): #proposal*4096
        labels = svm.predict(regions_cnn_features) #预言的labels 含有背景和object
        
        #print(labels) #debug
        
        probability = svm.predict_proba(regions_cnn_features) #概率信息 为NMS做准备
        
        #print(probability) #debug
        
        rects_hat = [] #label对应不是背景的标记框 才予以显示和bbox回归 label是背景的候选框不需要进行bbox回归
        probability_hat = [] #label对应不是背景的概率值 NMS score需要
        
        for i in range(len(labels)):
            if labels[i] != 0: #不是背景
                rects_hat.append(rects[i])
                probability_hat.append(np.max(probability[i]))
        
        rects_hat = np.array(rects_hat)
        probability_hat = np.array(probability_hat)
        
        if len(probability_hat) != 0:
            sorted_idx = np.argsort(probability_hat) #对概率进行升序排列 NMS score使用
            #框坐标和概率score都升序
            rects_hat = rects_hat[sorted_idx][::-1] #降序
            probability_hat = probability_hat[sorted_idx][::-1] #降序
        
        return rects_hat , probability_hat #返回非背景的候选框 每个候选框的概率值

    def predict(self , path):
        #rects regions
        img_arr , proposals , proposals_region = self.img_generator.one_img_rect_region(path) #原图像数据 候选框坐标 候选框数据
        regions_cnn_features = self.alexnet.extract_feature(proposals_region)
        
        all_rects_hat_and_proba_and_labels = [] #框坐标 标记 概率值
        
        #使用所有的svm
        for label in range(1 , len(LABEL2STR)): #20
            rects_hat , probability_hat = self.__meta_predict(regions_cnn_features , self.svms[label-1] , proposals)
            
            nms_rects_hat , nms_probability_hat = self.nms(rects_hat , probability_hat)
            '''
            nms_probability_hat暂无用
            可以显示在预测的框子角 显示概率值
            '''
            
            #每一个元素是label对用的 一堆框子坐标 一堆框子对应的概率 一个label
            all_rects_hat_and_proba_and_labels.append( (nms_rects_hat , nms_probability_hat , label ) )
            
        return all_rects_hat_and_proba_and_labels , img_arr

In [13]:
s_set = SVM_set()
#display_demo = Display()

In [None]:
s_set.train_all_svm()

INFO:tensorflow:Restoring parameters from qp/finetune_alexnet/model_epoch.ckpt
No.0/1994
No.1/1994
No.2/1994
No.3/1994
No.4/1994
No.5/1994
No.6/1994
No.7/1994
No.8/1994
No.9/1994
No.10/1994
No.11/1994
No.12/1994
No.13/1994
No.14/1994
No.15/1994
No.16/1994
No.17/1994
No.18/1994
No.19/1994
No.20/1994
No.21/1994
No.22/1994
No.23/1994
No.24/1994
No.25/1994
No.26/1994
No.27/1994
No.28/1994
No.29/1994
No.30/1994
No.31/1994
No.32/1994
No.33/1994
No.34/1994
No.35/1994
No.36/1994
No.37/1994
No.38/1994
No.39/1994
No.40/1994
No.41/1994
No.42/1994


In [14]:
all_rects_hat_and_proba_and_labels , img_arr = s_set.predict('person.jpg')



INFO:tensorflow:Restoring parameters from ../RCNN/qp/finetune_alexnet/model_epoch.ckpt


In [15]:
all_rects_hat_and_proba_and_labels

[(array([], dtype=float64), array([], dtype=float64), 1),
 (array([], dtype=float64), array([], dtype=float64), 2),
 (array([], dtype=float64), array([], dtype=float64), 3),
 (array([], dtype=float64), array([], dtype=float64), 4),
 (array([], dtype=float64), array([], dtype=float64), 5),
 (array([], dtype=float64), array([], dtype=float64), 6),
 (array([], dtype=float64), array([], dtype=float64), 7),
 (array([], dtype=float64), array([], dtype=float64), 8),
 (array([], dtype=float64), array([], dtype=float64), 9),
 (array([], dtype=float64), array([], dtype=float64), 10),
 (array([], dtype=float64), array([], dtype=float64), 11),
 (array([], dtype=float64), array([], dtype=float64), 12),
 (array([], dtype=float64), array([], dtype=float64), 13),
 (array([], dtype=float64), array([], dtype=float64), 14),
 (array([], dtype=float64), array([], dtype=float64), 15),
 (array([], dtype=float64), array([], dtype=float64), 16),
 (array([], dtype=float64), array([], dtype=float64), 17),
 (arra

In [17]:
display_demo = Display()

In [28]:
display_demo.display_svm(img_arr , all_rects_hat_and_proba_and_labels)

In [21]:
#bounding-box回归
class Bbox_regression(object):
    def __init__(self):
        self.ridges = [] #pascal voc 20类别就使用20个线性回归
        
        self.object_load = Object_load()
        self.img_generator = Img_generator()
        self.svm_set = SVM_set()
        #self.alexnet = Alexnet_finetune()
        self.pr_generator = Clip()
        
        self.resource_threshold_shape_0 = 800 #同svm类
        
        self.train_all_ridge() #自调
        
    def train_ridge_with_label(self , label):
        ##if os.path.exists('../RCNN/qp/linear_model/ridge_%d.m' % label):
        ##    #模型已经存在 载入即可
        ##    print('exist,loading......')
        ##    self.ridges.append(joblib.load('../RCNN/qp/linear_model/ridge_%d.m' % label))
        ##    
        ##    print('finish loading')
        ##    return 
        
        imgs_path = self.object_load.load(label)
        
        ridge = Ridge(alpha=1000) #正则参数为1000
        
        #没有model的保存数据就先训练
        if not os.path.exists('qp/finetune_alexnet/checkpoint'):
            self.svm_set.alexnet.train() 
            
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            self.svm_set.alexnet.saver.restore(sess , tf.train.latest_checkpoint('qp/finetune_alexnet/')) #读取模型整个model的模型参数

            scores = 0.0 #debug
            count = 0 #debug
            
            images_count = len(imgs_path) #debug
            step = 0 #debug
            
            for path in imgs_path:
                print('No.%d/%d' % (step , images_count)) #debug
                step = step+1 #debug
                #regions
                proposals_region , targets = self.img_generator.one_img_rect_region_label_bnd(path , label)
                
                if len(proposals_region) == 0:
                    #没有数据
                    continue
                
                #显存太小 不得不增加判断逻辑 这会增加运行时间
                if proposals_region.shape[0] > self.resource_threshold_shape_0:
                    #拆开运行训练
                    for idx in range(proposals_region.shape[0] // self.resource_threshold_shape_0):
                        regions_cnn_features_block = sess.run(self.svm_set.alexnet.features , feed_dict={self.svm_set.alexnet.x : proposals_region[idx*self.resource_threshold_shape_0 : (idx+1)*self.resource_threshold_shape_0] , self.svm_set.alexnet.keep_prob:1.0})
                        
                        ridge.fit(regions_cnn_features_block , targets[idx*self.resource_threshold_shape_0 : (idx+1)*self.resource_threshold_shape_0])
                        scores = scores + ridge.score(regions_cnn_features_block , targets[idx*self.resource_threshold_shape_0 : (idx+1)*self.resource_threshold_shape_0])
                        
                        count = count+1
                    
                    #余下的部分（不是整的） 
                    regions_cnn_features_extra = sess.run(self.svm_set.alexnet.features , feed_dict={self.svm_set.alexnet.x : proposals_region[(idx+1)*self.resource_threshold_shape_0:] , self.svm_set.alexnet.keep_prob:1.0})
                    ridge.fit(regions_cnn_features_extra , targets[(idx+1)*self.resource_threshold_shape_0:])
                    scores = scores + ridge.score(regions_cnn_features_extra , targets[(idx+1)*self.resource_threshold_shape_0:])
                    
                    count = count+1
                
                else:
                    regions_cnn_features = sess.run(self.svm_set.alexnet.features , feed_dict={self.svm_set.alexnet.x : proposals_region , self.svm_set.alexnet.keep_prob:1.0})
                    ridge.fit(regions_cnn_features , targets)
                    scores = scores + ridge.score(regions_cnn_features , targets)

                    count = count+1
                    
        try:
            print('label:%s average_score:%f' % (LABEL2STR[label] , scores/count)) #debug
        except ZeroDivisionError:
            pass #这里不会被执行到    
        
        self.ridges.append(ridge)
        
        joblib.dump(ridge , 'qp/linear_model/ridge_%d.m' % label) #将ridge保存起来
    
    def train_all_ridge(self):
        ridges_path = glob('qp/linear_model/*.m')
        
        if len(ridges_path) == len(LABEL2STR) - 1:
            #存在已经训练好的模型 就不再训练了 直接读取训练好的模型即可
            print('loading all model......')
            
            for label in range(1 , len(LABEL2STR)):
                path = 'qp/linear_model/ridge_%d.m' % label
                print('loading %s model ......' % path)
                self.ridges.append(joblib.load(path))
                
            print('finish loading')
            return 
        
        for label in range(1 , len(LABEL2STR)):
            self.train_ridge_with_label(label)
    
    '''
    推理阶段使用
    '''
    def __meta_predict(self , regions_cnn_features , rects , ridge):
        def to(rect):
            x1 = rect[0]
            x2 = rect[1]
            y1 = rect[2]
            y2 = rect[3]
            
            w = x2-x1
            h = y2-y1
            
            x_c = (x1+x2)//2
            y_c = (y1+y2)//2
            
            return x_c , y_c , w , h
        
        def ot(target):
            x_c = target[0]
            y_c = target[1]
            w = target[2]
            h = target[3]
            
            x1 = 0.5*(2*x_c-w)
            y1 = 0.5*(2*y_c-h)
            x2 = x1+w
            y2 = y1+h
            
            x1=int(round(x1))
            y1=int(round(y1))
            x2=int(round(x2))
            y2=int(round(y2))
            
            if x1<0:
                x1 = 0
            if x2>WIDTH:
                x2 = WIDTH
            if y1<0:
                y1 = 0
            if y2>HEIGHT:
                y2 = HEIGHT
            
        
            return [x1 , x2 , y1 , y2]
        
        def target2rect(target_hat , P_box):
            t_x = target_hat[0]
            t_y = target_hat[1]
            t_w = target_hat[2]
            t_h = target_hat[3]
            
            P_x , P_y , P_w , P_h = to(P_box)
            
            G_x_hat = P_w*t_x+P_x
            G_y_hat = P_h*t_y+P_y
            G_w_hat = P_w*np.exp(t_w)
            G_h_hat = P_h*np.exp(t_h)
            
            return ot([G_x_hat , G_y_hat , G_w_hat , G_h_hat]) #ot还需要转化为(x1,x2,y1,y2)形式
        
        #由每一个 与label对应的ridge 预测的结果
        target_hat = ridge.predict(regions_cnn_features)
        rect_hat = []
        
        #使用预测结果对框子进行调整
        for i in range(len(target_hat)):
            rect_hat.append(target2rect(target_hat[i] , rects[i]))
            
        return np.array(rect_hat)
    
    
    def predict(self , path):
        '''
        svm的nms之后进行bounding box回归
        '''
        
        #预测的框 概率 label
        all_rects_hat_and_proba_and_labels , img_arr=self.svm_set.predict(path)
        
        print('finish svm') #debug
        bbox_r_rects_hat_and_label = []
        
        for (nms_rects_hat , _ , label) in all_rects_hat_and_proba_and_labels:
            
            if len(nms_rects_hat) == 0:
                #说明此图像不存在此label
                bbox_r_rects_hat_and_label.append((np.array([]) , label))
                continue 
            
            nms_rects_hat_region = self.pr_generator.clip_region(img_arr , nms_rects_hat)
            
            nms_rects_hat_region_cnn_features = self.svm_set.alexnet.extract_feature(nms_rects_hat_region)
    
            nms_rects_hat_bb_r = self.__meta_predict(nms_rects_hat_region_cnn_features , nms_rects_hat , self.ridges[label-1])
        
            '''
            最终的预测框子和label信息
            '''
            #一个元素为一个label对应的 一堆经过调整的框子 一个label
            bbox_r_rects_hat_and_label.append((nms_rects_hat_bb_r , label))
    
        return bbox_r_rects_hat_and_label , img_arr #最终的框

In [14]:
bbox_r = Bbox_regression()

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

INFO:tensorflow:Summary name fc7/weights:0/gradient is illegal; using fc7/weights_0/gradient instead.
INFO:tensorflow:Summary name fc7/biases:0/gradient is illegal; using fc7/biases_0/gradient instead.
INFO:tensorflow:Summary name fc8/weights:0/gradient is illegal; using fc8/weights_0/gradient instead.
INFO:tensorflow:Summary name fc8/biases:0/gradient is illegal; using fc8/biases_0/gradient instead.
INFO:tensorflow:Summary name fc7/weights:0 is illegal; using fc7/weights_0 instead.
INFO:tensorflow:Summary name fc7/biases:0 is illegal; using fc7/biases_0 instead.
INFO:tensorflow:Summary name fc8/weights:0 is illegal; using fc8/weights_0 instead.
INFO:tensorflow:Summary name fc8/biases:0 is illegal; using fc8/biases_0 instead.
loading all model......
loading ../RCNN/qp/svm_model\svm_1.m model .....

In [29]:
bbox_r_rects_hat_and_label , img_arr = bbox_r.predict('person.jpg')

INFO:tensorflow:Restoring parameters from ../RCNN/qp/finetune_alexnet/model_epoch.ckpt
finish svm
INFO:tensorflow:Restoring parameters from ../RCNN/qp/finetune_alexnet/model_epoch.ckpt
INFO:tensorflow:Restoring parameters from ../RCNN/qp/finetune_alexnet/model_epoch.ckpt
INFO:tensorflow:Restoring parameters from ../RCNN/qp/finetune_alexnet/model_epoch.ckpt
INFO:tensorflow:Restoring parameters from ../RCNN/qp/finetune_alexnet/model_epoch.ckpt
INFO:tensorflow:Restoring parameters from ../RCNN/qp/finetune_alexnet/model_epoch.ckpt
INFO:tensorflow:Restoring parameters from ../RCNN/qp/finetune_alexnet/model_epoch.ckpt
INFO:tensorflow:Restoring parameters from ../RCNN/qp/finetune_alexnet/model_epoch.ckpt


In [30]:
display = Display()

In [31]:
display.display_final(img_arr , bbox_r_rects_hat_and_label , name='xxx')

In [16]:
#效果展示
class Display(object):
    def __init__(self):
        self.name_generator = self.__name_generator()
    
    def __name_generator(self):
        for i in range(100000):
            yield i
    
    def __meta_display(self , meta_img , labels , G_box , img_name):
        #一幅图像中显示proposal个G框与label
        
        for i in range(len(labels)):
            x1 = G_box[i][0][0]
            x2 = G_box[i][0][1]
            y1 = G_box[i][0][2]
            y2 = G_box[i][0][3]
            #绘制G_box
            meta_img = cv2.rectangle(meta_img , (x1 , y1) , (x2 , y2) , (255,255,255))
            #显示label字符串
            meta_img = cv2.putText(meta_img , LABEL2STR[labels[i][0]] , org=(x1 , y1+10) , fontFace = cv2.FONT_HERSHEY_PLAIN , fontScale=1 , color = (255,255,255), thickness = 1)
        
        #plt.imshow(meta_img) #图像查看
        
        plt.imsave(arr=meta_img , fname = 'result/%s.jpg' % img_name) #保存图像
        
    #demo
    def display(self , img , labels , G_box , img_names):
        for i in range(img.shape[0]):
            self.__meta_display(img[i] , labels[i] , G_box[i] , img_names[i])
        
    #demo
    def display_svm(self , img , all_rects_hat_and_proba_and_labels , name='xxx'):
        original_img = np.copy(img)
        
        for (nms_rects_hat , _ , label) in all_rects_hat_and_proba_and_labels:
            img = np.copy(original_img)
            
            for (x1,x2,y1,y2) in nms_rects_hat:
                img = cv2.rectangle(img , (x1 , y1) , (x2 , y2) , (255,255,255))
                img = cv2.putText(img , LABEL2STR[label] , org=(x1 , y1+10) , fontFace = cv2.FONT_HERSHEY_PLAIN , fontScale=1 , color = (255,255,255), thickness = 1)
            
            plt.imsave(arr=img[:,:,[2,1,0]] , fname = 'result/%s_%s.jpg' % (name , LABEL2STR[label])) #保存图像
            
    
    #绘制20张图片 即20个类别中对应的类别中加框
    def display_final(self , img_arr , bbox_r_rects_hat_and_label , name='xxx'):
        label_i=1
        original_img = np.copy(img_arr)
        
        for (rects_hat , label) in bbox_r_rects_hat_and_label:
            img_arr = np.copy(original_img)
            
            for (x1,x2,y1,y2) in rects_hat:
                img_arr = cv2.rectangle(img_arr , (x1 , y1) , (x2 , y2) , (255,255,255))
                img_arr = cv2.putText(img_arr , LABEL2STR[label] , org=(x1 , y1+10) , fontFace = cv2.FONT_HERSHEY_PLAIN , fontScale=1 , color = (255,255,255), thickness = 1)
            
            plt.imsave(arr=img_arr[:,:,[2,1,0]] , fname = 'result/%s_%s.jpg' % (name , LABEL2STR[label_i])) #保存图像
            
            label_i = label_i + 1
        
    def display_final_one(self , img_arr , bbox_r_rects_hat_and_label , name='xxx'):
        label_i=1
        img = np.copy(img_arr)
        
        for (rects_hat , label) in bbox_r_rects_hat_and_label:
            for (x1,x2,y1,y2) in rects_hat:
                img = cv2.rectangle(img , (x1 , y1) , (x2 , y2) , (255,255,255))
                img = cv2.putText(img , LABEL2STR[label] , org=(x1 , y1+10) , fontFace = cv2.FONT_HERSHEY_PLAIN , fontScale=1 , color = (255,255,255), thickness = 1)
            
        plt.imsave(arr=img[:,:,[2,1,0]] , fname = 'result/%s.jpg' % name) #保存图像
