In [1]:
import tensorflow as tf
import numpy as np
import time
import cv2
from tool import read_multi_tfrecords
from MTCNN_model import Pnet_model



def image_color_distort(inputs):
    inputs = tf.image.random_contrast(inputs, lower=0.5, upper=1.5)
    inputs = tf.image.random_brightness(inputs, max_delta=0.2)
    inputs = tf.image.random_hue(inputs,max_delta= 0.2)
    inputs = tf.image.random_saturation(inputs,lower = 0.5, upper= 1.5)

    return inputs

def label_los(pre_label,act_label):
    
    ratio=tf.constant(0.7)
    zeros=tf.zeros_like(act_label,dtype=tf.int32)
    valid_label=tf.where(tf.less(act_label,0),zeros,act_label)

    column_num=tf.shape(pre_label,out_type=tf.int32)[0]
    pre_label=tf.squeeze(tf.reshape(pre_label,(1,-1)))
    column=tf.range(0,column_num)*2 
    column_to_stay=column+valid_label

    pre_label=tf.squeeze(tf.gather(pre_label,column_to_stay))
    loss = -tf.log(pre_label+1e-10)      
    ones=tf.ones_like(act_label,dtype=tf.float32)
    zero=tf.zeros_like(act_label,dtype=tf.float32)
    valid_colunm = tf.where(act_label < zeros,zero,ones)  
    
    num_column=tf.reduce_sum(valid_colunm)
    num=tf.cast(num_column*ratio,dtype=tf.int32)
    loss=tf.multiply(loss,valid_colunm,'label_los')
    loss,_=tf.nn.top_k(loss,num)
    
    return tf.reduce_mean(loss)
    
def roi_los(label,pre_box,act_box) :    
    
    zeros=tf.zeros_like(label,dtype=tf.float32)
    ones=tf.ones_like(label,dtype=tf.float32)    
    valid_label=tf.where(tf.equal(abs(label),1),ones,zeros)
    loss=tf.reduce_sum(tf.square(act_box-pre_box),axis=1)
    loss=tf.multiply(loss,valid_label,'roi_los')
    return tf.reduce_mean(loss) 
    
def landmark_los(label,pre_landmark,act_landmark):    
    
    zeros=tf.zeros_like(label,dtype=tf.float32)
    ones = tf.ones_like(label,dtype=tf.float32)
    valid_label=tf.where(tf.equal(label,-2),ones,zeros)
    loss=tf.reduce_sum(tf.square(act_landmark-pre_landmark),axis=1)
    loss=tf.multiply(loss,valid_label,'landmark_los')
    return tf.reduce_mean(loss)     
    
def cal_accuracy(cls_prob,label):
       
    pred = tf.argmax(cls_prob,axis=1)
    label_int = tf.cast(label,tf.int64)
    cond = tf.where(tf.greater_equal(label_int,0))
    picked = tf.squeeze(cond)
    label_picked = tf.gather(label_int,picked)
    pred_picked = tf.gather(pred,picked)
    accuracy_op = tf.reduce_mean(tf.cast(tf.equal(label_picked,pred_picked),tf.float32))
    
    return accuracy_op

def train(image,label,roi,landmark,model,model_name):
    
    _label, _roi ,_landmark=model(image)
    
    with tf.name_scope('output'):
        _label=tf.squeeze(_label,name='label')
        _roi=tf.squeeze(_roi,name='roi')
        _landmark=tf.squeeze(_landmark,name='landmark')
        
    _label_los=label_los(_label,label)
    _box_los=roi_los(label,_roi,roi)    
    _landmark_los=landmark_los(label,_landmark,landmark)
    
    function_loss=_label_los+_box_los*0.5+_landmark_los*0.5

    tf.add_to_collection("loss", function_loss)
    loss_all=tf.get_collection('loss')
    
    with tf.name_scope('loss'):
        loss=tf.reduce_sum(loss_all)
        tf.summary.scalar('loss',loss) 
        
    opt=tf.train.AdamOptimizer(0.001).minimize(loss)
    
    with tf.name_scope('accuracy'):
        train_accuracy=cal_accuracy(_label,label)
        tf.summary.scalar('accuracy',train_accuracy) 

    saver=tf.train.Saver()
    merged=tf.summary.merge_all() 
    
    images,labels,rois,landmarks=read_multi_tfrecords(addr,batch_size,img_size)   
    images=image_color_distort(images)    
    with tf.Session() as sess:
        sess.run((tf.global_variables_initializer(),
                  tf.local_variables_initializer()))
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess,coord=coord)
        image_batch,label_batch,roi_batch,landmark_batch=sess.run([images,labels,rois,landmarks])
    
        writer_train=tf.summary.FileWriter('C:\\Users\\312\\Desktop\\',sess.graph)
        
        for i in range(1,100001):
            
            image_batch,label_batch,roi_batch,landmark_batch=sess.run([images,labels,rois,landmarks])

            sess.run(opt,feed_dict={image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch})
            if(i%100==0):
                summary=sess.run(merged,feed_dict={image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch})
                writer_train.add_summary(summary,i) 
            if(i%1000==0):
                print('次数',i)    
                print('train_accuracy',sess.run(train_accuracy,feed_dict={image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch}))
                print('loss',sess.run(loss,{image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch}))               
                print('time',time.time()-begin)
                if(i%10000==0):
                    saver.save(sess,"E:\\friedhelm\\object\\face_detection_MTCNN\\model\\%s.ckpt"%(model_name),global_step=i)
    writer_train.close()
    
def main(model,model_name):
    
    with tf.name_scope('input'):
        image=tf.placeholder(tf.float32,name='image')
        label=tf.placeholder(tf.int32,name='label')
        roi=tf.placeholder(tf.float32,name='roi')
        landmark = tf.placeholder(tf.float32,name='landmark')  

    train(image,label,roi,landmark,model,model_name)

if __name__=='__main__':
    
    img_size=12
    batch=448
    batch_size=[192,64,64,128]
    addr=["E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\neg_%d_train.tfrecords"%(img_size,img_size),
          "E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\pos_%d_train.tfrecords"%(img_size,img_size),
          "E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\par_%d_train.tfrecords"%(img_size,img_size),
          "E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\land_%d_train.tfrecords"%(img_size,img_size)]  
        
    model=Pnet_model
    model_name="Pnet_model"    
    
    begin=time.time()
    
    main(model,model_name)
# tensorboard --logdir=C:\\Users\\312\\Desktop\\

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


次数 1000
train_accuracy 0.78515625
loss 0.6878749
time 100.79413723945618
次数 2000
train_accuracy 0.8515625
loss 0.5616866
time 174.0264813899994
次数 3000
train_accuracy 0.8671875
loss 0.52327466
time 248.54219555854797
次数 4000
train_accuracy 0.890625
loss 0.45095062
time 318.6285996437073
次数 5000
train_accuracy 0.85546875
loss 0.54340607
time 388.4394247531891
次数 6000
train_accuracy 0.90625
loss 0.41419792
time 458.4488773345947
次数 7000
train_accuracy 0.92578125
loss 0.33988434
time 529.0986154079437
次数 8000
train_accuracy 0.9140625
loss 0.3682877
time 599.3664338588715
次数 9000
train_accuracy 0.91015625
loss 0.3749094
time 668.6277329921722
次数 10000
train_accuracy 0.875
loss 0.46819583
time 738.911292552948
次数 11000
train_accuracy 0.89453125
loss 0.43227974
time 842.3431050777435
次数 12000
train_accuracy 0.9140625
loss 0.36128223
time 917.1106638908386
次数 13000
train_accuracy 0.921875
loss 0.30620977
time 1006.3175184726715
次数 14000
train_accuracy 0.94921875
loss 0.2814188
time 1096.85954

In [1]:
import tensorflow as tf
import time
from tool import read_multi_tfrecords
from MTCNN_model import Pnet_model,Rnet_model,Onet_model
import os

def image_color_distort(inputs):
    inputs = tf.image.random_contrast(inputs, lower=0.5, upper=1.5)
    inputs = tf.image.random_brightness(inputs, max_delta=0.2)
    inputs = tf.image.random_hue(inputs,max_delta= 0.2)
    inputs = tf.image.random_saturation(inputs,lower = 0.5, upper= 1.5)

    return inputs

def label_los(pre_label,act_label):
    
    ratio=tf.constant(0.7)
    zeros=tf.zeros_like(act_label,dtype=tf.int32)
    valid_label=tf.where(tf.less(act_label,0),zeros,act_label)

    column_num=tf.shape(pre_label,out_type=tf.int32)[0]
    pre_label=tf.squeeze(tf.reshape(pre_label,(1,-1)))
    column=tf.range(0,column_num)*2 
    column_to_stay=column+valid_label

    pre_label=tf.squeeze(tf.gather(pre_label,column_to_stay))
    loss = -tf.log(pre_label+1e-10)      
    ones=tf.ones_like(act_label,dtype=tf.float32)
    zero=tf.zeros_like(act_label,dtype=tf.float32)
    valid_colunm = tf.where(act_label < zeros,zero,ones)  
    
    num_column=tf.reduce_sum(valid_colunm)
    num=tf.cast(num_column*ratio,dtype=tf.int32)
    loss=tf.multiply(loss,valid_colunm,'label_los')
    loss,_=tf.nn.top_k(loss,num)
    
    return tf.reduce_mean(loss)
    
def roi_los(label,pre_box,act_box) :    
    
    zeros=tf.zeros_like(label,dtype=tf.float32)
    ones=tf.ones_like(label,dtype=tf.float32)    
    valid_label=tf.where(tf.equal(abs(label),1),ones,zeros)
    loss=tf.reduce_sum(tf.square(act_box-pre_box),axis=1)
    loss=tf.multiply(loss,valid_label,'roi_los')
    return tf.reduce_mean(loss) 
    
def landmark_los(label,pre_landmark,act_landmark):    
    
    zeros=tf.zeros_like(label,dtype=tf.float32)
    ones = tf.ones_like(label,dtype=tf.float32)
    valid_label=tf.where(tf.equal(label,-2),ones,zeros)
    loss=tf.reduce_sum(tf.square(act_landmark-pre_landmark),axis=1)
    loss=tf.multiply(loss,valid_label,'landmark_los')
    return tf.reduce_mean(loss)     
    
def cal_accuracy(cls_prob,label):
       
    pred = tf.argmax(cls_prob,axis=1)
    label_int = tf.cast(label,tf.int64)
    cond = tf.where(tf.greater_equal(label_int,0))
    picked = tf.squeeze(cond)
    label_picked = tf.gather(label_int,picked)
    pred_picked = tf.gather(pred,picked)
    accuracy_op = tf.reduce_mean(tf.cast(tf.equal(label_picked,pred_picked),tf.float32))
    
    return accuracy_op

def train(image,label,roi,landmark,model,model_name):
    
    _label, _roi ,_landmark=model(image,batch)
    
    with tf.name_scope('output'):
        _label=tf.squeeze(_label,name='label')
        _roi=tf.squeeze(_roi,name='roi')
        _landmark=tf.squeeze(_landmark,name='landmark')
        
    _label_los=label_los(_label,label)
    _box_los=roi_los(label,_roi,roi)    
    _landmark_los=landmark_los(label,_landmark,landmark)
    
    function_loss=_label_los*ratio[0]+_box_los*ratio[1]+_landmark_los*ratio[2]

    tf.add_to_collection("loss", function_loss)
    loss_all=tf.get_collection('loss')
    
    with tf.name_scope('loss'):
        loss=tf.reduce_sum(loss_all)
        tf.summary.scalar('loss',loss) 
        
    opt=tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
    with tf.name_scope('accuracy'):
        train_accuracy=cal_accuracy(_label,label)
        tf.summary.scalar('accuracy',train_accuracy) 

    saver=tf.train.Saver(max_to_keep=10)
    merged=tf.summary.merge_all() 
    
    images,labels,rois,landmarks=read_multi_tfrecords(addr,batch_size,img_size)   
    images=image_color_distort(images)
    
    with tf.Session() as sess:
        sess.run((tf.global_variables_initializer(),
                  tf.local_variables_initializer()))
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess,coord=coord)
        image_batch,label_batch,roi_batch,landmark_batch=sess.run([images,labels,rois,landmarks])
        
        writer_train=tf.summary.FileWriter('C:\\Users\\312\\Desktop\\',sess.graph)
        try:
            
            for i in range(1,train_step):
                
                image_batch,label_batch,roi_batch,landmark_batch=sess.run([images,labels,rois,landmarks])
                
                sess.run(opt,feed_dict={image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch})
                if(i%100==0):
                    summary=sess.run(merged,feed_dict={image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch})
                    writer_train.add_summary(summary,i) 
                if(i%1000==0):
                    print('次数',i)    
                    print('train_accuracy',sess.run(train_accuracy,feed_dict={image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch}))
                    print('loss',sess.run(loss,{image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch}))               
                    print('time',time.time()-begin)
                    if(i%10000==0):
                        saver.save(sess,"E:\\friedhelm\\object\\face_detection_MTCNN\\%s\\%s.ckpt"%(model_name,model_name),global_step=i)
        except  tf.errors.OutOfRangeError:
            print("finished")
        finally:
            coord.request_stop()
            writer_train.close()
        coord.join(threads)
    
def main(model):
    
    with tf.name_scope('input'):
        image=tf.placeholder(tf.float32,name='image')
        label=tf.placeholder(tf.int32,name='label')
        roi=tf.placeholder(tf.float32,name='roi')
        landmark = tf.placeholder(tf.float32,name='landmark')  

    train(image,label,roi,landmark,model,model_name)

if __name__=='__main__':
    
    img_size=24
    batch=448
    batch_size=[192,64,64,128]
    addr=["E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\neg_%d_train.tfrecords"%(img_size,img_size),
          "E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\pos_%d_train.tfrecords"%(img_size,img_size),
          "E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\par_%d_train.tfrecords"%(img_size,img_size),
          "E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\land_%d_train.tfrecords"%(img_size,img_size)]  

    model=Rnet_model
    model_name="Rnet_model"    
    train_step=100001
    learning_rate=0.001
    
    save_model_path="E:\\friedhelm\\object\\face_detection_MTCNN\\%s"%(model_name)
    
    if not os.path.exists(save_model_path):
        os.makedirs(save_model_path) 
        
    if(model_name=="Onet_model"):
        ratio=[1,0.5,1]
    else:
        ratio=[1,0.5,0.5]
    

    begin=time.time()        
    main(model)
# tensorboard --logdir=C:\\Users\\312\\Desktop\\

次数 1000
train_accuracy 0.75
loss 2.983419
time 106.94655513763428
次数 2000
train_accuracy 0.74609375
loss 1.025057
time 201.37084102630615
次数 3000
train_accuracy 0.7578125
loss 0.80651593
time 295.2332332134247
次数 4000
train_accuracy 0.796875
loss 0.8239015
time 387.4109103679657
次数 5000
train_accuracy 0.83203125
loss 0.80475277
time 479.7254915237427
次数 6000
train_accuracy 0.84765625
loss 0.68953633
time 572.839462518692
次数 7000
train_accuracy 0.8203125
loss 0.7377406
time 665.4434840679169
次数 8000
train_accuracy 0.875
loss 0.6200149
time 757.8247125148773
次数 9000
train_accuracy 0.8671875
loss 0.56840163
time 849.9494550228119
次数 10000
train_accuracy 0.89453125
loss 0.49643278
time 942.6464929580688
次数 11000
train_accuracy 0.84375
loss 0.5684604
time 1035.016267299652
次数 12000
train_accuracy 0.91015625
loss 0.40027446
time 1126.5457875728607
次数 13000
train_accuracy 0.89453125
loss 0.47212297
time 1219.7754926681519
次数 14000
train_accuracy 0.91796875
loss 0.42059052
time 1312.0044779777

In [3]:
import tensorflow as tf
import time
from core.tool import read_multi_tfrecords,image_color_distort
from core.MTCNN_model import Pnet_model,Rnet_model,Onet_model
from train.train_tool import label_los,roi_los,landmark_los,cal_accuracy
import os



def train(image,label,roi,landmark,model,model_name):
    
    _label, _roi ,_landmark=model(image,batch)
    
    with tf.name_scope('output'):
        _label=tf.squeeze(_label,name='label')
        _roi=tf.squeeze(_roi,name='roi')
        _landmark=tf.squeeze(_landmark,name='landmark')
        
    _label_los=label_los(_label,label)
    _box_los=roi_los(label,_roi,roi)    
    _landmark_los=landmark_los(label,_landmark,landmark)
    
    function_loss=_label_los*ratio[0]+_box_los*ratio[1]+_landmark_los*ratio[2]

    tf.add_to_collection("loss", function_loss)
    loss_all=tf.get_collection('loss')
    
    with tf.name_scope('loss'):
        loss=tf.reduce_sum(loss_all)
        tf.summary.scalar('loss',loss) 
        
    opt=tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
    with tf.name_scope('accuracy'):
        train_accuracy=cal_accuracy(_label,label)
        tf.summary.scalar('accuracy',train_accuracy) 

    saver=tf.train.Saver(max_to_keep=10)
    merged=tf.summary.merge_all() 
    
    images,labels,rois,landmarks=read_multi_tfrecords(addr,batch_size,img_size)   
    images=image_color_distort(images)
    
    with tf.Session() as sess:
        sess.run((tf.global_variables_initializer(),
                  tf.local_variables_initializer()))
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess,coord=coord)
        image_batch,label_batch,roi_batch,landmark_batch=sess.run([images,labels,rois,landmarks])
        
        writer_train=tf.summary.FileWriter('C:\\Users\\312\\Desktop\\',sess.graph)
        try:
            
            for i in range(1,train_step):
                
                image_batch,label_batch,roi_batch,landmark_batch=sess.run([images,labels,rois,landmarks])
                
                sess.run(opt,feed_dict={image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch})
                if(i%100==0):
                    summary=sess.run(merged,feed_dict={image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch})
                    writer_train.add_summary(summary,i) 
                if(i%1000==0):
                    print('次数',i)    
                    print('train_accuracy',sess.run(train_accuracy,feed_dict={image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch}))
                    print('loss',sess.run(loss,{image:image_batch,label:label_batch,roi:roi_batch,landmark:landmark_batch}))               
                    print('time',time.time()-begin)
                    if(i%10000==0):
                        saver.save(sess,"E:\\friedhelm\\object\\face_detection_MTCNN\\%s\\%s.ckpt"%(model_name,model_name),global_step=i)
        except  tf.errors.OutOfRangeError:
            print("finished")
        finally:
            coord.request_stop()
            writer_train.close()
        coord.join(threads)
    
def main(model):
    
    with tf.name_scope('input'):
        image=tf.placeholder(tf.float32,name='image')
        label=tf.placeholder(tf.int32,name='label')
        roi=tf.placeholder(tf.float32,name='roi')
        landmark = tf.placeholder(tf.float32,name='landmark')  

    train(image,label,roi,landmark,model,model_name)

if __name__=='__main__':
    
    img_size=48
    batch=448
    batch_size=[192,64,64,128]
    addr=["E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\neg_%d_train.tfrecords"%(img_size,img_size),
          "E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\pos_%d_train.tfrecords"%(img_size,img_size),
          "E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\par_%d_train.tfrecords"%(img_size,img_size),
          "E:\\friedhelm\\object\\face_detection_MTCNN\\DATA\\%d\\land_%d_train.tfrecords"%(img_size,img_size)]  

    model=Onet_model
    model_name="Onet_model"    
    train_step=100001
    learning_rate=0.001
    
    save_model_path="E:\\friedhelm\\object\\face_detection_MTCNN\\%s"%(model_name)
    
    if not os.path.exists(save_model_path):
        os.makedirs(save_model_path) 
        
    if(model_name=="Onet_model"):
        ratio=[1,0.5,1]
    else:
        ratio=[1,0.5,0.5]
    

    begin=time.time()        
    main(model)
# tensorboard --logdir=C:\\Users\\312\\Desktop\\

次数 1000
train_accuracy 0.81640625
loss 16.90684
time 250.21454238891602
次数 2000
train_accuracy 0.87890625
loss 11.122903
time 456.55007433891296
次数 3000
train_accuracy 0.85546875
loss 4.307001
time 659.713751077652
次数 4000
train_accuracy 0.88671875
loss 1.8141267
time 865.4698987007141
次数 5000
train_accuracy 0.84765625
loss 1.8586755
time 1065.9913940429688
次数 6000
train_accuracy 0.875
loss 1.0554221
time 1269.3003406524658
次数 7000
train_accuracy 0.90234375
loss 1.0119798
time 1465.7885904312134
次数 8000
train_accuracy 0.91015625
loss 1.0395651
time 1660.3263640403748
次数 9000
train_accuracy 0.87890625
loss 0.72770464
time 1854.1370985507965
次数 10000
train_accuracy 0.91015625
loss 0.7000376
time 2045.871889591217
次数 11000
train_accuracy 0.92578125
loss 0.5811616
time 2246.3041937351227
次数 12000
train_accuracy 0.94140625
loss 0.5829478
time 2441.8680381774902
次数 13000
train_accuracy 0.91796875
loss 0.58931255
time 2750.2011394500732
次数 14000
train_accuracy 0.91015625
loss 0.6873237
time 2