In [1]:
# -*- coding: utf-8 -*-
"""

@author: friedhelm
"""
import sys
sys.path.append("../")

import tensorflow as tf
from train.train_tool import arcface_loss,read_single_tfrecord,average_gradients
from core import Arcface_model,config
import time
import os
import numpy as np

def train(image,label,train_phase_dropout,train_phase_bn):

    train_images_split = tf.split(image, config.gpu_num)
    train_labels_split = tf.split(label, config.gpu_num)      
    
    global_step = tf.Variable(name='global_step', initial_value=0, trainable=False)
    inc_op = tf.assign_add(global_step, 1, name='increment_global_step')    
    scale = int(512.0/batch_size)
    lr_steps = [scale*s for s in config.lr_steps]
    lr_values = [v/scale for v in config.lr_values]
    lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=lr_values, name='lr_schedule')
    opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=config.momentum)

    embds = []
    logits = []
    inference_loss = []
    wd_loss = []
    total_train_loss = []
    pred = []
    tower_grads = []
    update_ops = []
    
    for i in range(config.gpu_num):
        sub_train_images = train_images_split[i]
        sub_train_labels = train_labels_split[i]
        with tf.device("/gpu:%d"%(i)):
            with tf.variable_scope(tf.get_variable_scope(),reuse=(i>0)):
                
                net, end_points = Arcface_model.get_embd(sub_train_images, train_phase_dropout, train_phase_bn,config.model_params)
                        
                logit = arcface_loss(net,sub_train_labels,config.s,config.m)
                arc_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logit , labels = sub_train_labels))
                L2_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
                train_loss = arc_loss+L2_loss
                
                pred.append(tf.to_int32(tf.argmax(tf.nn.softmax(logit),axis=1)))
                tower_grads.append(opt.compute_gradients(train_loss))
                update_ops.append(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
                
                embds.append(net)
                logits.append(logit)
                inference_loss.append(arc_loss)
                wd_loss.append(L2_loss)
                total_train_loss.append(train_loss)

    embds = tf.concat(embds, axis=0)
    logits = tf.concat(logits, axis=0)
    pred = tf.concat(pred, axis=0)
    wd_loss = tf.add_n(wd_loss)/config.gpu_num
    inference_loss = tf.add_n(inference_loss)/config.gpu_num
    
    train_ops = [opt.apply_gradients(average_gradients(tower_grads))]
    train_ops.extend(update_ops)
    train_op = tf.group(*train_ops) 
    
    with tf.name_scope('loss'):
        train_loss = tf.add_n(total_train_loss)/config.gpu_num
        tf.summary.scalar('train_loss',train_loss)    

    with tf.name_scope('accuracy'):
        train_accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, label), tf.float32))
        tf.summary.scalar('train_accuracy',train_accuracy) 
        
    saver=tf.train.Saver(max_to_keep=20)
    merged=tf.summary.merge_all() 
    
    train_images,train_labels=read_single_tfrecord(addr,batch_size,img_size)
    
    tf_config = tf.ConfigProto(allow_soft_placement=True)
    tf_config.gpu_options.allow_growth = True
    with tf.Session(config=tf_config) as sess:
        sess.run((tf.global_variables_initializer(),
                  tf.local_variables_initializer()))
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess,coord=coord)
        writer_train=tf.summary.FileWriter("./model/%s"%(model_name),sess.graph)
        print("start")
        train_begin=time.time()
        try:
            for i in range(1,train_step):                
                image_batch,label_batch=sess.run([train_images,train_labels])
                sess.run([train_op,inc_op],feed_dict={image:image_batch,label:label_batch,train_phase_dropout:True,train_phase_bn:True})
                if(i%100==0):
                    summary=sess.run(merged,feed_dict={image:image_batch,label:label_batch,train_phase_dropout:True,train_phase_bn:True})
                    writer_train.add_summary(summary,i)
#                     print('100-time: ',time.time()-train_begin)
                    train_begin=time.time()
                if(i%1000==0):
                    print('times: ',i)    
                    print('train_accuracy: ',sess.run(train_accuracy,feed_dict={image:image_batch,label:label_batch,train_phase_dropout:True,train_phase_bn:True}))
                    print('train_loss: ',sess.run(train_loss,{image:image_batch,label:label_batch,train_phase_dropout:True,train_phase_bn:True}))                    
                    print('time: ',time.time()-begin)
                    if(i%30000==0):
                        saver.save(sess,os.path.join(model_path,model_name),global_step=i)
        except  tf.errors.OutOfRangeError:
            print("finished")
        finally:
            coord.request_stop()
            writer_train.close()
        coord.join(threads)

        
def main():
    
    with tf.name_scope('input'):
        image=tf.placeholder(tf.float32,[batch_size,img_size,img_size,3],name='image')
        label=tf.placeholder(tf.int32,[batch_size],name='label')
        train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_dropout')
        train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_bn') 

        train(image,label,train_phase_dropout,train_phase_bn)


if __name__ == "__main__":
    
    img_size=config.img_size
    batch_size=config.batch_size
    addr=config.addrt
    model_name=config.model_name
    train_step=config.train_step
    model_path=config.model_patht
    
    begin=time.time()

    
    main()
# tensorboard --logdir=/home/dell/Desktop//InsightFace/model/Arcface/

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.
start
times:  1000
train_accuracy:  0.01171875
train_loss:  42.62245
time:  595.178290605545
times:  2000
train_accuracy:  0.046875
train_loss:  39.180023
time:  1088.884649515152
times:  3000
train_accuracy:  0.01953125
train_loss:  34.594162
time:  1583.919938325882
times:  4000
train_accuracy:  0.078125
train_loss:  32.301476
time:  2080.0958297252655
times:  5000
train_accuracy:  0.05859375
train_loss:  30.721973
time:  2576.2248845100403
times:  6000
train_accuracy:  0.171875
train_loss:  27.345337
time:  3072.2438430786133
times:  7000
train_accuracy:  0.10546875
train_loss:  25.815805
time:  3568.2658002376556
times:  8000
train_accuracy:  0.1953125
train_loss:  22.507519
time:  4064.954259634018
times:  9000
train_accuracy:  0.1328125
train_loss:  26.671421
time:  4561.75318312645
times:  10000
train_accuracy:  0.2265625
train_loss:  22.822392
time:  5057.895961046219
times:  11000
train_accuracy:  0.15625

times:  90000
train_accuracy:  0.75390625
train_loss:  8.081097
time:  44807.451597929
times:  91000
train_accuracy:  0.8671875
train_loss:  6.4140277
time:  45319.8419213295
times:  92000
train_accuracy:  0.89453125
train_loss:  6.598339
time:  45815.36218595505
times:  93000
train_accuracy:  0.83203125
train_loss:  6.8668394
time:  46311.22509884834
times:  94000
train_accuracy:  0.8828125
train_loss:  6.65021
time:  46805.974585056305
times:  95000
train_accuracy:  0.84375
train_loss:  6.8617525
time:  47301.11820626259
times:  96000
train_accuracy:  0.89453125
train_loss:  5.9464245
time:  47797.04121637344
times:  97000
train_accuracy:  0.89453125
train_loss:  5.8348184
time:  48292.94453549385
times:  98000
train_accuracy:  0.890625
train_loss:  6.1195574
time:  48788.99401283264
times:  99000
train_accuracy:  0.8515625
train_loss:  7.298297
time:  49284.557185173035
times:  100000
train_accuracy:  0.9453125
train_loss:  5.522714
time:  49779.97583985329
times:  101000
train_accu

times:  182000
train_accuracy:  0.9921875
train_loss:  3.625484
time:  90490.17711257935
times:  183000
train_accuracy:  0.9921875
train_loss:  3.5950975
time:  90985.28136086464
times:  184000
train_accuracy:  0.9765625
train_loss:  3.6839368
time:  91481.56646251678
times:  185000
train_accuracy:  0.99609375
train_loss:  3.6069674
time:  91977.77905726433
times:  186000
train_accuracy:  0.9921875
train_loss:  3.5918949
time:  92474.18846511841
times:  187000
train_accuracy:  0.98828125
train_loss:  3.5831861
time:  92971.08817100525
times:  188000
train_accuracy:  0.984375
train_loss:  3.625761
time:  93467.14297509193
times:  189000
train_accuracy:  0.9921875
train_loss:  3.5499883
time:  93963.66238951683
times:  190000
train_accuracy:  0.99609375
train_loss:  3.6067781
time:  94460.58076095581
times:  191000
train_accuracy:  0.9765625
train_loss:  3.6021938
time:  94957.7469086647
times:  192000
train_accuracy:  0.99609375
train_loss:  3.5156093
time:  95455.22744631767
times:  19

times:  274000
train_accuracy:  1.0
train_loss:  3.0254831
time:  136212.51454377174
times:  275000
train_accuracy:  0.99609375
train_loss:  3.0343475
time:  136708.19340610504
times:  276000
train_accuracy:  0.99609375
train_loss:  3.03924
time:  137204.40947771072
times:  277000
train_accuracy:  0.99609375
train_loss:  2.9883132
time:  137700.4250473976
times:  278000
train_accuracy:  0.984375
train_loss:  3.0108333
time:  138197.48367476463
times:  279000
train_accuracy:  0.99609375
train_loss:  2.992661
time:  138693.2120909691
times:  280000
train_accuracy:  1.0
train_loss:  2.9572234
time:  139189.75592899323
times:  281000
train_accuracy:  0.9921875
train_loss:  2.9613483
time:  139686.04461431503
times:  282000
train_accuracy:  0.99609375
train_loss:  2.9672074
time:  140182.57924580574
times:  283000
train_accuracy:  0.98828125
train_loss:  2.9758303
time:  140679.19736981392
times:  284000
train_accuracy:  0.99609375
train_loss:  3.0023866
time:  141176.15607500076
times:  28

KeyboardInterrupt: 

In [None]:
# -*- coding: utf-8 -*-
"""

@author: friedhelm
"""
import sys
sys.path.append("../")

import tensorflow as tf
from train.train_tool import arcface_loss,read_single_tfrecord,average_gradients
from core import Arcface_model,config
import time
import os
import numpy as np
from evaluate.evaluate import evaluation

def train(image,label,train_phase_dropout,train_phase_bn):

    train_images_split = tf.split(image, config.gpu_num)
    train_labels_split = tf.split(label, config.gpu_num)      
    
    global_step = tf.Variable(name='global_step', initial_value=0, trainable=False)
    inc_op = tf.assign_add(global_step, 1, name='increment_global_step')    
    scale = int(512.0/batch_size)
    lr_steps = [scale*s for s in config.lr_steps]
    lr_values = [v/scale for v in config.lr_values]
    lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=lr_values, name='lr_schedule')
    opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=config.momentum)

    embds = []
    logits = []
    inference_loss = []
    wd_loss = []
    total_train_loss = []
    pred = []
    tower_grads = []
    update_ops = []
    
    for i in range(config.gpu_num):
        sub_train_images = train_images_split[i]
        sub_train_labels = train_labels_split[i]
        with tf.device("/gpu:%d"%(i)):
            with tf.variable_scope(tf.get_variable_scope(),reuse=(i>0)):
                
                net, end_points = Arcface_model.get_embd(sub_train_images, train_phase_dropout, train_phase_bn,config.model_params)
                        
                logit = arcface_loss(net,sub_train_labels,config.s,config.m)
                arc_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logit , labels = sub_train_labels))
                L2_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
                train_loss = arc_loss+L2_loss
                
                pred.append(tf.to_int32(tf.argmax(tf.nn.softmax(logit),axis=1)))
                tower_grads.append(opt.compute_gradients(train_loss))
                update_ops.append(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
                
                embds.append(net)
                logits.append(logit)
                inference_loss.append(arc_loss)
                wd_loss.append(L2_loss)
                total_train_loss.append(train_loss)

    embds = tf.concat(embds, axis=0)
    logits = tf.concat(logits, axis=0)
    pred = tf.concat(pred, axis=0)
    wd_loss = tf.add_n(wd_loss)/config.gpu_num
    inference_loss = tf.add_n(inference_loss)/config.gpu_num
    
    train_ops = [opt.apply_gradients(average_gradients(tower_grads))]
    train_ops.extend(update_ops)
    train_op = tf.group(*train_ops) 
    
    with tf.name_scope('loss'):
        train_loss = tf.add_n(total_train_loss)/config.gpu_num
        tf.summary.scalar('train_loss',train_loss)    

    with tf.name_scope('accuracy'):
        train_accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, label), tf.float32))
        tf.summary.scalar('train_accuracy',train_accuracy) 
        
    saver=tf.train.Saver(max_to_keep=20)
    merged=tf.summary.merge_all() 
    
    train_images,train_labels=read_single_tfrecord(addr,batch_size,img_size)
    
    tf_config = tf.ConfigProto(allow_soft_placement=True)
    tf_config.gpu_options.allow_growth = True
    with tf.Session(config=tf_config) as sess:
        sess.run((tf.global_variables_initializer(),
                  tf.local_variables_initializer()))
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess,coord=coord)
        writer_train=tf.summary.FileWriter(model_path,sess.graph)
        print("start")
        train_begin=time.time()
        try:
            for i in range(1,train_step):                
                image_batch,label_batch=sess.run([train_images,train_labels])
                sess.run([train_op,inc_op],feed_dict={image:image_batch,label:label_batch,train_phase_dropout:True,train_phase_bn:True})
                if(i%100==0):
                    summary=sess.run(merged,feed_dict={image:image_batch,label:label_batch,train_phase_dropout:True,train_phase_bn:True})
                    writer_train.add_summary(summary,i)
#                     print('100-time: ',time.time()-train_begin)
                    train_begin=time.time()
                if(i%1000==0):
                    print('times: ',i)    
#                     print('train_accuracy: ',sess.run(train_accuracy,feed_dict={image:image_batch,label:label_batch,train_phase_dropout:True,train_phase_bn:True}))
#                     print('train_loss: ',sess.run(train_loss,{image:image_batch,label:label_batch,train_phase_dropout:True,train_phase_bn:True}))                    
                    print('time: ',time.time()-begin)
                if(i%5000==0):
                    f.write("itrations: %d"%(i)+'\n')
                    for dataset_path in config.eval_datasets:
                        tpr, fpr, accuracy, best_thresholds = evaluation(sess, batch_size, img_size, dataset_path, dropout_flag=config.eval_dropout_flag, bn_flag=config.eval_bn_flag, embd=embd, image=image, train_phase_dropout=train_phase_dropout, train_phase_bn=train_phase_bn) 
                        print("%s datasets get %.3f acc"%(dataset_path.split("/")[-1].split(".")[0],accuracy))
                        f.write("\t %s \t %.3f \t \t "%(dataset_path.split("/")[-1].split(".")[0],accuracy)+str(best_thresholds)+'\n')
                    f.write('\n')
                if(i%30000==0):
                    saver.save(sess,os.path.join(model_path,model_name),global_step=i)
        except  tf.errors.OutOfRangeError:
            print("finished")
        finally:
            coord.request_stop()
            writer_train.close()
        coord.join(threads)
        f.close()

        
def main():
    
    with tf.name_scope('input'):
        image=tf.placeholder(tf.float32,[batch_size,img_size,img_size,3],name='image')
        label=tf.placeholder(tf.int32,[batch_size],name='label')
        train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_dropout')
        train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_bn') 

        train(image,label,train_phase_dropout,train_phase_bn)


if __name__ == "__main__":
    
    img_size=config.img_size
    batch_size=config.batch_size
    addr=config.addrt
    model_name=config.model_name
    train_step=config.train_step
    model_path=config.model_patht
    
    begin=time.time()
    
    f = open("./eval_record.txt", 'w')
    f.write("\t dataset \t accuracy \t best_thresholds \t"+'\n')    
    main()
# tensorboard --logdir=/home/dell/Desktop/insightface/model/Arcface_model/

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.
start
