In [1]:
import tensorflow as tf
import os
import sys
import cv2
import numpy as np
import tensorflow.contrib.slim as slim
import tensorflow_hub as hub
from utils import *
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(gpu_options=gpu_options,log_device_placement=True,allow_soft_placement=True)
import lmdb
%matplotlib inline
import matplotlib.pyplot as plt
import random
import pickle

#data_path = '../data/mscoco/'
data_path = '/dvmm-filer2/datasets/Groundings/data/mscoco/'
dict_paths = [data_path+'train2014.pickle',
              data_path+'val2014.pickle'] 
lmdb_path = data_path+'MSCOCO_jpg.lmdb'

#loading mscoco data
lmdb_env = lmdb.open(lmdb_path, map_size=int(1e11), readonly=True, lock=False)
txn = lmdb_env.begin(write=False)

with open(dict_paths[0], 'rb') as f:
    dict_train = pickle.load(f, encoding='latin1')
    ids_train = list(dict_train.keys())
    
with open(dict_paths[1], 'rb') as f:
    dict_val = pickle.load(f, encoding='latin1')
    ids_val = list(dict_val.keys())

#f30k_path = '../data/flickr30k/'
f30k_path = '/dvmm-filer2/datasets/Groundings/data/flickr30k/'
f30k_dict_path = f30k_path+'flickr30k_val.pickle'

#loading flickr30k data
with open(f30k_dict_path, 'rb') as f:
    dict_val = pickle.load(f, encoding='latin1')    

n_batch = 32
gamma_1 = 5.0
gamma_2 = 10.0
n_iter_per_epoch = int(len(dict_train)/n_batch)
n_iter_per_epoch_val = int(len(dict_val)/n_batch)
n_epochs = 20 #N of epochs
reg_val = .0005
num_tst = 500

  from ._conv import register_converters as _register_converters
W1024 11:16:47.920156 140192113698624 deprecation_wrapper.py:119] From /home/hassan/anaconda3/lib/python3.6/site-packages/tensorflow_hub/native_module.py:54: The name tf.GraphKeys is deprecated. Please use tf.compat.v1.GraphKeys instead.



In [2]:
def calc_correctness(annot,heatmap,orig_img_shape):
    bbox_dict = heat2bbox(heatmap,orig_img_shape)
    bbox, bbox_norm, bbox_score = filter_bbox(bbox_dict=bbox_dict, order='xyxy')
    bbox_norm_annot = union(annot['bbox_norm'])
    bbox_annot = annot['bbox']
    bbox_norm_pred = union(bbox_norm)
    bbox_correctness = isCorrect(bbox_norm_annot, bbox_norm_pred, iou_thr=.5)
    hit_correctness = isCorrectHit(bbox_annot,heatmap,orig_img_shape)
    return bbox_correctness,hit_correctness

def validate_flickr30k(dict_test):
    cnt_overall = 0
    cnt_correct = 0
    cnt_correct_hit = 0
    for k,doc_id in enumerate(dict_test):
        if k>num_tst:
            continue
        img = np.reshape(cv2.resize(dict_test[doc_id]['img'],(299,299)),(1,299,299,3))
        orig_img_shape = dict_test[doc_id]['size']
        sen_batch = list(dict_test[doc_id]['sentences'].keys())
        img_batch = np.repeat(img,len(sen_batch),axis=0)
        tensor_list = [heatmap_w, R_i, R_s]
        feed_dict = {input_img: img_batch, text_batch: sen_batch, mode: 'test'}
        qry_heats, qry_scores, sen_score = sess.run(tensor_list, feed_dict)
        for c,sen in enumerate(sen_batch):
            for query in dict_test[doc_id]['sentences'][sen]:
                #reject not groundable/acceptable queries
                idx = dict_test[doc_id]['sentences'][sen][query]['idx']
                if len(query.split())==0 or len(idx)==0:
                    continue
                annot = dict_test[doc_id]['sentences'][sen][query]
                category = annot['category']
                if 'notvisual' in category or len(annot['bbox_norm'])==0:
                    continue
                if not check_percent(union(annot['bbox_norm'])):
                    continue
                #if reaches this point, it is groundable/acceptable
                cnt_overall+=1

                if np.mean(qry_scores[c,idx])==0:
                    pred = {}
                else:
                    heatmap = np.average(qry_heats[c,idx,:], weights = qry_scores[c,idx], axis=0)
                    bbox_c,hit_c = calc_correctness(annot,heatmap,orig_img_shape)
                    cnt_correct+=bbox_c
                    cnt_correct_hit+=hit_c
                    
        var = [k,num_tst,cnt_correct/cnt_overall,cnt_correct_hit/cnt_overall]
        prnt = 'Sample {}/{}, IoU_acc:{:.2f}, Hit_acc:{:.2f} \r'.format(var[0],var[1],var[2],var[3])
        sys.stdout.write(prnt)                
        sys.stdout.flush()

    hit_acc = cnt_correct_hit/cnt_overall
    iou_acc = cnt_correct/cnt_overall
    return iou_acc,hit_acc

def batch_gen(ids, annot_dict, txn):
    img_batch = np.empty((n_batch, 299, 299, 3), dtype='float32')
    cap_batch = []
    #currently, it takes negative samples randomly from "all" dataset
    #it randomly picks any batch, so it doesn't have ending
    seen = {}
    for i in range(n_batch):
        choice_id = random.choice(ids)
        while choice_id in seen: #we don't want to have repetitive img/caps in a batch 
            choice_id = random.choice(ids)
        imgbin = txn.get(choice_id.encode('utf-8'))
        buff = np.frombuffer(imgbin, dtype='uint8')
        while choice_id in seen or len(buff)==0:
            choice_id = random.choice(ids)
            imgbin = txn.get(choice_id.encode('utf-8'))
            buff = np.frombuffer(imgbin, dtype='uint8')
        seen[choice_id] = 1
        
        imgbgr = cv2.imdecode(buff, cv2.IMREAD_COLOR)
        img = imgbgr[:,:,[2,1,0]]
        img_batch[i,:,:,:] = cv2.resize(img,(299,299))

        sentence = random.choice(annot_dict[choice_id]['sentences'])
        cap_batch.append(sentence)
    return img_batch, cap_batch

def attn_loss(e_w,v,e_s):
    #e: ?xTxD, v: ?xNx4xD, e_bar: ?xD
    with tf.variable_scope('attention_loss'):
        ###word-level###
        #heatmap
        h = tf.nn.relu(tf.einsum('bij,cklj->bcikl',e_w,v)) #pair-wise ev^T: ?x?xTxNx4
        #attention
        a = tf.einsum('bcijl,cjlk->bcikl',h,v) #?x?xTxDx4 attnded visual reps for each of T words for all pairs
        #pair-wise score
        a_norm = tf.nn.l2_normalize(a,axis=3)
        e_w_norm = tf.nn.l2_normalize(e_w,axis=2)
        R_ik = tf.einsum('bcilk,bil->bcik',a_norm,e_w_norm) #cosine for T (words,img_reps) for all pairs
        #level dropout
        #R_ik_sh = R_ik.get_shape().as_list()
        #R_ik = tf.layers.dropout(R_ik,rate=0.5,noise_shape=[1,1,1,R_ik_sh[3]],
        #                         training=isTraining)
        R_i = tf.reduce_max(R_ik,axis=-1) #?x?xT
        R = tf.log(tf.pow(tf.reduce_sum(tf.exp(gamma_1*R_i),axis=2),1/gamma_1)) #?x? cap-img pairs
        #posterior probabilities
        P_DQ = tf.diag_part(tf.nn.softmax(gamma_2*R,axis=0)) #P(cap match img)
        P_QD = tf.diag_part(tf.nn.softmax(gamma_2*R,axis=1)) #p(img match cap)
        #losses
        L1_w = -tf.reduce_mean(tf.log(P_DQ))
        L2_w = -tf.reduce_mean(tf.log(P_QD))
        
        ###sentence-level###
        #heatmap
        h_s = tf.nn.relu(tf.einsum('bj,cklj->bckl',e_s,v)) #pair-wise e_bar*v^T: ?x?xNx4
        #attention
        a_s = tf.einsum('bcjk,cjkl->bclk',h_s,v) #?x?xDx4 attnded visual reps for sen. for all pairs
        #pair-wise score
        a_s_norm = tf.nn.l2_normalize(a_s,axis=2)
        e_s_norm = tf.nn.l2_normalize(e_s,axis=1)
        R_sk = tf.einsum('bclk,bl->bck',a_s_norm,e_s_norm) #cosine for (sen,img_reps) for all pairs
        R_s = tf.reduce_max(R_sk,axis=-1) #?x?
        #posterior probabilities
        P_DQ_s = tf.diag_part(tf.nn.softmax(gamma_2*R_s,axis=0)) #P(cap match img)
        P_QD_s = tf.diag_part(tf.nn.softmax(gamma_2*R_s,axis=1)) #P(img match cap)
        #losses
        L1_s = -tf.reduce_mean(tf.log(P_DQ_s))
        L2_s = -tf.reduce_mean(tf.log(P_QD_s))
        #overall loss
        loss = L1_w + L2_w + L1_s + L2_s
    
    return loss
    
def attn(e_w,v,e_s):
    ## Inputs: local and global cap and img features ##
    ## Output: Heatmap for each word, Global Heatmap, Attnded Vis features, Corr-vals
    #e: ?xTxD, v: ?xNx4xD, e_bar: ?xD
    with tf.variable_scope('attention'):
        ###word-level###
        #heatmap pool
        h = tf.nn.relu(tf.einsum('bij,bklj->bikl',e_w,v)) #pair-wise ev^T: ?xTxNx4
        #attention
        a = tf.einsum('bijk,bjkl->bilk',h,v) #?xTxDx4 attnded visual reps for each of T words
        #pair-wise score
        a_norm = tf.nn.l2_normalize(a,axis=2)
        e_w_norm = tf.nn.l2_normalize(e_w,axis=2)
        R_ik = tf.einsum('bilk,bil->bik',a_norm,e_w_norm) #cosine for T (words,img_reps) for all pairs
        R_ik = tf.identity(R_ik,name='level_score_word')
        R_i = tf.reduce_max(R_ik,axis=-1,name='score_word') #?xT
        #R = tf.log(tf.pow(tf.reduce_sum(tf.exp(gamma_1*R_i),axis=1),1/gamma_1)) #? corrs
        #heatmap
        idx_i = tf.argmax(R_ik,axis=-1,name='level_index_word') #?xT index of the featuremap which maximizes R_i
        with tf.name_scope('summaries'):
            tf.summary.histogram('histogram_w', idx_i)
        ii,jj = tf.meshgrid(tf.range(tf.shape(idx_i)[0]),tf.range(tf.shape(idx_i)[1]),indexing='ij')
        ii = tf.cast(ii,tf.int64)
        jj = tf.cast(jj,tf.int64)
        batch_idx_i = tf.stack([tf.reshape(ii,(-1,)),
                                tf.reshape(jj,(-1,)),
                                tf.reshape(idx_i,(-1,))],axis=1) #?Tx3 indices of argmax
        N0=int(np.sqrt(h.get_shape().as_list()[2]))
        h_max = tf.gather_nd(tf.transpose(h,[0,1,3,2]),batch_idx_i) #?TxN retrieving max heatmaps
        heatmap_wd = tf.reshape(h_max,[tf.shape(h)[0],tf.shape(h)[1],N0,N0],name='heatmap_word')
        heatmap_wd_l = tf.reshape(h,[tf.shape(h)[0],tf.shape(h)[1],N0,N0,tf.shape(h)[3]],name='level_heatmap_word')
        
        ###sentence-level###
        #heatmap pool
        h_s = tf.nn.relu(tf.einsum('bj,blkj->blk',e_s,v)) #pair-wise e_bar*v^T: ?xNx4
        #attention
        a_s = tf.einsum('bjk,bjki->bik',h_s,v) #?xDx4 attnded visual reps for sen.
        #pair-wise score
        a_s_norm = tf.nn.l2_normalize(a_s,axis=1)
        e_s_norm = tf.nn.l2_normalize(e_s,axis=1)
        R_sk = tf.einsum('bik,bi->bk',a_s_norm,e_s_norm) #cosine for (sen,img_reps)
        R_sk = tf.identity(R_sk,name='level_score_sentence')
        R_s = tf.reduce_mean(R_sk,axis=-1,name='score_sentence') #?
        #heatmap
        idx_k = tf.argmax(R_sk,axis=-1,name='level_index_sentence') #? index of the featuremap which maximizes R_i
        with tf.name_scope('summaries'):
            tf.summary.histogram('histogram_s', idx_k)
        ii_k = tf.cast(tf.range(tf.shape(idx_k)[0]),dtype='int64')
        batch_idx_k = tf.stack([ii_k,idx_k],axis=1)
        N0_g=int(np.sqrt(h_s.get_shape().as_list()[1]))
        h_s_max = tf.gather_nd(tf.transpose(h_s,[0,2,1]),batch_idx_k) #?xN retrieving max heatmaps
        heatmap_sd = tf.reshape(h_s_max,[-1,N0_g,N0_g],name='heatmap_sentence')
        heatmap_sd_l = tf.reshape(h_s,[-1,N0_g,N0_g,tf.shape(h)[3]],name='level_heatmap_sentence')
        
    return heatmap_wd, heatmap_sd, R_i, R_s  

def add_1by1_conv(feat_map,n_layers,n_filters,name,regularizer):
    with tf.variable_scope(name+'_postConv'):
        for i in range(n_layers):
            with tf.variable_scope(name+'_stage_'+str(i)):
                feat_map = tf.layers.conv2d(feat_map,filters=n_filters[i],kernel_size=[1,1],kernel_regularizer=regularizer)
                feat_map = tf.nn.leaky_relu(feat_map,alpha=.25)
    return feat_map

def depth_selection(model):
    with tf.variable_scope('stack_v'):
        v1 = tf.identity(model['vgg_16/conv5/conv5_1'],name='v1')
        v1 = add_1by1_conv(v1,n_layers=3,n_filters=[1024,1024,1024],name='v1',regularizer=regularizer)
        size = v1.get_shape().as_list()[1:3]
        resize_method = tf.image.ResizeMethod.BILINEAR
        v2 = tf.identity(model['vgg_16/conv5/conv5_3'],name='v2')
        #v2 = tf.image.resize_images(v2, size, method=resize_method)
        v2 = add_1by1_conv(v2,n_layers=3,n_filters=[1024,1024,1024],name='v2',regularizer=regularizer)
        v3 = tf.identity(model['vgg_16/conv4/conv4_1'],name='v3')
        v3 = tf.image.resize_images(v3, size, method=resize_method)
        v3 = add_1by1_conv(v3,n_layers=3,n_filters=[1024,1024,1024],name='v3',regularizer=regularizer)
        v4 = tf.identity(model['vgg_16/conv4/conv4_3'],name='v4')
        v4 = tf.image.resize_images(v4, size, method=resize_method)
        v4 = add_1by1_conv(v4,n_layers=3,n_filters=[1024,1024,1024],name='v4',regularizer=regularizer)
        v_all = tf.stack([v1,v2,v3,v4], axis=3)
        v_all = tf.reshape(v_all,[-1,v_all.shape[1]*v_all.shape[2],v_all.shape[3],v_all.shape[4]])
        v_all = tf.nn.l2_normalize(v_all, axis=-1, name='stacked_image_feature_maps')
    return v_all

In [None]:
sess = tf.InteractiveSession(config=config)

mode = tf.placeholder(tf.string, name='mode')
isTraining = tf.equal(mode, 'train')
regularizer = tf.contrib.layers.l2_regularizer(reg_val)

with tf.device('/gpu:1'):
    #building visual model
    print('Building Visual Model...')
    input_img = tf.placeholder(tf.float32, (None,299,299,3), name='input_img')
    pre_processed_img = pre_process(input_img, 'vgg_preprocessing')
    vis_model = pre_trained_load(model_name='vgg_16', image_shape=(None,299,299,3),
                              input_tensor=pre_processed_img, session=sess, is_training=False, global_pool=True)

    v = depth_selection(vis_model) #(?,1225,4,1024)
    
   #building text model
    print('Building Text Model...')
    #sentence placeholder - list of sentences
    text_batch = tf.placeholder('string', shape=[None], name='text_input')
    #loading pre-trained ELMo
    elmo = hub.Module("../modules/ELMo", trainable=True)
    #getting ELMo embeddings
    elmo_embds = elmo(text_batch, signature="default", as_dict=True)
    lstm1_embd = elmo_embds['lstm_outputs1'] #?xTXD
    lstm2_embd = elmo_embds['lstm_outputs2'] #?xTXD
    w_embd = tf.identity(elmo_embds['elmo'], name='elmo_word_embd') #?xTXD
    #taking index of last word in each sentence
    idx = elmo_embds['sequence_len']-1
    batch_idx = tf.stack([tf.range(0,tf.size(idx),1),idx],axis=1)
    # Concatenate first of backward with last of forward to get sentence embeddings
    dim = lstm1_embd.get_shape().as_list()[-1]
    sen_embd_1 = tf.concat([lstm1_embd[:,0,int(dim/2):],
                            tf.gather_nd(lstm1_embd[:,:,:int(dim/2)],batch_idx)], axis=-1) #[batch,dim]
    sen_embd_2 = tf.concat([lstm2_embd[:,0,int(dim/2):],
                            tf.gather_nd(lstm2_embd[:,:,:int(dim/2)],batch_idx)], axis=-1) #[batch,dim]
    sen_embd = tf.concat([tf.expand_dims(sen_embd_1,axis=2),
                               tf.expand_dims(sen_embd_2,axis=2)], axis=2, name='elmo_sen_embd') #[batch,dim,2]
    e_s = tf.layers.dense(sen_embd,units=1,use_bias=False) #?xDx1
    e_s = tf.squeeze(e_s,axis=2)
    e_s = tf.layers.dense(e_s, units=1024)
    e_s = tf.nn.leaky_relu(e_s,alpha=.25)
    e_s = tf.layers.dense(e_s, units=1024)
    e_s = tf.nn.leaky_relu(e_s,alpha=.25)
    e_s = tf.nn.l2_normalize(e_s, axis=-1, name='sen_embedding')
        
    e_w = tf.layers.dense(w_embd, units=1024)
    e_w = tf.nn.leaky_relu(e_w,alpha=.25)
    e_w = tf.layers.dense(e_w, units=1024)
    e_w = tf.nn.leaky_relu(e_w,alpha=.25)
    e_w = tf.nn.l2_normalize(e_w, axis=-1, name='w_embedding')
    
    heatmap_w,heatmap_s,R_i,R_s = attn(e_w,v,e_s)
        
loss = attn_loss(e_w,v,e_s) + tf.losses.get_regularization_loss()
loss = tf.identity(loss, name='loss')
        
lr = tf.placeholder(tf.float32, shape=[], name='learning_rate')
opt = tf.train.AdamOptimizer(lr)

train_vars = list(set(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)) - set(vis_model.model_weights))
train_op = opt.minimize(loss, var_list=train_vars, name='train_op')

global_saver = tf.train.Saver()
    
train_writer = tf.summary.FileWriter('./logs/vgg/mscoco', sess.graph)
merged = tf.summary.merge_all()

W1024 11:17:15.478177 140192113698624 deprecation_wrapper.py:119] From /home/hassan/CVPR_final/v0_final/codes/utils.py:300: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.



Building Visual Model...


W1024 11:17:16.646660 140192113698624 deprecation.py:323] From <ipython-input-2-e21064ea4eb8>:189: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
W1024 11:17:16.649092 140192113698624 deprecation.py:506] From /home/hassan/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W1024 11:17:16.990737 140192113698624 deprecation_wrapper.py:119] From /home/hassan/anaconda3/lib/python3.6/site-packages/tensorflow_hub/module.py:104: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W1024 11:17:16.991722 140192113698624 deprecation_wrapper.py:119] From

Building Text Model...


W1024 11:17:17.590759 140192113698624 deprecation_wrapper.py:119] From /home/hassan/anaconda3/lib/python3.6/site-packages/tensorflow_hub/native_module.py:388: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

W1024 11:17:17.602658 140192113698624 deprecation_wrapper.py:119] From /home/hassan/anaconda3/lib/python3.6/site-packages/tensorflow_hub/native_module.py:338: The name tf.train.init_from_checkpoint is deprecated. Please use tf.compat.v1.train.init_from_checkpoint instead.

W1024 11:17:17.785053 140192113698624 deprecation_wrapper.py:119] From /home/hassan/anaconda3/lib/python3.6/site-packages/tensorflow_hub/native_module.py:342: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

W1024 11:17:17.785989 140192113698624 deprecation_wrapper.py:119] From /home/hassan/anaconda3/lib/python3.6/site-packages/tensorflow_hub/native_module.py:345: The name tf.train.SaverDef is deprecated. Please use tf.compat.v1.tra

In [None]:
condition = 'ELMo_VGG_MSCOCO'
print('Initializing...')
_ = sess.run([tf.global_variables_initializer(), tf.tables_initializer()])
#loading pretrained vgg weights
print('Loading visual path model (vgg)...')
vis_model.load_weights()
    
#loop on training data
print('Start training...')
iou_acc = np.zeros((n_epochs,))
hit_acc = np.zeros((n_epochs,))
train_loss = np.zeros((n_epochs,))
lr_value_0 = .001
max_val_iou = 0
max_val_hit = 0
for e in range(n_epochs):
    print('\n\n=====Epoch: %d'%e)
    avg_loss = 0
    if e<9:
        lr_value=lr_value_0
    elif 9<=e<14:
        lr_value=lr_value_0/2.0
    elif e>=14:
        lr_value=lr_value_0/4.0
        
    print('===Train')
    for i in range(n_iter_per_epoch):
        img_batch, cap_batch = batch_gen(ids_train, dict_train, txn)
        feed_dict = {input_img: img_batch, text_batch: cap_batch, mode: 'train', lr: lr_value}
        summary, loss_val, _ = sess.run([merged, loss, train_op], feed_dict)
        if i%100==0:
            train_writer.add_summary(summary, n_iter_per_epoch*e + i)
        avg_loss+=loss_val
        var = [i*n_batch, n_iter_per_epoch*n_batch, avg_loss/float(i+1)]
        prnt = 'Sample {}/{}, train_loss:{:.4f} \r'.format(var[0],var[1],var[2])
        sys.stdout.write(prnt)                
        sys.stdout.flush()     
    train_loss[e] = avg_loss/float(n_iter_per_epoch+1)
    
    #validation phase
    print('\n===Validation')
    iou_acc[e],hit_acc[e]=validate_flickr30k(dict_val)
    sv = 'Epoch:{}, Train_loss:{}, Val_iou_acc:{}, Val_hit_acc:{}\r'.format(e,train_loss[e],iou_acc[e],hit_acc[e])
    open('./logs/log_'+condition+'.txt', 'w').write(sv)
    if hit_acc[e]>max_val_hit:
        max_val_hit = hit_acc[e]
        print('\nHit accuracy improved. Saving best model...\r')
        global_saver.save(sess, '../saved_models/model_'+condition+'_best_hit')
    if iou_acc[e]>max_val_iou:
        max_val_iou = iou_acc[e]
        print('\nIoU accuracy improved. Saving best model...\r')
        global_saver.save(sess, '../saved_models/model_'+condition+'_best_iou')
        
print('\n\nTraining done.')
#saving the session
print('Saving model...')
global_saver.save(sess, '../saved_models/model_'+condition)
print('Saving done.')

plt.figure()
plt.plot(train_loss, label='Train loss '+condition)
plt.legend()
plt.show()
plt.figure()
plt.plot(100.*iou_acc, label='Validation iou_acc '+condition)
plt.plot(100.*hit_acc, label='Validation hit_acc '+condition)
plt.legend()
plt.show()

Initializing...


W1024 11:17:39.442469 140192113698624 deprecation.py:323] From /home/hassan/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


Loading visual path model (vgg)...
Start training...


=====Epoch: 0
===Train
Sample 82720/82752, train_loss:7.4928  
===Validation
Sample 500/500, IoU_acc:0.30, Hit_acc:0.58 
Hit accuracy improved. Saving best model...

IoU accuracy improved. Saving best model...


=====Epoch: 1
===Train
Sample 82720/82752, train_loss:6.1617 
===Validation
Sample 500/500, IoU_acc:0.31, Hit_acc:0.57 
IoU accuracy improved. Saving best model...


=====Epoch: 2
===Train
Sample 48736/82752, train_loss:5.8542 