In [None]:
from __future__ import division, print_function, absolute_import
from keras_self_attention import SeqSelfAttention
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
import os
from tensorflow.compat.v1.keras.backend import set_session
from tensorflow.keras.preprocessing import image
from PIL import Image
import numpy as np
import random
import matplotlib.patheffects as PathEffects
import efficientnet.tfkeras as efn 
import tensorflow as tensorflow
import tensorflow as tf
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam, Nadam, Adadelta,SGD
from tensorflow.keras import regularizers

from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical 
import math as m
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from keras_self_attention import SeqSelfAttention

def attach_attention_module(net, attention_module):
    if attention_module == 'se_block': # SE_block
        net = se_block(net)
    elif attention_module == 'cbam_block': # CBAM_block
        net = cbam_block(net)
    else:
        raise Exception("'{}' is not supported attention module!".format(attention_module))

    return net

def cbam_block(cbam_feature, ratio=2):
    """Contains the implementation of Convolutional Block Attention Module(CBAM) block.
    As described in https://arxiv.org/abs/1807.06521.
    """

#     cbam_feature = channel_attention(cbam_feature, ratio)
    cbam_feature = spatial_attention(cbam_feature)
    return cbam_feature

def channel_attention(input_feature, ratio=8):

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    channel = input_feature.shape[channel_axis]

    shared_layer_one = Dense(channel//ratio,
                             activation='relu',
                             kernel_initializer='he_normal',
                             use_bias=True,
                             bias_initializer='zeros')
    shared_layer_two = Dense(channel,
                             kernel_initializer='he_normal',
                             use_bias=True,
                             bias_initializer='zeros')

    avg_pool = GlobalAveragePooling2D()(input_feature)    
    avg_pool = tf.keras.layers.Reshape((1,1,channel))(avg_pool)
    assert avg_pool.shape[1:] == (1,1,channel)
    avg_pool = shared_layer_one(avg_pool)
    assert avg_pool.shape[1:] == (1,1,channel//ratio)
    avg_pool = shared_layer_two(avg_pool)
    assert avg_pool.shape[1:] == (1,1,channel)

    max_pool = tf.keras.layers.GlobalMaxPooling2D()(input_feature)
    max_pool = tf.keras.layers.Reshape((1,1,channel))(max_pool)
    assert max_pool.shape[1:] == (1,1,channel)
    max_pool = shared_layer_one(max_pool)
    assert max_pool.shape[1:] == (1,1,channel//ratio)
    max_pool = shared_layer_two(max_pool)
    assert max_pool.shape[1:] == (1,1,channel)

    cbam_feature = tf.keras.layers.Add()([avg_pool,max_pool])
    cbam_feature = Activation('sigmoid')(cbam_feature)

    if K.image_data_format() == "channels_first":
        cbam_feature = Permute((3, 1, 2))(cbam_feature)

    return multiply([input_feature, cbam_feature])

def spatial_attention(input_feature):
    kernel_size = 3

    if K.image_data_format() == "channels_first":
        channel = input_feature.shape[1]
        cbam_feature = Permute((2,3,1))(input_feature)
    else:
        channel = input_feature.shape[-1]
        cbam_feature = input_feature

    avg_pool = Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(cbam_feature)
    assert avg_pool.shape[-1] == 1
    max_pool = Lambda(lambda x: K.max(x, axis=3, keepdims=True))(cbam_feature)
    assert max_pool.shape[-1] == 1
    concat = Concatenate(axis=3)([avg_pool, max_pool])
    assert concat.shape[-1] == 2
    cbam_feature = Conv2D(filters = 1,
                    kernel_size=kernel_size,
                    strides=1,
                    padding='same',
                    activation='sigmoid',
                    kernel_initializer='he_normal',
                    use_bias=False)(concat)	
    assert cbam_feature.shape[-1] == 1

    if K.image_data_format() == "channels_first":
        cbam_feature = Permute((3, 1, 2))(cbam_feature)

    return multiply([input_feature, cbam_feature])


def one_stream(image_input_shape,embedding_size,n_class,height,width):
   
    y = Input(shape=(n_class,))
    input_image = Input(shape=image_input_shape)
    eff_model=efn.EfficientNetB3(input_shape=(height, width, 3),
                                   include_top=False,
                                   weights='noisy-student')
            
    model_backbone = Model(eff_model.input,eff_model.get_layer('block7a_project_bn').output)(input_image)
    print("backbone :",model_backbone.shape)
      
    s = SeparableConv2D(filters = 512, kernel_size = (3, 3), padding = 'same')(model_backbone)
    s = spatial_attention(s)
    s = SeparableConv2D(filters = 512, kernel_size = (3, 3), padding = 'same')(s)
    s = spatial_attention(s)
    s = BatchNormalization()(s)
    s = GlobalAveragePooling2D()(s)
    s = Dropout(0.3)(s)
    
    print("spatial: ",s.shape)

    return s,y,input_image  
    
seq_len=1
def fusion_3(image_input_shape,embedding_size,n_class,height,width):
   
    y = Input(shape=(n_class,))
    input_image = Input(batch_shape=(None, seq_len,height, width, 3))
    eff_model=efn.EfficientNetB3(input_shape=(height, width, 3),
                                 include_top=False,
                                 weights='noisy-student')
    model_backbone = Model(eff_model.input,eff_model.get_layer('block7a_project_bn').output)
    timeDistributed_layer = tf.keras.layers.TimeDistributed(model_backbone)(input_image)
    print("TimeDistributed", timeDistributed_layer.shape)
    
    '''Temporal'''
    t = tf.keras.layers.TimeDistributed(GlobalAveragePooling2D())(timeDistributed_layer)
    t = LSTM(256, return_sequences=True, input_shape=(t.shape[1],t.shape[2]), name="lstm_layer_in")(t)
    t = SeqSelfAttention(attention_activation='sigmoid')(t)
    avg_pool = GlobalAveragePooling1D()(t)
    max_pool = GlobalMaxPooling1D()(t)
    t = concatenate([avg_pool, max_pool])
    
    t = Dropout(0.3)(t)
    print("Temporal: ", t.shape)
    
    '''Spatial'''
    s = tf.math.reduce_mean(timeDistributed_layer, axis=1)  
    s = SeparableConv2D(filters = 512, kernel_size = (3, 3), padding = 'same')(s)
    s = cbam_block(s)
    s = SeparableConv2D(filters = 512, kernel_size = (3, 3), padding = 'same')(s)
    s = cbam_block(s)
    s = BatchNormalization()(s)
    a = GlobalAveragePooling2D()(s)
    c = Dropout(0.3)(a)
    print("Spatial: ", s.shape)
        
    '''Fusion'''
    f = tf.keras.layers.Concatenate()([c, t])
    f = Dropout(0.3)(f)
    print("Fusion: ", f.shape)
    return f,y,input_image  

def fc_reid(x,y,n_class):
    
    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu')(x)
    
    x =  BatchNormalization()(x)
    #     x = tf.math.l2_normalize(x, axis=1)
    softmax = Dense(n_class, activation='softmax', name='reid_output')(x)

    center = Embedding(n_class, embedding_size)(y)
    l2_loss = Lambda(lambda x: K.sum(K.square(x[0] - x[1][:, 0]), 1, keepdims=True), name='l2_loss')(
        [x, center])
    return softmax


def create_model(image_input_shape,embedding_size,n_class,height,width):
    
    model,y,input_image = fusion_3(image_input_shape,embedding_size,n_class,height,width)
    softmax_id = fc_reid(model,y,n_class)
     
    return tf.keras.models.Model(inputs=[input_image,y], outputs=[softmax_id])

def triplet_center_loss(y_true, y_pred, n_classes= 10, alpha=0.38):
    """
    Implementation of the triplet loss function
    Arguments:
    y_true -- true labels, required when you define a loss in Keras, you don't need it in this function.
    y_pred -- python list containing three objects:
            anchor -- the encodings for the anchor data
            positive -- the encodings for the positive data (similar to anchor)
            negative -- the encodings for the negative data (different from anchor)
    Returns:
    loss -- real number, value of the loss
    """
    print('y_pred.shape = ', y_pred)

    total_lenght = y_pred.shape.as_list()[-1]
    #     print('total_lenght=',  total_lenght)
    #     total_lenght =12

    # repeat y_true for n_classes and == np.arange(n_classes)
    # repeat also y_pred and apply mask
    # obtain min for each column min vector for each class

    classes = tf.range(0, n_classes,dtype=tf.float32)
    y_pred_r = tf.reshape(y_pred, (tf.shape(y_pred)[0], 1))
    y_pred_r = tf.keras.backend.repeat(y_pred_r, n_classes)

    y_true_r = tf.reshape(y_true, (tf.shape(y_true)[0], 1))
    y_true_r = tf.keras.backend.repeat(y_true_r, n_classes)

    mask = tf.equal(y_true_r[:, :, 0], classes)

    #mask2 = tf.ones((tf.shape(y_true_r)[0], tf.shape(y_true_r)[1]))  # todo inf

    # use tf.where(tf.equal(masked, 0.0), np.inf*tf.ones_like(masked), masked)

    masked = y_pred_r[:, :, 0] * tf.cast(mask, tf.float32) #+ (mask2 * tf.cast(tf.logical_not(mask), tf.float32))*tf.constant(float(2**10))
    masked = tf.where(tf.equal(masked, 0.0), np.inf*tf.ones_like(masked), masked)

    minimums = tf.math.reduce_min(masked, axis=1)

    loss = K.max(y_pred - minimums +alpha ,0)

    # obtain a mask for each pred
    return loss

def get_lr_metric(optimizer):
    def lr(y_true, y_pred):
        return optimizer.lr
    return lr

 
optimizer = tfa.optimizers.LazyAdam()
lr_metric = get_lr_metric(optimizer)

height, width =299,100
loss_weights = [1, 0.0005]
n_class = 751
embedding_size =128
batch_size=64
nb_frame = 1

input_image_shape = (nb_frame,height,width,3) #if two stream
# input_image_shape = (height,width,3) #if one stream

model = create_model(input_image_shape,embedding_size,n_class,height,width) 
# model.load_weights("combine.hdf5")

'''LOAD WEIGHTS'''
# model.load_weights("model_combine_2/variables/variables")
# model.load_weights('model_MTDNN/variables/variables')
model.compile(loss=["categorical_crossentropy", triplet_center_loss],optimizer=tfa.optimizers.LazyAdam(0.0001),metrics=['accuracy'],loss_weights=loss_weights)
model.summary()




In [None]:
model.output


In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:

def cross_entropy_loss(real_score, predict_score):
    predict_prob = 1 / (1 + K.exp(-predict_score))
    real_prob = 1 / (1 + K.exp(-real_score))
    cross_entropy = -real_prob * K.log(predict_prob) - (1 - real_prob) * K.log(1 - predict_prob)
    return cross_entropy
def write(path, content):
    with open(path, "a+") as dst_file:
        dst_file.write(content)

def safe_remove(path):
    if os.path.exists(path):
        os.remove(path)
        return True
    else:
        return False

def extract_info(dir_path):
    infos = []
    for image_name in sorted(os.listdir(dir_path)):
        if 'f' in image_name or 's' in image_name:
            arr = image_name.split('_')
            try : 
                person = int(arr[0])
        #         person = (str(person))
            except ValueError:
                pass
            try :
                camera = int(arr[1][1])
            except IndexError:
                pass
        elif 's' not in image_name:
            # grid
            arr = image_name.split('_')
            person = int(arr[0])
            camera = int(arr[1])
        else:
            continue
        infos.append((person, camera))

    return infos

class ReshapeLayer(Layer):
    def call(self,inputs):
        nshape = (1) + inputs.shape[0:]
        return tf.reshape(inputs,nshape)

def extract_feature(dir_path, net, height, width,nb_frame):
    datagen = ImageDataGenerator(featurewise_std_normalization=True,featurewise_center=True)
    features = []
    infos = []
    print('Extracting all test data, please wait...')
    for image_name in sorted(os.listdir(dir_path)):
        #if '.txt' in image_name:
            #continue
        if 'f' in image_name or 's' in image_name:
            arr = image_name.split('_')
            try : 
                person = int(arr[0])
                #print(person)
            except ValueError:
                pass
            try :
                camera = int(arr[1][1])
            except IndexError:
                pass
        elif 's' not in image_name:
            # grid
            arr = image_name.split('_')
            person = int(arr[0])
            camera = int(arr[1])
        else:
            continue
        image_path = os.path.join(dir_path, image_name)
        img = image.load_img(image_path, target_size=( height, width))
        x = image.img_to_array(img)
        
        x = np.expand_dims(x, axis=0)
#         x /= 255.0 #normalize image
#         x = ReshapeLayer()(x) #for multi image
        
        feature = net.predict(x)
        feature /= np.linalg.norm(feature, axis=1, keepdims=True)
        features.append(np.squeeze(feature))
        infos.append((person, camera))

    return features, infos


def similarity_matrix(query_f, test_f):
    # Tensorflow graph
    # use GPU to calculate the similarity matrix
    tf.compat.v1.disable_eager_execution()
    query_t = tf.compat.v1.placeholder(tf.float32, (None, None))
    test_t = tf.compat.v1.placeholder(tf.float32, (None, None))
    query_t_norm = tf.nn.l2_normalize(query_t, axis=1)
    test_t_norm = tf.nn.l2_normalize(test_t, axis=1)
    tensor = tf.matmul(query_t_norm, test_t_norm, transpose_a=False, transpose_b=True)

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=config)
    set_session(sess)

    result = sess.run(tensor, {query_t: query_f, test_t: test_f})
    print(result.shape)
    # descend
    return result


def sort_similarity(query_f, test_f):
    result = similarity_matrix(query_f, test_f)
    result_argsort = np.argsort(-result, axis=1)
    return result, result_argsort


def map_rank_quick_eval(query_info, test_info, result_argsort):
    # much more faster than hehefan's evaluation
    match = []
    junk = []
    QUERY_NUM = len(query_info)

    for q_index, (qp, qc) in enumerate(query_info):
        tmp_match = []
        tmp_junk = []
        for t_index in range(len(test_info)):
            p_t_idx = result_argsort[q_index][t_index]
            p_info = test_info[int(p_t_idx)]

            tp = p_info[0]
            tc = p_info[1]
            if tp == qp and qc != tc:
                tmp_match.append(t_index)
            elif tp == qp or tp == -1:
                tmp_junk.append(t_index)
        match.append(tmp_match)
        junk.append(tmp_junk)

    rank_1 = 0.0
    rank_5 = 0.0
    rank_10 = 0.0
    mAP = 0.0
    for idx in range(len(query_info)):
        if idx % 100 == 0:
            print('evaluate img %d' % idx)
        recall = 0.0
        precision = 1.0
        ap = 0.0
        YES = match[idx]
        IGNORE = junk[idx]
        ig_cnt = 0
        for ig in IGNORE:
            if len(YES) > 0 and ig < YES[0]:
                ig_cnt += 1
            else:
                break
        if len(YES) > 0 and ig_cnt >= YES[0]:
            rank_1 += 1
        if len(YES) > 0 and ig_cnt >= YES[0] - 4:
            rank_5 += 1
        if len(YES) > 0 and ig_cnt >= YES[0] - 9:
            rank_10 += 1
        for i, k in enumerate(YES):
            ig_cnt = 0
            for ig in IGNORE:
                if ig < k:
                    ig_cnt += 1
                else:
                    break
            cnt = k + 1 - ig_cnt
            hit = i + 1
            tmp_recall = hit / len(YES)
            tmp_precision = hit / cnt
            ap = ap + (tmp_recall - recall) * ((precision + tmp_precision) / 2)
            recall = tmp_recall
            precision = tmp_precision

        mAP += ap
    rank1_acc = rank_1 / QUERY_NUM
    rank5_acc = rank_5 / QUERY_NUM
    rank10_acc = rank_10 / QUERY_NUM
    mAP = mAP / QUERY_NUM
    print('Rank 1:\t%f' % rank1_acc)
    print('Rank 5:\t%f' % (rank_5 / QUERY_NUM))
    print('Rank 10:\t%f' % (rank_10 / QUERY_NUM))
    print('mAP:\t%f' % mAP)
    # np.savetxt('rank_1.log', np.array(rank1_list), fmt='%d')
    return rank1_acc, rank5_acc, rank10_acc, mAP


def train_predict(net, train_path, pid_path, score_path):
    # net = Model(inputs=[net.input], outputs=[net.get_layer('avg_pool').output])
    train_f, test_info = extract_feature(train_path, net,height, width,nb_frame)
    np.savetxt(score_path.replace('renew_ac.log', 'feature.txt'), train_f, fmt='%.4f')
    result, result_argsort = sort_similarity(train_f, train_f)
    for i in range(len(result)):
        result[i] = result[i][result_argsort[i]]
    result = np.array(result)
    # ignore top1 because it's the origin image

    np.savetxt(score_path.replace('.log', '.txt'), result, fmt='%.4f')
    np.savetxt(pid_path.replace('.log', '.txt'), result_argsort, fmt='%d')

    np.savetxt(score_path, result[:, 1:], fmt='%.4f')
    np.savetxt(pid_path, result_argsort[:, 1:], fmt='%d')
    return result


def test_predict(net, probe_path, gallery_path, pid_path, score_path, height, width, nb_frame):
    # net = Model(inputs=[net.get_layer('resnet50').get_input_at(0)], outputs=[net.get_layer('resnet50').get_output_at(0)])
    #net = Model(inputs=[net.input], outputs=[net.get_layer('avg_pool').output])
    test_f, test_info = extract_feature(gallery_path, net, height, width,nb_frame)
    print("Features Gallery Extracted")
    query_f, query_info = extract_feature(probe_path, net, height, width,nb_frame)
    print("Features Query Extracted")
    print("Calculating similarity..")
    result, result_argsort = sort_similarity(query_f, test_f)
    print("Similarity calculated")
    
    for i in range(len(result)):
        result[i] = result[i][result_argsort[i]]
    result = np.array(result)
    safe_remove(pid_path)
    safe_remove(score_path)
    np.savetxt(pid_path, result_argsort, fmt='%d')
    np.savetxt(score_path, result, fmt='%.4f')


def train_sepbn_predict(net_path, train_path, pid_path, score_path):
    # model = load_model(net_path, custom_objects={'cross_entropy_loss': cross_entropy_loss})
    # net = Model(inputs=[model.get_layer('resnet50').get_input_at(0)[1]],
    #             outputs=[model.get_layer('resnet50').get_output_at(0)[1]])
    train_f, test_info = extract_feature(train_path, net)
    result, result_argsort = sort_similarity(train_f, train_f)
    for i in range(len(result)):
        result[i] = result[i][result_argsort[i]]
    result = np.array(result)
    # ignore top1 because it's the origin image
    np.savetxt(score_path, result[:, 1:], fmt='%.4f')
    np.savetxt(pid_path, result_argsort[:, 1:], fmt='%d')
    return result


def test_sepbn_predict(net_path, probe_path, gallery_path, pid_path, score_path):
    model = load_model(net_path, custom_objects={'cross_entropy_loss': cross_entropy_loss})
    model = Model(inputs=[model.get_layer('resnet50').get_input_at(0)[1]],
                  outputs=[model.get_layer('resnet50').get_output_at(0)[1]])
    test_f, test_info = extract_feature(gallery_path, model)
    query_f, query_info = extract_feature(probe_path, model)
    result, result_argsort = sort_similarity(query_f, test_f)
    for i in range(len(result)):
        result[i] = result[i][result_argsort[i]]
    result = np.array(result)
    safe_remove(pid_path)
    safe_remove(score_path)
    np.savetxt(pid_path, result_argsort, fmt='%d')
    np.savetxt(score_path, result, fmt='%.4f')


def market_result_eval(predict_path, log_path='market_result_eval.log', TEST='Market-1501/test',
                       QUERY='Market-1501/probe'):
    res = np.genfromtxt(predict_path, delimiter=' ')
    print('predict info get, extract gallery info start')
    test_info = extract_info(TEST)
    print('extract probe info start')
    query_info = extract_info(QUERY)
    print('start evaluate map and rank acc')
    rank1_acc, rank5_acc, rank10_acc, mAP = map_rank_quick_eval(query_info, test_info, res)
    write(log_path, predict_path + '\n')
    write(log_path, 'rank 1: %f rank 5: %f rank 10: %f mAP: %f\n' % (rank1_acc, rank5_acc, rank10_acc, mAP))


def grid_result_eval(predict_path, log_path='grid_eval.log'):
    pids4probes = np.genfromtxt(predict_path, delimiter=' ')
    probe_shoot = [0, 0, 0, 0, 0]
    for i, pids in enumerate(pids4probes):
        for j, pid in enumerate(pids):
            if pid - i == 775:
                if j == 0:
                    for k in range(5):
                        probe_shoot[k] += 1
                elif j < 5:
                    for k in range(1, 5):
                        probe_shoot[k] += 1
                elif j < 10:
                    for k in range(2, 5):
                        probe_shoot[k] += 1
                elif j < 20:
                    for k in range(3, 5):
                        probe_shoot[k] += 1
                elif j < 50:
                    for k in range(4, 5):
                        probe_shoot[k] += 1
                break
    probe_acc = [shoot / len(pids4probes) for shoot in probe_shoot]
    write(log_path, predict_path + '\n')
    write(log_path, '%.2f\t%.2f\t%.2f\n' % (probe_acc[0], probe_acc[1], probe_acc[2]))
    return probe_acc[3]
    # print(predict_path)
    # print(probe_acc)

net = model
probe_path ='MARKET1501/probe/'
gallery_path = 'MARKET1501/test/'
train_path = '../MARKET1501/market_rename/train_all'
pid_path = 'ret_train_pid.txt'
score_path = 'ret_train_score.txt'
test_predict(net, probe_path, gallery_path,  pid_path, score_path, height, width,nb_frame)
print("Prediction done, start evaluate..")
market_result_eval(pid_path, 'market_eval.txt', gallery_path, probe_path)
print("Evaluate done")