## Load required libraries and frameworks

In [None]:
import tensorflow as tf
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, roc_curve, auc, roc_auc_score, precision_recall_curve, average_precision_score
import scipy.ndimage as nd
import pylab as pl
import pandas as pd
from matplotlib import pyplot as plt
from pathlib import Path
from keras.preprocessing import image
import pickle
import random, os
from google.colab import drive, files
import cv2
from google.colab.patches import cv2_imshow
import ipywidgets as widgets
from IPython.display import display
from scipy.special import softmax

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import classification_report
from tqdm import tqdm


In [None]:
tf.compat.v1.disable_eager_execution()

In [None]:
# You need to run the following first and restart runtime
!pip install h5py==2.10.0



In [None]:
# Set seed value
seed = 333
random.seed(seed)
np.random.seed(seed)


In [None]:
# Directories that you can find the dataset and save your trained model
dataset_dir = '/dataset_dir'
model_dir = '/model_dir'


In [None]:
# Train_Model = False if you use a trained model
Train_Model = True
batch_size = 100
INIT_LR = 1e-5
NUM_EPOCHS = 10


In [None]:
df_train = pd.read_csv(dataset_dir + "/df_train.csv")
df_train = df_train.drop_duplicates()
df_test = pd.read_csv(dataset_dir +"/df_test.csv")


(19167, 5000)

In [None]:
def append_ext(fn):
    return str(fn)+".jpg"

def prepare_df(df_train_dataset, df_test_dataset):
    df_train_dataset["photoid"] = df_train_dataset["photoid"].apply(append_ext)
    df_test_dataset["photoid"] = df_test_dataset["photoid"].apply(append_ext)
    df_train_dataset['privacy_value'] = df_train_dataset.apply(lambda row: "public" if row.normalizedpublic==1 else "private", axis=1)
    df_test_dataset['privacy_value'] = df_test_dataset.apply(lambda row: "public" if row.normalizedpublic==1 else "private", axis=1)

    return df_train_dataset, df_test_dataset


In [None]:
df_train_dataset, df_test_dataset = prepare_df(df_train, df_test)


## Prepare Datasets

In [None]:
# Create train and test generators from a dataframe
trainAug = ImageDataGenerator(
	rotation_range=25,
	zoom_range=0.1,
	width_shift_range=0.1,
	height_shift_range=0.1,
	shear_range=0.2,
	horizontal_flip=True,
	fill_mode="nearest")

train_generator=trainAug.flow_from_dataframe(
    dataframe = df_train_dataset,
    directory = dataset_dir + "/ImFiles",
    x_col = "photoid",
    y_col = "privacy_value",
    target_size = (224, 224),
    color_mode = 'rgb',
    class_mode = 'categorical',
    batch_size = batch_size,
    seed = seed,
    shuffle = True,
    save_to_dir = None,
    save_prefix = '',
    save_format = 'png',
    follow_links = False,
    subset = None,
    interpolation = 'nearest')

test_generator = ImageDataGenerator().flow_from_dataframe(
    dataframe = df_test_dataset,
    directory = dataset_dir + "/ImFiles",
    x_col = "photoid",
    y_col = None,
    class_mode = None,
    batch_size = batch_size,
    seed = seed,
    shuffle = False,
    target_size=(224, 224))


Found 19167 validated image filenames belonging to 2 classes.
Found 5000 validated image filenames.


In [None]:
totalTrain = train_generator.n//train_generator.batch_size
totalTest = test_generator.n//test_generator.batch_size
steps_per_epoch = totalTrain // batch_size
validation_steps = totalTest // batch_size
steps_per_epoch, validation_steps

STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size
STEP_SIZE_TRAIN, STEP_SIZE_TEST


(191, 50)

# Build neural network architecture

In [None]:
baseModel = ResNet50(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))


Instructions for updating:
Colocations handled automatically by placer.


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
K = 2 #the number of class
headModel = baseModel.output
headModel = Flatten(name="flatten")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(K, activation=None)(headModel)
model = Model(inputs=baseModel.input, outputs=headModel)


# Define activation functions and losses

In [None]:
def relu_evidence(logits):
    return tf.nn.relu(logits)

def exp_evidence(logits):
    return tf.exp(tf.clip_by_value(logits,-10,10))

def softplus_evidence(logits):
    return tf.nn.softplus(logits)

def KL(alpha):
    beta=tf.constant(np.ones((1,K)),dtype=tf.float32)
    S_alpha = tf.reduce_sum(input_tensor=alpha,axis=1,keepdims=True)
    S_beta = tf.reduce_sum(input_tensor=beta,axis=1,keepdims=True)
    lnB = tf.math.lgamma(S_alpha) - tf.reduce_sum(input_tensor=tf.math.lgamma(alpha),axis=1,keepdims=True)
    lnB_uni = tf.reduce_sum(input_tensor=tf.math.lgamma(beta),axis=1,keepdims=True) - tf.math.lgamma(S_beta)

    dg0 = tf.math.digamma(S_alpha)
    dg1 = tf.math.digamma(alpha)

    kl = tf.reduce_sum(input_tensor=(alpha - beta)*(dg1-dg0),axis=1,keepdims=True) + lnB + lnB_uni
    return kl

def mse_loss(p, alpha, global_step, annealing_step):
    S = tf.reduce_sum(input_tensor=alpha, axis=1, keepdims=True)
    E = alpha - 1
    m = alpha / S

    A = tf.reduce_sum(input_tensor=(p-m)**2, axis=1, keepdims=True)
    B = tf.reduce_sum(input_tensor=alpha*(S-alpha)/(S*S*(S+1)), axis=1, keepdims=True)
    annealing_coef = tf.minimum(1.0, tf.cast(global_step/annealing_step,tf.float32))

    alp = E*(1-p) + 1
    C =  annealing_coef * KL(alp)

    return (A + B) + C


In [None]:
def ev_succ(y_true, y_pred):
    evidence = exp_evidence(y_pred)
    pred = tf.argmax(input=y_pred, axis=1)
    truth = tf.argmax(input=y_true, axis=1)
    match = tf.reshape(tf.cast(tf.equal(pred, truth), tf.float32),(-1,1))
    total_evidence = tf.reduce_sum(input_tensor=evidence,axis=1, keepdims=True)
    mean_ev_succ = tf.reduce_sum(input_tensor=tf.reduce_sum(input_tensor=evidence,axis=1, keepdims=True)*match) / tf.reduce_sum(input_tensor=match+1e-20)
    ev_succ = tf.reduce_sum(input_tensor=evidence,axis=1, keepdims=True)*match
    return ev_succ

def ev_fail(y_true, y_pred):
    evidence = exp_evidence(y_pred)
    pred = tf.argmax(input=y_pred, axis=1)
    truth = tf.argmax(input=y_true, axis=1)
    match = tf.reshape(tf.cast(tf.equal(pred, truth), tf.float32),(-1,1))
    total_evidence = tf.reduce_sum(input_tensor=evidence,axis=1, keepdims=True)
    mean_ev_fail = tf.reduce_sum(input_tensor=tf.reduce_sum(input_tensor=evidence,axis=1, keepdims=True)*(1-match)) / (tf.reduce_sum(input_tensor=tf.abs(1-match))+1e-20)
    ev_fail = tf.reduce_sum(input_tensor=evidence,axis=1, keepdims=True)*(1-match)
    return ev_fail

def loss(y_true, y_pred):
    annealing_step = NUM_EPOCHS * steps_per_epoch
    evidence = exp_evidence(y_pred)
    alpha = evidence + 1
    prob = alpha/tf.reduce_sum(input_tensor=alpha, axis=1, keepdims=True)

    loss = mse_loss(y_true, alpha, globalstep.global_step, annealing_step)

    return tf.reduce_mean(input_tensor=loss)


In [None]:
class GlobalStep(tf.keras.callbacks.Callback):
    def __init__(self, epoch_counter=0.0, global_step=0.0):
        super(GlobalStep, self).__init__()
        sess = tf.compat.v1.keras.backend.get_session()
        epoch_counter = tf.Variable(float(epoch_counter), trainable=False,  name='epoch_counter')
        global_step = tf.Variable(float(global_step), trainable=False, name='global_step')
        sess.run(epoch_counter.initializer)
        sess.run(global_step.initializer)
        self.sess = sess
        self.epoch_counter = epoch_counter;
        self.global_step = global_step

    def on_train_begin(self, logs=None):
        pass

    def on_epoch_end(self, epoch, logs=None):
       op = self.epoch_counter.assign(self.epoch_counter + 1)
       self.sess.run(op)

    def on_train_end(self, logs=None):
        pass

    def on_batch_end(self, batch, logs=None):
       op = self.global_step.assign(self.global_step + 1)
       self.sess.run(op)

    def set_globalstep(self, val):
       op = self.global_step.assign(val)
       self.sess.run(op)

    def set_epoch(self, val):
       op = self.epoch_counter.assign(val)
       self.sess.run(op)

globalstep = GlobalStep()


In [None]:
opt = tf.keras.optimizers.legacy.Adam(learning_rate=INIT_LR, decay=INIT_LR / NUM_EPOCHS)
model.compile(loss=loss, optimizer=opt, metrics=["accuracy", ev_succ, ev_fail], run_eagerly=False)


In [None]:
globalstep.sess.run(globalstep.global_step)


# Train a model

In [None]:
if Train_Model:
    # You should specify a folder to save your trained model
    checkpoint = ModelCheckpoint(model_dir +'/pure', monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1)

    H = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=STEP_SIZE_TRAIN+1,
        epochs=NUM_EPOCHS, callbacks=[globalstep, checkpoint])


## Predict the trained model

In [None]:
def edl_stats(pred, lbs, truth=None, predIdxs=None):
  if truth is None:
    truth =  np.argmax(lbs, axis=1)
  if predIdxs is None:
    predIdxs = np.argmax(pred, axis=1)
  match = (truth == predIdxs)

  return match


In [None]:
T_train = []
P_train = []
E_train = []
M_train = []
O_train = []

train_generator.reset()
for i in tqdm(range(STEP_SIZE_TRAIN+1)):
    img, lbs = train_generator.next()
    pred_train = model.predict(img)
    predIdxs = np.argmax(pred_train, axis=1)
    truth_train =  np.argmax(lbs, axis=1)
    match_train = edl_stats(pred_train, lbs, truth_train, predIdxs)
    M_train.append(match_train)
    T_train.append(truth_train)
    P_train.append(predIdxs)
    O_train.append(pred_train)

pred_train = np.concatenate(P_train)
output_train = np.concatenate(O_train)
match_train = np.concatenate(M_train)
truth_train = np.concatenate(T_train)
ev_train = np.exp(output_train)


## Calculate the Train uncertainty

In [None]:
tot_ev_train = ev_train.sum(axis=1)
train_u = 2/(2+tot_ev_train)


In [None]:
T = []
P = []
E = []
M = []
O = []

test_generator.reset()
for i in tqdm(range(STEP_SIZE_TEST)):
    img = test_generator.next()
    pred = model.predict(img)
    predIdxs = np.argmax(pred, axis=1)

    P.append(predIdxs)
    O.append(pred)

pred = np.concatenate(P)
output = np.concatenate(O)
truth = np.array(df_test_dataset.apply(lambda row: 1 if row.privacy_value=="public" else 0, axis=1))
ev = np.exp(output)


100%|██████████| 50/50 [00:29<00:00,  1.70it/s]


## Calculate the Test uncertainty

In [None]:
tot_ev = ev.sum(axis=1)
test_u = 2/(2+tot_ev)
