In [None]:
!pip install memory_profiler

In [None]:
#Imports
import tensorflow as tf # Tensorflow 2.3
import tensorflow_hub as hub # Tensorflow-hub 0.12
import PIL.Image as Image # Pillow 
import numpy as np # Numpy
from tqdm import tqdm # progress bar package
import tensorflow_datasets as tfds
import os
from time import process_time
from memory_profiler import memory_usage
import tempfile
import tracemalloc
import matplotlib.image as mpimg
import pandas as pd
import PIL.Image as Image # Pillow
import math

################################################################################################################################################################

# Opening the text file and saving to the corresponding lists.
def load_values(file): 
    val_groundtruth = []
    with open(file, 'r') as f:
        val_set = f.read().splitlines()
    for line in val_set:
        # Image ground truth.
        ground_truth = line.split(' ')[1]
        val_groundtruth.append(int(ground_truth))
    return val_groundtruth

################################################################################################################################################################

# Image Preprocessing
def prepare(path, size):
    im = Image.open(path)
    im = im.convert('RGB')
    re_size = int(round(1.14286*size))
    
    width, height = im.size
    new_height = height * re_size // min(width,height)
    new_width = width * re_size // min(width,height)
    im = im.resize((new_width,new_height))

    left = math.floor((new_width - size)/2)
    top = math.ceil((new_height - size)/2)
    right = math.floor((new_width + size)/2)
    bottom = math.ceil((new_height + size)/2)


    im = im.crop((left, top, right, bottom))
    input = (np.array(im))/255
    return input

################################################################################################################################################################

# Ellipse defined by the labels of DAGM
def calc_ellipse(x, y,label):
    # https://www.maa.org/external_archive/joma/Volume8/Kalman/General.html
    ...
    [semi_major, semi_minor, rotation, x_centre, y_centre] = label
    term1 = (((x - x_centre) * np.cos(rotation)) + (
        (y - y_centre) * np.sin(rotation)))**2
    term2 = (((x - x_centre) * np.sin(rotation)) - (
        (y - y_centre) * np.cos(rotation)))**2
    ellipse = ((term1 / semi_major**2) + (term2 / semi_minor**2)) <= 1
    return ellipse

##################################################################################################################################################################

# IoU calculation
def iou_calc(labimg, pred):

    pred = np.round(np.squeeze(pred),decimals = 6).astype(dtype=bool)
    intersection = np.logical_and(labimg, pred)
    union = np.logical_or(labimg, pred)
    iou_score = np.sum(intersection) / np.sum(union)
    return iou_score

###################################################################################################################################################################

# Model Inference
def modelInference(model_list,end):

    [link,batchshape,dataset,background] = model_list
    
    # Load Model

    model = tf.keras.Sequential([
    hub.KerasLayer(link)])
    model.build(batchshape)
    batch = batchshape[0]
    
    # Inference and metrics

    imagenum = 0
    latency = []
    avg_lat = []
    data = np.zeros(tuple(batchshape))
    tracemalloc.start()

    for i in range(end):
        data[imagenum%batch,:] = np.random.rand(batchshape[1],batchshape[2],batchshape[3]) # Preprocessing
        imagenum += 1

        if (imagenum%batch == 0):
            start = process_time()    
            prediction = model.predict(data,batch_size = batch)
            latency.append(process_time()-start)
            avg_lat.append(latency[-1]/batch)
            data = np.zeros(tuple(batchshape))

    if (imagenum%batch != 0):
        start = process_time()    
        prediction = model.predict(data[:(imagenum%batch)],batch_size = batch)
        latency.append(process_time()-start)
        avg_lat.append(latency[-1]/(imagenum%batch))

    memory = round(((tracemalloc.get_traced_memory()[1])/1024)) #peak memory usage   
    throughput = float((imagenum)/sum(latency)) 
    tracemalloc.stop()

    return {'memory':memory,'avg(latency)':avg_lat,'latency':latency,'throughput':throughput}

In [None]:
import traceback

#task = input("Choose a model by typing the corresponding number: \n1.MobileNet V2\n2.ResNet50 V1\n3.Inception V3\n4.NASNet-A large\n5.EfficientNet V2 M")
#batch = int(input("Insert batch size"))
#if (batch == 1): batch = None

# Parameters of each model
# Format = [link,batch_input_shape,datasetPath,isBackgroundIncluded]
MobileNet = ["https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4",[1,224,224,3],'../Datasets/ImageNet',1]
ResNet = ["https://tfhub.dev/tensorflow/resnet_50/classification/1",[1,224,224,3],'../Datasets/ImageNet',0]
Inception = ["https://tfhub.dev/google/imagenet/inception_v3/classification/5",[1,299,299,3],'../Datasets/ImageNet',1]
NASNet = ["https://tfhub.dev/google/imagenet/nasnet_large/classification/5",[1,331,331,3],'../Datasets/ImageNet',1]
EfficientNetM = ["https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_m/classification/2",[1,480,480,3],'../Datasets/ImageNet',0]

NASNetMob = ["https://tfhub.dev/google/imagenet/nasnet_mobile/classification/5",[1,224,224,3],'',0]
EfficientNetB3 = ["https://tfhub.dev/google/efficientnet/b3/classification/1",[1,300,300,3],'',0]
EfficientNetB4 = ["https://tfhub.dev/google/efficientnet/b4/classification/1",[1,380,380,3],'',0]
EfficientNetS = ["https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/classification/2",[1,384,384,3],'',0]


model_dict = {'1':MobileNet,'2':ResNet,'3':Inception,'4':NASNet,'5':EfficientNetM,'6':NASNetMob, '7':EfficientNetB3,'8':EfficientNetB4,'9':EfficientNetS}

for task in [str(i) for i in range(1,10)]:

    filename = 'batches'+task+'CPUK.txt'

    for batch in [2**i for i in range(6)]:
      
        model_dict[task][1][0] = batch
      
        try:

            #Warmup
            modelInference(model_dict[task],20)

            inf_dict = modelInference(model_dict[task],300)
            inf_dict['batch'] = batch
            f = open(filename, "a")
            f.write(str(inf_dict)+'\n')
            f.close()
            print("For batchsize = {}:\n\tThoughput = {}samples/sec\n\tLatency(All) = {}sec\n------".format(batch,inf_dict['throughput'],sum(inf_dict['latency'])))
        
        except Exception:

            print(traceback.format_exc())

In [None]:
import ast
import matplotlib.pyplot as plt
import os
import numpy as np
import pandas as pd
from matplotlib.gridspec import SubplotSpec

data = pd.DataFrame(columns=['memory','avglat','tlat','throughput','batchsize','model'])

plt.figure(figsize=(20,10))

fileCPU = open('batches9CPUK.txt', 'r')
linesCPU = fileCPU.readlines()
fileCPU.close()

fileGPU = open('batches9GPUK.txt', 'r')
linesGPU = fileGPU.readlines()
fileGPU.close()


tlatCPU = []
avglatCPU = []
throughputCPU = []
batchCPU = []
memoryCPU = []

tlatGPU = []
avglatGPU = []
throughputGPU = []
batchGPU = []
memoryGPU = []


for line in linesCPU:
    temp = ast.literal_eval(line)
    memoryCPU.append(temp['memory']) #peak memory per total inference
    avglatCPU.append(((temp['batch']*sum(temp['latency'][:-1]))+((300%temp['batch'])*temp['latency'][-1]))/300)
    tlatCPU.append(sum(temp['latency'])) #total latency
    throughputCPU.append(float(300/sum(temp['latency'])))
    batchCPU.append(temp['batch'])

for line in linesGPU:
    temp = ast.literal_eval(line)
    memoryGPU.append(temp['memory']) #peak memory per total inference
    avglatGPU.append(((temp['batch']*sum(temp['latency'][:-1]))+((300%temp['batch'])*temp['latency'][-1]))/300)
    tlatGPU.append(sum(temp['latency'])) #total latency
    throughputGPU.append(float(300/sum(temp['latency'])))
    batchGPU.append(temp['batch'])

  #data = data.append(pd.DataFrame({'memory':memory,'avglat':avglat,'tlat':tlat,'throughput':throughput,'batchsize':batch,'model':np.full(len(batch), filename[-5])}),ignore_index=True)


    # create 5x1 subfigs
    #subfigs = fig.subplots(5, 4)
    #models = ['MobileNet','ResNet','InceptionNet','NASNet','EfficientNet']
batch = batchCPU
memory = [memoryCPU,memoryGPU]
throughput = [throughputCPU,throughputGPU]
avglat = [avglatCPU,avglatGPU]
tlat = [tlatCPU,tlatGPU]

for j in range(2):

    axs = plt.subplot(2, 4, 1+(4*j))
    axs.bar(batch,memory[j])
    axs.set_title('memory(batchsize)')
    axs.set_xlabel('Batchsize')
    axs.set_ylabel('Peak Memory [kB]')

    axs = plt.subplot(2, 4, 2+(4*j))
    axs.plot(throughput[j],avglat[j],c='orange',marker='o')
    axs.set_title('throughput - latency tradeoff')
    axs.set_xlabel('Throughput')
    axs.set_ylabel('Latency')

    axs = plt.subplot(2, 4, 3+(4*j))
    axs.plot(batch,avglat[j],c='green',marker='o')
    axs.set_title('Average latency per inference')
    axs.set_xlabel('Batchsize')
    axs.set_ylabel('Latency')

    axs = plt.subplot(2, 4, 4+(4*j))
    axs.plot(batch,tlat[j],c='red',marker='o')
    axs.set_title('Total latency per inference')
    axs.set_xlabel('Batchsize')
    axs.set_ylabel('Total Latency')