# TeraFlow: Energy Efficiency Optimization of a Cryptomining Detector (POC)

### Imports

In [None]:
import gc
import glob
import itertools
import math
import multiprocessing
import os
import re
import socket
import string
import subprocess
import time
from copy import deepcopy

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from joblib import Parallel, delayed, load
from tqdm import tqdm_notebook as tqdm

pd.set_option('display.max_columns', None)

import sys

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, balanced_accuracy_score,
                             confusion_matrix, f1_score, matthews_corrcoef,
                             mean_absolute_error,
                             mean_absolute_percentage_error,
                             mean_squared_error, precision_score, recall_score,
                             roc_auc_score)
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

sys.path.append("..")

import copy
import gzip
import logging
import os
import pickle
import tempfile
import zipfile
from contextlib import contextmanager
from multiprocessing import Process
from pathlib import Path

import onnx
import onnxruntime as rt
import psutil
import seaborn as sns
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import tf2onnx
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential, load_model

In [None]:
idle_gpu_memory = 61 # in MB

if tf.test.gpu_device_name():
    print('GPU found')
    
    # check that the GPU utilization is 0
    gpu_utilization = os.system("nvidia-smi --query-gpu=utilization.gpu --format=csv")
    
    if gpu_utilization != 0:
        print("GPU utilization is not 0")
        sys.exit(1)
        
    # check that GPU memory is completely free
    gpu_memory = os.system("nvidia-smi --query-gpu=memory.free --format=csv")
    
    if gpu_memory != 0:
        print("GPU memory is not completely free")
        sys.exit(1)

### Set Memory Growth

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')

for physical_device in physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_device, True)
    except RuntimeError as e:
        print(e)

## Data Processing Pipeline

In [None]:
def show_history(history):
    print(history.history.keys())
    # summarize history for accuracy
    plt.figure(figsize=(10,4))
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.figure(figsize=(10,4))
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
def get_metrics(Y, predictions):
    if len(Y.unique()) > 2:
        print('F1:',f1_score(Y, predictions, average='macro'))
    else:
        print('F1:',f1_score(Y, predictions))
    print('Accuracy:',accuracy_score(Y, predictions))
    print(confusion_matrix(Y, predictions))

def baseline_model(input_dim, n_output):
    model = Sequential()
    model.add(Dense(input_dim*4, input_dim=input_dim, activation='relu'))
    model.add(Dense(input_dim*6, activation='relu'))
    model.add(Dense(input_dim*3, activation='relu'))
    model.add(Dense(n_output, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def small_model(input_dim, n_output):
    model = Sequential()
    model.add(Dense(int(input_dim/2), input_dim=input_dim, activation='relu'))
    model.add(Dense(int(input_dim/3), activation='relu'))
    model.add(Dense(n_output, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def save_model_keras(model, name):
    model.save(name)

def save_scaler(scaler, name):
    pickle.dump(scaler, open(name, 'wb'))

In [None]:
data_path = 'data/'

features = "#15#c_ip:1 c_port:2 c_pkts_all:3 c_rst_cnt:4 c_ack_cnt:5 c_ack_cnt_p:6 c_bytes_uniq:7 c_pkts_data:8 c_bytes_all:9 c_pkts_retx:10 c_bytes_retx:11 c_pkts_ooo:12 c_syn_cnt:13 c_fin_cnt:14 s_ip:15 s_port:16 s_pkts_all:17 s_rst_cnt:18 s_ack_cnt:19 s_ack_cnt_p:20 s_bytes_uniq:21 s_pkts_data:22 s_bytes_all:23 s_pkts_retx:24 s_bytes_retx:25 s_pkts_ooo:26 s_syn_cnt:27 s_fin_cnt:28 first:29 last:30 durat:31 c_first:32 s_first:33 c_last:34 s_last:35 c_first_ack:36 s_first_ack:37 c_isint:38 s_isint:39 c_iscrypto:40 s_iscrypto:41 con_t:42 p2p_t:43 http_t:44 c_rtt_avg:45 c_rtt_min:46 c_rtt_max:47 c_rtt_std:48 c_rtt_cnt:49 c_ttl_min:50 c_ttl_max:51 s_rtt_avg:52 s_rtt_min:53 s_rtt_max:54 s_rtt_std:55 s_rtt_cnt:56 s_ttl_min:57 s_ttl_max:58 p2p_st:59 ed2k_data:60 ed2k_sig:61 ed2k_c2s:62 ed2k_c2c:63 ed2k_chat:64 c_f1323_opt:65 c_tm_opt:66 c_win_scl:67 c_sack_opt:68 c_sack_cnt:69 c_mss:70 c_mss_max:71 c_mss_min:72 c_win_max:73 c_win_min:74 c_win_0:75 c_cwin_max:76 c_cwin_min:77 c_cwin_ini:78 c_pkts_rto:79 c_pkts_fs:80 c_pkts_reor:81 c_pkts_dup:82 c_pkts_unk:83 c_pkts_fc:84 c_pkts_unrto:85 c_pkts_unfs:86 c_syn_retx:87 s_f1323_opt:88 s_tm_opt:89 s_win_scl:90 s_sack_opt:91 s_sack_cnt:92 s_mss:93 s_mss_max:94 s_mss_min:95 s_win_max:96 s_win_min:97 s_win_0:98 s_cwin_max:99 s_cwin_min:100 s_cwin_ini:101 s_pkts_rto:102 s_pkts_fs:103 s_pkts_reor:104 s_pkts_dup:105 s_pkts_unk:106 s_pkts_fc:107 s_pkts_unrto:108 s_pkts_unfs:109 s_syn_retx:110 http_req_cnt:111 http_res_cnt:112 http_res:113 c_pkts_push:114 s_pkts_push:115 c_tls_SNI:116 s_tls_SCN:117 c_npnalpn:118 s_npnalpn:119 c_tls_sesid:120 c_last_handshakeT:121 s_last_handshakeT:122 c_appdataT:123 s_appdataT:124 c_appdataB:125 s_appdataB:126 fqdn:127 dns_rslv:128 req_tm:129 res_tm:130 http_hostname:131".split(" ")
ids_str = "3,5,7,8,9,17,19,21,22,23"

ids = [int(x) for x in ids_str.split(",")]
gf = [f for x,f in enumerate(features) if x+1 in ids]

def tagger(c_ip, s_ip, c_pool, s_pool):
    return 1 if (c_ip in c_pool) and (s_ip in s_pool) else 0

In [None]:
tls_c_pool = [('10.100.200.4'),
          ('10.100.200.3'),
          ('10.100.200.2'),
          ('10.100.200.2'),
          ('10.100.200.2'),
          ('10.100.200.2'),
          ('10.100.200.3'),
          ('10.100.200.4'),
          ('10.100.200.2','10.100.200.4'),
          ('10.100.200.4','10.100.200.2')
]

tls_s_pool = [('149.202.83.171'),
          ('149.202.83.171'),
          ('37.187.95.110'),
          ('94.23.23.52'),
          ('94.23.23.52','149.202.83.171'),
          ('91.121.140.167','149.202.83.171'),
          ('37.187.95.110','91.121.140.167'),
          ('37.187.95.110','91.121.140.167'),
          ('149.202.83.171','37.187.95.110','94.23.23.52','94.23.247.226','91.121.140.167'),
          ('149.202.83.171','37.187.95.110','94.23.23.52','94.23.247.226','91.121.140.167')
]

data_path_5g = 'Criptominado/crypto5g'
all_files = [f'{data_path_5g}/{files_dir}/log_tcp_temp_complete' for files_dir in os.listdir(data_path_5g)]
all_files = sorted(all_files)
all_files.remove(all_files[0])
all_files

In [None]:
df_list = [pd.read_csv(file, sep=' ') for file in all_files]

In [None]:
df_list[-2]

In [None]:
def tagRows(row, pool_index):
    return tagger(row['#15#c_ip:1'], row['s_ip:15'], tls_c_pool[pool_index], tls_s_pool[pool_index])

In [None]:
for i, df in enumerate(df_list):
    df['tag'] = df.apply(lambda row: tagRows(row, i), axis=1)

In [None]:
df_train = pd.concat([df_list[0],df_list[2],df_list[4],df_list[6]])
df_test = pd.concat([df_list[1],df_list[3],df_list[5],df_list[7]])

In [None]:
df_list[4][(df_list[4]['tag'] == 1)].groupby(['#15#c_ip:1','c_port:2','s_ip:15','s_port:16']).size().reset_index().rename(columns={0:'count'})

In [None]:
for mydf in df_list:
    print(mydf[(mydf['tag'] == 1)].groupby(['#15#c_ip:1','c_port:2','s_ip:15','s_port:16']).size().reset_index().rename(columns={0:'count'}))

In [None]:
for i in tls_s_pool[-2]:
    print(df_list[-2].loc[df_list[-2]['s_ip:15'] == i])

In [None]:
new_df = df_list[-2].loc[df_list[-2]['s_ip:15'] == '94.23.247.226']
new_df[['#15#c_ip:1','c_port:2','c_pkts_all:3','s_ip:15','s_port:16','s_pkts_all:17']]

In [None]:
df_list[-2].loc[df_list[-2]['#15#c_ip:1'] == '94.23.247.226']

In [None]:
for mydf in df_list:
    print(len(mydf))

In [None]:
gf = ['c_syn_cnt:13',
     'c_mss:70',
     'c_mss_max:71',
     'c_mss_min:72',
     'c_win_max:73',
     'c_win_min:74',
     'c_cwin_max:76',
     'c_cwin_min:77',
     'c_cwin_ini:78',
     's_win_scl:90',
     's_mss_max:94',
     's_mss_min:95',
     's_win_max:96',
     's_win_min:97',
     's_cwin_max:99',
     's_cwin_min:100']

In [None]:
for i in gf:
    print(i.split(':')[1])

In [None]:
standard = StandardScaler().fit(df_train[gf])

In [None]:
data_transformed = standard.transform(df_train[gf])
data_transformed

## Model Optimization Benchmarking

Set logger

In [None]:
# delete previous log file
if os.path.exists('poc_energy_efficiency_crypto/logs/crypto_spider_5g_fcnn_optimized_benchmark.log'):
    os.remove('poc_energy_efficiency_crypto/logs/crypto_spider_5g_fcnn_optimized_benchmark.log')

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
Path('poc_energy_efficiency_crypto/logs/').mkdir(parents=True, exist_ok=True)
filehandler = logging.FileHandler('poc_energy_efficiency_crypto/logs/crypto_spider_5g_fcnn_optimized_benchmark.log')
formatter = logging.Formatter('%(asctime)s - %(message)s')
filehandler.setFormatter(formatter)
logger.addHandler(filehandler)

# add stream handler
streamhandler = logging.StreamHandler()
streamhandler.setFormatter(formatter)
logger.addHandler(streamhandler)

### Set Benchmarking Parameters

In [None]:
num_trials = 5
stats_sampling_rate = 1

es_patience = 20
es_restore_best_weights = True

batch_size = 1024
validation_split = 0.2
epochs = 10000

# devices = ["CPU", "GPU"]
devices = ["CPU"]

ml_task = "binary_classification" # "binary_classification" or "regression"

# general_optimizations = ["runtime", "graph"] NOT USED

# Check if the parameters are valid
assert devices in [["CPU"], ["GPU"], ["CPU", "GPU"]], "devices must be either ['CPU'], ['GPU'] or ['CPU', 'GPU']"
assert ml_task in ["binary_classification", "regression"], "ml_task must be either 'binary_classification' or 'regression'"

assert num_trials > 0, "num_trials must be greater than 0"
assert stats_sampling_rate > 0, "stats_sampling_rate must be greater than 0"
assert batch_size > 0, "batch_size must be greater than 0"
assert validation_split > 0 and validation_split < 1, "validation_split must be greater than 0 and less than 1"
assert epochs > 0, "epochs must be greater than 0"
assert es_patience > 0, "es_patience must be greater than 0"
assert es_restore_best_weights in [True, False], "es_restore_best_weights must be either True or False"
assert len(df_train) > 0, "df_train must contain at least one row"
assert len(df_test) > 0, "df_test must contain at least one row"
assert len(df_train.columns) > 0, "df_train must contain at least one column"
assert len(df_test.columns) > 0, "df_test must contain at least one column"
assert len(df_train.columns) == len(df_test.columns), "df_train and df_test must have the same number of columns"

logger.info(f'Number of trials set to: {num_trials}')
logger.info(f'Epochs set to: {epochs}')
logger.info(f'Batch size set to: {batch_size}')
logger.info(f'Validation split set to: {validation_split}')
logger.info(f'Patience set to: {es_patience}')
logger.info(f'Restore best weights set to: {es_restore_best_weights}')

### Define Experiments

In [None]:
experiments = {
    "EXP0": {
        "post_training_optimizations": None,
        "training_aware_optimizations": None
    },
    "EXP1": {
        "post_training_optimizations": ["full_integer_quantization"],
        "training_aware_optimizations": None
    },
    "EXP2": {
        "post_training_optimizations": ["float16_quantization"],
        "training_aware_optimizations": None
    },
    "EXP3": {
        "post_training_optimizations": ["float16_int8_quantization"],
        "training_aware_optimizations": None
    },
    "EXP4": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["pruning"]
    },
    "EXP5": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["quantization_aware_training"]
    },
    "EXP6": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["knowledge_distillation"]
    },
    "EXP7": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["pruning", "quantization_aware_training"],
    },
    "EXP8": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["knowledge_distillation", "pruning"]
    },
    "EXP9": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["knowledge_distillation", "quantization_aware_training"]
    },
    "EXP10": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["knowledge_distillation", "pruning", "quantization_aware_training"]
    },
    "EXP11": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["pruning", "float16_quantization"],
    },
    "EXP12": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["knowledge_distillation", "float16_quantization"]
    },
    "EXP13": {
        "post_training_optimizations": None,
        "training_aware_optimizations": ["knowledge_distillation", "pruning", "float16_quantization"]
    },
}

### Energy Efficiency Monitoring Functions

In [None]:
# Socket for sending data from powerstat process to main process
HOST = "127.0.0.1"

# Define the measurement tool that will be used to gather power consumption data
power_consumption_measurement_tool = "powerstat"

# RAM
def get_ram_memory_uss(pid):
    process = psutil.Process(pid)
    
    return str(process.memory_full_info().uss / (1024*1024)) + ' MB'

def get_ram_memory_rss(pid):
    process = psutil.Process(pid)
    
    return str(process.memory_full_info().rss / (1024*1024)) + ' MB'

def get_ram_memory_vms(pid):
    process = psutil.Process(pid)
    
    return str(process.memory_full_info().vms / (1024*1024)) + ' MB'

def get_ram_memory_pss(pid):
    process = psutil.Process(pid)
    
    return str(process.memory_full_info().pss / (1024*1024)) + ' MB'

# CPU
def get_cpu_usage(pid):
    process = psutil.Process(pid)
    
    return str(process.cpu_percent(interval=0.5) / psutil.cpu_count()) + ' %'

def get_cpu_freq():    
    return str(psutil.cpu_freq()[0]) + " MHz"

def perf(PORT):
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect((HOST, PORT))
    cmd = "echo pirata.lab | sudo -S -p \"\" perf stat -e power/energy-cores/,power/energy-pkg/"
    out = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
    s.sendall(out.stdout)
    
    s.close()
    
def kill_perf():
    cmd = "echo pirata.lab | sudo -S -p \"\" pkill perf"
    subprocess.run(cmd, shell=True)

def powerstat(PORT):
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect((HOST, PORT))
    cmd = "echo pirata.lab | sudo -S -p \"\" powerstat -R 0.5 120"
    out = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
    s.sendall(out.stdout)
    
    s.close()
    
def kill_powerstat():
    cmd = "echo pirata.lab | sudo -S -p \"\" pkill powerstat"
    subprocess.run(cmd, shell=True)
    
def get_cpu_power_draw():    
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    PORT = np.random.randint(10000, 20000)
    s.bind((HOST, PORT))
    s.listen()
    
    if power_consumption_measurement_tool == "powerstat":
        p = multiprocessing.Process(target=powerstat, args=(PORT,))
    elif power_consumption_measurement_tool == "perf":
        p = multiprocessing.Process(target=perf, args=(PORT,))
        
    p.start()
    conn, _addr = s.accept()
    time.sleep(max(1, stats_sampling_rate / 2))
    
    if power_consumption_measurement_tool == "powerstat":
        q = multiprocessing.Process(target=kill_powerstat)
    elif power_consumption_measurement_tool == "perf":
        q = multiprocessing.Process(target=kill_perf)
        
    q.start()
    
    out = conn.recv(2048).decode()
    
    power_consumption = re.findall(r'CPU: (.+?) Watts', out)[0].strip() + " W"
    
    s.close()
    p.terminate()
    q.terminate()
    
    return power_consumption

# IO
def get_io_usage(pid):
    process = psutil.Process(pid)
    
    io_counters = process.io_counters()
    io_usage_process = io_counters[2] + io_counters[3] # read_bytes + write_bytes
    disk_io_counter = psutil.disk_io_counters()
    disk_io_total = disk_io_counter[2] + disk_io_counter[3] # read_bytes + write_bytes
    io_usage_process = io_usage_process / disk_io_total * 100
    io_usage_process = np.round(io_usage_process, 2)
    io_usage_process = str(io_usage_process) + " %"

    return io_usage_process

def get_bytes_written(pid):
    process = psutil.Process(pid)
    
    io_counters = process.io_counters()
    process_bytes_written = io_counters[3]
    total_bytes_written = psutil.disk_io_counters()[3]
    process_bytes_written = process_bytes_written / total_bytes_written * 100
    process_bytes_written = np.round(process_bytes_written, 2)
    process_bytes_written = str(process_bytes_written) + " %"

    return process_bytes_written

def get_bytes_read(pid):
    process = psutil.Process(pid)
    
    io_counters = process.io_counters()
    process_bytes_read = io_counters[2]
    total_bytes_read = psutil.disk_io_counters()[2]
    process_bytes_read = process_bytes_read / total_bytes_read * 100
    process_bytes_read = np.round(process_bytes_read, 2)
    process_bytes_read = str(process_bytes_read) + " %"

    return process_bytes_read

# GPU
get_gpu_memory_system = lambda: os.popen('nvidia-smi --query-gpu=memory.used --format=csv,noheader').read().split('\n')[0] # SYSTEM WIDE
def get_gpu_memory(pid):
    output = os.popen('nvidia-smi | awk \'/' + str(pid) + '/{print $8}\'').read().split('\n')[0]
    output = "0 MiB" if output == "" else output.replace("MiB", "") + " MiB"

    return output

get_gpu_usage = lambda: os.popen('nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader').read().split('\n')[0]
get_gpu_freq = lambda: os.popen('nvidia-smi --query-gpu=clocks.gr --format=csv,noheader').read().split('\n')[0]
get_gpu_power_draw = lambda: os.popen('nvidia-smi --query-gpu=power.draw --format=csv,noheader').read().split('\n')[0]

test_pid = os.getpid()

# Test functions
print("RAM Memory Usage (USS): " + get_ram_memory_uss(test_pid))
print("RAM Memory Usage (RSS): " + get_ram_memory_rss(test_pid))
print("RAM Memory Usage (VMS): " + get_ram_memory_vms(test_pid))
print("RAM Memory Usage (PSS): " + get_ram_memory_pss(test_pid))
# print("RAM Power Draw: " + get_cpu_power_draw()[1])
print("CPU Usage: " + get_cpu_usage(test_pid))
print("CPU Frequency: " + get_cpu_freq())
# print("CPU Cores Power Draw: " + get_cpu_power_draw()[0])
# print("CPU Package Power Draw: " + get_cpu_power_draw()[2])
print("CPU Power Usage: " + get_cpu_power_draw())
print("I/O Usage: " + get_io_usage(test_pid))
print("Bytes Written to disk: " + str(get_bytes_written(test_pid)))
print("Bytes Read to disk: " + str(get_bytes_read(test_pid)))
print("GPU Memory Usage: " + get_gpu_memory(test_pid))
print("GPU Usage: " + get_gpu_usage())
print("GPU Frequency: " + get_gpu_freq())
print("GPU Power Draw: " + get_gpu_power_draw())

def get_stats(pid):
    stats = {}
    stats['ram_memory_uss'] = get_ram_memory_uss(pid)
    stats['ram_memory_rss'] = get_ram_memory_rss(pid)
    stats['ram_memory_vms'] = get_ram_memory_vms(pid)
    stats['ram_memory_pss'] = get_ram_memory_pss(pid)
    # stats["ram_power_draw"] = get_cpu_power_draw()[1]
    stats['cpu_usage'] = get_cpu_usage(pid)
    stats['cpu_freq'] = get_cpu_freq()
    # stats['cpu_cores_power_draw'] = get_cpu_power_draw()[0]
    # stats['cpu_package_power_draw'] = get_cpu_power_draw()[2]
    stats['cpu_power_draw'] = get_cpu_power_draw()
    stats['io_usage'] = get_io_usage(pid)
    stats['bytes_written'] = get_bytes_written(pid)
    stats['bytes_read'] = get_bytes_read(pid)
    stats['gpu_memory'] = get_gpu_memory(pid)
    stats['gpu_usage'] = get_gpu_usage()
    stats['gpu_freq'] = get_gpu_freq()
    stats['gpu_power_draw'] = get_gpu_power_draw()

    return stats

def sample_stats(test, sampling_rate, pid, directory):
    print(f"test: {test}")
    
    stats_list = []
    started = False
    
    Path(directory).mkdir(parents=True, exist_ok=True)

    while True:
        stats = get_stats(pid)
        stats_list.append(stats)

        # write stats to pickle file
        with open(f"{directory}/crypto_spider_5g_fcnn_optimized_benchmark_{test}_stats.pkl", 'wb') as f:
            pickle.dump(stats_list, f)

        if not started:
            # write file started.txt to signal that the sampling has started
            with open(f"started_{test}.txt", 'w') as f:
                f.write("STARTED")
            
            print("\nStats sampling started")

        started = True

        # check if file "stop.txt" exists
        if os.path.isfile(f"stop_{test}.txt"):
            print("Stats sampling stopped")
            os.remove(f"stop_{test}.txt")

            break
        else:
            time.sleep(sampling_rate)

def get_stats_background(test, sampling_rate, pid, directory):
    proc = Process(target=sample_stats, args=(test, sampling_rate, pid, directory))
    proc.start()

    return proc

def strip_units(x):
    return float(x.split(' ')[0])

def agg_stats(agg_func, stats_list, average_time_spent):
    average_stats = copy.deepcopy(stats_list)

    # strip units of the stats of every trial in stats_list
    for trial in average_stats:
        for snapshot in trial:
            for stat in snapshot:
                stats_value = snapshot[stat]
                stats_value_stripped = strip_units(stats_value)
                snapshot[stat] = stats_value_stripped

    trials_list = []
    
    # convert to a numpy array
    for trial in average_stats:
        df = pd.DataFrame(trial)
        trial = df.to_numpy()
        
        trials_list.append(trial)
    
    trials_list_np = np.array(trials_list)
    
    print("trials_list_np.shape: {}".format(trials_list_np.shape))
    
    # fill first axis of trials_list_np with NaNs until all trials have the same length
    # trials_list_np = np.array([np.pad(trial, ((0, trials_list_np.shape[0] - trial.shape[0]), (0, 0)), 'constant', constant_values=np.nan) for trial in trials_list_np])
    max_length = max([trial.shape[0] for trial in trials_list_np])
    trials_list_np_filled = []
    
    for trial in trials_list_np:
        trial_length = trial.shape[0]
        
        if trial_length < max_length:
            print(f"Trial length ({trial_length}) is smaller than max length ({max_length}). Filling with NaNs...")
        
            # fill first axis of trial with NaNs until trial has the same length as the longest trial
            trial = np.pad(trial, ((0, max_length - trial_length), (0, 0)), 'constant', constant_values=np.nan)
            
            print("trial.shape: {}".format(trial.shape))
        
        trials_list_np_filled.append(trial)
    
    trials_list_np_filled = np.array(trials_list_np_filled)
    
    print("trials_list_np_filled.shape: {}".format(trials_list_np_filled.shape))

    average_stats_np = agg_func(trials_list_np_filled, axis=0)

    print("average_stats_np.shape: {}".format(average_stats_np.shape))

    return average_stats_np

def get_average_stats(stats_list, average_time_spent):
    return agg_stats(agg_func=np.nanmean, stats_list=stats_list, average_time_spent=average_time_spent)

def get_std_dev_stats(stats_list, average_time_spent):
    return agg_stats(agg_func=np.nanstd, stats_list=stats_list, average_time_spent=average_time_spent)

def get_max_stats(stats_list, average_time_spent):
    return agg_stats(agg_func=np.nanmax, stats_list=stats_list, average_time_spent=average_time_spent)

def save_stats_to_logfile(test, average_stats, std_dev_stats, max_stats):
    units = {'ram_memory_uss': 'MB', 'ram_memory_rss': 'MB', 'ram_memory_vms': 'MB', 'ram_memory_pss': 'MB', 'cpu_usage': '%', 'cpu_freq': 'MHz', 'cpu_power_draw': 'W', 'io_usage': '%', 'bytes_written': 'MB', 'bytes_read': 'MB', 'gpu_memory': 'MB', 'gpu_usage': '%', 'gpu_freq': 'MHz', 'gpu_power_draw': 'W'}

    for key in average_stats.keys():
        logger.info(f'[{test}] {key} (average): {average_stats[key]} {units[key]}')
        logger.info(f'[{test}] {key} (std_dev): {std_dev_stats[key]} {units[key]}')
        logger.info(f'[{test}] {key} (max): {max_stats[key]} {units[key]}')

### General Optimizations

In [None]:
# Runtime optimizations
def apply_runtime_optimizations():
    logger.info("Applying runtime optimizations")

    total_cpu_cores = os.popen('nproc').read().strip()
    number_sockets = int(os.popen('grep "^physical id" /proc/cpuinfo | awk \'{print $4}\' | sort -un | tail -1').read().strip()) + 1
    number_cpu_cores = int((int(total_cpu_cores) / 2) / number_sockets)

    logger.info("number of CPU cores per socket: {}".format(number_cpu_cores))
    logger.info("number of sockets: {}".format(number_sockets))

    # set intra_op_parallelism = number of physical core per socket
    # set inter_op_parallelism = number of sockets

    tf.config.set_soft_device_placement(True)
    tf.config.threading.set_intra_op_parallelism_threads(number_cpu_cores)
    tf.config.threading.set_inter_op_parallelism_threads(number_sockets)

# Graph optimizations
def apply_graph_optimizations():
    logger.info("Applying graph optimizations")

    # Export the graph to a protobuf file
    tf.io.write_graph(tf.compat.v1.get_default_graph(), './', 'graph.pbtxt', as_text=True)

    # Freeze the graph
    os.system('python3 -m tensorflow.python.tools.freeze_graph --input_graph crypto_spider_5g_fcnn_frozen.pb --input_checkpoint crypto_spider_5g_fcnn.h5 --output_graph crypto_spider_5g_fcnn_frozen.pb --output_node_names model_output')

    # Optimize the graph for inference
    os.system('python3 -m tensorflow.python.tools.optimize_for_inference --input crypto_spider_5g_fcnn_frozen.pb --output crypto_spider_5g_fcnn_frozen.pb --input_names input_1 --output_names model_output --frozen_func true')

### Model Training

In [None]:
def train_model(model, verbose=False, extra_callbacks=[]):
    print("Training model")
    print("Verbose: {}".format(verbose))
    print("Extra callbacks: {}".format(extra_callbacks))
    
    # es = EarlyStopping(monitor='val_loss', mode='min', patience=es_patience, verbose=verbose, restore_best_weights=es_restore_best_weights)
    # history = model.fit(data_transformed, pd.get_dummies(df_train['tag']), validation_split=validation_split, epochs=epochs, batch_size=4096, callbacks=[es] + extra_callbacks, verbose=verbose)
    # history = model.fit(data_transformed, pd.get_dummies(df_train['tag']), validation_split=validation_split, epochs=epochs, batch_size=4096, callbacks=extra_callbacks, verbose=verbose)
    history = model.fit(data_transformed, pd.get_dummies(df_train['tag']), validation_split=validation_split, epochs=50, batch_size=4096, callbacks=extra_callbacks, verbose=verbose)

    return model, history

### Model Optimizations

In [None]:
def full_integer_quantization(trained_model):
    def dataset_generator():
        for data in tf.data.Dataset.from_tensor_slices((data_transformed)).batch(1).take(int(len(data_transformed) * 0.25)):
            yield [tf.dtypes.cast(data, tf.float32)]

    converter = tf.lite.TFLiteConverter.from_keras_model(trained_model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = dataset_generator
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8  # or tf.uint8
    converter.inference_output_type = tf.int8  # or tf.uint8
    quantized_model = converter.convert()

    logger.info("Applied Full-integer Quantization")

    # Save the model to disk
    quantized_tflite_model_file = 'model.tflite'
    
    with open(quantized_tflite_model_file, 'wb') as f:
        f.write(quantized_model)

    return quantized_tflite_model_file


def float16_quantization(trained_model):
    def dataset_generator():
        for data in tf.data.Dataset.from_tensor_slices((data_transformed)).batch(1).take(int(len(data_transformed) * 0.25)):
            yield [tf.dtypes.cast(data, tf.float32)]

    converter = tf.lite.TFLiteConverter.from_keras_model(trained_model)
    converter.representative_dataset = dataset_generator
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_ops = [tf.float16]
    quantized_model = converter.convert()

    logger.info("Applied float16 Activations and int8 weights-Quantization")

    # Save the model to disk
    quantized_tflite_model_file = 'model.tflite'
    
    with open(quantized_tflite_model_file, 'wb') as f:
        f.write(quantized_model)

    return quantized_tflite_model_file


def float16_int8_quantization(trained_model):
    def dataset_generator():
        for data in tf.data.Dataset.from_tensor_slices((data_transformed)).batch(1).take(int(len(data_transformed) * 0.25)):
            yield [tf.dtypes.cast(data, tf.float32)]

    converter = tf.lite.TFLiteConverter.from_keras_model(trained_model)
    converter.representative_dataset = dataset_generator
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
    quantized_model = converter.convert()

    logger.info("Applied float16 Activations and int8 weights-Quantization")

    # Save the model to disk
    quantized_tflite_model_file = 'model.tflite'
    
    with open(quantized_tflite_model_file, 'wb') as f:
        f.write(quantized_model)

    return quantized_tflite_model_file

# Fine-tune pretrained model with pruning
def pruning(model, convert_to_tflite=True):
    prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

    # Compute end step to finish pruning after 10% of pre-training epochs
    batch_size = 256
    # n_epochs = 20
    n_epochs = 10
    
    validation_split = 0.1

    num_samples = len(df_train)
    end_step = np.ceil(num_samples / batch_size).astype(np.int32) * epochs

    pruning_params = {
        "pruning_schedule": tfmot.sparsity.keras.PolynomialDecay(
            initial_sparsity=0.50, final_sparsity=0.80, begin_step=0, end_step=end_step
        )
    }

    model_for_pruning = prune_low_magnitude(model, **pruning_params)

    model_for_pruning.compile(
        optimizer="adam", loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]
    )

    callbacks = [
        tfmot.sparsity.keras.UpdatePruningStep(),
    ]
    
    history = model_for_pruning.fit(data_transformed, pd.get_dummies(df_train['tag']), validation_split=validation_split, epochs=n_epochs, batch_size=batch_size, callbacks=callbacks)

    pruned_model = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
    
    if convert_to_tflite:
        # convert to tflite
        converter = tf.lite.TFLiteConverter.from_keras_model(pruned_model)
        converter.optimizations = [tf.lite.Optimize.EXPERIMENTAL_SPARSITY, tf.lite.Optimize.DEFAULT]
        pruned_tflite_model = converter.convert()
        
        # Save the model to disk
        pruned_tflite_model_file = 'model.tflite'
        
        with open(pruned_tflite_model_file, 'wb') as f:
            f.write(pruned_tflite_model)

        return pruned_tflite_model_file, history
        
        # return pruned_model, history
    else:
        return pruned_model, history


# Quantization Aware Training
def quantization_aware_training(model):
    quantize_model = tfmot.quantization.keras.quantize_model

    q_aware_model = quantize_model(model)
    q_aware_model.compile(
        optimizer="adam", loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]
    )
    
    batch_size = 256
    n_epochs = 10
    validation_split = 0.1

    history = q_aware_model.fit(data_transformed, pd.get_dummies(df_train['tag']), validation_split=validation_split, epochs=n_epochs, batch_size=batch_size)

    converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]

    quantized_model = converter.convert()
    
    # Save the model to disk
    quantized_tflite_model_file = 'model.tflite'
    
    with open(quantized_tflite_model_file, 'wb') as f:
        f.write(quantized_model)

    return quantized_tflite_model_file, history


# Knowledge Distillation
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super(Distiller, self).__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self, optimizer, metrics, student_loss_fn, distillation_loss_fn, alpha=0.1, temperature=3,
    ):
        """ Configure the distiller.

        Args:
            optimizer: Keras optimizer for the student weights
            metrics: Keras metrics for evaluation
            student_loss_fn: Loss function of difference between student
                predictions and ground-truth
            distillation_loss_fn: Loss function of difference between soft
                student predictions and soft teacher predictions
            alpha: weight to student_loss_fn and 1-alpha to distillation_loss_fn
            temperature: Temperature for softening probability distributions.
                Larger temperature gives softer distributions.
        """
        super(Distiller, self).compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def train_step(self, data):
        # Unpack data
        x, y = data

        # Forward pass of teacher
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            # Forward pass of student
            student_predictions = self.student(x, training=True)

            # Compute losses
            student_loss = self.student_loss_fn(y, student_predictions)

            # Compute scaled distillation loss from https://arxiv.org/abs/1503.02531
            # The magnitudes of the gradients produced by the soft targets scale
            # as 1/T^2, multiply them by T^2 when using both hard and soft targets.
            distillation_loss = (
                self.distillation_loss_fn(
                    tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
                    tf.nn.softmax(student_predictions / self.temperature, axis=1),
                )
                * self.temperature ** 2
            )

            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        # Compute gradients
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`.
        self.compiled_metrics.update_state(y, student_predictions)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss, "distillation_loss": distillation_loss})

        return results

    def test_step(self, data):
        # Unpack the data
        x, y = data

        # Compute predictions
        y_prediction = self.student(x, training=False)

        # Calculate the loss
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update the metrics.
        self.compiled_metrics.update_state(y, y_prediction)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss})

        return results


def knowledge_distillation(teacher, convert_to_tflite=True):
    logger.info("Applying Knowledge Distillation")
    student = small_model(input_dim=len(gf), n_output=2)

    # Initialize and compile distiller
    distiller = Distiller(student=student, teacher=teacher)
    distiller.compile(
        optimizer=keras.optimizers.Adam(),
        metrics=[keras.metrics.CategoricalAccuracy()],
        student_loss_fn=keras.losses.CategoricalCrossentropy(from_logits=False),
        distillation_loss_fn=keras.losses.KLDivergence(),
        alpha=0.1,
        temperature=3,
    )

    es = EarlyStopping(
        monitor="val_student_loss",
        mode="min",
        patience=es_patience,
        restore_best_weights=es_restore_best_weights,
    )
    history = distiller.fit(
        data_transformed,
        pd.get_dummies(df_train["tag"]),
        validation_split=validation_split,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[es],
    )
    
    if convert_to_tflite:
        # convert to tflite
        converter = tf.lite.TFLiteConverter.from_keras_model(distiller.student)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        distilled_tflite_model = converter.convert()
        
        # Save the model to disk
        distilled_tflite_model_file = 'model.tflite'
        
        with open(distilled_tflite_model_file, 'wb') as f:
            f.write(distilled_tflite_model)

        return distilled_tflite_model_file, history

        # return distiller.student, history
    else:
        return distiller.student, history

### Benchmarking Helper Functions

In [None]:
@contextmanager
def measure_time() -> float:
    start = time.perf_counter()
    yield lambda: time.perf_counter() - start

class StatsCollectionManager():
    def __init__(self, test, sampling_rate=0.1, pid=None, directory=None):
        self.test = test
        self.sampling_rate = sampling_rate
        self.pid = pid
        self.proc = None
        
        if directory is None:
            self.directory = f"poc_energy_efficiency_crypto/"
        else:
            self.directory = f"poc_energy_efficiency_crypto/{directory}/"

        print("Starting stats collection for test: {}".format(test))
 
    def __enter__(self):
        self.proc = get_stats_background(test=self.test, sampling_rate=self.sampling_rate, pid=self.pid, directory=self.directory)

        while True:
            # check if file stats.txt exists
            if os.path.exists(f"started_{self.test}.txt"):
                print("\nStats collection started")
                os.remove(f"started_{self.test}.txt")
                break
 
    def __exit__(self, exc_type, exc_value, exc_traceback):
        # write file stop.txt to signal to the background process to stop
        with open(f"stop_{self.test}.txt", "w") as f:
            print("Stopping stats collection")
            f.write("STOP")

        while True:
            if os.path.isfile(f"{self.directory}/crypto_spider_5g_fcnn_optimized_benchmark_{self.test}_stats.pkl"):
                print("Stats file found")
                print(self.test)

                break

def perform_inference(model):
    model.predict(data_transformed)
    
def perform_inference_onnx(sess):
    input_name = sess.get_inputs()[0].name
    output_name = sess.get_outputs()[0].name
    sess.run([output_name], {input_name: data_transformed.astype(np.float32)})
    
def perform_inference_tflite(interpreter, batch_size):
    # check input details and convert data to the right type
    input_details = interpreter.get_input_details()
    print(input_details)
    dtype = input_details[0]['dtype']
    input_data = np.array(data_transformed, dtype=dtype)
    
    input_details = interpreter.get_input_details()[0]
    interpreter.resize_tensor_input(input_details['index'], (batch_size, input_data.shape[1]))
    interpreter.allocate_tensors()
    
    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]
    
    # create batches of data_transformed
    batch_size = batch_size
    
    preds = []

    for i in range(0, len(data_transformed), batch_size):
        batch = data_transformed[i:i+batch_size]
        # print(f"Batch {i//batch_size} has {len(batch)} elements")
        
        batch_data = np.array(batch, dtype=dtype)
        
        if len(batch) == batch_size:        
            interpreter.set_tensor(input_details['index'], batch_data)
            interpreter.invoke()
            output_data = interpreter.get_tensor(output_details['index'])
            
            preds.append(output_data)
    
    return np.concatenate(preds)

def save_model_to_h5(keras_model, model_filename):
    keras_model.save(f"{model_filename}.h5", include_optimizer=False)
    
    return f"{model_filename}.h5"

def save_model_to_tflite(tflite_model, model_filename):
    with open(f"{model_filename}.tflite", 'wb') as f:
        f.write(tflite_model)
    
    return f"{model_filename}.tflite"

def save_model_to_onnx(onnx_model, model_filename):
    onnx.save(onnx_model, f"{model_filename}.onnx")
    
    return f"{model_filename}.onnx"
    
def gzip_model(model_path):
    with open(model_path, 'rb') as f_in, open(f"{model_path}.gz", 'wb') as f_out:
        f_out.write(gzip.compress(f_in.read()))
        
    os.remove(model_path)
    
    return f"{model_path}.gz"

def unzip_model(model_path):
    with gzip.open(model_path, 'rb') as f_in, open(model_path[:-3], 'wb') as f_out:
        f_out.write(f_in.read())
            
    return model_path[:-3]

def save_model(model, ext):
    model_filename = "model"
    
    if ext == "h5":
        model_path = save_model_to_h5(model, model_filename)
    elif ext == "onnx":
        model_path = save_model_to_onnx(model, model_filename)
    elif ext == "tflite":
        model_path = save_model_to_tflite(model, model_filename)
    else:
        raise Exception("Model format not supported")
    
    model_path = gzip_model(model_path)
    
    print(f"Model saved to {model_path}")
    
    return model_path

def load_model_from_h5(model_path) -> tf.keras.Model:
    keras_model = tf.keras.models.load_model(model_path)
    
    return keras_model

def load_model_from_tflite(model_path) -> tf.lite.Interpreter:
    tflite_model = tf.lite.Interpreter(model_path=model_path)
    tflite_model.allocate_tensors()
    
    return tflite_model

def load_model_from_onnx(model_path) -> onnx.ModelProto:
    # onnx_model = onnx.load(model_path)
    
    # return onnx_model
    
    # onnx_model = rt.InferenceSession(model_path, providers=["CUDAExecutionProvider"])
    onnx_model = rt.InferenceSession(model_path, providers=["CPUExecutionProvider"])
    
    return onnx_model

def load_model(ext):
    model_path = f"model.{ext}.gz"
    
    model_path = unzip_model(model_path)
    
    if ext == "h5":
        model = load_model_from_h5(model_path)
    elif ext == "onnx":
        model = load_model_from_onnx(model_path)
    elif ext == "tflite":
        model = load_model_from_tflite(model_path)
    else:
        raise Exception("Model format not supported")
    
    return model

def export_keras_model_to_tflite(model):
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    tflite_model = converter.convert()
    
    return tflite_model

def export_keras_model_to_onnx(model):
    spec = (tf.TensorSpec(shape=(1, 16), dtype=tf.float32, name='input'),)
    model_proto, _ = tf2onnx.convert.from_keras(model, input_signature=spec, output_path="model.onnx")
    
    return model_proto

def export_tflite_model_to_onnx(model_path):
    spec = (tf.TensorSpec(shape=(1, 16), dtype=tf.float32, name='input'),)
    model_proto, _ = tf2onnx.convert.from_tflite(model_path, output_path="model.onnx")
    
    return model_proto

def get_gzipped_model_size(ext):
  # Returns size of gzipped model, in bytes.
  if os.path.exists(f"model.{ext}.gz"):
    return os.path.getsize(f"model.{ext}.gz")
  else:
      RuntimeError(f"Model with extension \"{ext}\" not found")

def delete_model(ext):
    if os.path.exists(f"model.{ext}.gz"):
        os.remove(f"model.{ext}.gz")
    else:
        RuntimeError(f"Model with extension \"{ext}\" not found")
    
    if os.path.exists(f"model.{ext}"):
        os.remove(f"model.{ext}")
    else:
        RuntimeError(f"Model with extension \"{ext}\" not found")

def perform_evaluation_onnx(experiment, device="CPU"):
    assert device in ["CPU", "GPU"]

    logger.info(f"Evaluating {experiment} model on test set")

    # Load model
    sess = load_model(ext="onnx")
    sess.set_providers(["CPUExecutionProvider"] if device == "CPU" else ["CUDAExecutionProvider"])

    # Load test set
    test_data_transformed = standard.transform(df_test[gf])
    
    # get input shape
    input_shape = sess.get_inputs()[0].shape
    logger.info(f"Input shape: {input_shape}")
    
    # get output shape
    output_shape = sess.get_outputs()[0].shape
    logger.info(f"Output shape: {output_shape}")
    
    # transform data to the expected tensor type
    test_data_transformed = test_data_transformed.astype(np.float32)

    # Evaluate model
    results = sess.run(None, {sess.get_inputs()[0].name: test_data_transformed})
    
    # get predictions
    predictions = results[0]
    
    
    if ml_task == "binary_classification":
        # convert predictions to labels
        predictions = np.argmax(predictions, axis=1)
        
        accuracy = accuracy_score(df_test["tag"], predictions)
        f1 = f1_score(df_test["tag"], predictions, average="weighted")
        auc = roc_auc_score(df_test["tag"], predictions)
        recall = recall_score(df_test["tag"], predictions, average="weighted")
        precision = precision_score(df_test["tag"], predictions, average="weighted")
        balanced_accuracy = balanced_accuracy_score(df_test["tag"], predictions)
        matthews = matthews_corrcoef(df_test["tag"], predictions)

        logger.info(f"Accuracy: {accuracy}")
        logger.info(f"F1 score: {f1}")
        logger.info(f"AUC: {auc}")
        logger.info(f"Recall: {recall}")
        logger.info(f"Precision: {precision}")
        logger.info(f"Balanced accuracy: {balanced_accuracy}")
        logger.info(f"Matthews correlation coefficient: {matthews}")
        
        test_results = {
            "accuracy": accuracy,
            "f1": f1,
            "auc": auc,
            "recall": recall,
            "precision": precision,
            "balanced_accuracy": balanced_accuracy,
            "matthews": matthews
        }
        
        # save to datframe
        df_evaluation = pd.DataFrame([[experiment, device, accuracy, f1, auc, recall, precision, balanced_accuracy, matthews]], columns=["experiment", "device", "accuracy", "f1", "auc", "recall", "precision", "balanced_accuracy", "matthews"])
    elif ml_task == "regression":
        mae = mean_absolute_error(df_test["tag"], predictions)
        mse = mean_squared_error(df_test["tag"], predictions)
        mape = mean_absolute_percentage_error(df_test["tag"], predictions)
        smape = 1/len(df_test["tag"]) * np.sum(2 * np.abs(predictions - df_test["tag"]) / (np.abs(predictions) + np.abs(df_test["tag"])))
        
        logger.info(f"MAE: {mae}")
        logger.info(f"MSE: {mse}")
        logger.info(f"MAPE: {mape}")
        logger.info(f"SMAPE: {smape}")
        
        test_results = {
            "mae": mae,
            "mse": mse,
            "mape": mape,
            "smape": smape
        }
            
        # save to datframe
        df_evaluation = pd.DataFrame([[experiment, device, mae, mse, mape, smape]], columns=["experiment", "device", "mae", "mse", "mape", "smape"])
    
    with open("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation.pkl", "wb") as f:
        pickle.dump(test_results, f)
    
    return df_evaluation

def perform_evaluation_tflite(experiment, device="CPU"):
    assert device in ["CPU", "GPU"]

    logger.info(f"Evaluating {experiment} model on test set")

    # Load model
    interpreter = load_model(ext="tflite")
    interpreter.allocate_tensors()

    # Load test set
    test_data_transformed = standard.transform(df_test[gf])
    
    # get input shape
    input_shape = interpreter.get_input_details()[0]["shape"]
    logger.info(f"Input shape: {input_shape}")
    
    # get output shape
    output_shape = interpreter.get_output_details()[0]["shape"]
    logger.info(f"Output shape: {output_shape}")
    
    # transform data to the expected tensor type
    input_details = interpreter.get_input_details()[0]
    dtype = input_details["dtype"]    
    input_data = np.array(test_data_transformed, dtype=dtype)
    
    # reshape model input
    batch_size = 256
    
    input_details = interpreter.get_input_details()[0]
    interpreter.resize_tensor_input(input_details['index'], (batch_size, input_data.shape[1]))
    interpreter.allocate_tensors()
    
    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]
    
    preds = []

    # create batches of test_data_transformed
    for i in range(0, len(test_data_transformed), batch_size):
        batch = test_data_transformed[i:i+batch_size]
        # print(f"Batch {i//batch_size} has {len(batch)} elements")
        
        batch_data = np.array(batch, dtype=dtype)
        
        if len(batch) == batch_size:        
            interpreter.set_tensor(input_details['index'], batch_data)
            interpreter.invoke()
            output_data = interpreter.get_tensor(output_details['index'])
            
            preds.append(output_data)
    
    predictions = np.concatenate(preds)
    
    # take the labels of the test set
    test_labels = df_test["tag"].values[:len(predictions)]

    # Evaluate model
    # accuracy = accuracy_score(test_labels, predictions)
    # balanced_accuracy = balanced_accuracy_score(test_labels, predictions)
    # f1 = f1_score(test_labels, predictions, average="weighted")

    # logger.info(f"Accuracy: {accuracy}")
    # logger.info(f"Balanced accuracy: {balanced_accuracy}")
    # logger.info(f"F1 score: {f1}")
    
    # conf_matrix = confusion_matrix(test_labels, predictions)
    # logger.info(f"Confusion matrix: {conf_matrix}")
    
    # test_results = {
    #     "accuracy": accuracy,
    #     "balanced_accuracy": balanced_accuracy,
    #     "f1": f1,
    # }

    # with open("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation.pkl", "wb") as f:
    #     pickle.dump(test_results, f)
    
    # # save to datframe
    # df_evaluation = pd.DataFrame([[experiment, device, accuracy, balanced_accuracy, f1]], columns=["experiment", "device", "accuracy", "balanced_accuracy", "f1"])
    
    if ml_task == "binary_classification":
        # convert predictions to labels
        predictions = np.argmax(predictions, axis=1)
        
        accuracy = accuracy_score(test_labels, predictions)
        f1 = f1_score(test_labels, predictions, average="weighted")
        auc = roc_auc_score(test_labels, predictions)
        recall = recall_score(test_labels, predictions, average="weighted")
        precision = precision_score(test_labels, predictions, average="weighted")
        balanced_accuracy = balanced_accuracy_score(test_labels, predictions)
        matthews = matthews_corrcoef(test_labels, predictions)

        logger.info(f"Accuracy: {accuracy}")
        logger.info(f"F1 score: {f1}")
        logger.info(f"AUC: {auc}")
        logger.info(f"Recall: {recall}")
        logger.info(f"Precision: {precision}")
        logger.info(f"Balanced accuracy: {balanced_accuracy}")
        logger.info(f"Matthews correlation coefficient: {matthews}")
        
        test_results = {
            "accuracy": accuracy,
            "f1": f1,
            "auc": auc,
            "recall": recall,
            "precision": precision,
            "balanced_accuracy": balanced_accuracy,
            "matthews": matthews
        }
        
        # save to datframe
        df_evaluation = pd.DataFrame([[experiment, device, accuracy, f1, auc, recall, precision, balanced_accuracy, matthews]], columns=["experiment", "device", "accuracy", "f1", "auc", "recall", "precision", "balanced_accuracy", "matthews"])
    elif ml_task == "regression":
        mae = mean_absolute_error(test_labels, predictions)
        mse = mean_squared_error(test_labels, predictions)
        mape = mean_absolute_percentage_error(test_labels, predictions)
        smape = 1/len(test_labels) * np.sum(2 * np.abs(predictions - test_labels) / (np.abs(predictions) + np.abs(test_labels)))
        
        logger.info(f"MAE: {mae}")
        logger.info(f"MSE: {mse}")
        logger.info(f"MAPE: {mape}")
        logger.info(f"SMAPE: {smape}")
        
        test_results = {
            "mae": mae,
            "mse": mse,
            "mape": mape,
            "smape": smape
        }
            
        # save to datframe
        df_evaluation = pd.DataFrame([[experiment, device, mae, mse, mape, smape]], columns=["experiment", "device", "mae", "mse", "mape", "smape"])
    
    with open("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation.pkl", "wb") as f:
        pickle.dump(test_results, f)
    
    return df_evaluation

### Benchmarking Functions

In [None]:
def benchmark_training(device, post_training_optimizations=None, training_aware_optimizations=None):
    gpu_memory_usage = get_gpu_memory_system()
    gpu_memory_usage = gpu_memory_usage.replace('MiB', '')
    gpu_memory_usage = float(gpu_memory_usage)

    assert gpu_memory_usage == idle_gpu_memory_usage

    print(f"GPU memory usage: {gpu_memory_usage} MiB")
    
    pid = os.getpid()

    with StatsCollectionManager(test="training", sampling_rate=stats_sampling_rate, pid=pid) as training_scm:
        with measure_time() as training_time_measure:
            # Training
            if training_aware_optimizations is None:
                logging.info("Training without training-aware optimizations")

                with tf.device(device):
                    model = baseline_model(input_dim=len(gf), n_output=2)
                    model, history = train_model(model)
                    tflite_model = export_keras_model_to_tflite(model)
                    model_path = save_model_to_tflite(tflite_model, model_filename="model")
                    # model_path = apply_post_training_optimizations(model, post_training_optimizations)
                    
                    if post_training_optimizations is not None:
                        if "full_integer_quantization" in post_training_optimizations:
                            model_path = full_integer_quantization(model)
                        elif "float16_quantization" in post_training_optimizations:
                            model_path = float16_quantization(model)
                        elif "float16_int8_quantization" in post_training_optimizations:
                            model_path = float16_int8_quantization(model)
                        
                        # print("Exporting TFLite model to ONNX")
                        # onnx_model = export_tflite_model_to_onnx(model_path)
            else:
                with tf.device(device):
                    logging.info("Training with training-aware optimizations")
                    model = baseline_model(input_dim=len(gf), n_output=2)
                    model, history = train_model(model)
                    # model, history = apply_training_aware_optimizations(model, training_aware_optimizations)
                                        
                    if training_aware_optimizations is not None:
                        if "knowledge_distillation" in training_aware_optimizations:
                            if "pruning" in training_aware_optimizations or "quantization_aware_training" in training_aware_optimizations:
                                model_path, _history = knowledge_distillation(model, convert_to_tflite=False)
                            else:
                                model_path, _history = knowledge_distillation(model)
                        
                        if "pruning" in training_aware_optimizations:
                            if "quantization_aware_training" in training_aware_optimizations:
                                model_path, _history = pruning(model, convert_to_tflite=False)
                            else:
                                model_path, _history = pruning(model)
                            
                        if "quantization_aware_training" in training_aware_optimizations:
                            model_path, _history = quantization_aware_training(model)
                        
                        # print("Exporting TFLite model to ONNX")
                        # onnx_model = export_tflite_model_to_onnx(model_path)
    
    gpu_memory_usage = get_gpu_memory_system()
    gpu_memory_usage = gpu_memory_usage.replace('MiB', '')
    gpu_memory_usage = float(gpu_memory_usage)

    print(f"GPU memory usage: {gpu_memory_usage} MiB")

    assert gpu_memory_usage == idle_gpu_memory_usage

    # # Get total parameters count
    # if isinstance(model, tf.keras.Model):
    #     num_params = model.count_params()
    # elif isinstance(model, tf.lite.TFLiteConverter): # NOT SUPPORTED
    #     num_params = model._num_parameters
    # else:
    #     raise Exception(f"Unknown model type: {type(model)}")
    
    # logger.info(f"Number of total parameters: {num_params}")

    # # Save model to ONNX
    # if isinstance(model, tf.keras.Model):
    #     # tflite_model = export_keras_model_to_tflite(model)
    #     # onnx_model = export_keras_model_to_onnx(model)
    #     print("Exporting Keras model to ONNX")
    #     onnx_model = export_keras_model_to_onnx(model)
    # else:
    #     print("Exporting TFLite model to ONNX")
    #     onnx_model = export_tflite_model_to_onnx(model_path)
    
    # save_model(onnx_model, ext="onnx")
    
    gzip_model(model_path)

    # Get training time
    training_time = training_time_measure()

    # Write training time to file
    with open(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_time.pkl", "wb") as f:
        pickle.dump({"training_time": training_time}, f)
        

def benchmark_inference(device):
    gpu_memory_usage = get_gpu_memory_system()
    gpu_memory_usage = gpu_memory_usage.replace('MiB', '')
    gpu_memory_usage = float(gpu_memory_usage)

    assert gpu_memory_usage == idle_gpu_memory_usage

    print(f"GPU memory usage: {gpu_memory_usage} MiB")
    
    pid = os.getpid()

    # load model
    # model = load_gzipped_model_from_h5()
    # model = load_model(ext="onnx")
    interpreter = load_model(ext="tflite")
    
    # # resize input
    # batch_size = 256
    # tensor_index = interpreter.get_input_details()[0]["index"]
    # interpreter.resize_tensor_input(tensor_index, [batch_size, 16])
    # interpreter.allocate_tensors()

    # Inference test
    with StatsCollectionManager(test="inference", sampling_rate=stats_sampling_rate, pid=pid) as inference_scm:
        with measure_time() as inference_time_measure:
            with tf.device(device):
                # if post_training_optimizations is None and training_aware_optimizations is None:
                #     perform_inference(model)
                # elif "quantization" in post_training_optimizations or "quantization" in training_aware_optimizations:
                #     perform_inference_quantized(model)
                # else:
                #     perform_inference(model)
                # perform_inference_onnx(model)
                perform_inference_tflite(interpreter, batch_size=256)

    gpu_memory_usage = get_gpu_memory_system()
    gpu_memory_usage = gpu_memory_usage.replace('MiB', '')
    gpu_memory_usage = float(gpu_memory_usage)

    print(f"GPU memory usage: {gpu_memory_usage} MiB")

    assert gpu_memory_usage == idle_gpu_memory_usage

    # Get inference and load times
    inference_time = inference_time_measure()

    # Write inference time to file
    with open(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_time.pkl", "wb") as f:
        pickle.dump({"inference_time": inference_time}, f)

def benchmark_load(device):
    gpu_memory_usage = get_gpu_memory_system()
    gpu_memory_usage = gpu_memory_usage.replace('MiB', '')
    gpu_memory_usage = float(gpu_memory_usage)

    assert gpu_memory_usage == idle_gpu_memory_usage

    print(f"GPU memory usage: {gpu_memory_usage} MiB")
    
    pid = os.getpid()

    # load test
    with StatsCollectionManager(test="load", sampling_rate=stats_sampling_rate, pid=pid) as load_scm:
        with measure_time() as load_time_measure:
            with tf.device(device):
                # if post_training_optimizations is None:
                #     load_gzipped_model()
                # elif "pruning" in post_training_optimizations:
                #     load_gzipped_pruned_model()
                # else:
                #     load_gzipped_model()
                load_model(ext="tflite")
    
    gpu_memory_usage = get_gpu_memory_system()
    gpu_memory_usage = gpu_memory_usage.replace('MiB', '')
    gpu_memory_usage = float(gpu_memory_usage)

    print(f"GPU memory usage: {gpu_memory_usage} MiB")

    assert gpu_memory_usage == idle_gpu_memory_usage

    # Get load time
    load_time = load_time_measure()

    # Write load time to file
    with open(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_time.pkl", "wb") as f:
        pickle.dump({"load_time": load_time}, f)

def benchmark(experiment, device="GPU"):
    assert device in ["CPU", "GPU"]

    logger.info("Benchmarking {} on {}".format(experiment, device))

    post_training_optimizations = experiments[experiment]["post_training_optimizations"]
    training_aware_optimizations = experiments[experiment]["training_aware_optimizations"]

    training_times_list = []
    inference_times_list = []
    load_times_list = []
    
    training_stats_list = []
    inference_stats_list = []
    load_stats_list = []
    
    for i in range(0, num_trials):
        logger.info(f"Trial {i}")

        p_training = multiprocessing.Process(target=benchmark_training, args=(device, post_training_optimizations, training_aware_optimizations))
        p_training.start()
        p_training.join()

        p_inference = multiprocessing.Process(target=benchmark_inference, args=(device,))
        p_inference.start()
        p_inference.join()

        p_load = multiprocessing.Process(target=benchmark_load, args=(device,))
        p_load.start()
        p_load.join()
        
        # Read training, inference and load times to file
        with open(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_time.pkl", "rb") as f:
            training_time = pickle.load(f)
        
        with open(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_time.pkl", "rb") as f:
            inference_time = pickle.load(f)
        
        with open(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_time.pkl", "rb") as f:
            load_time = pickle.load(f)

        inference_time = inference_time["inference_time"]
        training_time = training_time["training_time"]
        load_time = load_time["load_time"]

        # Delete trial times files
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_time.pkl")
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_time.pkl")
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_time.pkl")
        
        # Read training, inference and load stats to file
        with open('poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_stats.pkl', 'rb') as f:
            training_stats = pickle.load(f)
            
        with open('poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats.pkl', 'rb') as f:
            inference_stats = pickle.load(f)

        with open('poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_stats.pkl', 'rb') as f:
            load_stats = pickle.load(f)
    
        # Delete trial stats files
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_stats.pkl")
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats.pkl")
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_stats.pkl")

        logger.info("Trial {} - Training time: {:.2f}s".format(i, training_time))
        logger.info("Trial {} - Inference time: {:.2f}s".format(i, inference_time))
        logger.info("Trial {} - Load time: {:.2f}s".format(i, load_time))
        logger.info("Trial {} - Total time: {:.2f}s".format(i, training_time + inference_time + load_time))

        # Save training, inference and load times
        training_times_list.append(training_time)
        inference_times_list.append(inference_time)
        load_times_list.append(load_time)

        # Save training, inference and load stats
        training_stats_list.append(training_stats)
        inference_stats_list.append(inference_stats)
        load_stats_list.append(load_stats)
        
        if i != num_trials - 1:
            # Delete models
            delete_model("onnx")
            delete_model("tflite")
            delete_model("h5")

    average_training_time = np.mean(training_times_list)
    std_dev_training_time = np.std(training_times_list)
    max_training_time = np.max(training_times_list)

    average_inference_time = np.mean(inference_times_list)
    std_dev_inference_time = np.std(inference_times_list)
    max_inference_time = np.max(inference_times_list)

    average_load_time = np.mean(load_times_list)
    std_dev_load_time = np.std(load_times_list)
    max_load_time = np.max(load_times_list)

    # Time spent on training
    logger.info("Average training time: {}".format(np.round(average_training_time, 2)))
    logger.info("Standard deviation of training time: {}".format(np.round(std_dev_training_time, 2)))
    logger.info("Max. training time: {}".format(np.round(max_training_time, 2)))

    # Time spent on inference
    logger.info("Average inference time: {}".format(np.round(average_inference_time, 2)))
    logger.info("Standard deviation of inference time: {}".format(np.round(std_dev_inference_time, 2)))
    logger.info("Max. inference time: {}".format(np.round(max_inference_time, 2)))

    # Time spent on loading
    logger.info("Average load time: {}".format(np.round(average_load_time, 2)))
    logger.info("Standard deviation of load time: {}".format(np.round(std_dev_load_time, 2)))
    logger.info("Max. load time: {}".format(np.round(max_load_time, 2)))

    # Get average training, inference and load times
    average_training_stats_list = get_average_stats(training_stats_list, average_training_time)
    std_dev_training_stats_list = get_std_dev_stats(training_stats_list, std_dev_training_time)
    max_training_stats_list = get_max_stats(training_stats_list, max_training_time)

    average_inference_stats_list = get_average_stats(inference_stats_list, average_inference_time)
    std_dev_inference_stats_list = get_std_dev_stats(inference_stats_list, std_dev_inference_time)
    max_inference_stats_list = get_max_stats(inference_stats_list, max_inference_time)

    average_load_stats_list = get_average_stats(load_stats_list, average_load_time)
    std_dev_load_stats_list = get_std_dev_stats(load_stats_list, std_dev_load_time)
    max_load_stats_list = get_max_stats(load_stats_list, max_load_time)

    # Get model size
    model_size = get_gzipped_model_size(ext="tflite")
    logger.info(f"Model size (gzip): {model_size}")

    # Save training, inference and load metrics to dataframe
    stats = training_stats_list[0][0].keys()
    
    average_traning_stats_names = [f"average_training_{stat}" for stat in stats]
    std_dev_training_stats_names = [f"std_dev_training_{stat}" for stat in stats]
    max_training_stats_names = [f"max_training_{stat}" for stat in stats]
    
    average_inference_stats_names = [f"average_inference_{stat}" for stat in stats]
    std_dev_inference_stats_names = [f"std_dev_inference_{stat}" for stat in stats]
    max_inference_stats_names = [f"max_inference_{stat}" for stat in stats]
    
    average_load_stats_names = [f"average_load_{stat}" for stat in stats]
    std_dev_load_stats_names = [f"std_dev_load_{stat}" for stat in stats]
    max_load_stats_names = [f"max_load_{stat}" for stat in stats]

    df_times_columns = ["experiment", "device", "average_training_time", "std_dev_training_time", "max_training_time", "average_inference_time", "std_dev_inference_time", "max_inference_time", "average_load_time", "std_dev_load_time", "max_load_time", "model_size"]
    df_times = pd.DataFrame(columns=df_times_columns)
    times_row = [experiment, device, average_training_time, std_dev_training_time, max_training_time, average_inference_time, std_dev_inference_time, max_inference_time, average_load_time, std_dev_load_time, max_load_time, model_size]
    df_times.loc[0] = times_row

    df_training_stats_columns = ["experiment", "device", "snapshot", *average_traning_stats_names, *std_dev_training_stats_names, *max_training_stats_names]
    df_training_stats = pd.DataFrame(columns=df_training_stats_columns)
    
    for index, _snapshot in enumerate(average_training_stats_list):
        row = np.array([experiment, device, index])
        row = np.append(row, average_training_stats_list[index])
        row = np.append(row, std_dev_training_stats_list[index])
        row = np.append(row, max_training_stats_list[index])
    
        df_training_stats.loc[index] = row

    df_inference_stats_columns = ["experiment", "device", "snapshot", *average_inference_stats_names, *std_dev_inference_stats_names, *max_inference_stats_names]
    df_inference_stats = pd.DataFrame(columns=df_inference_stats_columns)
    
    for index, _snapshot in enumerate(average_inference_stats_list):
        row = np.array([experiment, device, index])
        row = np.append(row, average_inference_stats_list[index])
        row = np.append(row, std_dev_inference_stats_list[index])
        row = np.append(row, max_inference_stats_list[index])

        df_inference_stats.loc[index] = row

    df_load_stats_columns = ["experiment", "device", "snapshot", *average_load_stats_names, *std_dev_load_stats_names, *max_load_stats_names]
    df_load_stats = pd.DataFrame(columns=df_load_stats_columns)
    
    for index, _snapshot in enumerate(average_load_stats_list):
        row = np.array([experiment, device, index])
        row = np.append(row, average_load_stats_list[index])
        row = np.append(row, std_dev_load_stats_list[index])
        row = np.append(row, max_load_stats_list[index])

        df_load_stats.loc[index] = row

    # # Save average and std_dev of training, inference and load stats to log file
    # save_stats_to_logfile(test="training", average_stats=average_training_stats_list, std_dev_stats=std_dev_training_stats_list)
    # save_stats_to_logfile(test="inference", average_stats=average_inference_stats_list, std_dev_stats=std_dev_inference_stats_list)
    # save_stats_to_logfile(test="load", average_stats=average_load_stats_list, std_dev_stats=std_dev_load_stats_list)

    return df_times, df_training_stats, df_inference_stats, df_load_stats

def run_benchmark(experiments, devices):
    for experiment in experiments:
        for device in devices:
            print(f"Running experiment {experiment} on device {device}")
            df_times, df_training_stats, df_inference_stats, df_load_stats = benchmark(experiment=experiment, device=device)
            df_evaluation = perform_evaluation_tflite(experiment=experiment, device=device)

            # load times dataframe from file and append new df
            if os.path.exists("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_times.csv"):
                df_times.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_times.csv", mode="a", header=False, index=False)
            else:
                df_times.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_times.csv", index=False)

            # load training, inference and load stats dataframe from file and append new df
            if os.path.exists("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_stats.csv"):
                df_training_stats.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_stats.csv", mode="a", header=False, index=False)
            else:
                df_training_stats.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_stats.csv", index=False)
            
            if os.path.exists("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats.csv"):
                df_inference_stats.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats.csv", mode="a", header=False, index=False)
            else:
                df_inference_stats.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats.csv", index=False)
            
            if os.path.exists("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_stats.csv"):
                df_load_stats.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_stats.csv", mode="a", header=False, index=False)
            else:
                df_load_stats.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_stats.csv", index=False)
            
            # load evaluation dataframe from file and append new df
            if os.path.exists("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation.csv"):
                df_evaluation.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation.csv", mode="a", header=False, index=False)
            else:
                df_evaluation.to_csv("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation.csv", index=False)
                
            
def clean_files():
    if os.path.exists(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_times.csv"):
        os.remove(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_times.csv")

    if os.path.exists("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_stats.csv"):
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_stats.csv")

    if os.path.exists("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats.csv"):
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats.csv")

    if os.path.exists("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_stats.csv"):
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_stats.csv")

    if os.path.exists("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation.csv"):
        os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation.csv")

def load_files():
    df_times = pd.read_csv(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_times.csv")
    df_training_stats = pd.read_csv(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_training_stats.csv")
    df_inference_stats = pd.read_csv(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats.csv")
    df_load_stats = pd.read_csv(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_load_stats.csv")
    df_evaluation = pd.read_csv(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation.csv")
    
    return df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation

def aggregate_stats_by_time(df):
    columns_avg_std = [col for col in df.columns if "max" not in col]
    
    df_avg_std = df[columns_avg_std].groupby(["experiment", "device"]).mean().reset_index()
    df_avg_std = df_avg_std.drop(columns=["snapshot"])
    
    # columns containing "max" in the name must be aggregated by taking the maximum value instead of the mean
    columns_max = [col for col in df.columns if "max" in col]
    
    # add experiment and device columns to columns_max
    columns_max.append("experiment")
    columns_max.append("device")
    
    df_max = df[columns_max]
    df_max = df_max.groupby(["experiment", "device"]).max().reset_index()
    
    df_final = pd.concat([df_avg_std, df_max[columns_max[:-2]]], axis=1)
    
    return df_final

def sort_by_experiment(df):    
    # sort dataframes by experiment. experiment is the string "EXP" followed by a number, so we can sort by the number
    df['sort'] = df['experiment'].str.extract('(\d+)', expand=False).astype(int)
    df.sort_values('sort',inplace=True, ascending=True)
    df = df.drop('sort', axis=1)
    df = df.reset_index(drop=True)
    
    return df

def calculate_energy_consumption(df_experiment, experiment, df_times):    
    average_exp_duration = df_times[f"average_{experiment}_time"]
    df_experiment["total_average_cpu_energy_consumption"] = df_experiment[f"average_{experiment}_cpu_power_draw"] * average_exp_duration
    df_experiment["total_average_gpu_energy_consumption"] = df_experiment[f"average_{experiment}_gpu_power_draw"] * average_exp_duration
    
    # calculate percentage of energy consumption reduction with respect to the baseline (EXP0)
    baseline = df_experiment[df_experiment["experiment"] == "EXP0"]
    baseline_cpu_energy_consumption = baseline["total_average_cpu_energy_consumption"].values[0]
    baseline_gpu_energy_consumption = baseline["total_average_gpu_energy_consumption"].values[0]
    
    percentage_cpu_energy_consumption_reduction = (df_experiment["total_average_cpu_energy_consumption"] - baseline_cpu_energy_consumption) / baseline_cpu_energy_consumption * 100
    percentage_gpu_energy_consumption_reduction = (df_experiment["total_average_gpu_energy_consumption"] - baseline_gpu_energy_consumption) / baseline_gpu_energy_consumption * 100
    
    # round to 5 decimal places
    percentage_cpu_energy_consumption_reduction = percentage_cpu_energy_consumption_reduction.round(5)
    percentage_gpu_energy_consumption_reduction = percentage_gpu_energy_consumption_reduction.round(5)
    
    # if number is positive, put a "+" in front of it
    # percentage_cpu_energy_consumption_reduction = percentage_cpu_energy_consumption_reduction.apply(lambda x: f"+{x}" if x > 0 else x)
    # percentage_gpu_energy_consumption_reduction = percentage_gpu_energy_consumption_reduction.apply(lambda x: f"+{x}" if x > 0 else x)
        
    df_experiment["percentage_cpu_energy_consumption_reduction"] = percentage_cpu_energy_consumption_reduction
    df_experiment["percentage_gpu_energy_consumption_reduction"] = percentage_gpu_energy_consumption_reduction
    
    # in baseline put a "N/A" in the percentage column
    df_experiment.loc[df_experiment["experiment"] == "EXP0", "percentage_cpu_energy_consumption_reduction"] = "N/A"
    df_experiment.loc[df_experiment["experiment"] == "EXP0", "percentage_gpu_energy_consumption_reduction"] = "N/A"
        
    return df_experiment

def sort_columns(df_training_stats, df_inference_stats, df_load_stats):
    print("Sorting columns...")
    
    stats = ["ram_memory_uss", "ram_memory_rss", "ram_memory_vms", "ram_memory_pss", "cpu_usage", "cpu_freq", "cpu_power_draw", "io_usage", "bytes_written", "bytes_read", "gpu_memory", "gpu_usage", "gpu_freq", "gpu_power_draw"]
    tests = ["training", "inference", "load"]
    
    for test in tests:
        test_columns = ["experiment", "device"]
        
        for i in range(len(stats)):
            stat_test_columns = [f"average_{test}_{stats[i]}", f"std_dev_{test}_{stats[i]}", f"max_{test}_{stats[i]}"]
            test_columns.extend(stat_test_columns)
        
        test_columns.extend(["total_average_cpu_energy_consumption", "percentage_cpu_energy_consumption_reduction", "total_average_gpu_energy_consumption", "percentage_gpu_energy_consumption_reduction"])
        
        if test == "training":
            df_training_stats = df_training_stats[test_columns]
        elif test == "inference":
            df_inference_stats = df_inference_stats[test_columns]
        elif test == "load":
            df_load_stats = df_load_stats[test_columns]
    
    return df_training_stats, df_inference_stats, df_load_stats

def move_model_size(df_times, df_load_stats):
    print("Moving 'model size' column from times to load stats dataframe...")
    
    df_load_stats["model_size"] = df_times["model_size"]
    df_times = df_times.drop("model_size", axis=1)
    
    return df_times, df_load_stats

def pretty_format_column_names(df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation):
    print("Pretty formating column names...")
    
    stats = ["ram_memory_uss", "ram_memory_rss", "ram_memory_vms", "ram_memory_pss", "cpu_usage", "cpu_freq", "cpu_power_draw", "io_usage", "bytes_written", "bytes_read", "gpu_memory", "gpu_usage", "gpu_freq", "gpu_power_draw"]

    stats_names = {
        "ram_memory_uss": "RAM Memory USS (B)",
        "ram_memory_rss": "RAM Memory RSS (B)",
        "ram_memory_vms": "RAM Memory VMS (B)",
        "ram_memory_pss": "RAM Memory PSS (B)",
        "cpu_usage": "CPU Usage (%)",
        "cpu_freq": "CPU Frequency (MHz)",
        "cpu_power_draw": "CPU Power Draw (W)",
        "io_usage": "I/O Usage (%)",
        "bytes_written": "Bytes Written (B)",
        "bytes_read": "Bytes Read (B)",
        "gpu_memory": "GPU Memory (MB)",
        "gpu_usage": "GPU Usage (%)",
        "gpu_freq": "GPU Frequency (MHz)",
        "gpu_power_draw": "GPU Power Draw (W)"
    }
    
    agg_types = ["Avg.", "Std. Dev.", "Max."]
    
    stats_column_names = ["Experiment", "Device"]
    
    for i in range(len(stats)):
        for j in range(len(agg_types)):
            stats_column_names.append(f"{agg_types[j]} {stats_names[stats[i]]}")
    
    stats_column_names.extend(["Total Avg. CPU Energy Consumption (J)", "Percentage of Total Avg. CPU Energy Consumption Reduction (%)", "Total Avg. GPU Energy Consumption (J)", "Percentage of Total Avg. GPU Energy Consumption Reduction (%)"])
            
    df_training_stats.columns = stats_column_names
    df_inference_stats.columns = stats_column_names
    df_load_stats.columns = stats_column_names + ["Model Size (B)"]
    
    binary_classification_evaluation_column_names = ["Experiment", "Device", "Accuracy", "F1 Score", "AUC", "Recall", "Precision", "Balanced Accuracy", "Matthews Correlation Coefficient"]
    regression_evaluation_column_names = ["Experiment", "Device", "Mean Squared Error", "Mean Absolute Error", "Mean Absolute Percentage Error", "Symmetric Mean Absolute Percentage Error"]
    
    evaluation_column_names = binary_classification_evaluation_column_names if ml_task == "binary_classification" else regression_evaluation_column_names
    
    df_evaluation.columns = evaluation_column_names
    
    times_names = {
        "training_time": "Training Time (s)",
        "inference_time": "Inference Time (s)",
        "load_time": "Load Time (s)",
    }
    
    times_column_names = ["Experiment", "Device"]
    
    for time_column in times_names:
        for agg_type in agg_types:
            times_column_names.append(f"{agg_type} {times_names[time_column]}")
    
    df_times.columns = times_column_names
            
    return df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation

def round_results(df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation, decimal_places=5):
    print("Rounding results...")
    
    df_times = df_times.round(decimal_places)
    df_training_stats = df_training_stats.round(decimal_places)
    df_inference_stats = df_inference_stats.round(decimal_places)
    df_load_stats = df_load_stats.round(decimal_places)
    df_evaluation = df_evaluation.round(decimal_places)
    
    return df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation

def remove_unnecesary_stats(df_training_stats, df_inference_stats, df_load_stats):
    print("Removing unnecesary stats...")
    
    # if device is CPU, remove GPU columns
    df_training_stats = df_training_stats.drop(df_training_stats[df_training_stats["Device"] == "CPU"].filter(regex="GPU").columns, axis=1)
    df_inference_stats = df_inference_stats.drop(df_inference_stats[df_inference_stats["Device"] == "CPU"].filter(regex="GPU").columns, axis=1)
    df_load_stats = df_load_stats.drop(df_load_stats[df_load_stats["Device"] == "CPU"].filter(regex="GPU").columns, axis=1)
    
    # remove RSS columns
    df_training_stats = df_training_stats.drop(df_training_stats.filter(regex="RSS").columns, axis=1)
    df_inference_stats = df_inference_stats.drop(df_inference_stats.filter(regex="RSS").columns, axis=1)
    df_load_stats = df_load_stats.drop(df_load_stats.filter(regex="RSS").columns, axis=1)
    
    # remove VMS columns
    df_training_stats = df_training_stats.drop(df_training_stats.filter(regex="VMS").columns, axis=1)
    df_inference_stats = df_inference_stats.drop(df_inference_stats.filter(regex="VMS").columns, axis=1)
    df_load_stats = df_load_stats.drop(df_load_stats.filter(regex="VMS").columns, axis=1)
    
    # remove PSS columns
    df_training_stats = df_training_stats.drop(df_training_stats.filter(regex="PSS").columns, axis=1)
    df_inference_stats = df_inference_stats.drop(df_inference_stats.filter(regex="PSS").columns, axis=1)
    df_load_stats = df_load_stats.drop(df_load_stats.filter(regex="PSS").columns, axis=1)
    
    # remove bytes written and bytes read columns
    df_training_stats = df_training_stats.drop(df_training_stats.filter(regex="Bytes").columns, axis=1)
    df_inference_stats = df_inference_stats.drop(df_inference_stats.filter(regex="Bytes").columns, axis=1)
    df_load_stats = df_load_stats.drop(df_load_stats.filter(regex="Bytes").columns, axis=1)
    
    # remove IO usage column
    df_training_stats = df_training_stats.drop(df_training_stats.filter(regex="I/O").columns, axis=1)
    df_inference_stats = df_inference_stats.drop(df_inference_stats.filter(regex="I/O").columns, axis=1)
    df_load_stats = df_load_stats.drop(df_load_stats.filter(regex="I/O").columns, axis=1)
    
    return df_training_stats, df_inference_stats, df_load_stats

def reorder_columns(df_training_stats, df_inference_stats, df_load_stats):
    # reorder columns
    order = ["Experiment", "Device", "Total Avg. CPU Energy Consumption (J)", "Percentage of Total Avg. CPU Energy Consumption Reduction (%)", "Total Avg. GPU Energy Consumption (J)", "Percentage of Total Avg. GPU Energy Consumption Reduction (%)", "Avg. CPU Power Draw (W)", "Std. Dev. CPU Power Draw (W)", "Max. CPU Power Draw (W)", "Avg. GPU Power Draw (W)", "Std. Dev. GPU Power Draw (W)", "Max. GPU Power Draw (W)", "Avg. CPU Usage (%)", "Std. Dev. CPU Usage (%)", "Max. CPU Usage (%)", "Avg. GPU Usage (%)", "Std. Dev. GPU Usage (%)", "Max. GPU Usage (%)", "Avg. CPU Frequency (MHz)", "Std. Dev. CPU Frequency (MHz)", "Max. CPU Frequency (MHz)", "Avg. GPU Frequency (MHz)", "Std. Dev. GPU Frequency (MHz)", "Max. GPU Frequency (MHz)", "Avg. RAM Memory USS (B)", "Std. Dev. RAM Memory USS (B)", "Max. RAM Memory USS (B)", "Avg. RAM Memory PSS (B)", "Std. Dev. RAM Memory PSS (B)", "Max. RAM Memory PSS (B)", "Avg. RAM Memory RSS (B)", "Std. Dev. RAM Memory RSS (B)", "Max. RAM Memory RSS (B)", "Avg. RAM Memory VMS (B)", "Std. Dev. RAM Memory VMS (B)", "Max. RAM Memory VMS (B)", "Avg. I/O Usage (%)", "Std. Dev. I/O Usage (%)", "Max. I/O Usage (%)", "Avg. Bytes Written (B)", "Std. Dev. Bytes Written (B)", "Max. Bytes Written (B)", "Avg. Bytes Read (B)", "Std. Dev. Bytes Read (B)", "Max. Bytes Read (B)"]
    
    df_training_stats = df_training_stats[order]
    df_inference_stats = df_inference_stats[order]
    df_load_stats = df_load_stats[order + ["Model Size (B)"]]
    
    return df_training_stats, df_inference_stats, df_load_stats

def group_with_evaluation(df_stats, df_evaluation):
    # add evaluation metrics to inference stats
    df_stats["Accuracy"] = df_evaluation["Accuracy"]
    df_stats["Balanced Accuracy"] = df_evaluation["Balanced Accuracy"]
    df_stats["F1 Score"] = df_evaluation["F1 Score"]
    
    return df_stats
  
def get_results(aggregate_by_time=True):
    df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation = load_files()
        
    if aggregate_by_time:
        print("Aggregating stats by time...")
        
        df_training_stats = aggregate_stats_by_time(df_training_stats)
        df_inference_stats = aggregate_stats_by_time(df_inference_stats)
        df_load_stats = aggregate_stats_by_time(df_load_stats)
    
    print("Sorting dataframes by experiment...")
    df_times = sort_by_experiment(df_times)
    df_training_stats = sort_by_experiment(df_training_stats)
    df_inference_stats = sort_by_experiment(df_inference_stats)
    df_load_stats = sort_by_experiment(df_load_stats)
    df_evaluation = sort_by_experiment(df_evaluation)
        
    # calculate energy consumption in Joules
    print("Calculating energy consumption...")
    df_training_stats = calculate_energy_consumption(df_training_stats, "training", df_times)
    df_inference_stats = calculate_energy_consumption(df_inference_stats, "inference", df_times)
    df_load_stats = calculate_energy_consumption(df_load_stats, "load", df_times)
    
    # put columns of the same statistic together
    df_training_stats, df_inference_stats, df_load_stats = sort_columns(df_training_stats, df_inference_stats, df_load_stats)
    
    # move column "model_size" of df_times to df_load_stats
    df_times, df_load_stats = move_model_size(df_times, df_load_stats)

    # pretty print column names
    df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation = pretty_format_column_names(df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation)
    
    # reorder columns
    # df_training_stats, df_inference_stats, df_load_stats = reorder_columns(df_training_stats, df_inference_stats, df_load_stats)
    
    # round results
    df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation = round_results(df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation, decimal_places=3)
    
    # remove unnecesary stats
    df_training_stats, df_inference_stats, df_load_stats = remove_unnecesary_stats(df_training_stats, df_inference_stats, df_load_stats)
    
    # group inference and evaluation
    # df_inference_stats = group_inference_and_evaluation(df_inference_stats, df_evaluation)
    
    # remove Device column
    # df_times = df_times.drop(columns=["Device"])
    # df_training_stats = df_training_stats.drop(columns=["Device"])
    # df_inference_stats = df_inference_stats.drop(columns=["Device"])
    # df_load_stats = df_load_stats.drop(columns=["Device"])
    # df_evaluation = df_evaluation.drop(columns=["Device"])
    
    # remove "EXP" from experiment names
    # df_times["Experiment"] = df_times["Experiment"].str.replace("EXP", "")
    # df_training_stats["Experiment"] = df_training_stats["Experiment"].str.replace("EXP", "")
    # df_inference_stats["Experiment"] = df_inference_stats["Experiment"].str.replace("EXP", "")
    # df_load_stats["Experiment"] = df_load_stats["Experiment"].str.replace("EXP", "")
    # df_evaluation["Experiment"] = df_evaluation["Experiment"].str.replace("EXP", "")
    
    return df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation

def print_results(df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation):
    print("Results of the benchmark:")
    
    print("Training/Inference/Load times:")
    display(df_times)
    
    print("Training stats:")
    display(df_training_stats)
    
    print("Inference stats:")
    display(df_inference_stats)
    
    print("Load stats:")
    display(df_load_stats)
    
    print("Evaluation:")
    display(df_evaluation)

def export_results(df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation):
    print("Exporting results...")
    
    Path(f"results/{batch_size}").mkdir(parents=True, exist_ok=True)
        
    df_times.to_csv(f"results/{batch_size}/times_{batch_size}.csv", index=False)
    df_training_stats.to_csv(f"results/{batch_size}/training_stats_{batch_size}.csv")
    df_inference_stats.to_csv(f"results/{batch_size}/inference_stats_{batch_size}.csv")
    df_load_stats.to_csv(f"results/{batch_size}/load_stats_{batch_size}.csv")
    df_evaluation.to_csv(f"results/{batch_size}/evaluation_{batch_size}.csv")
    
    print("Results exported to results/ folder.")

In [None]:
df_train[gf].columns

### Run Benchmark

In [None]:
clean_files()
run_benchmark(experiments=experiments, devices=devices)

### Get Results

In [None]:
df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation = get_results(aggregate_by_time=True)
print_results(df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation)

### Export Results

In [None]:
df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation = get_results(aggregate_by_time=True)
export_results(df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation)

### Fix Results

In [None]:
def load_processed_results(batch_size):
    df_times = pd.read_csv(f"results/{batch_size}/times_{batch_size}.csv")
    df_training_stats = pd.read_csv(f"results/{batch_size}/training_stats_{batch_size}.csv")
    df_inference_stats = pd.read_csv(f"results/{batch_size}/inference_stats_{batch_size}.csv")
    df_load_stats = pd.read_csv(f"results/{batch_size}/load_stats_{batch_size}.csv")
    df_evaluation = pd.read_csv(f"results/{batch_size}/evaluation_{batch_size}.csv")
    
    return df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation

def reorder_stats_columns(df_training_stats, df_inference_stats, df_load_stats):
    # reorder columns
    order = ["Experiment", "Device", "Total Avg. CPU Energy Consumption (J)", "Percentage of Total Avg. CPU Energy Consumption Reduction (%)", "Avg. CPU Power Draw (W)", "Std. Dev. CPU Power Draw (W)", "Max. CPU Power Draw (W)", "Avg. CPU Usage (%)", "Std. Dev. CPU Usage (%)", "Max. CPU Usage (%)", "Avg. CPU Frequency (MHz)", "Std. Dev. CPU Frequency (MHz)", "Max. CPU Frequency (MHz)", "Avg. RAM Memory USS (B)", "Std. Dev. RAM Memory USS (B)", "Max. RAM Memory USS (B)"]
    
    df_training_stats = df_training_stats[order]
    df_inference_stats = df_inference_stats[order]
    df_load_stats = df_load_stats[order + ["Model Size (B)"]]
    
    return df_training_stats, df_inference_stats, df_load_stats

def reorder_times_columns(df_times):
    # reorder columns
    order = ["Experiment", "Device", "Avg. Inference Time (s)", "Std. Dev. Inference Time (s)", "Max. Inference Time (s)", "Avg. Training Time (s)", "Std. Dev. Training Time (s)", "Max. Training Time (s)", "Avg. Load Time (s)", "Std. Dev. Load Time (s)", "Max. Load Time (s)"]
    
    df_times = df_times[order]
    
    return df_times

In [None]:
batch_sizes = [32, 256, 1024]

_df_times, _df_training_stats, _df_inference_stats, _df_load_stats, df_evaluation_32 = load_processed_results(32)

for batch_size in batch_sizes:
    df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation = load_processed_results(batch_size)
    
    # replace last two column names
    new_column_names = ["Total Avg. CPU Energy Consumption (J)", "Percentage of Total Avg. CPU Energy Consumption Reduction (%)"]

    df_training_stats.columns = df_training_stats.columns[:-2].tolist() + new_column_names
    df_inference_stats.columns = df_inference_stats.columns[:-2].tolist() + new_column_names
    df_load_stats.columns = df_load_stats.columns[:-3].tolist() + new_column_names + ["Model Size (B)"]
    
    # reorder columns
    df_times = reorder_times_columns(df_times)
    df_training_stats, df_inference_stats, df_load_stats = reorder_stats_columns(df_training_stats, df_inference_stats, df_load_stats)

    # round results
    df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation = round_results(df_times, df_training_stats, df_inference_stats, df_load_stats, df_evaluation, decimal_places=3)

    # remove unnecesary stats
    df_training_stats, df_inference_stats, df_load_stats = remove_unnecesary_stats(df_training_stats, df_inference_stats, df_load_stats)

    # group inference and evaluation
    df_times = group_with_evaluation(df_times, df_evaluation_32)
    df_training_stats = group_with_evaluation(df_training_stats, df_evaluation_32)
    df_inference_stats = group_with_evaluation(df_inference_stats, df_evaluation_32)
    df_load_stats = group_with_evaluation(df_load_stats, df_evaluation_32)

    # remove Device column
    df_times = df_times.drop(columns=["Device"])
    df_training_stats = df_training_stats.drop(columns=["Device"])
    df_inference_stats = df_inference_stats.drop(columns=["Device"])
    df_load_stats = df_load_stats.drop(columns=["Device"])
    df_evaluation = df_evaluation.drop(columns=["Device"])

    # remove "EXP" from experiment names
    df_times["Experiment"] = df_times["Experiment"].str.replace("EXP", "")
    df_training_stats["Experiment"] = df_training_stats["Experiment"].str.replace("EXP", "")
    df_inference_stats["Experiment"] = df_inference_stats["Experiment"].str.replace("EXP", "")
    df_load_stats["Experiment"] = df_load_stats["Experiment"].str.replace("EXP", "")
    df_evaluation["Experiment"] = df_evaluation["Experiment"].str.replace("EXP", "")

    print("Exporting results...")
    
    revision = "rev6"
    Path(f"results/{revision}/{batch_size}").mkdir(parents=True, exist_ok=True)
    
    # remove unnamed columns
    df_times = df_times.loc[:, ~df_times.columns.str.contains('^Unnamed')]
    df_training_stats = df_training_stats.loc[:, ~df_training_stats.columns.str.contains('^Unnamed')]
    df_inference_stats = df_inference_stats.loc[:, ~df_inference_stats.columns.str.contains('^Unnamed')]
    df_load_stats = df_load_stats.loc[:, ~df_load_stats.columns.str.contains('^Unnamed')]
    df_evaluation = df_evaluation.loc[:, ~df_evaluation.columns.str.contains('^Unnamed')]
    
    # replace "Experiment" by "Opt. Strategy Id."
    df_times = df_times.rename(columns={"Experiment": "Opt. Strategy Id."})
    df_training_stats = df_training_stats.rename(columns={"Experiment": "Opt. Strategy Id."})
    df_inference_stats = df_inference_stats.rename(columns={"Experiment": "Opt. Strategy Id."})
    df_load_stats = df_load_stats.rename(columns={"Experiment": "Opt. Strategy Id."})
    df_evaluation = df_evaluation.rename(columns={"Experiment": "Opt. Strategy Id."})
    
    df_times.to_csv(f"results/{revision}/{batch_size}/times_{batch_size}_final.csv", index=False)
    df_training_stats.to_csv(f"results/{revision}/{batch_size}/training_stats_{batch_size}_final.csv", index=False)
    df_inference_stats.to_csv(f"results/{revision}/{batch_size}/inference_stats_{batch_size}_final.csv", index=False)
    df_load_stats.to_csv(f"results/{revision}/{batch_size}/load_stats_{batch_size}_final.csv", index=False)
    df_evaluation.to_csv(f"results/{revision}/{batch_size}/evaluation_{batch_size}_final.csv", index=False)
    
    print("Results exported to results/ folder.")

In [None]:
batch_sizes = [32, 256, 1024]

for batch_size in batch_sizes:
    df_times = pd.read_csv(f"results/{revision}/{batch_size}/times_{batch_size}_final.csv")
    df_training_stats = pd.read_csv(f"results/{revision}/{batch_size}/training_stats_{batch_size}_final.csv")
    df_inference_stats = pd.read_csv(f"results/{revision}/{batch_size}/inference_stats_{batch_size}_final.csv")
    df_load_stats = pd.read_csv(f"results/{revision}/{batch_size}/load_stats_{batch_size}_final.csv")
    df_evaluation = pd.read_csv(f"results/{revision}/{batch_size}/evaluation_{batch_size}_final.csv")
    
    print(f"Batch size: {batch_size}")
    
    display(df_times)
    display(df_training_stats)
    display(df_inference_stats)
    display(df_load_stats)
    display(df_evaluation)
    
    print("")

In [None]:
batch_sizes = [32, 256, 1024]

for batch_size in batch_sizes:
    df_times = pd.read_csv(f"results_final/rev6/{batch_size}/times_{batch_size}_final.csv")
    df_training_stats = pd.read_csv(f"results_final/rev6/{batch_size}/training_stats_{batch_size}_final.csv")
    df_inference_stats = pd.read_csv(f"results_final/rev6/{batch_size}/inference_stats_{batch_size}_final.csv")
    df_load_stats = pd.read_csv(f"results_final/rev6/{batch_size}/load_stats_{batch_size}_final.csv")
    df_evaluation = pd.read_csv(f"results_final/rev6/{batch_size}/evaluation_{batch_size}_final.csv")
    
    print(f"Batch size: {batch_size}")
    
    display(df_times)
    display(df_training_stats)
    display(df_inference_stats)
    display(df_load_stats)
    display(df_evaluation)
    
    print("")

### Plots

In [None]:
# change font family to 'Liberation Serif'
import matplotlib as mpl
mpl.rc('font',family='Liberation Serif')

def plot_stats(experiment, device, test):
    df_test = pd.read_csv(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_{test}_stats.csv")
    
    display(df_test)
    
    stats_list = df_test.columns[3:]
    # stats_names = [stat.replace("_", " ").capitalize() for stat in stats_list]
    stats_names = {stat: " ".join(stat.split("_")).capitalize() for stat in stats_list}
        
    for stat in stats_list:
        print(f"Plotting {stat} for {experiment} on {device} for {test}")
        fig, ax = plt.subplots(figsize=(8, 5))

        sns.set(style="darkgrid")
        sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
        sns.set_style("whitegrid", {"axes.grid": False})

        stat_data = df_test[(df_test["experiment"] == experiment) & (df_test["device"] == device)][stat]
        baseline_stat_data = df_test[(df_test["experiment"] == "EXP0") & (df_test["device"] == device)][stat]
        
        if experiments[experiment]["post_training_optimizations"] is not None:
            post_training_optimizations = " ".join(experiments[experiment]["post_training_optimizations"]).replace("_", " ").capitalize()
        else:
            post_training_optimizations = ""
        if experiments[experiment]["training_aware_optimizations"] is not None:
            training_aware_optimizations = " ".join(experiments[experiment]["training_aware_optimizations"]).replace("_", " ").capitalize()
        else:
            training_aware_optimizations = ""
        
        experiment_name = f"{post_training_optimizations} {training_aware_optimizations}"
        print(experiment_name)
        
        time = np.arange(0, len(stat_data))
        baseline_time = np.arange(0, len(baseline_stat_data))
        
        max_time = max(len(time), len(baseline_time))
        
        ax = sns.lineplot(x=time, y=stat_data, label=experiment_name)
        ax = sns.lineplot(x=baseline_time, y=baseline_stat_data, label="Baseline")

        ax.set(xlabel="Time (s)", ylabel=stat)
        ax.set(ylabel=stats_names[stat])
        # ax.set_xticks(np.arange(0, max_time + 1, 1))
        # ax.set_xticklabels(np.arange(0, max_time + 1, 1))
        ax.set_xlim(0, max(time))
        ax.set_ylim(0, max(df_test[stat]) * 1.1)
        # ax.set_title(f"{stats_names[stat]}. {experiment_name} ({device})", fontsize=17)
        ax.set_title(f"{stats_names[stat]}. {experiment} ({device})", fontsize=17)
        ax.legend(prop={'family':'Liberation Serif'}, loc='upper left')
        plt.tight_layout()
        # plt.show()
        Path(f"poc_energy_efficiency_crypto/plots").mkdir(parents=True, exist_ok=True)
        plt.savefig(f"poc_energy_efficiency_crypto/plots/crypto_spider_5g_fcnn_optimized_benchmark_{test}_stats_{experiment}_{device}_{stat}.png", dpi=300)
        plt.close()

tests = ["training", "inference", "load"]

for experiment in experiments:
    if experiment == "EXP0":
        continue
    
    for device in devices:
        for test in tests:
            # plot_stats(experiment, device, test)
            pass

### Summary Plots

In [None]:
batch_sizes = [32, 256, 1024]

from matplotlib.patches import FancyBboxPatch

# plot df_inference_stats for all batch sizes
def plot_inference_stats(device, df_inference_stats, batch_size):
    display(df_inference_stats)
    
    stat = "Percentage of Total Avg. CPU Energy Consumption Reduction (%)"
    stat_title = "Energy Consumption Reduction (%)"
    
    print(f"Plotting {stat} ({device}) for batch size {batch_size}")
    fig, ax = plt.subplots(figsize=(8, 8))

    # sns.set(style="darkgrid", font_scale=1.5)
    # sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
    # sns.set_style("whitegrid", {"axes.grid": False})
    sns.set_style("darkgrid", {"axes.grid": True, "font.family": "Liberation Serif"})
    
    # change color of the background of the grid
    sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5, "axes.facecolor": (0, 0, 0, 0)})

    exps = np.arange(1, len(df_inference_stats[stat]))
    
    # exps should be the x-axis and the bars should be the y-axis, that is, the bars should be arranged vertically
    ax = sns.barplot(x=exps, y=df_inference_stats[stat].values[1:], label=stat_title, palette="Set2", linewidth=0.6, edgecolor=".35", saturation=0.93, ci=None)
    
    # put the labels on the bars
    values = df_inference_stats[stat].values[1:]
    
    for i, v in enumerate(values):
        offset = 3 if v > 0 else -6
        ax.text(i, v + offset, str(round(v, 2)), color=(0.18, 0.18, 0.18), fontweight='bold', ha="center", fontfamily="Liberation Serif", fontsize=12)
       
    # set y-axis upper limit to 100
    ax.set_ylim(-115, 100)
    
    ax.set_yticks(np.arange(-125, 125, 25))

    # ax.set(xlabel="Time (s)", ylabel=stat)
    # ax.set(ylabel=stat.replace("_", " ").capitalize())
    # ax.set_xlim(0, max(time))
    # ax.set_ylim(0, max(df_inference_stats[stat]) * 1.1)
    ax.set_title(f"{stat_title}. Batch size {batch_size}", fontsize=15, fontfamily="Liberation Serif")
    # ax.legend(prop={'family':'Liberation Serif'}, loc="lower right")
    # change axis labels font family
    ax.set_xticklabels(ax.get_xticklabels(), fontfamily="Liberation Serif")
    ax.set_yticklabels(ax.get_yticklabels(), fontfamily="Liberation Serif")
    ax.set_xlabel("Optimization Strategies", fontfamily="Liberation Serif")
    ax.set_ylabel(stat_title, fontfamily="Liberation Serif")
 
    plt.tight_layout()
    # plt.show()
    Path(f"poc_energy_efficiency_crypto/plots").mkdir(parents=True, exist_ok=True)
    plt.savefig(f"poc_energy_efficiency_crypto/plots/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats_batch_size_{batch_size}_{stat}.png", dpi=300)
    plt.close()
    
print("Plotting inference stats for all batch sizes")

for device in devices:
    for batch_size in batch_sizes:
        df_inference_stats = pd.read_csv(f"results_final/rev6/{batch_size}/inference_stats_{batch_size}_final.csv")
        
        plot_inference_stats(device, df_inference_stats, batch_size)

In [None]:
# batch_sizes = [32, 256, 1024]
batch_sizes = [256]

from matplotlib.patches import FancyBboxPatch

# plot df_inference_stats for all batch sizes
def plot_inference_stats(device, df_inference_stats, batch_size):
    display(df_inference_stats)
    
    stat_title = "Energy Consumption Reduction (%) vs. Accuracy"
    stat1 = "Percentage of Total Avg. CPU Energy Consumption Reduction (%)"
    stat2 = "Balanced Accuracy"
    
    print(f"Plotting {stat_title} ({device}) for batch size {batch_size}")
    fig, ax = plt.subplots(figsize=(8, 8))

    # sns.set(style="darkgrid", font_scale=1.5)
    # sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
    # sns.set_style("whitegrid", {"axes.grid": False})
    sns.set_style("darkgrid", {"axes.grid": True, "font.family": "Liberation Serif"})
    
    # change color of the background of the grid
    sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5, "axes.facecolor": (0, 0, 0, 0)})

    exps = np.arange(1, len(df_inference_stats))
    
    # exps should be the x-axis and the bars should be the y-axis, that is, the bars should be arranged vertically
    ax = sns.barplot(x=exps, y=df_inference_stats[stat1].values[1:], label=stat1, linewidth=0.6, edgecolor=".35", saturation=0.93, ci=None, color="#3f73c5")
    ax2 = ax.twinx()
    
    # put a line in the same spot as the points of the scatter that has the same width as the bars
    ax2 = sns.lineplot(x=exps - 1, y=df_inference_stats[stat2].values[1:] * 100, linewidth=2, color="#f4c94f", zorder=-1)
    
    ax2 = sns.scatterplot(x=exps - 1, y=df_inference_stats[stat2].values[1:] * 100, label=stat2, palette="Set2", marker="o", s=40, color="#fb8500", edgecolors="#fb8500", linewidth=0.0)
    
    # for i, v in enumerate(df_inference_stats[stat2].values[1:] * 100):
    #     ax2.plot([i - 0.3, i + 0.3], [v, v], color="red", linewidth=2)
    
    # put the labels on the bars
    values1 = df_inference_stats[stat1].values[1:]
    
    for i, v in enumerate(values1):
        offset = 3 if v > 0 else -6
        ax.text(i, v + offset, str(round(v, 2)), color=(0.18, 0.18, 0.18), fontweight='bold', ha="center", fontfamily="Liberation Serif", fontsize=12)
    
    values2 = df_inference_stats[stat2].values[1:] * 100
    
    # for i, v in enumerate(values2):
    #     offset = 0.01 if v > 0 else -0.01
    #     ax2.text(i, v + offset, str(round(v, 2)), color=(0.18, 0.18, 0.18), fontweight='bold', ha="center", fontfamily="Liberation Serif", fontsize=12)
       
    # set y-axis upper limit to 100
    ax.set_ylim(-115, 115)
    ax.set_yticks(np.arange(-125, 125, 25))
    
    ax2.set_ylim(-115, 115)
    ax2.set_yticks(np.arange(-125, 125, 25))

    # ax.set(xlabel="Time (s)", ylabel=stat)
    # ax.set(ylabel=stat.replace("_", " ").capitalize())
    # ax.set_xlim(0, max(time))
    # ax.set_ylim(0, max(df_inference_stats[stat]) * 1.1)
    ax.set_title(f"{stat_title}. Batch size {batch_size}", fontsize=15, fontfamily="Liberation Serif")
    # put legend out of the plot
    fig.legend(prop={'family':'Liberation Serif'}, loc='lower center', bbox_to_anchor=(0.5, -0.1), fancybox=False, ncol=1, facecolor="none")
    # remove legend color
    ax2.legend_.remove()
    # change axis labels font family
    ax.set_xticklabels(ax.get_xticklabels(), fontfamily="Liberation Serif")
    ax.set_yticklabels(ax.get_yticklabels(), fontfamily="Liberation Serif")
    ax.set_xlabel("Optimization Strategies", fontfamily="Liberation Serif")
    ax.set_ylabel(stat1, fontfamily="Liberation Serif")
    ax2.set_ylabel(stat2, fontfamily="Liberation Serif")
    
    # hide ax2 y ticks from 0 to -125
    for tick in ax2.get_yticklabels()[0:5]:
        tick.set_visible(False)
 
    plt.tight_layout()
    # plt.show()
    Path(f"poc_energy_efficiency_crypto/plots").mkdir(parents=True, exist_ok=True)
    # plt.savefig(f"poc_energy_efficiency_crypto/plots/crypto_spider_5g_fcnn_optimized_benchmark_inference_stats_batch_size_{batch_size}_{stat}.png", dpi=300)
    # plt.close()
    
    pause
    
print("Plotting inference stats for all batch sizes")

for device in devices:
    for batch_size in batch_sizes:
        df_inference_stats = pd.read_csv(f"results_final/rev6/{batch_size}/inference_stats_{batch_size}_final.csv")
        
        plot_inference_stats(device, df_inference_stats, batch_size)

## Automatic Hyperparameter Search

### Grid Search

#### NAS

In [None]:
 # find the student model structure by searching for the model
num_layers_grid = [1, 2, 3, 4]
num_neurons_grid = [4, 8, 16, 32, 64]

# make all possible combinations of the model structure (num_layers, num_neuron_for_each_layer)
combinations = []

for num_layers in num_layers_grid:
    product = list(itertools.product(num_neurons_grid, repeat=num_layers))
    combinations.append(product)

# build the models
for num_layers in range(0, len(combinations)):
    print(f"num_layers: {num_layers + 1}")
    
    for index, num_neurons in enumerate(combinations[num_layers]):
        print(f"index: {index}")
        neurons = combinations[num_layers][index]
        
        student = Sequential()
        
        for layer, neurons_layer in enumerate(neurons):
            print(f"- layer: {layer}, neurons_layer: {neurons_layer}")
        
        print("")

In [None]:
num_trials = 5

In [None]:
def test_nas(device="CPU"):
    teacher = baseline_model(input_dim=len(gf), n_output=2)
    teacher, history = train_model(teacher)

    num_trials = 5
    
    logger.info("Starting test_nas")
    
    pid = os.getpid()
    
    model_energy_consumption = {}
    
    Path(f"poc_energy_efficiency_crypto/NAS").mkdir(parents=True, exist_ok=True)
    
    with tf.device(device):
        with StatsCollectionManager(directory="NAS", test="NAS", sampling_rate=stats_sampling_rate, pid=pid) as nas_scm:
            with measure_time() as nas_time_measure:
                logger.info("Applying Knowledge Distillation (with NAS)")
        
                # find the student model structure by searching for the model
                num_layers_grid = [1, 2, 3]
                num_neurons_grid = [4, 8, 16, 32]
                
                # num_layers_grid = [1,]
                # num_neurons_grid = [4,]

                # make all possible combinations of the model structure (num_layers, num_neuron_for_each_layer)
                combinations = []

                for num_layers in num_layers_grid:
                    product = list(itertools.product(num_neurons_grid, repeat=num_layers))
                    combinations.append(product)

                # build the models
                for num_layers in range(0, len(combinations)):
                    logger.info(f"num_layers: {num_layers + 1}")
                    
                    for index, num_neurons in enumerate(combinations[num_layers]):
                        neurons = combinations[num_layers][index]
                        logger.info(f"neurons: {num_neurons}")
                        
                        student = Sequential()
                        student.add(tf.keras.Input(shape=(len(gf),)))
                        
                        for layer, neurons_layer in enumerate(neurons):
                            logger.info(f"layer: {layer}")
                            logger.info(f"neurons_layer: {neurons_layer}")
                            
                            student.add(Dense(neurons_layer, activation='relu'))
                        
                        student.add(Dense(2, activation='softmax'))
                        student.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
                        student.summary()

                        # Initialize and compile distiller
                        distiller = Distiller(student=student, teacher=teacher)
                        distiller.compile(
                            optimizer=keras.optimizers.Adam(),
                            metrics=[keras.metrics.CategoricalAccuracy()],
                            student_loss_fn=keras.losses.CategoricalCrossentropy(from_logits=False),
                            distillation_loss_fn=keras.losses.KLDivergence(),
                            alpha=0.1,
                            temperature=3,
                        )

                        es = EarlyStopping(
                            monitor="val_student_loss",
                            mode="min",
                            patience=es_patience,
                            restore_best_weights=es_restore_best_weights,
                        )
                        _history = distiller.fit(
                            data_transformed,
                            pd.get_dummies(df_train["tag"]),
                            validation_split=validation_split,
                            epochs=epochs,
                            batch_size=batch_size,
                            callbacks=[es],
                            verbose=0,
                        )
                                                
                        # save the model
                        comb_id = f"{num_layers + 1}_{index}"
                        distiller.student.save(f"poc_energy_efficiency_crypto/NAS/models/model_{comb_id}.h5")
                        
                        for i in range(0, num_trials):
                            logger.info(f"Trial Evaluation: {i}")
                        
                            # evaluate the model
                            data_transformed_test = standard.transform(df_test[gf])
                            
                            with StatsCollectionManager(directory="NAS", test=f"evaluation_nas_{comb_id}_{i}", sampling_rate=stats_sampling_rate, pid=pid) as evaluation_nas_scm:
                                with measure_time() as evaluation_nas_time_measure:
                                    predictions = distiller.student.predict(data_transformed_test)
                            
                            evaluation_nas_time = evaluation_nas_time_measure()
                            
                            # Write evaluation nas time to file
                            with open(f"poc_energy_efficiency_crypto/NAS/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_nas_time_{comb_id}_{i}.pkl", "wb") as f:
                                pickle.dump({"evaluation_nas_time": evaluation_nas_time}, f)
                                
                        if ml_task == "binary_classification":
                            predictions = np.argmax(predictions, axis=1)
                            
                            accuracy = accuracy_score(df_test["tag"], predictions)
                            f1 = f1_score(df_test["tag"], predictions, average="weighted")
                            auc = roc_auc_score(df_test["tag"], predictions)
                            recall = recall_score(df_test["tag"], predictions, average="weighted")
                            precision = precision_score(df_test["tag"], predictions, average="weighted")
                            balanced_accuracy = balanced_accuracy_score(df_test["tag"], predictions)
                            matthews = matthews_corrcoef(df_test["tag"], predictions)

                            logger.info(f"Accuracy: {accuracy}")
                            logger.info(f"F1 score: {f1}")
                            logger.info(f"AUC: {auc}")
                            logger.info(f"Recall: {recall}")
                            logger.info(f"Precision: {precision}")
                            logger.info(f"Balanced accuracy: {balanced_accuracy}")
                            logger.info(f"Matthews correlation coefficient: {matthews}")
                            
                            test_results = {
                                "accuracy": accuracy,
                                "f1": f1,
                                "auc": auc,
                                "recall": recall,
                                "precision": precision,
                                "balanced_accuracy": balanced_accuracy,
                                "matthews": matthews
                            }
                        elif ml_task == "regression":
                            mae = mean_absolute_error(df_test["tag"], predictions)
                            mse = mean_squared_error(df_test["tag"], predictions)
                            mape = mean_absolute_percentage_error(df_test["tag"], predictions)
                            smape = 1/len(df_test["tag"]) * np.sum(2 * np.abs(predictions - df_test["tag"]) / (np.abs(predictions) + np.abs(df_test["tag"])))
                            
                            logger.info(f"MAE: {mae}")
                            logger.info(f"MSE: {mse}")
                            logger.info(f"MAPE: {mape}")
                            logger.info(f"SMAPE: {smape}")
                            
                            test_results = {
                                "mae": mae,
                                "mse": mse,
                                "mape": mape,
                                "smape": smape
                            }
                            
                        with open(f"poc_energy_efficiency_crypto/NAS/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_nas_test_results_{comb_id}.pkl", "wb") as f:
                            pickle.dump(test_results, f)

                        # accuracy = accuracy_score(df_test['tag'], np.argmax(preds, axis=1))
                        # balanced_accuracy = balanced_accuracy_score(df_test['tag'], np.argmax(preds, axis=1))
                        # f1 = f1_score(df_test['tag'], np.argmax(preds, axis=1))

                        # print(f"Accuracy: {accuracy}")
                        # print(f"Balanced accuracy: {balanced_accuracy}")
                        # print(f"F1 score: {f1}")
                        
                        # # save the results
                        # test_results = [accuracy, balanced_accuracy, f1]
                        
                        # with open(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_nas_test_results_{comb_id}.pkl", "wb") as f:
                        #     pickle.dump({"test_results": test_results}, f)
                                    
                        evaluation_nas_times_list = []
                        evaluation_nas_stats_list = []
                        
                        for i in range(0, num_trials):
                            logger.info(f"Trial Evaluation: {i}. Comb ID: {comb_id}")
                                
                            # Read nas time from file
                            with open(f"poc_energy_efficiency_crypto/NAS/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_nas_time_{comb_id}_{i}.pkl", "rb") as f:
                                evaluation_nas_time = pickle.load(f)

                            evaluation_nas_time = evaluation_nas_time["evaluation_nas_time"]
                            evaluation_nas_time = float(evaluation_nas_time)

                            # # Delete nas time file
                            # os.remove(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_nas_time_{comb_id}_{i}.pkl")
                                    
                            # Read nas stats from file
                            with open(f'poc_energy_efficiency_crypto/NAS/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_nas_{comb_id}_{i}_stats.pkl', 'rb') as f:
                                evaluation_nas_stats = pickle.load(f)
                                
                            # # Delete nas stats file
                            # os.remove(f"poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_nas_stats_{comb_id}_{i}.pkl")
                            
                            logger.info("Trial {}. Comb ID {}. Evaluation NAS time: {}".format(i, comb_id, evaluation_nas_time))

                            # Save NAS times
                            evaluation_nas_times_list.append(evaluation_nas_time)

                            # Save NAS stats
                            evaluation_nas_stats_list.append(evaluation_nas_stats)

                        average_evaluation_nas_time = np.mean(evaluation_nas_times_list)
                        std_dev_evaluation_nas_time = np.std(evaluation_nas_times_list)
                        max_evaluation_nas_time = np.max(evaluation_nas_times_list)

                        # Time spent on NAS
                        logger.info(f"Comb ID: {comb_id}. " + "Average NAS time: {}".format(np.round(average_evaluation_nas_time, 2)))
                        logger.info(f"Comb ID: {comb_id}. " + "Standard deviation of NAS time: {}".format(np.round(std_dev_evaluation_nas_time, 2)))
                        logger.info(f"Comb ID: {comb_id}. " + "Max. NAS time: {}".format(np.round(max_evaluation_nas_time, 2)))

                        # Get average NAS metrics
                        average_evaluation_nas_stats_list = get_average_stats(evaluation_nas_stats_list, average_evaluation_nas_time)
                        std_dev_evaluation_nas_stats_list = get_std_dev_stats(evaluation_nas_stats_list, std_dev_evaluation_nas_time)
                        max_evaluation_nas_stats_list = get_max_stats(evaluation_nas_stats_list, max_evaluation_nas_time)
                        
                        # Save NAS metrics to dataframe
                        stats = evaluation_nas_stats_list[0][0].keys()

                        averag_evaluation_nas_stats_names = [f"average_evaluation_nas_{stat}" for stat in stats]
                        std_dev_evaluation_nas_stats_names = [f"std_dev_evaluation_nas_{stat}" for stat in stats]
                        max_evaluation_nas_stats_names = [f"max_evaluation_nas_{stat}" for stat in stats]

                        df_evaluation_nas_times_columns = ["experiment", "device", "average_evaluation_nas_time", "std_dev_evaluation_nas_time", "max_evaluation_nas_time"]
                        df_evaluation_nas_times = pd.DataFrame(columns=df_evaluation_nas_times_columns)
                        
                        experiment = f"evaluation_nas_{comb_id}"
                        evaluation_nas_times_row = [experiment, device, average_evaluation_nas_time, std_dev_evaluation_nas_time, max_evaluation_nas_time]
                        df_evaluation_nas_times.loc[0] = evaluation_nas_times_row

                        df_evaluation_nas_stats_columns = ["experiment", "device", "snapshot", *averag_evaluation_nas_stats_names, *std_dev_evaluation_nas_stats_names, *max_evaluation_nas_stats_names]
                        df_evaluation_nas_stats = pd.DataFrame(columns=df_evaluation_nas_stats_columns)
                        
                        for index, _snapshot in enumerate(average_evaluation_nas_stats_list):
                            row = np.array([experiment, device, index])
                            row = np.append(row, average_evaluation_nas_stats_list[index])
                            row = np.append(row, std_dev_evaluation_nas_stats_list[index])
                            row = np.append(row, max_evaluation_nas_stats_list[index])

                            df_evaluation_nas_stats.loc[index] = row
                            
                        assert df_evaluation_nas_times.shape[0] == 1
                        assert df_evaluation_nas_stats.shape[0] == len(average_evaluation_nas_stats_list)
                        assert df_evaluation_nas_stats.shape[1] == len(df_evaluation_nas_stats_columns)
                        
                        display(df_evaluation_nas_times)
                        display(df_evaluation_nas_stats)

                        # calculate the average of the df_evaluation_nas_stats average_evaluation_nas_cpu_power_draw and average_evaluation_nas_gpu_power_draw columns
                        global_average_average_evaluation_nas_cpu_power_draw = df_evaluation_nas_stats[f"average_evaluation_nas_cpu_power_draw"].astype(float).mean()
                        global_average_average_evaluation_nas_gpu_power_draw = df_evaluation_nas_stats[f"average_evaluation_nas_gpu_power_draw"].astype(float).mean()
                        
                        # check that both columns have a single scalar value
                        assert np.isscalar(global_average_average_evaluation_nas_cpu_power_draw)
                        assert np.isscalar(global_average_average_evaluation_nas_gpu_power_draw)
                        
                        average_evaluation_nas_exp_duration = df_evaluation_nas_times[f"average_evaluation_nas_time"].values[0]
                        
                        print("Average evaluation NAS experiment duration: {}".format(average_evaluation_nas_exp_duration))
                        print("Average evaluation NAS CPU power draw: {}".format(global_average_average_evaluation_nas_cpu_power_draw))
                        print("Average evaluation NAS GPU power draw: {}".format(global_average_average_evaluation_nas_gpu_power_draw))
                        
                        total_average_cpu_energy_consumption = global_average_average_evaluation_nas_cpu_power_draw * average_evaluation_nas_exp_duration
                        total_average_gpu_energy_consumption = global_average_average_evaluation_nas_gpu_power_draw * average_evaluation_nas_exp_duration
                        
                        model_energy_consumption[comb_id] = {
                            "cpu": total_average_cpu_energy_consumption,
                            "gpu": total_average_gpu_energy_consumption
                        }
                        
                        print("Model energy consumption:\n{}".format(model_energy_consumption))
                    
                # normalize energy consumption between 0 and 1
                normalized_model_energy_consumption = deepcopy(model_energy_consumption)
                
                max_cpu_energy_consumption = max([model_energy_consumption[comb_id]["cpu"] for comb_id in model_energy_consumption])
                max_gpu_energy_consumption = max([model_energy_consumption[comb_id]["gpu"] for comb_id in model_energy_consumption])
                
                for comb_id in model_energy_consumption:
                    normalized_model_energy_consumption[comb_id]["cpu"] = model_energy_consumption[comb_id]["cpu"] / max_cpu_energy_consumption
                    normalized_model_energy_consumption[comb_id]["gpu"] = model_energy_consumption[comb_id]["gpu"] / max_gpu_energy_consumption
                
                # multiply normalized energy consumption by 0.5
                weighted_normalized_model_energy_consumption = deepcopy(normalized_model_energy_consumption)
                
                for comb_id in model_energy_consumption:
                    weighted_normalized_model_energy_consumption[comb_id]["cpu"] = normalized_model_energy_consumption[comb_id]["cpu"] * 0.5 # TODO: parameterize the weight
                    weighted_normalized_model_energy_consumption[comb_id]["gpu"] = normalized_model_energy_consumption[comb_id]["gpu"] * 0.5
                                    
                # Read all evaluation nas test results files
                evaluation_nas_test_results_files = glob.glob("poc_energy_efficiency_crypto/NAS/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_nas_test_results_*.pkl")
                measured_performance = {}
                
                for evaluation_nas_test_results_file in evaluation_nas_test_results_files:
                    comb_id = evaluation_nas_test_results_file.split("_")[-2] + "_" + evaluation_nas_test_results_file.split("_")[-1].split(".")[0]
                    
                    with open(evaluation_nas_test_results_file, 'rb') as f:
                        evaluation_nas_test_results = pickle.load(f)

                        # # Delete evaluation nas test results file
                        # os.remove(evaluation_nas_test_results)
                    
                    # Get measured performance
                    measured_performance[comb_id] = evaluation_nas_test_results["balanced_accuracy"] # TODO: parameterize the metric
                                
                # multiply measured performance by 0.5
                weighted_measured_performance = deepcopy(measured_performance)
                
                for comb_id in measured_performance:
                    weighted_measured_performance[comb_id] = measured_performance[comb_id] * 0.5 # TODO: parameterize the weight
                
                # calculate weighted average of energy consumption and measured performance
                weighted_average_energy_consumption_and_measured_performance = {}
                
                for comb_id in model_energy_consumption:
                    weighted_average_energy_consumption_and_measured_performance[comb_id] = weighted_normalized_model_energy_consumption[comb_id]["cpu"] + weighted_measured_performance[comb_id] # TODO: parameterize the platform that is being optimized (cpu or gpu)
            
                # get best comb_id
                best_comb_id = max(weighted_average_energy_consumption_and_measured_performance, key=weighted_average_energy_consumption_and_measured_performance.get)
                logger.info(f"Best comb_id: {best_comb_id}")
                logger.info(f"Best comb_id energy consumption: {model_energy_consumption[best_comb_id]}")
                logger.info(f"Best comb_id measured performance: {measured_performance[best_comb_id]}")
                logger.info(f"Best comb_id normalized energy consumption cpu: {normalized_model_energy_consumption[best_comb_id]['cpu']}")
                logger.info(f"Best comb_id normalized energy consumption gpu: {normalized_model_energy_consumption[best_comb_id]['gpu']}")
                logger.info(f"Best comb_id weighted normalized energy consumption cpu: {weighted_normalized_model_energy_consumption[best_comb_id]['cpu']}")
                logger.info(f"Best comb_id weighted normalized energy consumption gpu: {weighted_normalized_model_energy_consumption[best_comb_id]['gpu']}")
                logger.info(f"Best comb_id weighted measured performance: {weighted_measured_performance[best_comb_id]}")
                logger.info(f"Best comb_id weighted average energy consumption and measured performance: {weighted_average_energy_consumption_and_measured_performance[best_comb_id]}")
                
                # save best comb_id
                with open(f"poc_energy_efficiency_crypto/NAS/crypto_spider_5g_fcnn_optimized_benchmark_best_comb_id.pkl", "wb") as f:
                    pickle.dump({"best_comb_id": best_comb_id}, f)
                
        # Get nas time
        nas_time = nas_time_measure()

        # Write nas time to file
        with open(f"poc_energy_efficiency_crypto/NAS/crypto_spider_5g_fcnn_optimized_benchmark_nas_time.pkl", "wb") as f:
            pickle.dump({"nas_time": nas_time}, f)
        

In [None]:
# Start the child process
p_nas = multiprocessing.Process(target=test_nas, args=("cpu",))

p_nas.start()
p_nas.join()

In [None]:
nas_times_list = []
nas_stats_list = []

device = "cpu"

for i in range(0, num_trials):
    logger.info(f"Trial {i}")
    
    p_nas = multiprocessing.Process(target=test_nas, args=(device,))
    
    p_nas.start()
    p_nas.join()
            
    # Read nas time from file
    with open(f"poc_energy_efficiency_crypto/NAS/crypto_spider_5g_fcnn_optimized_benchmark_nas_time.pkl", "rb") as f:
        nas_time = pickle.load(f)

    nas_time = nas_time["nas_time"]

    # # Delete nas time file
    # os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_nas_time.pkl")
            
    # Read nas stats from file
    with open('poc_energy_efficiency_crypto/NAS/crypto_spider_5g_fcnn_optimized_benchmark_nas_stats.pkl', 'rb') as f:
        nas_stats = pickle.load(f)
        
    # # Delete nas stats file
    # os.remove("poc_energy_efficiency_crypto/crypto_spider_5g_fcnn_optimized_benchmark_nas_stats.pkl")

    logger.info("Trial {} - NAS time: {:.2f}s".format(i, nas_time))

    # Save NAS times
    nas_times_list.append(nas_time)

    # Save NAS stats
    nas_stats_list.append(nas_stats)

average_nas_time = np.mean(nas_times_list)
std_dev_nas_time = np.std(nas_times_list)
max_nas_time = np.max(nas_times_list)

# Time spent on NAS
logger.info("Average NAS time: {}".format(np.round(average_nas_time, 2)))
logger.info("Standard deviation of NAS time: {}".format(np.round(std_dev_nas_time, 2)))
logger.info("Max. NAS time: {}".format(np.round(max_nas_time, 2)))

# Get average NAS metrics
average_nas_stats_list = get_average_stats(nas_stats_list, average_nas_time)
std_dev_nas_stats_list = get_std_dev_stats(nas_stats_list, std_dev_nas_time)
max_nas_stats_list = get_max_stats(nas_stats_list, max_nas_time)

# Save NAS metrics to dataframe
stats = nas_stats_list[0][0].keys()

averag_nas_stats_names = [f"average_nas_{stat}" for stat in stats]
std_dev_nas_stats_names = [f"std_dev_nas_{stat}" for stat in stats]
max_nas_stats_names = [f"max_nas_{stat}" for stat in stats]

df_nas_times_columns = ["experiment", "device", "average_nas_time", "std_dev_nas_time", "max_nas_time"]
df_nas_times = pd.DataFrame(columns=df_nas_times_columns)

experiment = "nas"

nas_times_row = [experiment, device, average_nas_time, std_dev_nas_time, max_nas_time]
df_nas_times.loc[0] = nas_times_row

df_nas_stats_columns = ["experiment", "device", "snapshot", *averag_nas_stats_names, *std_dev_nas_stats_names, *max_nas_stats_names]
df_nas_stats = pd.DataFrame(columns=df_nas_stats_columns)

for index, _snapshot in enumerate(average_nas_stats_list):
    row = np.array([experiment, device, index])
    row = np.append(row, average_nas_stats_list[index])
    row = np.append(row, std_dev_nas_stats_list[index])
    row = np.append(row, max_nas_stats_list[index])

    df_nas_stats.loc[index] = row

display(df_nas_times)
display(df_nas_stats)

average_nas_time = df_nas_times["average_nas_time"].values[0]
global_average_nas_cpu_power_draw = df_nas_stats["average_nas_cpu_power_draw"].astype(float).mean()

print(f"Average NAS time: {average_nas_time}")
print(f"Global average NAS CPU power draw: {global_average_nas_cpu_power_draw}")

In [None]:
test_nas("cpu")

In [None]:
from sklearn.ensemble import RandomForestClassifier

# balance the number of samples in each class
def balance_classes(data, labels):
    # count number of samples in each class
    unique, counts = np.unique(labels, return_counts=True)
    counts_dict = dict(zip(unique, counts))
    print("Class counts:", counts_dict)
    
    # find the class with the most samples
    max_class = max(counts_dict, key=counts_dict.get)
    max_class_count = counts_dict[max_class]
    print("Class with the most samples:", max_class)
    
    # find the class with the least samples
    min_class = min(counts_dict, key=counts_dict.get)
    min_class_count = counts_dict[min_class]
    print("Class with the least samples:", min_class)
    
    # find the difference between the number of samples in the two classes
    diff = max_class_count - min_class_count
    print("Difference between the number of samples in the two classes:", diff)
    
    # find the indices of the samples in the class with the least samples
    indices = np.where(labels == min_class)[0]
    # randomly select the same number of samples from the class with the most samples
    selected_indices = np.random.choice(np.where(labels == max_class)[0], diff, replace=False)
    # combine the indices of the two classes
    combined_indices = np.concatenate((indices, selected_indices))
    # shuffle the indices
    shuffled_indices = np.random.permutation(combined_indices)
    # return the balanced data and labels
    
    return data[shuffled_indices], labels[shuffled_indices]

balanced_data, balanced_labels = balance_classes(data_transformed, df_train['tag'].values)

rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=0)
# rf.fit(data_transformed, df_train['tag'].values)
rf.fit(balanced_data, balanced_labels)

test_data_transformed = standard.transform(df_test[gf])
predictions = rf.predict(test_data_transformed)

accuracy = accuracy_score(df_test["tag"], predictions)
balanced_accuracy = balanced_accuracy_score(df_test["tag"], predictions)
f1 = f1_score(df_test["tag"], predictions, average="weighted")

print(f"Accuracy: {accuracy}")
print(f"Balanced accuracy: {balanced_accuracy}")
print(f"F1 score: {f1}")

In [None]:
teacher = baseline_model(input_dim=len(gf), n_output=2)
teacher, history = train_model(teacher)

In [None]:
student = small_model(input_dim=len(gf), n_output=2)

# Initialize and compile distiller
distiller = Distiller(student=student, teacher=teacher)
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.CategoricalAccuracy()],
    student_loss_fn=keras.losses.CategoricalCrossentropy(from_logits=False),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=1,
)

es = EarlyStopping(
    monitor="val_student_loss",
    mode="min",
    patience=es_patience,
    restore_best_weights=es_restore_best_weights,
)
history = distiller.fit(
    data_transformed,
    pd.get_dummies(df_train["tag"]),
    validation_split=validation_split,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[es],
    verbose=False,
)

test_data_transformed = standard.transform(df_test[gf])
predictions = distiller.student.predict(test_data_transformed)
balanced_accuracy = balanced_accuracy_score(df_test["tag"], np.argmax(predictions, axis=1))
print(f"Balanced accuracy: {balanced_accuracy}")

In [None]:
# Initialize and compile distiller
distiller = Distiller(student=student, teacher=teacher)
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.CategoricalAccuracy()],
    student_loss_fn=keras.losses.CategoricalCrossentropy(from_logits=False),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=3,
)

es = EarlyStopping(
    monitor="val_student_loss",
    mode="min",
    patience=es_patience,
    restore_best_weights=es_restore_best_weights,
)
history = distiller.fit(
    data_transformed,
    pd.get_dummies(df_train["tag"]),
    validation_split=validation_split,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[es],
    verbose=1,
)

In [None]:
average_nas_exp_duration = df_nas_times[f"average_{experiment}_time"]
df_nas_stats["total_average_cpu_energy_consumption"] = df_nas_stats[f"average_nas_cpu_power_draw"] * average_nas_exp_duration
df_nas_stats["total_average_gpu_energy_consumption"] = df_nas_stats[f"average_nas_gpu_power_draw"] * average_nas_exp_duration

#### KD

In [None]:
teacher = baseline_model(input_dim=len(gf), n_output=2)
teacher, history = train_model(teacher)

In [None]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import GridSearchCV
import pickle

class DistillerEstimator(BaseEstimator):
    def __init__(self, alpha=0.1, temperature=3):
        self.alpha = alpha
        self.temperature = temperature
    
    def fit(self, X, y):
        student = small_model(input_dim=len(gf), n_output=2)
        distiller = Distiller(student=student, teacher=teacher)
        distiller.compile(
            optimizer=keras.optimizers.Adam(),
            metrics=[keras.metrics.CategoricalAccuracy()],
            student_loss_fn=keras.losses.CategoricalCrossentropy(from_logits=False),
            distillation_loss_fn=keras.losses.KLDivergence(),
            alpha=self.alpha,
            temperature=self.temperature,
        )
        es = EarlyStopping(
            monitor="val_student_loss",
            mode="min",
            patience=es_patience,
            restore_best_weights=es_restore_best_weights,
        )
        history = distiller.fit(
            X,
            y,
            validation_split=validation_split,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[es],
            verbose=False,
        )
        self.distiller_ = distiller
        return self
    
    # def score(self, X, y):
    #     test_data_transformed = standard.transform(df_test[gf])
    #     predictions = self.distiller_.student.predict(test_data_transformed)
    #     balanced_accuracy = balanced_accuracy_score(df_test["tag"], np.argmax(predictions, axis=1))
    #     return balanced_accuracy
    def score(self, X, y):
        predictions = self.distiller_.student.predict(X)
        balanced_accuracy = balanced_accuracy_score(np.argmax(y, axis=1), np.argmax(predictions, axis=1))
        
        return balanced_accuracy
    
    def evaluate(self, X, y):
        predictions = self.distiller_.student.predict(X)
        print(f"predictions shape: {predictions.shape}")
        y = np.array(y)
        accuracy = accuracy_score(np.argmax(y, axis=1), np.argmax(predictions, axis=1))
        balanced_accuracy = balanced_accuracy_score(np.argmax(y, axis=1), np.argmax(predictions, axis=1))
        f1 = f1_score(np.argmax(y, axis=1), np.argmax(predictions, axis=1), average="weighted")
        
        accuracy = round(accuracy, 3)
        balanced_accuracy = round(balanced_accuracy, 3)
        f1 = round(f1, 3)
        
        print(f"Accuracy: {accuracy}")
        print(f"Balanced accuracy: {balanced_accuracy}")
        print(f"F1 score: {f1}")


In [None]:
param_grid = {'alpha': [0.1, 0.2, 0.3, 0.4, 0.5],
              'temperature': [0.5, 1, 1.5, 2, 2.5, 3]}

grid_search = GridSearchCV(estimator=DistillerEstimator(), param_grid=param_grid, cv=3, verbose=2)
grid_search.fit(data_transformed, pd.get_dummies(df_train["tag"]))

print(f"Best parameters: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_}")


In [None]:
de = DistillerEstimator(alpha=0.1, temperature=2)
de.fit(data_transformed, pd.get_dummies(df_train["tag"]))
de.evaluate(test_data_transformed, pd.get_dummies(df_test["tag"]))

#### PT Quantization

In [None]:
baseline = baseline_model(input_dim=len(gf), n_output=2)
baseline, history = train_model(baseline)

In [None]:
def dataset_generator():
    dataset_size = 0.1
    print("Generating dataset for float16 activations and int8 weights quantization")
    print("Original Dataset size: ", int(len(data_transformed)))
    print("Dataset size: ", int(len(data_transformed) * dataset_size))
    
    for data in tf.data.Dataset.from_tensor_slices((data_transformed)).batch(1).take(int(len(data_transformed) * dataset_size)):
        yield [tf.dtypes.cast(data, tf.float32)]

converter = tf.lite.TFLiteConverter.from_keras_model(baseline)
converter.representative_dataset = dataset_generator
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.float16]
quantized_model = converter.convert()

In [None]:
def float16_quantization(trained_model, dataset_size: float = 0.25):
    def dataset_generator(): # Does not need a representative dataset
        print("Generating dataset for float16 weights quantization")
        print("Original Dataset size: ", int(len(data_transformed)))
        print("Dataset size: ", int(len(data_transformed) * dataset_size))
        
        for data in tf.data.Dataset.from_tensor_slices((data_transformed)).batch(1).take(int(len(data_transformed) * dataset_size)):
            yield [tf.dtypes.cast(data, tf.float32)]

    converter = tf.lite.TFLiteConverter.from_keras_model(trained_model)
    # converter.representative_dataset = dataset_generator
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_ops = [tf.float16]
    quantized_model = converter.convert()

    logger.info("Applied float16 weights quantization")

    # Save the model to disk
    quantized_tflite_model_file = 'model.tflite'
    
    with open(quantized_tflite_model_file, 'wb') as f:
        f.write(quantized_model)

    return quantized_tflite_model_file

dataset_sizes = [0.1, 0.25, 0.5, 0.75, 1.0]

for dataset_size in dataset_sizes:
    tflite_model = float16_quantization(baseline, dataset_size)

    # evaluate
    interpreter = tf.lite.Interpreter(model_path=tflite_model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    test_data_transformed = standard.transform(df_test[gf])
    predictions = []

    # cast data to float32
    test_data_transformed = test_data_transformed.astype(np.float32)

    for i in range(len(test_data_transformed)):
        interpreter.set_tensor(input_details[0]['index'], test_data_transformed[i].reshape(1, -1))
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]['index'])
        predictions.append(np.argmax(output_data))
    
    accuracy = accuracy_score(df_test["tag"], predictions)
    balanced_accuracy = balanced_accuracy_score(df_test["tag"], predictions)
    f1 = f1_score(df_test["tag"], predictions, average="weighted")
    
    accuracy = round(accuracy, 4)
    balanced_accuracy = round(balanced_accuracy, 4)
    f1 = round(f1, 4)
    
    print(f"Accuracy: {accuracy}")
    print(f"Balanced accuracy: {balanced_accuracy}")
    print(f"F1 score: {f1}")

In [None]:
def int8_quantization(trained_model, dataset_size: float = 0.25):
    def dataset_generator():
        print("Generating dataset for int8 weights quantization")
        print("Original Dataset size: ", int(len(data_transformed)))
        print("Dataset size: ", int(len(data_transformed) * dataset_size))
        
        for data in tf.data.Dataset.from_tensor_slices((data_transformed)).batch(1).take(int(len(data_transformed) * dataset_size)):
            yield [tf.dtypes.cast(data, tf.float32)]

    converter = tf.lite.TFLiteConverter.from_keras_model(trained_model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = dataset_generator
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8  # or tf.uint8
    converter.inference_output_type = tf.int8  # or tf.uint8
    quantized_model = converter.convert()

    logger.info("Applied int8 weights quantization")

    # Save the model to disk
    quantized_tflite_model_file = 'model.tflite'
    
    with open(quantized_tflite_model_file, 'wb') as f:
        f.write(quantized_model)

    return quantized_tflite_model_file

dataset_sizes = [0.1, 0.25, 0.5, 0.75, 1.0]

for dataset_size in dataset_sizes:
    tflite_model = int8_quantization(baseline, dataset_size)

    # evaluate
    interpreter = tf.lite.Interpreter(model_path=tflite_model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    test_data_transformed = standard.transform(df_test[gf])
    predictions = []

    # cast data to float32
    test_data_transformed = test_data_transformed.astype(np.int8)

    for i in range(len(test_data_transformed)):
        interpreter.set_tensor(input_details[0]['index'], test_data_transformed[i].reshape(1, -1))
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]['index'])
        predictions.append(np.argmax(output_data))
        
    accuracy = accuracy_score(df_test["tag"], predictions)
    balanced_accuracy = balanced_accuracy_score(df_test["tag"], predictions)
    f1 = f1_score(df_test["tag"], predictions, average="weighted")
    
    accuracy = round(accuracy, 4)
    balanced_accuracy = round(balanced_accuracy, 4)
    f1 = round(f1, 4)
    
    print(f"Accuracy: {accuracy}")
    print(f"Balanced accuracy: {balanced_accuracy}")
    print(f"F1 score: {f1}")

In [None]:
def float16_activations_int8_weights_quantization(trained_model, dataset_size: float = 0.25):
    def dataset_generator():
        print("Generating dataset for float16 activations and int8 weights quantization")
        print("Original Dataset size: ", int(len(data_transformed)))
        print("Dataset size: ", int(len(data_transformed) * dataset_size))
        
        for data in tf.data.Dataset.from_tensor_slices((data_transformed)).batch(1).take(int(len(data_transformed) * dataset_size)):
            yield [tf.dtypes.cast(data, tf.float32)]

    converter = tf.lite.TFLiteConverter.from_keras_model(trained_model)
    converter.representative_dataset = dataset_generator
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
    quantized_model = converter.convert()

    logger.info("Applied float16 activations and int8 weights quantization")

    # Save the model to disk
    quantized_tflite_model_file = 'model.tflite'
    
    with open(quantized_tflite_model_file, 'wb') as f:
        f.write(quantized_model)

    return quantized_tflite_model_file

dataset_sizes = [0.1, 0.25, 0.5, 0.75, 1.0]

for dataset_size in dataset_sizes:
    tflite_model = float16_activations_int8_weights_quantization(baseline, dataset_size)

    # evaluate
    interpreter = tf.lite.Interpreter(model_path=tflite_model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    test_data_transformed = standard.transform(df_test[gf])
    predictions = []

    # cast data to float32
    test_data_transformed = test_data_transformed.astype(np.float32)

    for i in range(len(test_data_transformed)):
        interpreter.set_tensor(input_details[0]['index'], test_data_transformed[i].reshape(1, -1))
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]['index'])
        predictions.append(np.argmax(output_data))
        
    accuracy = accuracy_score(df_test["tag"], predictions)
    balanced_accuracy = balanced_accuracy_score(df_test["tag"], predictions)
    f1 = f1_score(df_test["tag"], predictions, average="weighted")
    
    accuracy = round(accuracy, 4)
    balanced_accuracy = round(balanced_accuracy, 4)
    f1 = round(f1, 4)
    
    print(f"Accuracy: {accuracy}")
    print(f"Balanced accuracy: {balanced_accuracy}")
    print(f"F1 score: {f1}")

In [None]:
def dynamic_range_quantization(trained_model, dataset_size: float = 0.25): # Not implemented in the framework
    def dataset_generator(): # Does not need a representative dataset
        print("Generating dataset for dynamic range quantization")
        print("Original Dataset size: ", int(len(data_transformed)))
        print("Dataset size: ", int(len(data_transformed) * dataset_size))
        
        for data in tf.data.Dataset.from_tensor_slices((data_transformed)).batch(1).take(int(len(data_transformed) * dataset_size)):
            yield [tf.dtypes.cast(data, tf.float32)]

    converter = tf.lite.TFLiteConverter.from_keras_model(trained_model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    quantized_model = converter.convert()

    logger.info("Applied dynamic range quantization")

    # Save the model to disk
    quantized_tflite_model_file = 'model.tflite'
    
    with open(quantized_tflite_model_file, 'wb') as f:
        f.write(quantized_model)

    return quantized_tflite_model_file

dataset_sizes = [0.1, 0.25, 0.5, 0.75, 1.0]

for dataset_size in dataset_sizes:
    tflite_model = dynamic_range_quantization(baseline, dataset_size)

    # evaluate
    interpreter = tf.lite.Interpreter(model_path=tflite_model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    test_data_transformed = standard.transform(df_test[gf])
    predictions = []

    # cast data to float32
    test_data_transformed = test_data_transformed.astype(np.float32)

    for i in range(len(test_data_transformed)):
        interpreter.set_tensor(input_details[0]['index'], test_data_transformed[i].reshape(1, -1))
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]['index'])
        predictions.append(np.argmax(output_data))
        
    accuracy = accuracy_score(df_test["tag"], predictions)
    balanced_accuracy = balanced_accuracy_score(df_test["tag"], predictions)
    f1 = f1_score(df_test["tag"], predictions, average="weighted")
    
    accuracy = round(accuracy, 4)
    balanced_accuracy = round(balanced_accuracy, 4)
    f1 = round(f1, 4)
    
    print(f"Accuracy: {accuracy}")
    print(f"Balanced accuracy: {balanced_accuracy}")
    print(f"F1 score: {f1}")

#### Pruning

In [None]:
baseline = baseline_model(input_dim=len(gf), n_output=2)
baseline, history = train_model(baseline)

In [None]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
import pandas as pd

class PolynomialDecayPruningEstimator(BaseEstimator):
    def __init__(self, batch_size=256, epochs=10, initial_sparsity=0.50, final_sparsity=0.80):
        self.batch_size = batch_size
        self.epochs = epochs
        self.initial_sparsity = initial_sparsity
        self.final_sparsity = final_sparsity
    
    def fit(self, X, y):
        # Fine-tune pretrained model with pruning aware training
        prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

        num_samples = len(X)
        end_step = np.ceil(num_samples / self.batch_size).astype(np.int32) * self.epochs

        pruning_params = {
            "pruning_schedule": tfmot.sparsity.keras.PolynomialDecay(
                initial_sparsity=self.initial_sparsity, final_sparsity=self.final_sparsity, begin_step=0, end_step=end_step
            )
        }

        model_for_pruning = prune_low_magnitude(baseline, **pruning_params)

        model_for_pruning.compile(
            optimizer="adam", loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]
        )

        callbacks = [
            tfmot.sparsity.keras.UpdatePruningStep(),
        ]

        self.history_ = model_for_pruning.fit(X, y, validation_split=0.1, epochs=self.epochs, batch_size=self.batch_size, callbacks=callbacks, verbose=0)

        self.pruned_model_ = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

        # convert to tflite
        converter = tf.lite.TFLiteConverter.from_keras_model(self.pruned_model_)
        converter.optimizations = [tf.lite.Optimize.EXPERIMENTAL_SPARSITY, tf.lite.Optimize.DEFAULT]
        self.pruned_tflite_model_ = converter.convert()

        # Save the model to disk
        self.pruned_tflite_model_file_ = 'model.tflite'

        with open(self.pruned_tflite_model_file_, 'wb') as f:
            f.write(self.pruned_tflite_model_)

        return self
    
    def score(self, X, y):
        # Load model
        interpreter = tf.lite.Interpreter(model_path=self.pruned_tflite_model_file_)
        interpreter.allocate_tensors()

        # Load test set
        # test_data_transformed = standard.transform(df_test[gf])
        
        # get input shape
        input_shape = interpreter.get_input_details()[0]["shape"]
        logger.info(f"Input shape: {input_shape}")
        
        # get output shape
        output_shape = interpreter.get_output_details()[0]["shape"]
        logger.info(f"Output shape: {output_shape}")
        
        # transform data to the expected tensor type
        input_details = interpreter.get_input_details()[0]
        dtype = input_details["dtype"]    
        input_data = np.array(X, dtype=dtype)
        
        # reshape model input
        batch_size = 256
        
        input_details = interpreter.get_input_details()[0]
        interpreter.resize_tensor_input(input_details['index'], (batch_size, input_data.shape[1]))
        interpreter.allocate_tensors()
        
        input_details = interpreter.get_input_details()[0]
        output_details = interpreter.get_output_details()[0]
        
        preds = []

        # create batches of test_data_transformed
        for i in range(0, len(X), batch_size):
            batch = X[i:i+batch_size]
            # print(f"Batch {i//batch_size} has {len(batch)} elements")
            
            batch_data = np.array(batch, dtype=dtype)
            
            if len(batch) == batch_size:        
                interpreter.set_tensor(input_details['index'], batch_data)
                interpreter.invoke()
                output_data = interpreter.get_tensor(output_details['index'])
                
                preds.append(output_data)
        
        predictions = np.concatenate(preds)
        
        # take the labels of the test set
        test_labels = y.values[:len(predictions)]
        
        balanced_accuracy = balanced_accuracy_score(np.argmax(y, axis=1), np.argmax(predictions, axis=1))
        
        return balanced_accuracy
    
    def evaluate(self, X, y):
        # Load model
        interpreter = tf.lite.Interpreter(model_path=self.pruned_tflite_model_file_)
        interpreter.allocate_tensors()

        # get input shape
        input_shape = interpreter.get_input_details()[0]["shape"]
        logger.info(f"Input shape: {input_shape}")

        # get output shape
        output_shape = interpreter.get_output_details()[0]["shape"]
        logger.info(f"Output shape: {output_shape}")

        # transform data to the expected tensor type
        input_details = interpreter.get_input_details()[0]
        dtype = input_details["dtype"]
        input_data = np.array(X, dtype=dtype)

        # reshape model input
        batch_size = 256

        input_details = interpreter.get_input_details()[0]
        interpreter.resize_tensor_input(
            input_details["index"], (batch_size, input_data.shape[1])
        )
        interpreter.allocate_tensors()

        input_details = interpreter.get_input_details()[0]
        output_details = interpreter.get_output_details()[0]

        preds = []

        # create batches of test_data_transformed
        for i in range(0, len(X), batch_size):
            batch = X[i : i + batch_size]
            # print(f"Batch {i//batch_size} has {len(batch)} elements")

            batch_data = np.array(batch, dtype=dtype)

            if len(batch) == batch_size:
                interpreter.set_tensor(input_details["index"], batch_data)
                interpreter.invoke()
                output_data = interpreter.get_tensor(output_details["index"])

                preds.append(output_data)
                
        predictions = np.concatenate(preds)

        # take the labels of the test set
        test_labels = y.values[: len(predictions)]

        balanced_accuracy = balanced_accuracy_score(
            np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1)
        )

        accuracy = accuracy_score(np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1))
        balanced_accuracy = balanced_accuracy_score(np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1))
        f1 = f1_score(np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1), average="weighted")
        
        accuracy = round(accuracy, 3)
        balanced_accuracy = round(balanced_accuracy, 3)
        f1 = round(f1, 3)
        
        print(f"Accuracy: {accuracy}")
        print(f"Balanced accuracy: {balanced_accuracy}")
        print(f"F1 score: {f1}")

In [None]:
with tf.device('/device:CPU:0'):
    param_grid = {'batch_size': [256, 512, 1024, 2048], 'epochs': [10, 20, 30], 'initial_sparsity': [0.50, 0.60, 0.70, 0.80], 'final_sparsity': [0.60, 0.70, 0.80, 0.90]}

    grid_search = GridSearchCV(estimator=PolynomialDecayPruningEstimator(), param_grid=param_grid, cv=3, verbose=2)
    grid_search.fit(data_transformed, pd.get_dummies(df_train["tag"]))

    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best score: {grid_search.best_score_}")

In [None]:
with tf.device('/device:CPU:0'):
    pe = PolynomialDecayPruningEstimator(batch_size=1024, epochs=10, initial_sparsity=0.70, final_sparsity=0.90)
    pe.fit(data_transformed, pd.get_dummies(df_train["tag"]))
    pe.evaluate(test_data_transformed, pd.get_dummies(df_test["tag"]))

In [None]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
import pandas as pd

class ConstantSparsityPruningEstimator(BaseEstimator):
    def __init__(self, batch_size=256, epochs=10, target_sparsity=0.80):
        self.batch_size = batch_size
        self.epochs = epochs
        self.target_sparsity = target_sparsity
    
    def fit(self, X, y):
        # Fine-tune pretrained model with pruning aware training
        prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

        num_samples = len(X)
        end_step = np.ceil(num_samples / self.batch_size).astype(np.int32) * self.epochs

        pruning_params = {
            "pruning_schedule": tfmot.sparsity.keras.ConstantSparsity(
                target_sparsity=self.target_sparsity, begin_step=0, end_step=end_step,
            )
        }

        model_for_pruning = prune_low_magnitude(baseline, **pruning_params)

        model_for_pruning.compile(
            optimizer="adam", loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]
        )

        callbacks = [
            tfmot.sparsity.keras.UpdatePruningStep(),
        ]

        self.history_ = model_for_pruning.fit(X, y, validation_split=0.1, epochs=self.epochs, batch_size=self.batch_size, callbacks=callbacks, verbose=0)

        self.pruned_model_ = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

        # convert to tflite
        converter = tf.lite.TFLiteConverter.from_keras_model(self.pruned_model_)
        converter.optimizations = [tf.lite.Optimize.EXPERIMENTAL_SPARSITY, tf.lite.Optimize.DEFAULT]
        self.pruned_tflite_model_ = converter.convert()

        # Save the model to disk
        self.pruned_tflite_model_file_ = 'model.tflite'

        with open(self.pruned_tflite_model_file_, 'wb') as f:
            f.write(self.pruned_tflite_model_)

        return self
    
    def score(self, X, y):
        # Load model
        interpreter = tf.lite.Interpreter(model_path=self.pruned_tflite_model_file_)
        interpreter.allocate_tensors()

        # Load test set
        # test_data_transformed = standard.transform(df_test[gf])
        
        # get input shape
        input_shape = interpreter.get_input_details()[0]["shape"]
        logger.info(f"Input shape: {input_shape}")
        
        # get output shape
        output_shape = interpreter.get_output_details()[0]["shape"]
        logger.info(f"Output shape: {output_shape}")
        
        # transform data to the expected tensor type
        input_details = interpreter.get_input_details()[0]
        dtype = input_details["dtype"]    
        input_data = np.array(X, dtype=dtype)
        
        # reshape model input
        batch_size = 256
        
        input_details = interpreter.get_input_details()[0]
        interpreter.resize_tensor_input(input_details['index'], (batch_size, input_data.shape[1]))
        interpreter.allocate_tensors()
        
        input_details = interpreter.get_input_details()[0]
        output_details = interpreter.get_output_details()[0]
        
        preds = []

        # create batches of test_data_transformed
        for i in range(0, len(X), batch_size):
            batch = X[i:i+batch_size]
            # print(f"Batch {i//batch_size} has {len(batch)} elements")
            
            batch_data = np.array(batch, dtype=dtype)
            
            if len(batch) == batch_size:        
                interpreter.set_tensor(input_details['index'], batch_data)
                interpreter.invoke()
                output_data = interpreter.get_tensor(output_details['index'])
                
                preds.append(output_data)
        
        predictions = np.concatenate(preds)
        
        # take the labels of the test set
        test_labels = y.values[:len(predictions)]
        
        balanced_accuracy = balanced_accuracy_score(np.argmax(y, axis=1), np.argmax(predictions, axis=1))
        
        return balanced_accuracy

In [None]:
with tf.device('/device:CPU:0'):
    param_grid = {'batch_size': [256, 512, 1024, 2048], 'epochs': [10, 20, 30], 'target_sparsity': [0.5, 0.60, 0.70, 0.80, 0.90]}

    grid_search = GridSearchCV(estimator=ConstantSparsityPruningEstimator(), param_grid=param_grid, cv=3, verbose=2)
    grid_search.fit(data_transformed, pd.get_dummies(df_train["tag"]))

    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best score: {grid_search.best_score_}")

In [None]:
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_}")

In [None]:
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
import pandas as pd

class PolynomialDecayPruningEstimator:
    def __init__(self, model=None, batch_size=256, epochs=10, initial_sparsity=0.50, final_sparsity=0.80):
        self.model = model
        self.batch_size = batch_size
        self.epochs = epochs
        self.initial_sparsity = initial_sparsity
        self.final_sparsity = final_sparsity
    
    def fit(self, X, y):
        # Fine-tune pretrained model with pruning aware training
        prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

        num_samples = len(X)
        end_step = np.ceil(num_samples / self.batch_size).astype(np.int32) * self.epochs

        pruning_params = {
            "pruning_schedule": tfmot.sparsity.keras.PolynomialDecay(
                initial_sparsity=self.initial_sparsity, final_sparsity=self.final_sparsity, begin_step=0, end_step=end_step
            )
        }

        model_for_pruning = prune_low_magnitude(self.model, **pruning_params)

        model_for_pruning.compile(
            optimizer="adam", loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]
        )

        callbacks = [
            tfmot.sparsity.keras.UpdatePruningStep(),
        ]

        self.history_ = model_for_pruning.fit(X, y, validation_split=0.1, epochs=self.epochs, batch_size=self.batch_size, callbacks=callbacks, verbose=0)

        self.pruned_model_ = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

        # convert to tflite
        converter = tf.lite.TFLiteConverter.from_keras_model(self.pruned_model_)
        converter.optimizations = [tf.lite.Optimize.EXPERIMENTAL_SPARSITY, tf.lite.Optimize.DEFAULT]
        self.pruned_tflite_model_ = converter.convert()

        # Save the model to disk
        self.pruned_tflite_model_file_ = 'model.tflite'

        with open(self.pruned_tflite_model_file_, 'wb') as f:
            f.write(self.pruned_tflite_model_)

        return self
    
    def score(self, X, y):
        # Load model
        interpreter = tf.lite.Interpreter(model_path=self.pruned_tflite_model_file_)
        interpreter.allocate_tensors()
        
        # get input shape
        input_shape = interpreter.get_input_details()[0]["shape"]
        logger.info(f"Input shape: {input_shape}")
        
        # get output shape
        output_shape = interpreter.get_output_details()[0]["shape"]
        logger.info(f"Output shape: {output_shape}")
        
        # transform data to the expected tensor type
        input_details = interpreter.get_input_details()[0]
        dtype = input_details["dtype"]    
        input_data = np.array(X, dtype=dtype)
        
        # reshape model input
        batch_size = 256
        
        input_details = interpreter.get_input_details()[0]
        interpreter.resize_tensor_input(input_details['index'], (batch_size, input_data.shape[1]))
        interpreter.allocate_tensors()
        
        input_details = interpreter.get_input_details()[0]
        output_details = interpreter.get_output_details()[0]
        
        preds = []

        # create batches of test_data_transformed
        for i in range(0, len(X), batch_size):
            batch = X[i:i+batch_size]
            
            batch_data = np.array(batch, dtype=dtype)
            
            if len(batch) == batch_size:        
                interpreter.set_tensor(input_details['index'], batch_data)
                interpreter.invoke()
                output_data = interpreter.get_tensor(output_details['index'])
                
                preds.append(output_data)
        
        predictions = np.concatenate(preds)
        
        # take the labels of the test set
        test_labels = y.values[:len(predictions)]
        
        balanced_accuracy = balanced_accuracy_score(np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1))
        
        return balanced_accuracy
    
    def predict(self, X):
        # Load model
        interpreter = tf.lite.Interpreter(model_path=self.pruned_tflite_model_file_)
        interpreter.allocate_tensors()
        
        # get input shape
        input_shape = interpreter.get_input_details()[0]["shape"]
        logger.info(f"Input shape: {input_shape}")
        
        # get output shape
        output_shape = interpreter.get_output_details()[0]["shape"]
        logger.info(f"Output shape: {output_shape}")
        
        # transform data to the expected tensor type
        input_details = interpreter.get_input_details()[0]
        dtype = input_details["dtype"]    
        input_data = np.array(X, dtype=dtype)
        
        # reshape model input
        batch_size = 256
        
        input_details = interpreter.get_input_details()[0]
        interpreter.resize_tensor_input(input_details['index'], (batch_size, input_data.shape[1]))
        interpreter.allocate_tensors()
        
        input_details = interpreter.get_input_details()[0]
        output_details = interpreter.get_output_details()[0]
        
        preds = []

        # create batches of test_data_transformed
        for i in range(0, len(X), batch_size):
            batch = X[i:i+batch_size]
            
            batch_data = np.array(batch, dtype=dtype)
            
            if len(batch) == batch_size:        
                interpreter.set_tensor(input_details['index'], batch_data)
                interpreter.invoke()
                output_data = interpreter.get_tensor(output_details['index'])
                
                preds.append(output_data)
        
        predictions = np.concatenate(preds)
        
        return predictions

In [None]:
def test_pruning(device="CPU"):
    baseline = baseline_model(input_dim=len(gf), n_output=2)
    baseline, _history = train_model(baseline)

    num_trials = 5
    
    logger.info("Starting test_pruning")
    
    pid = os.getpid()
    
    model_energy_consumption = {}
    
    Path(f"poc_energy_efficiency_crypto/Pruning").mkdir(parents=True, exist_ok=True)
    
    with tf.device(device):
        with StatsCollectionManager(directory="Pruning", test="pruning", sampling_rate=stats_sampling_rate, pid=pid) as pruning_scm:
            with measure_time() as pruning_time_measure:
                logger.info("Applying Pruning")
        
                # find the student model structure by searching for the model
                param_grid = {'batch_size': [256, 512, 1024, 2048], 'epochs': [10, 20, 30], 'initial_sparsity': [0.50, 0.60, 0.70, 0.80], 'final_sparsity': [0.60, 0.70, 0.80, 0.90]}
                # param_grid = {'batch_size': [256], 'epochs': [10], 'initial_sparsity': [0.50], 'final_sparsity': [0.60]}
                batch_size_grid = param_grid['batch_size']
                epochs_grid = param_grid['epochs']
                initial_sparsity_grid = param_grid['initial_sparsity']
                final_sparsity_grid = param_grid['final_sparsity']

                # make all possible combinations of the parameters
                combinations = list(itertools.product(batch_size_grid, epochs_grid, initial_sparsity_grid, final_sparsity_grid))
                    
                for comb_id, comb in enumerate(combinations):
                    batch_size = comb[0]
                    epochs = comb[1]
                    initial_sparsity = comb[2]
                    final_sparsity = comb[3]
                    
                    logger.info(f"Parameters: batch_size={batch_size}, epochs={epochs}, initial_sparsity={initial_sparsity}, final_sparsity={final_sparsity}")
                    
                    estimator = PolynomialDecayPruningEstimator(model=baseline, batch_size=batch_size, epochs=epochs, initial_sparsity=initial_sparsity, final_sparsity=final_sparsity)
                    estimator.fit(data_transformed, pd.get_dummies(df_train["tag"]))
                        
                    for i in range(0, num_trials):
                        logger.info(f"Trial Evaluation: {i}")
                    
                        # evaluate the model
                        data_transformed_test = standard.transform(df_test[gf])
                        
                        with StatsCollectionManager(directory="Pruning", test=f"evaluation_pruning_{comb_id}_{i}", sampling_rate=stats_sampling_rate, pid=pid) as evaluation_pruning_scm:
                            with measure_time() as evaluation_pruning_time_measure:
                                predictions = estimator.predict(data_transformed_test)
                        
                        evaluation_pruning_time = evaluation_pruning_time_measure()
                        
                        # Write evaluation pruning time to file
                        with open(f"poc_energy_efficiency_crypto/Pruning/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_pruning_time_{comb_id}_{i}.pkl", "wb") as f:
                            pickle.dump({"evaluation_pruning_time": evaluation_pruning_time}, f)
                            
                    if ml_task == "binary_classification":
                        predictions = np.argmax(predictions, axis=1)
                        
                        test_labels = df_test["tag"].values[:len(predictions)]
                        
                        accuracy = accuracy_score(test_labels, predictions)
                        f1 = f1_score(test_labels, predictions, average="weighted")
                        auc = roc_auc_score(test_labels, predictions)
                        recall = recall_score(test_labels, predictions, average="weighted")
                        precision = precision_score(test_labels, predictions, average="weighted")
                        balanced_accuracy = balanced_accuracy_score(test_labels, predictions)
                        matthews = matthews_corrcoef(test_labels, predictions)

                        logger.info(f"Accuracy: {accuracy}")
                        logger.info(f"F1 score: {f1}")
                        logger.info(f"AUC: {auc}")
                        logger.info(f"Recall: {recall}")
                        logger.info(f"Precision: {precision}")
                        logger.info(f"Balanced accuracy: {balanced_accuracy}")
                        logger.info(f"Matthews correlation coefficient: {matthews}")
                        
                        test_results = {
                            "accuracy": accuracy,
                            "f1": f1,
                            "auc": auc,
                            "recall": recall,
                            "precision": precision,
                            "balanced_accuracy": balanced_accuracy,
                            "matthews": matthews
                        }
                    elif ml_task == "regression":
                        mae = mean_absolute_error(test_labels, predictions)
                        mse = mean_squared_error(test_labels, predictions)
                        mape = mean_absolute_percentage_error(test_labels, predictions)
                        smape = 1/len(test_labels) * np.sum(2 * np.abs(predictions - test_labels) / (np.abs(predictions) + np.abs(test_labels)))
                        
                        logger.info(f"MAE: {mae}")
                        logger.info(f"MSE: {mse}")
                        logger.info(f"MAPE: {mape}")
                        logger.info(f"SMAPE: {smape}")
                        
                        test_results = {
                            "mae": mae,
                            "mse": mse,
                            "mape": mape,
                            "smape": smape
                        }
                        
                    with open(f"poc_energy_efficiency_crypto/Pruning/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_pruning_test_results_{comb_id}.pkl", "wb") as f:
                        pickle.dump(test_results, f)
                                
                    evaluation_pruning_times_list = []
                    evaluation_pruning_stats_list = []
                    
                    for i in range(0, num_trials):
                        logger.info(f"Trial Evaluation: {i}. Comb ID: {comb_id}")
                            
                        # Read pruning time from file
                        with open(f"poc_energy_efficiency_crypto/Pruning/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_pruning_time_{comb_id}_{i}.pkl", "rb") as f:
                            evaluation_pruning_time = pickle.load(f)

                        evaluation_pruning_time = evaluation_pruning_time["evaluation_pruning_time"]
                        evaluation_pruning_time = float(evaluation_pruning_time)
                                
                        # Read pruning stats from file
                        with open(f'poc_energy_efficiency_crypto/Pruning/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_pruning_{comb_id}_{i}_stats.pkl', 'rb') as f:
                            evaluation_pruning_stats = pickle.load(f)
                        
                        logger.info("Trial {}. Comb ID {}. Evaluation Pruning time: {}".format(i, comb_id, evaluation_pruning_time))

                        # Save Pruning times
                        evaluation_pruning_times_list.append(evaluation_pruning_time)

                        # Save Pruning stats
                        evaluation_pruning_stats_list.append(evaluation_pruning_stats)

                    average_evaluation_pruning_time = np.mean(evaluation_pruning_times_list)
                    std_dev_evaluation_pruning_time = np.std(evaluation_pruning_times_list)
                    max_evaluation_pruning_time = np.max(evaluation_pruning_times_list)

                    # Time spent on Pruning
                    logger.info(f"Comb ID: {comb_id}. " + "Average Pruning evaluation time: {}".format(np.round(average_evaluation_pruning_time, 2)))
                    logger.info(f"Comb ID: {comb_id}. " + "Standard deviation of Pruning evaluation time: {}".format(np.round(std_dev_evaluation_pruning_time, 2)))
                    logger.info(f"Comb ID: {comb_id}. " + "Max. Pruning evaluation time: {}".format(np.round(max_evaluation_pruning_time, 2)))

                    # Get average Pruning metrics
                    average_evaluation_pruning_stats_list = get_average_stats(evaluation_pruning_stats_list, average_evaluation_pruning_time)
                    std_dev_evaluation_pruning_stats_list = get_std_dev_stats(evaluation_pruning_stats_list, std_dev_evaluation_pruning_time)
                    max_evaluation_pruning_stats_list = get_max_stats(evaluation_pruning_stats_list, max_evaluation_pruning_time)
                    
                    # Save Pruning metrics to dataframe
                    stats = evaluation_pruning_stats_list[0][0].keys()

                    averag_evaluation_pruning_stats_names = [f"average_evaluation_pruning_{stat}" for stat in stats]
                    std_dev_evaluation_pruning_stats_names = [f"std_dev_evaluation_pruning_{stat}" for stat in stats]
                    max_evaluation_pruning_stats_names = [f"max_evaluation_pruning_{stat}" for stat in stats]

                    df_evaluation_pruning_times_columns = ["experiment", "device", "average_evaluation_pruning_time", "std_dev_evaluation_pruning_time", "max_evaluation_pruning_time"]
                    df_evaluation_pruning_times = pd.DataFrame(columns=df_evaluation_pruning_times_columns)
                    
                    experiment = f"evaluation_pruning_{comb_id}"
                    evaluation_pruning_times_row = [experiment, device, average_evaluation_pruning_time, std_dev_evaluation_pruning_time, max_evaluation_pruning_time]
                    df_evaluation_pruning_times.loc[0] = evaluation_pruning_times_row

                    df_evaluation_pruning_stats_columns = ["experiment", "device", "snapshot", *averag_evaluation_pruning_stats_names, *std_dev_evaluation_pruning_stats_names, *max_evaluation_pruning_stats_names]
                    df_evaluation_pruning_stats = pd.DataFrame(columns=df_evaluation_pruning_stats_columns)
                    
                    for index, _snapshot in enumerate(average_evaluation_pruning_stats_list):
                        row = np.array([experiment, device, index])
                        row = np.append(row, average_evaluation_pruning_stats_list[index])
                        row = np.append(row, std_dev_evaluation_pruning_stats_list[index])
                        row = np.append(row, max_evaluation_pruning_stats_list[index])

                        df_evaluation_pruning_stats.loc[index] = row
                        
                    assert df_evaluation_pruning_times.shape[0] == 1
                    assert df_evaluation_pruning_stats.shape[0] == len(average_evaluation_pruning_stats_list)
                    assert df_evaluation_pruning_stats.shape[1] == len(df_evaluation_pruning_stats_columns)
                    
                    display(df_evaluation_pruning_times)
                    display(df_evaluation_pruning_stats)

                    # calculate the average of the df_evaluation_pruning_stats average_evaluation_pruning_cpu_power_draw and average_evaluation_pruning_gpu_power_draw columns
                    global_average_average_evaluation_pruning_cpu_power_draw = df_evaluation_pruning_stats[f"average_evaluation_pruning_cpu_power_draw"].astype(float).mean()
                    global_average_average_evaluation_pruning_gpu_power_draw = df_evaluation_pruning_stats[f"average_evaluation_pruning_gpu_power_draw"].astype(float).mean()
                    
                    # check that both columns have a single scalar value
                    assert np.isscalar(global_average_average_evaluation_pruning_cpu_power_draw)
                    assert np.isscalar(global_average_average_evaluation_pruning_gpu_power_draw)
                    
                    average_evaluation_pruning_exp_duration = df_evaluation_pruning_times[f"average_evaluation_pruning_time"].values[0]
                    
                    logger.info("Average evaluation Pruning experiment duration: {}".format(average_evaluation_pruning_exp_duration))
                    logger.info("Average evaluation Pruning CPU power draw: {}".format(global_average_average_evaluation_pruning_cpu_power_draw))
                    logger.info("Average evaluation Pruning GPU power draw: {}".format(global_average_average_evaluation_pruning_gpu_power_draw))
                    
                    total_average_cpu_energy_consumption = global_average_average_evaluation_pruning_cpu_power_draw * average_evaluation_pruning_exp_duration
                    total_average_gpu_energy_consumption = global_average_average_evaluation_pruning_gpu_power_draw * average_evaluation_pruning_exp_duration
                    
                    model_energy_consumption[comb_id] = {
                        "cpu": total_average_cpu_energy_consumption,
                        "gpu": total_average_gpu_energy_consumption
                    }
                    
                    logger.info("Model energy consumption:\n{}".format(model_energy_consumption))
                    
                # normalize energy consumption between 0 and 1
                normalized_model_energy_consumption = deepcopy(model_energy_consumption)
                
                max_cpu_energy_consumption = max([model_energy_consumption[comb_id]["cpu"] for comb_id in model_energy_consumption])
                max_gpu_energy_consumption = max([model_energy_consumption[comb_id]["gpu"] for comb_id in model_energy_consumption])
                
                for comb_id in model_energy_consumption:
                    normalized_model_energy_consumption[comb_id]["cpu"] = model_energy_consumption[comb_id]["cpu"] / max_cpu_energy_consumption
                    normalized_model_energy_consumption[comb_id]["gpu"] = model_energy_consumption[comb_id]["gpu"] / max_gpu_energy_consumption
                
                # multiply normalized energy consumption by 0.5
                weighted_normalized_model_energy_consumption = deepcopy(normalized_model_energy_consumption)
                
                for comb_id in model_energy_consumption:
                    weighted_normalized_model_energy_consumption[comb_id]["cpu"] = normalized_model_energy_consumption[comb_id]["cpu"] * 0.5 # TODO: parameterize the weight
                    weighted_normalized_model_energy_consumption[comb_id]["gpu"] = normalized_model_energy_consumption[comb_id]["gpu"] * 0.5
                                    
                # Read all evaluation pruning test results files
                evaluation_pruning_test_results_files = glob.glob("poc_energy_efficiency_crypto/Pruning/crypto_spider_5g_fcnn_optimized_benchmark_evaluation_pruning_test_results_*.pkl")
                measured_performance = {}
                
                for evaluation_pruning_test_results_file in evaluation_pruning_test_results_files:
                    # comb_id = evaluation_pruning_test_results_file.split("_")[-2] + "_" + evaluation_pruning_test_results_file.split("_")[-1].split(".")[0]
                    comb_id = int(evaluation_pruning_test_results_file.split("_")[-1].split(".")[0])
                    
                    with open(evaluation_pruning_test_results_file, 'rb') as f:
                        evaluation_pruning_test_results = pickle.load(f)
                    
                    # Get measured performance
                    measured_performance[comb_id] = evaluation_pruning_test_results["balanced_accuracy"] # TODO: parameterize the metric
                                
                # multiply measured performance by 0.5
                weighted_measured_performance = deepcopy(measured_performance)
                
                for comb_id in measured_performance:
                    weighted_measured_performance[comb_id] = measured_performance[comb_id] * 0.5 # TODO: parameterize the weight
                
                # calculate weighted average of energy consumption and measured performance
                weighted_average_energy_consumption_and_measured_performance = {}
                                
                for comb_id in model_energy_consumption:
                    weighted_average_energy_consumption_and_measured_performance[comb_id] = weighted_normalized_model_energy_consumption[comb_id]["cpu"] + weighted_measured_performance[comb_id] # TODO: parameterize the platform that is being optimized (cpu or gpu)
            
                # get best comb_id
                best_comb_id = max(weighted_average_energy_consumption_and_measured_performance, key=weighted_average_energy_consumption_and_measured_performance.get)
                logger.info(f"Best comb_id: {best_comb_id}")
                logger.info(f"Best comb_id energy consumption: {model_energy_consumption[best_comb_id]}")
                logger.info(f"Best comb_id measured performance: {measured_performance[best_comb_id]}")
                logger.info(f"Best comb_id normalized energy consumption cpu: {normalized_model_energy_consumption[best_comb_id]['cpu']}")
                logger.info(f"Best comb_id normalized energy consumption gpu: {normalized_model_energy_consumption[best_comb_id]['gpu']}")
                logger.info(f"Best comb_id weighted normalized energy consumption cpu: {weighted_normalized_model_energy_consumption[best_comb_id]['cpu']}")
                logger.info(f"Best comb_id weighted normalized energy consumption gpu: {weighted_normalized_model_energy_consumption[best_comb_id]['gpu']}")
                logger.info(f"Best comb_id weighted measured performance: {weighted_measured_performance[best_comb_id]}")
                logger.info(f"Best comb_id weighted average energy consumption and measured performance: {weighted_average_energy_consumption_and_measured_performance[best_comb_id]}")
                
                # save best comb_id
                with open(f"poc_energy_efficiency_crypto/Pruning/crypto_spider_5g_fcnn_optimized_benchmark_best_comb_id.pkl", "wb") as f:
                    pickle.dump({"best_comb_id": best_comb_id}, f)
                
        # Get pruning time
        pruning_time = pruning_time_measure()

        # Write pruning time to file
        with open(f"poc_energy_efficiency_crypto/Pruning/crypto_spider_5g_fcnn_optimized_benchmark_pruning_time.pkl", "wb") as f:
            pickle.dump({"pruning_time": pruning_time}, f)
        

In [None]:
# Start the child process
p_nas = multiprocessing.Process(target=test_pruning, args=("cpu",))

p_nas.start()
p_nas.join()

#### Quantization

In [None]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
import pandas as pd


class QuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
    def __init__(self, quantization_config = "LastValueQuantizer", num_bits=8, symmetric=True, narrow_range=False, per_axis=False):
        self.quantization_config = quantization_config
        self.num_bits = num_bits
        self.symmetric = symmetric
        self.narrow_range = narrow_range
        self.per_axis = per_axis
        
        if self.quantization_config == "LastValueQuantizer":
            self.quantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer(num_bits=self.num_bits, symmetric=self.symmetric, narrow_range=self.narrow_range, per_axis=self.per_axis)
        elif self.quantization_config == "MovingAverageQuantizer":
            self.quantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=self.num_bits, symmetric=self.symmetric, narrow_range=self.narrow_range, per_axis=self.per_axis)
        elif self.quantization_config == "AllValuesQuantizer":
            self.quantizer = tfmot.quantization.keras.quantizers.AllValuesQuantizer(num_bits=self.num_bits, symmetric=self.symmetric, narrow_range=self.narrow_range, per_axis=self.per_axis)

    # Configure how to quantize weights.
    def get_weights_and_quantizers(self, layer):
        return [
            (
                layer.kernel,
                self.quantizer,
            )
        ]

    # Configure how to quantize activations.
    def get_activations_and_quantizers(self, layer):
        return [
            (
                layer.kernel,
                self.quantizer,
            )
        ]

    def set_quantize_weights(self, layer, quantize_weights):
        # Add this line for each item returned in `get_weights_and_quantizers`
        # , in the same order
        layer.kernel = quantize_weights[0]

    def set_quantize_activations(self, layer, quantize_activations):
        # Add this line for each item returned in `get_activations_and_quantizers`
        # , in the same order.
        layer.activation = quantize_activations[0]

    # Configure how to quantize outputs (may be equivalent to activations).
    def get_output_quantizers(self, layer):
        return []

    def get_config(self):
        # serialize the quantizer
        return {
            "quantization_config": self.quantization_config,
            "num_bits": self.num_bits,
            "symmetric": self.symmetric,
            "narrow_range": self.narrow_range,
            "per_axis": self.per_axis,
        }


class QuantizationEstimator(BaseEstimator):
    def __init__(
        self, batch_size=256, epochs=10, quantization_config="LastValueQuantizer"
    ):
        self.batch_size = batch_size
        self.epochs = epochs
        self.quantization_config = quantization_config

    def fit(self, X, y):
        # Fine-tune pretrained model with quantization aware training

        # Define the quantization configuration
        quantize_config = QuantizeConfig(self.quantization_config)
        
        model = tf.keras.Sequential()
        
        # annotate baseline model
        for layer in baseline.layers:
            annotated_layer = tfmot.quantization.keras.quantize_annotate_layer(layer, quantize_config)
            model.add(annotated_layer)
        
        model = tfmot.quantization.keras.quantize_annotate_model(model)

        # Apply quantization to the model
        with tfmot.quantization.keras.quantize_scope({'DefaultDenseQuantizeConfig': QuantizeConfig}):
            q_aware_model = tfmot.quantization.keras.quantize_apply(model)
            
        q_aware_model.compile(
            optimizer="adam",
            loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
            metrics=["accuracy"],
        )

        validation_split = 0.1

        history = q_aware_model.fit(
            X,
            y,
            validation_split=validation_split,
            epochs=self.epochs,
            batch_size=self.batch_size,
            verbose=0,
        )

        converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]

        quantized_model = converter.convert()

        # Save the model to disk
        self.quantized_tflite_model_file = "model.tflite"

        with open(self.quantized_tflite_model_file, "wb") as f:
            f.write(quantized_model)
            
        return self

    def score(self, X, y):
        # Load model
        interpreter = tf.lite.Interpreter(model_path=self.quantized_tflite_model_file)
        interpreter.allocate_tensors()

        # get input shape
        input_shape = interpreter.get_input_details()[0]["shape"]
        logger.info(f"Input shape: {input_shape}")

        # get output shape
        output_shape = interpreter.get_output_details()[0]["shape"]
        logger.info(f"Output shape: {output_shape}")

        # transform data to the expected tensor type
        input_details = interpreter.get_input_details()[0]
        dtype = input_details["dtype"]
        input_data = np.array(test_data_transformed, dtype=dtype)

        # reshape model input
        batch_size = 256

        input_details = interpreter.get_input_details()[0]
        interpreter.resize_tensor_input(
            input_details["index"], (batch_size, input_data.shape[1])
        )
        interpreter.allocate_tensors()

        input_details = interpreter.get_input_details()[0]
        output_details = interpreter.get_output_details()[0]

        preds = []

        # create batches of test_data_transformed
        for i in range(0, len(X), batch_size):
            batch = X[i : i + batch_size]
            # print(f"Batch {i//batch_size} has {len(batch)} elements")

            batch_data = np.array(batch, dtype=dtype)

            if len(batch) == batch_size:
                interpreter.set_tensor(input_details["index"], batch_data)
                interpreter.invoke()
                output_data = interpreter.get_tensor(output_details["index"])

                preds.append(output_data)
        predictions = np.concatenate(preds)

        # take the labels of the test set
        test_labels = y.values[: len(predictions)]

        balanced_accuracy = balanced_accuracy_score(
            test_labels, np.argmax(predictions, axis=1)
        )

        return balanced_accuracy


In [None]:
with tf.device('/device:CPU:0'):
    param_grid = {'batch_size': [256, 512, 1024, 2048], 'epochs': [10, 20, 30], 'quantization_config': ["LastValueQuantizer", "MovingAverageQuantizer", "AllValuesQuantizer",]}

    grid_search = GridSearchCV(estimator=QuantizationEstimator(), param_grid=param_grid, cv=3, verbose=2)
    grid_search.fit(data_transformed, pd.get_dummies(df_train["tag"]))

    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best score: {grid_search.best_score_}")

In [None]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
import pandas as pd


class QuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
    def __init__(self, quantization_config = "LastValueQuantizer", num_bits=8, symmetric=True, narrow_range=False, per_axis=False):
        self.quantization_config = quantization_config
        self.num_bits = num_bits
        self.symmetric = symmetric
        self.narrow_range = narrow_range
        self.per_axis = per_axis
        
        if self.quantization_config == "LastValueQuantizer":
            self.quantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer(num_bits=self.num_bits, symmetric=self.symmetric, narrow_range=self.narrow_range, per_axis=self.per_axis)
        elif self.quantization_config == "MovingAverageQuantizer":
            self.quantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=self.num_bits, symmetric=self.symmetric, narrow_range=self.narrow_range, per_axis=self.per_axis)
        elif self.quantization_config == "AllValuesQuantizer":
            self.quantizer = tfmot.quantization.keras.quantizers.AllValuesQuantizer(num_bits=self.num_bits, symmetric=self.symmetric, narrow_range=self.narrow_range, per_axis=self.per_axis)

    # Configure how to quantize weights.
    def get_weights_and_quantizers(self, layer):
        return [
            (
                layer.kernel,
                self.quantizer,
            )
        ]

    # Configure how to quantize activations.
    def get_activations_and_quantizers(self, layer):
        return [
            (
                layer.kernel,
                self.quantizer,
            )
        ]

    def set_quantize_weights(self, layer, quantize_weights):
        # Add this line for each item returned in `get_weights_and_quantizers`
        # , in the same order
        layer.kernel = quantize_weights[0]

    def set_quantize_activations(self, layer, quantize_activations):
        # Add this line for each item returned in `get_activations_and_quantizers`
        # , in the same order.
        layer.activation = quantize_activations[0]

    # Configure how to quantize outputs (may be equivalent to activations).
    def get_output_quantizers(self, layer):
        return []

    def get_config(self):
        # serialize the quantizer
        return {
            "quantization_config": self.quantization_config,
            "num_bits": self.num_bits,
            "symmetric": self.symmetric,
            "narrow_range": self.narrow_range,
            "per_axis": self.per_axis,
        }


class QuantizationEstimator(BaseEstimator):
    def __init__(
        self, batch_size=256, epochs=10,
    ):
        self.batch_size = batch_size
        self.epochs = epochs

    def fit(self, X, y):
        # Fine-tune pretrained model with quantization aware training

        q_aware_model = tfmot.quantization.keras.quantize_model(baseline)
        q_aware_model.compile(
            optimizer="adam", loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False), metrics=["accuracy"]
        )
        
        validation_split = 0.1

        history = q_aware_model.fit(X, y, validation_split=validation_split, epochs=self.epochs, batch_size=self.batch_size)

        converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]

        quantized_model = converter.convert()

        # Save the model to disk
        self.quantized_tflite_model_file = "model.tflite"

        with open(self.quantized_tflite_model_file, "wb") as f:
            f.write(quantized_model)
            
        return self

    def score(self, X, y):
        # Load model
        interpreter = tf.lite.Interpreter(model_path=self.quantized_tflite_model_file)
        interpreter.allocate_tensors()

        # get input shape
        input_shape = interpreter.get_input_details()[0]["shape"]
        logger.info(f"Input shape: {input_shape}")

        # get output shape
        output_shape = interpreter.get_output_details()[0]["shape"]
        logger.info(f"Output shape: {output_shape}")

        # transform data to the expected tensor type
        input_details = interpreter.get_input_details()[0]
        dtype = input_details["dtype"]
        input_data = np.array(X, dtype=dtype)

        # reshape model input
        batch_size = 256

        input_details = interpreter.get_input_details()[0]
        interpreter.resize_tensor_input(
            input_details["index"], (batch_size, input_data.shape[1])
        )
        interpreter.allocate_tensors()

        input_details = interpreter.get_input_details()[0]
        output_details = interpreter.get_output_details()[0]

        preds = []

        # create batches of test_data_transformed
        for i in range(0, len(X), batch_size):
            batch = X[i : i + batch_size]
            # print(f"Batch {i//batch_size} has {len(batch)} elements")

            batch_data = np.array(batch, dtype=dtype)

            if len(batch) == batch_size:
                interpreter.set_tensor(input_details["index"], batch_data)
                interpreter.invoke()
                output_data = interpreter.get_tensor(output_details["index"])

                preds.append(output_data)
                
        predictions = np.concatenate(preds)

        # take the labels of the test set
        test_labels = y.values[: len(predictions)]

        balanced_accuracy = balanced_accuracy_score(
            np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1)
        )

        return balanced_accuracy

    def evaluate(self, X, y):
        # Load model
        interpreter = tf.lite.Interpreter(model_path=self.quantized_tflite_model_file)
        interpreter.allocate_tensors()

        # get input shape
        input_shape = interpreter.get_input_details()[0]["shape"]
        logger.info(f"Input shape: {input_shape}")

        # get output shape
        output_shape = interpreter.get_output_details()[0]["shape"]
        logger.info(f"Output shape: {output_shape}")

        # transform data to the expected tensor type
        input_details = interpreter.get_input_details()[0]
        dtype = input_details["dtype"]
        input_data = np.array(X, dtype=dtype)

        # reshape model input
        batch_size = 256

        input_details = interpreter.get_input_details()[0]
        interpreter.resize_tensor_input(
            input_details["index"], (batch_size, input_data.shape[1])
        )
        interpreter.allocate_tensors()

        input_details = interpreter.get_input_details()[0]
        output_details = interpreter.get_output_details()[0]

        preds = []

        # create batches of test_data_transformed
        for i in range(0, len(X), batch_size):
            batch = X[i : i + batch_size]
            # print(f"Batch {i//batch_size} has {len(batch)} elements")

            batch_data = np.array(batch, dtype=dtype)

            if len(batch) == batch_size:
                interpreter.set_tensor(input_details["index"], batch_data)
                interpreter.invoke()
                output_data = interpreter.get_tensor(output_details["index"])

                preds.append(output_data)
                
        predictions = np.concatenate(preds)

        # take the labels of the test set
        test_labels = y.values[: len(predictions)]

        balanced_accuracy = balanced_accuracy_score(
            np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1)
        )

        accuracy = accuracy_score(np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1))
        balanced_accuracy = balanced_accuracy_score(np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1))
        f1 = f1_score(np.argmax(test_labels, axis=1), np.argmax(predictions, axis=1), average="weighted")
        
        accuracy = round(accuracy, 3)
        balanced_accuracy = round(balanced_accuracy, 3)
        f1 = round(f1, 3)
        
        print(f"Accuracy: {accuracy}")
        print(f"Balanced accuracy: {balanced_accuracy}")
        print(f"F1 score: {f1}")

In [None]:
with tf.device('/device:CPU:0'):
    param_grid = {'batch_size': [256, 512, 1024, 2048], 'epochs': [10, 20, 30]}

    grid_search = GridSearchCV(estimator=QuantizationEstimator(), param_grid=param_grid, cv=3, verbose=2)
    grid_search.fit(data_transformed, pd.get_dummies(df_train["tag"]))

    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best score: {grid_search.best_score_}")

In [None]:
qe = QuantizationEstimator(batch_size=256, epochs=10)
qe.fit(data_transformed, pd.get_dummies(df_train["tag"]))
qe.evaluate(test_data_transformed, pd.get_dummies(df_test["tag"]))