# IMPORT AND DATA LOADING

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# import necessari
import librosa
import numpy
import sys
import os
import glob
import itertools
import re
import pickle
import keras
import tensorflow as tf
import tensorflow.keras.models
import tensorflow.keras.backend as K
import keras.optimizers
from keras.models import Model
from keras.layers import Input, Dense, BatchNormalization, Activation, Flatten, Multiply, Add, Reshape
from tqdm import tqdm
from itertools import groupby
from keras.utils import to_categorical
from sklearn import metrics

# costanti 
ALPHA = 0.75
N_MELS = 128
HOP_LENGTH = 512
N_FFT = 1024
POWER = 2.0
FRAME_NUMS = 313
FRAMES = 10

# FEATURES EXTRACTION

# Loading da Google Drive
train_data = numpy.load("/content/drive/MyDrive/DCASE_DATA_EXTRACTED/train/training_valve.npy")
grouped_list_by_machine_id = pickle.load( open( "/content/drive/MyDrive/DCASE_DATA_EXTRACTED/train/training_valve_grouped_list.npy", "rb" ) )

In [3]:
# GENERAZIONE DELLE LABELS
# One-hot encoding
label = []
choices = []
for i in range(0, len(grouped_list_by_machine_id)):
  for j in range(0, len(grouped_list_by_machine_id[i])):
    machine_id = grouped_list_by_machine_id[i][j].split('/')[7].split('_')[2]
    #print(grouped_list_by_machine_id[i][j].split('/')[7])
    random_choice = numpy.random.choice(["match", "non_match"], p = [ALPHA, 1-ALPHA]) 

    if machine_id == '00':
      if random_choice == "match":
        to_append = [1,0,0,0]
      else: 
        not_match_label = numpy.random.choice([1, 2, 3]) 
        if not_match_label == 1:
          to_append = [0,1,0,0]
        elif not_match_label == 2:
          to_append = [0,0,1,0]
        else: 
          to_append = [0,0,0,1]

    elif machine_id == '02': 
      if random_choice == "match":
        to_append = [0,1,0,0]
      else: 
        not_match_label = numpy.random.choice( [ 1, 2, 3] ) 
        if not_match_label == 1:
          to_append = [1,0,0,0]
        elif not_match_label == 2:
          to_append = [0,0,1,0]
        else: 
          to_append = [0,0,0,1]

    elif machine_id == "04":
      if random_choice == "match":
        to_append = [0,0,1,0]
      else: 
        not_match_label = numpy.random.choice( [ 1, 2, 3] ) 
        if not_match_label == 1:
          to_append = [1,0,0,0]
        elif not_match_label == 2:
          to_append = [0,1,0,0]
        else: 
          to_append = [0,0,0,1]

    elif machine_id == "06":
      if random_choice == "match":
        to_append = [0,0,0,1]
      else: 
        not_match_label = numpy.random.choice( [ 1, 2, 3] ) 
        if not_match_label == 1:
          to_append = [1,0,0,0]
        elif not_match_label == 2:
          to_append = [0,1,0,0]
        else: 
          to_append = [0,0,1,0]
    
    label.append(to_append) # Append della label associata a ciascuno spettrogramma
    choices.append(random_choice) # Append della choice utilizzata per associare la label
                                  # La choice sarà utile in fase di addestramento per capire che tipo di loss calcolare

# Trasformazione in numpy.array     
label = numpy.asarray(label)
choices = numpy.asarray(choices)
print(label.shape)
print(choices.shape)

(3291, 4)
(3291,)


In [4]:
print(len(grouped_list_by_machine_id[0]))
print(len(grouped_list_by_machine_id[1]))
print(len(grouped_list_by_machine_id[2]))
print(len(grouped_list_by_machine_id[3]))

891
608
900
892


In [5]:
# Estrazione spettrogrammi divisi per ID
id_00 = train_data[0:891]
id_02 = train_data[891:1499]
id_04 = train_data[1499:2399]
id_06 = train_data[2399:3291]

# Z-Score Normalization ID_00
id_00_norm = numpy.empty_like(id_00)
mean_00 = numpy.mean(id_00)
std_00 = numpy.std(id_00)
id_00_norm = (id_00 - mean_00) / std_00

# Z-Score Normalization ID_02
id_02_norm = numpy.empty_like(id_02)
mean_02 = numpy.mean(id_02)
std_02 = numpy.std(id_02)
id_02_norm = (id_02 - mean_02) / std_02

# Z-Score Normalization ID_04
id_04_norm = numpy.empty_like(id_04)
mean_04 = numpy.mean(id_04)
std_04 = numpy.std(id_04)
id_04_norm = (id_04 - mean_04) / std_04

# Z-Score Normalization ID_06
id_06_norm = numpy.empty_like(id_06)
mean_06 = numpy.mean(id_06)
std_06 = numpy.std(id_06)
id_06_norm = (id_06 - mean_06) / std_06

print("Mean: {m}".format(m=mean_00))
print("Dev.Std: {d}".format(d=std_00))
print(id_00_norm.shape)

print("Mean: {m}".format(m=mean_02))
print("Dev.Std: {d}".format(d=std_02))
print(id_02_norm.shape)

print("Mean: {m}".format(m=mean_04))
print("Dev.Std: {d}".format(d=std_04))
print(id_04_norm.shape)

print("Mean: {m}".format(m=mean_06))
print("Dev.Std: {d}".format(d=std_06))
print(id_06_norm.shape)

train_data_norm = numpy.concatenate([id_00_norm, id_02_norm, id_04_norm, id_06_norm])

training = numpy.zeros((len(train_data_norm)*15, 128, 32)) # Dataset utilizzato per il training
index = 0
for vector_array in train_data_norm:
  i = 0
  while (i+32) <= 313:
    vector_i = numpy.zeros((128,32))
    for j in range(0,128):
      vector_i[j] = vector_array[j][i:i+32]
    training[index] = vector_i
    index += 1
    i = i+20
    

# Associazione della label associata a ciascun spettrogramma a ciascuno dei frame estratto da esso.
training_labels = []
for elem in label:
  if numpy.array_equal(elem, numpy.asarray([1,0,0,0])) :
    for i in range(15):
      training_labels.append([1,0,0,0])
  elif numpy.array_equal(elem, numpy.asarray([0,1,0,0])):
    for i in range(15):
      training_labels.append([0,1,0,0])
  elif numpy.array_equal(elem, numpy.asarray([0,0,1,0])):
    for i in range(15):
      training_labels.append([0,0,1,0])
  elif numpy.array_equal(elem, numpy.asarray([0,0,0,1])):
    for i in range(15):
      training_labels.append([0,0,0,1])

# Associazione della choice associata a ciascun spettrogramma a ciascuno dei frame estratto da esso. 
training_choices = []
for elem in choices:
  if numpy.array_equal(elem, numpy.asarray("match")) :
    for i in range(15):
      training_choices.append("match")
  elif numpy.array_equal(elem, numpy.asarray("non_match")):
    for i in range(15):
      training_choices.append("non_match")

training_labels = numpy.asarray(training_labels) # Dataset utilizzato per il training
training_choices = numpy.asarray(training_choices) # Dataset utilizzato per il training


# Shuffling
split_validation = int(len(train_data_norm)*15*0.1)
split_train = int(len(train_data_norm)*15 - split_validation)
print(split_train)
randomize = numpy.arange(len(training))
numpy.random.shuffle(randomize)
training_tot_shuffle = training[randomize]
training_tot_labels_shuffle = training_labels[randomize]
training_tot_choices_shuffle = training_choices[randomize]

training_shuffle = training_tot_shuffle[:split_train]
validation_shuffle = training_tot_shuffle[-split_validation:]

training_labels_shuffle = training_tot_labels_shuffle[:split_train]
validation_labels_shuffle = training_tot_labels_shuffle[- split_validation:]

training_choices_shuffle = training_tot_choices_shuffle[:split_train]
validation_choices_shuffle = training_tot_choices_shuffle[- split_validation:]

print(training_shuffle.shape)
print(training_labels_shuffle.shape)
print(training_choices_shuffle.shape)
print(validation_shuffle.shape)
print(validation_labels_shuffle.shape)
print(validation_choices_shuffle.shape)


Mean: -30.935727037260186
Dev.Std: 8.935703926392367
(891, 128, 313)
Mean: -31.005178730533846
Dev.Std: 8.917913805983776
(608, 128, 313)
Mean: -30.800382685318375
Dev.Std: 8.81481216617132
(900, 128, 313)
Mean: -30.69444303472267
Dev.Std: 8.911872919297977
(892, 128, 313)
44429
(44429, 128, 32)
(44429, 4)
(44429,)
(4936, 128, 32)
(4936, 4)
(4936,)


# KERAS MODEL DEFINITION

In [6]:
# KERAS MODEL

input_img = keras.Input(shape=(128, 32, 1))  # adapt this if using 'channels_first' image data format
input_Label = keras.Input(shape = [4,])

# encoder
x = keras.layers.Conv2D(32, (5, 5),strides=(2,1), padding='same')(input_img)   #32x128 -> 32x64
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Conv2D(64, (5, 5),strides=(2,1), padding='same')(x)           #32x32
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Conv2D(128, (5, 5),strides=(2,2), padding='same')(x)          #16x16
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Conv2D(256, (3, 3),strides=(2,2), padding='same')(x)          #8x8
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Conv2D(512, (3, 3),strides=(2,2), padding='same')(x)          #4x4
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)

volumeSize = keras.backend.int_shape(x)
# at this point the representation size is latentDim i.e. latentDim-dimensional
x = keras.layers.Conv2D(40, (4,4), strides=(1,1), padding='valid')(x)
encoded = keras.layers.Flatten()(x)

# Second Branch - Conditioning Feed Forward Neural Network
c = keras.layers.Dense(40)(input_Label)
c = keras.layers.Activation('sigmoid')(c)
q = keras.layers.Dense(40)(input_Label)

m = keras.layers.Multiply()([c,encoded])
encoded_input_conditioned = keras.layers.Add()([q, m]) # Input da passare al decoder
    
# decoder
x = keras.layers.Dense(volumeSize[1] * volumeSize[2] * volumeSize[3])(encoded_input_conditioned) 
x = keras.layers.Reshape((volumeSize[1], volumeSize[2], 512))(x)                #4x4

x = keras.layers.Conv2DTranspose(256, (3, 3),strides=(2,2), padding='same')(x)  #8x8
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Conv2DTranspose(128, (3, 3),strides=(2,2), padding='same')(x)  #16x16   
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Conv2DTranspose(64, (5, 5),strides=(2,2), padding='same')(x)   #32x32
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Conv2DTranspose(32, (5, 5),strides=(2,1), padding='same')(x)   #32x64
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
    
decoded = keras.layers.Conv2DTranspose(1, (5, 5),strides=(2,1), padding='same')(x) 
decoded_reshaped = keras.layers.Reshape((1, 128, 32))(decoded)  

loss_tracker = keras.metrics.Mean(name="loss")
mse_metric = keras.metrics.MeanSquaredError(name="mse")

class CustomModel(keras.Model):
    @property
    def metrics(self):
        return [loss_tracker, mse_metric]

    def test_step(self, data):
        # Unpack the data
        x, y = data
        # Compute predictions
        y_pred = self([x[0],x[1]], training=False)
        # Indici match
        match = tf.where ( tf.equal(x[2][:], "match") )
        # Dati match
        data_match = K.gather(y, match)
        # Separazione dei dati PREDETTI sulla base degli indici relativi a match/non_match
        # Dati match
        pred_match = K.gather(y_pred, match)

        # Update metrica
        mse_metric.update_state(data_match, pred_match)

        return {"mse": mse_metric.result()}
    
    def train_step(self, data):
          # Unpack the data. Its structure depends on your model and on what you pass to `fit()`.
          x, y = data

          # Vettore C utilizzato per il calcolo della loss in caso di non_match
          C = 5 
          # Valore di probabilità utilizzato come peso
          ALPHA = 0.75 

          # Indici match
          match = tf.where ( tf.equal(x[2][:], "match") )

          # Indici non_match
          not_match = tf.where ( tf.equal(x[2][:], "non_match") )

          # Dati match
          data_match = K.gather(y, match)

          with tf.GradientTape() as tape:
              y_pred = self([x[0],x[1]], training=True)  # Forward pass

              # Separazione dei dati PREDETTI sulla base degli indici relativi a match/non_match
              # Dati match
              pred_match = K.gather(y_pred, match)
              # Dati non match
              pred_not_match = K.gather(y_pred, not_match) 

              loss_m = K.mean(keras.losses.mean_squared_error(data_match, pred_match)) + 1e-6  # Calcolo Loss Match
              loss_nm = K.mean(keras.losses.mean_squared_error(C,pred_not_match)) + 1e-6     # Calcolo Loss Non_Match

              loss = ALPHA * loss_m + (1 - ALPHA) * loss_nm     # loss utilizzata per l'update dei pesi

          # Compute gradients
          trainable_vars = self.trainable_variables
          gradients = tape.gradient(loss, trainable_vars)

          # Update weights
          self.optimizer.apply_gradients(zip(gradients, trainable_vars))

          # Compute our own metrics
          loss_tracker.update_state(loss)
          mse_metric.update_state(y, y_pred)
          return {"loss": loss_tracker.result(), "mse": mse_metric.result()}


autoencoder = CustomModel(inputs=(input_img, input_Label), outputs = decoded_reshaped)

def get_lr_metric(optimizer):
    def lr(y_true, y_pred):
        return optimizer._decayed_lr(tf.float32) # I use ._decayed_lr method instead of .lr
    return lr

opt = keras.optimizers.Adam(
    learning_rate = 0.0001,
    beta_1=0.95,
    beta_2=0.999
)

lr_metric = get_lr_metric(opt)
autoencoder.compile(optimizer = opt, metrics=["mse", lr_metric])
autoencoder.summary()

Model: "custom_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128, 32, 1)] 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 64, 32, 32)   832         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 64, 32, 32)   128         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 64, 32, 32)   0           batch_normalization[0][0]        
_______________________________________________________________________________________

In [None]:
history = autoencoder.fit([training_shuffle, training_labels_shuffle, training_choices_shuffle], 
                          training_shuffle, 
                          epochs=100,
                          batch_size=64, 
                          validation_data=([validation_shuffle, validation_labels_shuffle, validation_choices_shuffle], validation_shuffle), shuffle=True)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100

In [None]:
# Salvataggio del modello
autoencoder.save('/content/drive/MyDrive/models/IDCCAE/valve/1/model_valve.h5')

# Salvataggio history di apprendimento
with open('/content/drive/MyDrive/models/IDCCAE/valve/1/trainHistoryDict', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)

# TEST

In [None]:
import csv

def save_csv(save_file_path,
             save_data):
    with open(save_file_path, "w", newline="") as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerows(save_data)


# load dataset
def select_dirs(path):
    dir_path = os.path.abspath(path)
    dirs = sorted(glob.glob(dir_path))
    return dirs

def file_load(wav_name, mono=False):
    try:
        return librosa.load(wav_name, sr=None, mono=mono)
    except:
        logger.error("file_broken or not exists!! : {}".format(wav_name))

def file_list_generator(target_dir, dir_name="train", ext="wav"):
    print("target_dir : {}".format(target_dir))

    # generate training list
    training_list_path = os.path.abspath("{dir}/{dir_name}/*.{ext}".format(dir=target_dir, dir_name=dir_name, ext=ext))
    files = sorted(glob.glob(training_list_path))
    if len(files) == 0:
      print("errore")
    return files


def file_to_vector_array(file_name, n_mels=64, n_fft=1024, hop_length=512, power=2.0):
    # 02 generate melspectrogram using librosa
    y, sr = file_load(file_name)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, power=power)

    # 03 convert melspectrogram to log mel energy
    log_mel_spectrogram = 20.0 / power * numpy.log10(mel_spectrogram + sys.float_info.epsilon)

    return log_mel_spectrogram

  
def list_to_vector_array(file_list, msg="calc...", n_mels=64, n_fft=1024, hop_length=512, power=2.0, frames=10):
    # iterate file_to_vector_array()
    for idx in tqdm(range(len(file_list)), desc=msg):
        vector_array = file_to_vector_array(file_list[idx], n_mels=n_mels, n_fft=n_fft, hop_length=hop_length, power=power)

        if idx == 0:
            dataset = numpy.zeros((len(file_list), n_mels, frames), float)
        dataset[vector_array.shape[0] * idx: vector_array.shape[0] * (idx + 1), :] = vector_array
    return dataset

def key_by_id(item):
  path_splitted = item.split("/")
  file_name = path_splitted[ len(path_splitted) - 1 ]
  file_name_splitted = file_name.split("_")
  machine_id = file_name_splitted = file_name_splitted[2]
  return machine_id

def get_machine_id_list_for_test(target_dir,
                                 dir_name="test",
                                 ext="wav"):

    # create test files
    dir_path = os.path.abspath("{dir}/{dir_name}/*.{ext}".format(dir=target_dir, dir_name=dir_name, ext=ext))
    file_paths = sorted(glob.glob(dir_path))
    # extract id
    machine_id_list = sorted(list(set(itertools.chain.from_iterable(
        [re.findall('id_[0-9][0-9]', ext_id) for ext_id in file_paths]))))
    return machine_id_list

def test_file_list_generator(target_dir,
                             id_name,
                             dir_name="test",
                             prefix_normal="normal",
                             prefix_anomaly="anomaly",
                             ext="wav"):
  
    print("target_dir : {}".format(target_dir+"_"+id_name))

    normal_files = sorted(
    glob.glob("{dir}/{dir_name}/{prefix_normal}_{id_name}*.{ext}".format(dir=target_dir,
                                                                                 dir_name=dir_name,
                                                                                 prefix_normal=prefix_normal,
                                                                                 id_name=id_name,
                                                                                 ext=ext)))
    normal_labels = numpy.zeros(len(normal_files))
    anomaly_files = sorted(
    glob.glob("{dir}/{dir_name}/{prefix_anomaly}_{id_name}*.{ext}".format(dir=target_dir,
                                                                                  dir_name=dir_name,
                                                                                  prefix_anomaly=prefix_anomaly,
                                                                                  id_name=id_name,
                                                                                  ext=ext)))
    anomaly_labels = numpy.ones(len(anomaly_files))
    files = numpy.concatenate((normal_files, anomaly_files), axis=0)
    labels = numpy.concatenate((normal_labels, anomaly_labels), axis=0)
    print("test_file  num : {num}".format(num=len(files)))
    if len(files) == 0:
        print("no_wav_file!!")
    print("\n========================================")

    return files, labels

In [None]:
target_dir = "/content/drive/MyDrive/test/valve"

machine_type = os.path.split(target_dir)[1]
print("============== MODEL LOAD ==============")
# set model path
model_file = "/content/drive/MyDrive/models/IDCCAE/valve/1/model_valve.h5"

# load model file
if not os.path.exists(model_file):
  print("{} model not found ".format(machine_type))
  sys.exit(-1)
model = keras.models.load_model(model_file, custom_objects={'CustomModel': CustomModel, 'mse':mse_metric, 'lr': lr_metric})
# model.summary()

machine_id_list = get_machine_id_list_for_test(target_dir)

# initialize lines in csv for AUC and pAUC
csv_lines = []

csv_lines.append([machine_type])
csv_lines.append(["id", "AUC", "pAUC"])
performance = []

for id_str in machine_id_list:
  # load test file

  id_num = id_str.split("_")[1]

  # Definizione della label "match" da utilizzare in fase di testing e del min e max da utilizzare per la normalizzazione
  # i min e max sono stati calcolati a partire dai dati di training.
  if id_num == "00":
    match_labels = numpy.asarray([1,0,0,0])
    mean = mean_00
    std = std_00
  if id_num == "02":
    match_labels = numpy.asarray([0,1,0,0])
    mean = mean_02
    std = std_02
  if id_num == "04":
    match_labels = numpy.asarray([0,0,1,0])
    mean = mean_04
    std = std_04
  if id_num == "06":
    match_labels = numpy.asarray([0,0,0,1])
    mean = mean_06
    std = std_06

  test_files, y_true = test_file_list_generator(target_dir, id_str)
  #print("\n====== True Labels ======")
  #print(y_true)
  #print("==> ====== Match ID Labels ======")
  #print(match_labels.shape)
  #print("=================================\n")

  # setup anomaly score file path
  anomaly_score_csv = "/content/drive/MyDrive/models/IDCCAE/valve/1/anomaly_score_{machine_type}_{id_str}.csv".format(machine_type=machine_type, id_str=id_str)
  anomaly_score_list = []

  print("\n============== BEGIN TEST FOR A MACHINE ID {id} ==============".format(id=id_num))

  y_pred = [0. for k in test_files]


  for file_idx, file_path in tqdm(enumerate(test_files), total=len(test_files)):

    # Estrazione spettrogramma audio test
    data = file_to_vector_array(file_path, n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH, power=POWER)

    # Normalizzazione spettrogramma di test
    data = ( data - mean ) / std

    # Estrazione delle frame 128x32
    data_splitted = numpy.zeros((15, 128, 32))
    index = 0
    i = 0
    while (i+32) <= 313:
      vector_i = numpy.zeros((128,32))
      for j in range(0,128):
        vector_i[j] = data[j][i:i+32]
      data_splitted[index] = vector_i
      index += 1
      i = i+20

    # Calcolo dell'errore medio sulle frame estratte dallo spettrogramma
    elem_error = []
    for elem in data_splitted:
      predicted = model.predict([elem.reshape(1,128,32), match_labels.reshape((1,4))])
      errors = numpy.mean(numpy.square(elem - predicted.reshape(1,128,32)), axis=1)
      elem_error.append(numpy.mean(errors))

    # Log dell'errore associato all'istanza di test
    y_pred[file_idx] = numpy.mean(elem_error)
    anomaly_score_list.append([os.path.basename(file_path), y_pred[file_idx]])
  

  save_csv(save_file_path=anomaly_score_csv, save_data=anomaly_score_list)
    
  # Calcolo AUC e pAUC per i dati con un certo ID_0x
  auc = metrics.roc_auc_score(y_true,y_pred)
  p_auc = metrics.roc_auc_score(y_true, y_pred, max_fpr=0.1)
  csv_lines.append([id_str.split("_", 1)[1], auc, p_auc])
  performance.append([auc, p_auc])
  print("AUC : {}".format(auc))
  print("pAUC : {}".format(p_auc))

  print("\n============ END OF TEST FOR A MACHINE ID ============")

# Stampa di AUC e pAUC medi su tutti i dati di test (media di AUC e pAUC sui vari ID).
print("\n============ AVERAGE PERFORMANCES ============")
averaged_performance = numpy.mean(numpy.array(performance, dtype=float), axis=0)
csv_lines.append(["Average"] + list(averaged_performance))
csv_lines.append([])
print(averaged_performance)

result_path = "/content/drive/MyDrive/models/IDCCAE/valve/1/anomaly_score_avg_valve.csv"
save_csv(save_file_path=result_path, save_data=csv_lines)