In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install pandas==1.5.3
!pip install tsfel
!pip install keras_tuner
!pip install wandb
!pip install tqdm

In [None]:
import os
import time
import tsfel
import warnings
from datetime import datetime
import keras_tuner
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers, Model
import matplotlib.cm as cm
from sklearn import metrics
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn import preprocessing
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import RandomizedSearchCV
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import MinMaxScaler
import hashlib
import timeit
import math
import wandb
from tqdm import tqdm

ROOTDIR_DATASET_NORMAL = "/content/drive/MyDrive/Kuka_v1/normal"
ROOTDIR_DATASET_ANOMALY = "/content/drive/MyDrive/Kuka_v1/collisions"

In [None]:
def get_df_action(filepaths_csv, filepaths_meta, action2int=None, delimiter=";"):
    # Load dataframes
    print("Loading data.")
    # Make dataframes
    # Some classes show the output boolean parameter as True rather than true. Fix here
    dfs_meta = list()
    for filepath in filepaths_meta:
        df_m = pd.read_csv(filepath, sep=delimiter)
        df_m.str_repr = df_m.str_repr.str.replace('True', 'true')
        df_m['filepath'] = filepath
        dfs_meta.append(df_m)

    df_meta = pd.concat(dfs_meta)
    df_meta.index = pd.to_datetime(df_meta.init_timestamp.astype('datetime64[ms]'), format="%Y-%m-%dT%H:%M:%S.%f")
    df_meta['completed_timestamp'] = pd.to_datetime(df_meta.completed_timestamp.astype('datetime64[ms]'),
                                                    format="%Y-%m-%dT%H:%M:%S.%f")
    df_meta['init_timestamp'] = pd.to_datetime(df_meta.init_timestamp.astype('datetime64[ms]'),
                                               format="%Y-%m-%dT%H:%M:%S.%f")

    # Eventually reduce number of classes
    # df_meta['str_repr'] = df_meta.str_repr.str.split('=', expand = True,n=1)[0]
    # df_meta['str_repr'] = df_meta.str_repr.str.split('(', expand=True, n=1)[0]

    actions = df_meta.str_repr.unique()
    dfs = [pd.read_csv(filepath_csv, sep=";") for filepath_csv in filepaths_csv]
    df = pd.concat(dfs)

    # Sort columns by name !!!
    df = df.sort_index(axis=1)

    # Set timestamp as index
    df.index = pd.to_datetime(df.time.astype('datetime64[ms]'), format="%Y-%m-%dT%H:%M:%S.%f")
    # Drop useless columns
    columns_to_drop = [column for column in df.columns if "Abb" in column or "Temperature" in column]
    df.drop(["machine_nameKuka Robot_export_active_energy",
             "machine_nameKuka Robot_import_reactive_energy"] + columns_to_drop, axis=1, inplace=True)
    signals = df.columns

    df_action = list()
    for action in actions:
        for index, row in df_meta[df_meta.str_repr == action].iterrows():
            start = row['init_timestamp']
            end = row['completed_timestamp']
            df_tmp = df.loc[start: end].copy()
            df_tmp['action'] = action
            # Duration as string (so is not considered a feature)
            df_tmp['duration'] = str((row['completed_timestamp'] - row['init_timestamp']).total_seconds())
            df_action.append(df_tmp)
    df_action = pd.concat(df_action, ignore_index=True)
    df_action.index = pd.to_datetime(df_action.time.astype('datetime64[ms]'), format="%Y-%m-%dT%H:%M:%S.%f")
    df_action = df_action[~df_action.index.duplicated(keep='first')]

    # Drop NaN
    df = df.dropna(axis=0)
    df_action = df_action.dropna(axis=0)

    if action2int is None:
        action2int = dict()
        j = 1
        for label in df_action.action.unique():
            action2int[label] = j
            j += 1

    df_merged = df.merge(df_action[['action']], left_index=True, right_index=True, how="left")
    # print(f"df_merged len: {len(df_merged)}")
    # Where df_merged in NaN Kuka is in idle state
    df_idle = df_merged[df_merged['action'].isna()].copy()
    df_idle['action'] = 'idle'
    df_idle['duration'] = df_action.duration.values.astype(float).mean().astype(str)
    df_action = pd.concat([df_action, df_idle])

    # ile label must be 0 for debug mode
    action2int['idle'] = 0
    print(f"Found {len(set(df_action['action']))} different actions.")
    print("Loading data done.\n")

    return df_action, df, df_meta, action2int

In [None]:
filepath_csv = [os.path.join(ROOTDIR_DATASET_NORMAL, f"rec{r}_20220811_rbtc_0.1s.csv") for r in [0, 2, 3, 4]]
filepath_meta = [os.path.join(ROOTDIR_DATASET_NORMAL, f"rec{r}_20220811_rbtc_0.1s.metadata") for r in [0, 2, 3, 4]]

filepath_csv_anomaly = [os.path.join(ROOTDIR_DATASET_ANOMALY, f"rec{r}_collision_20220811_rbtc_0.1s.csv") for r in [1, 5]]
filepath_meta_anomaly = [os.path.join(ROOTDIR_DATASET_ANOMALY, f"rec{r}_collision_20220811_rbtc_0.1s.metadata") for r in[1, 5]]

In [None]:
def hash_string_to_float(s):
    hash_object = hashlib.sha256(s.encode())
    hex_dig = hash_object.hexdigest()
    int_hash = int(hex_dig, 16)

    normalized_value = int_hash / 2 ** 256
    float_value = 2 * normalized_value - 1

    return float_value

def transform_datetime_strings(datetime_str_array):
    transformed_array = []
    for dt_str in datetime_str_array.flatten():
        transformed_dt = datetime.strptime(dt_str, '%Y-%m-%dT%H:%M:%S.%f%z').strftime('%Y-%m-%d %H:%M:%S')
        transformed_array.append(transformed_dt)
    return np.array(transformed_array).reshape(datetime_str_array.shape)

In [None]:
def singature_matrix_generator(windows):
  sensor_n = windows.shape[2] # il numero di sensori è il numero di caratteristiche meno 1
  win_size = windows.shape[1]

  # Generazione delle signature matrices
  matrix_all = []
  print("Generating signature matrices...")

  for window in windows:
      matrix_t = np.zeros((sensor_n, sensor_n))
      for i in range(sensor_n):
          for j in range(i, sensor_n):
              matrix_t[i][j] = np.inner(window[:, i], window[:, j]) / win_size  # rescale by win_size
              matrix_t[j][i] = matrix_t[i][j]
      matrix_all.append(matrix_t)
  matrix_all = np.array(matrix_all)

  return matrix_all

In [None]:
class MSCRED(tf.keras.Model):
    def __init__(self, opt, matrixes_train, matrixes_test):
        super(MSCRED, self).__init__()
        # Initialize parameters
        self.batch_size = opt['batch_size']
        self.learning_rate = opt['learning_rate']
        self.training_iters = opt['training_iters']
        self.step_max = opt['step_max']

        self.matrixes_train = matrixes_train
        self.matrixes_test = matrixes_test
        self.sensor_n = matrixes_train.shape[2]
        self.win_size = matrixes_train.shape[1]

        self.value_colnames = ['total_count', 'error_count', 'error_rate']
        self.scale_n = len(self.value_colnames)

        # Define CNN encoder layers
        self.conv1 = tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='selu')
        self.conv2 = tf.keras.layers.Conv2D(64, (3, 3), padding='same', strides=(2, 2), activation='selu')
        self.conv3 = tf.keras.layers.Conv2D(128, (2, 2), padding='same', strides=(2, 2), activation='selu')
        self.conv4 = tf.keras.layers.Conv2D(256, (2, 2), padding='same', strides=(2, 2), activation='selu')

        self.conv1_lstm = tf.keras.layers.ConvLSTM2D(32, (2, 2), padding='same', return_sequences=True, activation='selu')
        self.conv2_lstm = tf.keras.layers.ConvLSTM2D(64, (2, 2), padding='same', return_sequences=True, activation='selu')
        self.conv3_lstm = tf.keras.layers.ConvLSTM2D(128, (2, 2), padding='same', return_sequences=True, activation='selu')
        self.conv4_lstm = tf.keras.layers.ConvLSTM2D(256, (2, 2), padding='same', return_sequences=True, activation='selu')

        self.deconv4 = tf.keras.layers.Conv2DTranspose(128, (2, 2), padding='same', strides=(2, 2), activation='selu')
        self.deconv3 = tf.keras.layers.Conv2DTranspose(64, (2, 2), padding='same', strides=(2, 2), activation='selu')
        self.deconv2 = tf.keras.layers.Conv2DTranspose(32, (3, 3), padding='same', strides=(2, 2), activation='selu')
        self.deconv1 = tf.keras.layers.Conv2DTranspose(self.scale_n, (3, 3), padding='same', activation='selu')

    def create_sequences(self,data):
        step_max = self.step_max
        num_sequences = data.shape[0] - step_max + 1
        sequences = np.zeros((num_sequences, step_max, self.sensor_n, self.sensor_n, self.scale_n))
        for i in range(num_sequences):
            sequences[i] = data[i:i + step_max]
        return sequences
    def attention_layer(self, inputs):
        batch_size = tf.shape(inputs)[0]
        step_max = tf.shape(inputs)[1]
        height = tf.shape(inputs)[2]
        width = tf.shape(inputs)[3]
        channels = tf.shape(inputs)[4]

        # Estrai l'ultimo passo temporale
        last_output = inputs[:, -1]  # Dimensione: (batch_size, height, width, channels)

        # Funzione per calcolare i punteggi di attenzione
        def compute_attention_score(t):
            step_output = inputs[:, t]  # Dimensione: (batch_size, height, width, channels)
            score = tf.reduce_sum(tf.multiply(step_output, last_output), axis=[1, 2, 3])
            return score

        # Calcola i punteggi di attenzione per ogni passo temporale
        attention_scores = tf.map_fn(compute_attention_score, tf.range(step_max), dtype=tf.float32)
        attention_scores = tf.transpose(attention_scores, [1, 0])  # Dimensione: (batch_size, step_max)

        attention_weights = tf.nn.softmax(attention_scores, axis=1)  # Normalizza i punteggi

        # Applica i pesi di attenzione
        reshaped_inputs = tf.reshape(inputs, [batch_size, step_max, -1])  # Reshape per la moltiplicazione
        context_vector = tf.matmul(attention_weights[:, tf.newaxis, :], reshaped_inputs)  # Dimensione: (batch_size, 1, altezza * larghezza * canali)
        context_vector = tf.reshape(context_vector, [batch_size, height, width, channels])

        return context_vector

    def call(self, inputs, training=False):

        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        step_max = input_shape[1]
        height = input_shape[2]
        width = input_shape[3]
        channels = input_shape[4]

        # Reshape the input to merge batch and sequence dimensions
        reshaped_inputs = tf.reshape(inputs, (batch_size * step_max, height, width, channels))
        # Encoder
        conv1 = self.conv1(reshaped_inputs)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)
        conv4 = self.conv4(conv3)


        # Reshape for ConvLSTM layers
        conv1 = tf.reshape(conv1, [-1, self.step_max, self.sensor_n, self.sensor_n, 32])
        conv2 = tf.reshape(conv2, [-1, self.step_max, int(math.ceil(float(self.sensor_n)/2)), int(math.ceil(float(self.sensor_n)/2)), 64])
        conv3 = tf.reshape(conv3, [-1, self.step_max, int(math.ceil(float(self.sensor_n)/4)), int(math.ceil(float(self.sensor_n)/4)), 128])
        conv4 = tf.reshape(conv4, [-1, self.step_max, int(math.ceil(float(self.sensor_n)/8)), int(math.ceil(float(self.sensor_n)/8)), 256])

        # ConvLSTM layers with attention
        conv1_lstm_out = self.conv1_lstm(conv1)
        conv1_lstm_out_attention= self.attention_layer(conv1_lstm_out)

        conv2_lstm_out = self.conv2_lstm(conv2)
        conv2_lstm_out_attention= self.attention_layer(conv2_lstm_out)

        conv3_lstm_out = self.conv3_lstm(conv3)
        conv3_lstm_out_attention= self.attention_layer(conv3_lstm_out)

        conv4_lstm_out = self.conv4_lstm(conv4)
        conv4_lstm_out_attention= self.attention_layer(conv4_lstm_out)


        # Decoder
        deconv4 = self.deconv4(conv4_lstm_out_attention)
        deconv4 = tf.image.resize(deconv4, [int(math.ceil(float(self.sensor_n)/4)), int(math.ceil(float(self.sensor_n)/4))], method='bilinear')
        deconv4 = tf.concat([deconv4, conv3_lstm_out_attention], axis=-1)

        deconv3 = self.deconv3(deconv4)
        deconv3 = tf.image.resize(deconv3, [int(math.ceil(float(self.sensor_n)/2)), int(math.ceil(float(self.sensor_n)/2))], method='bilinear')
        deconv3 = tf.concat([deconv3, conv2_lstm_out_attention], axis=-1)


        deconv2 = self.deconv2(deconv3)
        deconv2 = tf.image.resize(deconv2, [self.sensor_n, self.sensor_n], method='bilinear')
        deconv2 = tf.concat([deconv2, conv1_lstm_out_attention], axis=-1)

        deconv1 = self.deconv1(deconv2)

        return deconv1

    def compile(self):
        # Define a custom loss function if needed
        def custom_loss(y_true, y_pred):
            y_true_reshaped = y_true[:, -1]
            return tf.reduce_mean(tf.square(y_true_reshaped - y_pred))

        super(MSCRED, self).compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),loss = custom_loss)

    def train(self):
        train_data = np.expand_dims(self.matrixes_train, axis=-1)
        train_data = np.tile(train_data, (1, 1, 1, 3))

        dataset = tf.data.Dataset.from_tensor_slices(train_data)
        dataset = dataset.window(self.step_max, shift=1, drop_remainder=True)
        dataset = dataset.flat_map(lambda window: window.batch(self.step_max))

        dataset = dataset.map(lambda window: (window, window))
        dataset = dataset.batch(self.batch_size).prefetch(tf.data.experimental.AUTOTUNE)


        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='loss',
            patience=15,
            verbose=1,
            restore_best_weights=True
        )

        self.fit(dataset, epochs=self.training_iters, callbacks=[early_stopping])

    def test(self):
        test_data = np.expand_dims(self.matrixes_test, axis=-1)
        test_data = np.tile(test_data, (1, 1, 1, 3))

        dataset = tf.data.Dataset.from_tensor_slices(test_data)
        dataset = dataset.window(self.step_max, shift=1, drop_remainder=True)
        dataset = dataset.flat_map(lambda window: window.batch(self.step_max))

        dataset = dataset.batch(self.batch_size).prefetch(tf.data.experimental.AUTOTUNE)

        reconstructed_matrices = []
        for batch in tqdm(dataset, desc="Testing", unit="batch"):
              reconstructed_matrix = self(batch, training=False)
              reconstructed_matrix_np = reconstructed_matrix.numpy()
              reconstructed_matrices.append(reconstructed_matrix_np)

        return np.concatenate(reconstructed_matrices)

In [None]:
class MSCREDnoLSTM(tf.keras.Model):
    def __init__(self, opt, matrixes_train, matrixes_test):
        super(MSCREDnoLSTM, self).__init__()
        # Initialize parameters
        self.batch_size = opt['batch_size']
        self.learning_rate = opt['learning_rate']
        self.training_iters = opt['training_iters']
        self.step_max = opt['step_max']
        self.win_size = opt['window_size']

        self.matrixes_train = matrixes_train
        self.matrixes_test = matrixes_test
        self.sensor_n = matrixes_train.shape[2]
        self.win_size = matrixes_train.shape[1]

        self.value_colnames = ['total_count', 'error_count', 'error_rate']
        self.scale_n = len(self.value_colnames)

        # Define layers
        self.conv1_W = tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='selu')
        self.conv2_W = tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='selu', strides=(2, 2))
        self.conv3_W = tf.keras.layers.Conv2D(128, (2, 2), padding='same', activation='selu', strides=(2, 2))
        self.conv4_W = tf.keras.layers.Conv2D(256, (2, 2), padding='same', activation='selu', strides=(2, 2))

        self.deconv4_W = tf.keras.layers.Conv2DTranspose(128, (2, 2), padding='same', activation='selu', strides=(2, 2))
        self.deconv3_W = tf.keras.layers.Conv2DTranspose(64, (2, 2), padding='same', activation='selu', strides=(2, 2))
        self.deconv2_W = tf.keras.layers.Conv2DTranspose(32, (3, 3), padding='same', activation='selu', strides=(2, 2))
        self.deconv1_W = tf.keras.layers.Conv2DTranspose(self.scale_n, (3, 3), padding='same')

    def call(self, inputs, training=False):
        # Encoder
        conv1 = self.conv1_W(inputs)
        conv2 = self.conv2_W(conv1)
        conv3 = self.conv3_W(conv2)
        conv4 = self.conv4_W(conv3)

        # Decoder
        deconv4 = self.deconv4_W(conv4)
        deconv4 = tf.image.resize(deconv4, size=(conv3.shape[1], conv3.shape[2]))  # Resize to match conv3
        deconv4 = tf.concat([deconv4, conv3], axis=-1)

        deconv3 = self.deconv3_W(deconv4)
        deconv3 = tf.image.resize(deconv3, size=(conv2.shape[1], conv2.shape[2]))  # Resize to match conv2
        deconv3 = tf.concat([deconv3, conv2], axis=-1)

        deconv2 = self.deconv2_W(deconv3)
        deconv2 = tf.image.resize(deconv2, size=(conv1.shape[1], conv1.shape[2]))  # Resize to match conv1
        deconv2 = tf.concat([deconv2, conv1], axis=-1)

        deconv1 = self.deconv1_W(deconv2)
        return deconv1

    def compile(self):
        super(MSCREDnoLSTM, self).compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
                                    loss='mean_squared_error')

    def train(self):
        train_data = np.expand_dims(self.matrixes_train, axis=-1)
        train_data = np.tile(train_data, (1, 1, 1, 3))
        dataset = tf.data.Dataset.from_tensor_slices((train_data, train_data))
        dataset = dataset.batch(self.batch_size)

        self.fit(dataset, epochs=self.training_iters)

    def test(self):
        test_data = np.expand_dims(self.matrixes_test, axis=-1)
        test_data = np.tile(test_data, (1, 1, 1, 3))
        dataset = tf.data.Dataset.from_tensor_slices(test_data)
        dataset = dataset.batch(self.batch_size)

        reconstructed_matrices = []
        for batch in dataset:
              reconstructed_matrix = self(batch, training=False)
              reconstructed_matrix_np = reconstructed_matrix.numpy()
              reconstructed_matrices.append(reconstructed_matrix_np)

        return np.concatenate(reconstructed_matrices)

In [None]:
def score(reconstructed_matrices, matrixes_test):
  reconstructed_matrix_temp = np.transpose(reconstructed_matrices, [0, 3, 1, 2])
  matrixes_test_array = np.array(matrixes_test)
  select_matrix_error = np.square(matrixes_test_array - reconstructed_matrix_temp[:, 0, :, :])
  scores = np.max(select_matrix_error, axis=(1, 2))
  return scores

In [None]:
def convert(times_windowed):
    # Lista per memorizzare le righe
    rows = []

    for time_window in times_windowed:
        # Estrai i timestamp di inizio e fine dalla finestra temporale
        start = time_window[0][0]
        end = time_window[-1][0]
        # Aggiungi la riga alla lista
        rows.append({'start': start, 'end': end})

    # Crea il DataFrame una sola volta usando la lista di righe
    df = pd.DataFrame(rows, columns=['start', 'end'])
    df['start'] = pd.to_datetime(df['start'])
    df['end'] = pd.to_datetime(df['end'])

    return df

In [None]:
def calculate_auc(x, y):
    x=np.array(x)
    y=np.array(y)
    # Ensure TPR and FPR arrays are sorted together by FPR (ascending)
    sorted_idx = np.argsort(x)
    x = x[sorted_idx]
    y = y[sorted_idx]

    # Calculate trapezoidal areas for each interval
    auc = np.trapz(y, x)

    return auc

In [None]:
def metrics_by_point_vectorized(scores, time_collision, full=False, convert=False):
    #score is attributed to last point in window

    if full:
        thresholds = np.sort(scores)
    else:
        thresholds = np.linspace(scores.min(), scores.max(), num=300)

    if convert:
        time_collision = convert(time_collision) #TADGANLOADER conversion

    collisions =pd.read_excel(os.path.join(ROOTDIR_DATASET_ANOMALY, "20220811_collisions_timestamp.xlsx"),sheet_name=None)
    collisions = pd.concat(collisions.values(), ignore_index=True)
    collisions_init = collisions[collisions['Inizio/fine'] == "i"].Timestamp - pd.to_timedelta([2] * len(collisions[collisions['Inizio/fine'] == "i"].Timestamp), 'h')
    collision_end = collisions[collisions['Inizio/fine'] == "f"].Timestamp - pd.to_timedelta([2] * len(collisions[collisions['Inizio/fine'] == "f"].Timestamp), 'h')

    time_collision = time_collision[:len(scores)]
    assert len(scores) == len(time_collision), "unmatching score/thresholds/timestamp"
    print(f"--- LOADED {len(collisions_init)} COLLISIONS ---")

    # Convert timestamps to numpy arrays
    start_times = time_collision['start'].to_numpy().astype('datetime64[ns]')
    end_times = time_collision['end'].to_numpy().astype('datetime64[ns]')

    collisions_init_np = collisions_init.to_numpy().astype('datetime64[ns]')
    collisions_end_np = collision_end.to_numpy().astype('datetime64[ns]')

    # Create a mask for each threshold
    threshold_masks = scores[:, np.newaxis] >= thresholds

    n_samples = len(scores)

    # Calculate metrics for each threshold
    results = []
    i = 0
    for threshold_mask in tqdm(threshold_masks.T, desc='Processing thresholds', unit='threshold'):

        pos_pred  = np.sum(threshold_mask)
        neg_pred = n_samples - pos_pred

        # count anomaly timestamps included in an anomaly window -> tp
        collision_in_window = ((start_times[threshold_mask] >= collisions_init_np[:, np.newaxis]) & \
                              (start_times[threshold_mask] < collisions_end_np[:, np.newaxis]))

        tp = np.sum(collision_in_window) #overall sum is necessary
        fp = pos_pred - tp

        not_threshold_mask = np.where(threshold_mask, False, True)

        # count non anomaly timestamps included in an anomaly window -> fn
        false_not_collision_in_window = ((start_times[not_threshold_mask] >= collisions_init_np[:, np.newaxis]) & \
                                        (start_times[not_threshold_mask] < collisions_end_np[:, np.newaxis]))

        fn = np.sum(false_not_collision_in_window)
        tn = neg_pred - fn

        anomaly_indices = np.where(threshold_mask)[0][np.any(collision_in_window, axis=0)]

        cm_anomaly = np.array([[tn, fp], [fn, tp]])

        precision = tp / (tp + fp) if tp + fp != 0 else 0
        recall = tp / (tp + fn) if tp + fn != 0 else 0
        fpr = fp / (fp + tn) if fp + tn != 0 else 0
        accuracy = (tp + tn) / (tp + tn + fp + fn) if tp + tn + fp + fn != 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if precision + recall != 0 else 0

        results.append((recall, precision, fpr, accuracy, f1, cm_anomaly, anomaly_indices))

    recalls, precisions, fprs, accuracies, f1s, cms, anomaly_indices_list = zip(*results)


    return recalls, precisions, fprs, accuracies, f1s, cms, anomaly_indices_list

In [None]:
opt = {
    'window_size': 60,
    'overlap_train': 0,
    'batch_size': 4,
    'learning_rate': 0.0005,
    'training_iters': 150,
    'step_max': 5
}

In [None]:
df_action, df, df_meta, action2int = get_df_action(filepath_csv, filepath_meta)

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
start_time = time.time()
df_features = df

print("--- %s seconds ---" % (time.time() - start_time))
df_features.isnull().values.any()  # controllare se ci sono colonne con valori null -> ritorna true
df_features_nonan = df_features.fillna(0)
df_train = df_features_nonan

X_train = df_train
corr_features = tsfel.correlated_features(X_train, threshold=0.95)
X_train.drop(corr_features, inplace=True, axis=1)
X_train_try = X_train
X_train_try = X_train_try.drop(["time"], axis=1)
X_train_try = np.array(X_train_try)

X_train_try= X_train_try[:, :-1]  # All columns except the last
scaler = MinMaxScaler(feature_range=(-1, 1))

X_train_try = scaler.fit_transform(X_train_try)

window_splits = []
#overlap = (opt['window_size'] - 15) / opt['window_size']
window_splits.extend(
    tsfel.utils.signal_processing.signal_window_splitter(X_train_try.copy(), opt['window_size'],opt['overlap_train']))
window_splits = np.asarray(window_splits)
window_splits.reshape(-1, opt['window_size'], X_train_try.shape[1])

# collision dataset caricamento

collisions = pd.read_excel(os.path.join(ROOTDIR_DATASET_ANOMALY, "20220811_collisions_timestamp.xlsx"))
collisions_init = collisions[collisions['Inizio/fine'] == "i"].Timestamp - pd.to_timedelta(
    [2] * len(collisions[collisions['Inizio/fine'] == "i"].Timestamp), 'h')
collisions_end = collisions[collisions['Inizio/fine'] == "f"].Timestamp - pd.to_timedelta(
    [2] * len(collisions[collisions['Inizio/fine'] == "f"].Timestamp), 'h')

collisions_init = collisions_init.array
collisions_init_str = [x.strftime('%Y-%m-%d %H:%M:%S') for x in collisions_init]
collisions_init = collisions_init_str

collisions_end = collisions_end.array

collisions_end_str = [x.strftime('%Y-%m-%d %H:%M:%S') for x in collisions_end]
collisions_end = collisions_end_str

df_action_collision, df_collision, df_meta_collision, action2int_collision = get_df_action(filepath_csv_anomaly,filepath_meta_anomaly)

start_time = time.time()

df_features_collision = df_collision
print("--- %s seconds ---" % (time.time() - start_time))

df_features_collision.isnull().values.any()

df_features_collision_nonan = df_features_collision.fillna(0)
columns_to_keep = ["time"]
columns_to_drop = [col for col in df_features_collision_nonan.columns if col not in columns_to_keep]

X_collision = df_features_collision_nonan.drop(["time"], axis=1)
df_time_only = df_features_collision_nonan.drop(columns=columns_to_drop)
X_collision.drop(corr_features, inplace=True, axis=1)
X_collision = np.asarray(X_collision)
X_collision_features = X_collision[:, :-1]  # All columns except the last
X_collision=X_collision_features
X_collision = scaler.transform(X_collision)

time_splits_test = []
window_splits_test = []
overlap_test = (opt['window_size'] - 1) / opt['window_size']
window_splits_test.extend(
    tsfel.utils.signal_processing.signal_window_splitter(X_collision.copy(), opt['window_size'], overlap_test))
time_splits_test.extend(
    tsfel.utils.signal_processing.signal_window_splitter(df_time_only.copy(), opt['window_size'], overlap_test))
window_splits_test = np.asarray(window_splits_test)
time_splits_test = np.asarray(time_splits_test)

window_splits_test.reshape(-1,opt['window_size'], X_collision.shape[1])
time_splits_test_transformed = np.array([transform_datetime_strings(window) for window in time_splits_test])


In [None]:
matrixes_train = singature_matrix_generator(window_splits)
print(matrixes_train.shape)
matrixes_test = singature_matrix_generator(window_splits_test)
print(matrixes_test.shape)

del(window_splits)
del(window_splits_test)

model = MSCRED(opt, matrixes_train, matrixes_test)
model.compile()
model.train()

print("\n-- testing -- " )
reconstructed_matrices = model.test()
print(reconstructed_matrices.shape)
matrixes_test = matrixes_test[opt['step_max']-1:]
print(matrixes_test.shape)
print()
scores = score(reconstructed_matrices,matrixes_test)

del(reconstructed_matrices)
del(matrixes_test)

time_collision =  convert(time_splits_test_transformed[opt['step_max']-1:])
thresholds = np.linspace( np.min(scores), np.max(scores), 300)
recalls, precisions, fprs, accuracies, f1s, cms, anomaly_indices = metrics_by_point_vectorized(scores, time_collision)