In [84]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GlobalAveragePooling1D, Dense, Dropout, BatchNormalization

from keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [85]:

# def precision(y_true, y_pred):
#     """Precision metric."""
#     true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
#     predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
#     precision = true_positives / (predicted_positives + K.epsilon())
#     return precision

# # Register the custom metric function with Keras
# tf.keras.metrics.Precision = precision

In [86]:

def load_and_preprocess_data(file_list, sequence_length=500):
    data = []
    targets = []

    for file in file_list:
        df = pd.read_csv(file, usecols=[1, 2, 3, 4])
        scaler = MinMaxScaler()
        df[["note", "velocity", "time"]] = scaler.fit_transform(
            df[["note", "velocity", "time"]]
        )

        # Pad the input data if the number of notes is less than the sequence length
        if len(df) < sequence_length:
            padding = pd.DataFrame(
                np.zeros((sequence_length - len(df), 3)),
                columns=["note", "velocity", "time"],
            )
            df = pd.concat(
                [df[["note", "velocity", "time"]], padding], ignore_index=True
            )

        data.append(df.iloc[:sequence_length, :-1].values)
        targets.append(df["anomaly"].sum())

    return np.array(data), np.array(targets)


In [87]:


directory = "./anomalous"  # Replace with the path to your directory
anomalous_file_list = []  # Initialize an empty list

for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory, filename)
        anomalous_file_list.append(file_path)

sequence_length = 500  # Updated sequence length
data, targets = load_and_preprocess_data(anomalous_file_list, sequence_length)

X_train, X_test, y_train, y_test = train_test_split(
    data, targets, test_size=0.2, random_state=42
)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(6070, 500, 3) (6070,) (1518, 500, 3) (1518,)


In [88]:
model = Sequential([
    LSTM(256, activation='tanh', input_shape=(sequence_length, 3), return_sequences=True),
    Dropout(0.2),
    LSTM(128, activation='tanh', return_sequences=True),
    Dropout(0.2),
    LSTM(64, activation='tanh'),
    BatchNormalization(),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='linear')
])

model.compile(optimizer='adam', loss='mse', run_eagerly=True)

In [89]:

# Save the model and print accuracy with the epochs
early_stopping_callback = EarlyStopping(monitor="val_loss", patience=10, min_delta=0, mode="min")


checkpoint_path = "model.h5"
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min"
)

history = model.fit(
    X_train,
    y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[checkpoint_callback, early_stopping_callback],
)

# Load the best model checkpoint and evaluate accuracy
model.load_weights(checkpoint_path)
accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy}")

Epoch 1/100
Epoch 1: val_loss improved from inf to 84223.17969, saving model to model.h5
Epoch 2/100
Epoch 2: val_loss improved from 84223.17969 to 63464.75391, saving model to model.h5
Epoch 3/100
Epoch 3: val_loss improved from 63464.75391 to 62454.44141, saving model to model.h5
Epoch 4/100
Epoch 4: val_loss did not improve from 62454.44141
Epoch 5/100
Epoch 5: val_loss did not improve from 62454.44141
Epoch 6/100
Epoch 6: val_loss did not improve from 62454.44141
Epoch 7/100
Epoch 7: val_loss improved from 62454.44141 to 13387.12109, saving model to model.h5
Epoch 8/100
Epoch 8: val_loss did not improve from 13387.12109
Epoch 9/100
Epoch 9: val_loss did not improve from 13387.12109
Epoch 10/100
Epoch 10: val_loss did not improve from 13387.12109
Epoch 11/100
Epoch 11: val_loss improved from 13387.12109 to 12619.84180, saving model to model.h5
Epoch 12/100
Epoch 12: val_loss improved from 12619.84180 to 12575.34082, saving model to model.h5
Epoch 13/100
Epoch 13: val_loss did not im

TypeError: cannot unpack non-iterable float object

In [114]:
def predict_anomalies(model, file, sequence_length=500):
    df = pd.read_csv(file, usecols=[1, 2, 3], index_col=False)
    print(df.columns)
    scaler = MinMaxScaler()
    df[["note", "velocity", "time"]] = scaler.fit_transform(
        df[["note", "velocity", "time"]]
    )

    # Pad the input data if the number of notes is less than the sequence length
    if len(df) < sequence_length:
        padding = pd.DataFrame(
            np.zeros((sequence_length - len(df), 3)),
            columns=["note", "velocity", "time"],
        )
        df = pd.concat([df, padding], ignore_index=True)

    input_data = df.iloc[:sequence_length, :].values.reshape(1, sequence_length, -1)
    print(input_data.shape)
    predictions = model.predict(input_data)
    predictions[predictions < 0] = 0
    return int(np.round(np.sum(predictions)))


In [119]:
new_file = "anomalous/alb_esp2_modified-N-1V-9.csv"
num_anomalies = predict_anomalies(model, new_file)
print(f"Number of anomalies: {num_anomalies}")


Index(['note', 'velocity', 'time'], dtype='object')
(1, 500, 3)
Number of anomalies: 7
