In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from tqdm import tqdm


In [3]:
# Load the CSV dataset into a DataFrame
df = pd.read_csv('dataset/aruba-bysecs-full.csv')

# Convert the 'datetime' column to a pandas datetime object
df['datetime'] = pd.to_datetime(df['datetime'])

# Extract the time part and replace the 'datetime' column
df['datetime'] = df['datetime'].dt.time

# Rename the 'datetime' column to 'time'
df = df.rename(columns={'datetime': 'time'})
df.drop(columns=['label_index'], inplace=True)

# Save the DataFrame to a new CSV file
df.to_csv('new_dataset.csv', index=False)

In [2]:
data = pd.read_csv('new_dataset.csv')

# Convert time column to datetime format with appropriate format strings
data['time'] = pd.to_datetime(data['time'], format='%H:%M:%S.%f', errors='coerce')
data['time'] = data['time'].combine_first(pd.to_datetime(data['time'], format='%H:%M:%S', errors='coerce'))


label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

# Create a MinMaxScaler object
scaler = MinMaxScaler()
# Create 'time_numerical' feature as seconds since start
data['time'] = (data['time'] - data['time'].min()).dt.total_seconds()
data['time'] = scaler.fit_transform(data[['time']])

sensor_columns = data.columns[1:-1]  # Exclude 'time' and 'label'
for column in sensor_columns:
    data[column] = data[column].apply(lambda x: 0 if x == 'OFF' else 1)

data.to_csv('pp.csv', index=False)

data.head

<bound method NDFrame.head of             time  D001  D002  D004  M001  M002  M003  M004  M005  M006  ...  \
0       0.002663     0     0     0     0     0     1     0     0     0  ...   
1       0.002747     0     0     0     0     0     0     0     0     0  ...   
2       0.105941     0     0     0     0     0     1     0     0     0  ...   
3       0.106005     0     0     0     0     0     0     0     0     0  ...   
4       0.154419     0     0     0     0     0     1     0     0     0  ...   
...          ...   ...   ...   ...   ...   ...   ...   ...   ...   ...  ...   
459042  0.723864     1     0     0     0     0     0     0     0     0  ...   
459043  0.723914     1     0     0     0     0     0     0     0     0  ...   
459044  0.723953     1     0     0     0     0     0     0     0     0  ...   
459045  0.724167     1     0     0     0     0     0     0     0     0  ...   
459046  0.724190     1     0     0     0     0     0     0     0     0  ...   

        M023  M024  M

In [3]:
sequence_length = 10
sequences = []
labels = []

# Wrap the loop with tqdm for a progress bar
for i in tqdm(range(len(data) - sequence_length + 1)):
    sequence = data.iloc[i:i + sequence_length]
    label = data.iloc[i + sequence_length - 1]['label']
    sequences.append(sequence.drop(columns=['label']).values)
    labels.append(label)

sequences = np.array(sequences)
labels = np.array(labels)

# print(sequences)

100%|██████████| 459038/459038 [08:18<00:00, 920.03it/s]


In [4]:
# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(sequences, labels, test_size=0.2, shuffle=False)

In [5]:
# Build and train LSTM model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

2023-08-12 01:50:41.860169: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-12 01:50:41.895395: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
num_features = X_train.shape[2]
num_classes = len(label_encoder.classes_)

model = Sequential()
model.add(LSTM(units=64, input_shape=(sequence_length, num_features), activation='relu'))
model.add(Dense(units=num_classes, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

epochs = 10
batch_size = 64

# Wrap the training loop with tqdm for a progress bar
with tqdm(total=epochs, desc="Epochs", unit="epoch") as epoch_progress:
    for epoch in range(epochs):
        with tqdm(total=len(X_train) // batch_size, desc="Batches", unit="batch") as batch_progress:
            for batch_start in range(0, len(X_train), batch_size):
                batch_end = batch_start + batch_size
                X_batch = X_train[batch_start:batch_end]
                y_batch = y_train[batch_start:batch_end]
                
                history = model.train_on_batch(X_batch, y_batch)
                batch_progress.set_postfix(loss=history[0], accuracy=history[1])
                batch_progress.update(1)
        
        # Evaluate the model after each epoch
        test_loss, test_accuracy = model.evaluate(X_test, y_test)
        print(f"Epoch {epoch+1}/{epochs} - Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
        
        epoch_progress.update(1)

# Final evaluation
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Final Test Loss: {test_loss:.4f}, Final Test Accuracy: {test_accuracy:.4f}")



2023-08-12 01:50:46.857431: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22419 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090 Ti, pci bus id: 0000:19:00.0, compute capability: 8.6
2023-08-12 01:50:46.857908: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22433 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3090 Ti, pci bus id: 0000:65:00.0, compute capability: 8.6
Epochs:   0%|          | 0/10 [00:00<?, ?epoch/s]2023-08-12 01:50:48.048108: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-08-12 01:50:48.179373: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7efdae34e440 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-08-12 01:50:48.179398: I tensorflow/co



Epochs:  10%|█         | 1/10 [02:08<19:14, 128.23s/epoch]

Epoch 1/10 - Test Loss: nan, Test Accuracy: 0.0035


Batches: 5738batch [01:55, 49.49batch/s, accuracy=0, loss=nan]                       




Epochs:  20%|██        | 2/10 [04:14<16:57, 127.16s/epoch]

Epoch 2/10 - Test Loss: nan, Test Accuracy: 0.0035


Batches: 5738batch [01:56, 49.38batch/s, accuracy=0, loss=nan]                       




Epochs:  30%|███       | 3/10 [06:21<14:48, 126.91s/epoch]

Epoch 3/10 - Test Loss: nan, Test Accuracy: 0.0035


Batches: 5738batch [01:55, 49.76batch/s, accuracy=0, loss=nan]                       




Epochs:  40%|████      | 4/10 [08:26<12:38, 126.42s/epoch]

Epoch 4/10 - Test Loss: nan, Test Accuracy: 0.0035


Batches: 5738batch [01:55, 49.65batch/s, accuracy=0, loss=nan]                       




Epochs:  50%|█████     | 5/10 [10:32<10:31, 126.26s/epoch]

Epoch 5/10 - Test Loss: nan, Test Accuracy: 0.0035


Batches: 5738batch [01:54, 50.14batch/s, accuracy=0, loss=nan]                       




Epochs:  60%|██████    | 6/10 [12:37<08:23, 125.78s/epoch]

Epoch 6/10 - Test Loss: nan, Test Accuracy: 0.0035


Batches: 5738batch [01:54, 50.01batch/s, accuracy=0, loss=nan]                       




Epochs:  70%|███████   | 7/10 [14:42<06:16, 125.61s/epoch]

Epoch 7/10 - Test Loss: nan, Test Accuracy: 0.0035


Batches: 5738batch [01:54, 49.91batch/s, accuracy=0, loss=nan]                       




Epochs:  80%|████████  | 8/10 [16:48<04:11, 125.59s/epoch]

Epoch 8/10 - Test Loss: nan, Test Accuracy: 0.0035


Batches: 5738batch [01:54, 50.18batch/s, accuracy=0, loss=nan]




Epochs:  90%|█████████ | 9/10 [18:53<02:05, 125.32s/epoch]

Epoch 9/10 - Test Loss: nan, Test Accuracy: 0.0035


Batches: 5738batch [01:53, 50.41batch/s, accuracy=0, loss=nan]                       




Epochs: 100%|██████████| 10/10 [20:57<00:00, 125.77s/epoch]

Epoch 10/10 - Test Loss: nan, Test Accuracy: 0.0035





Final Test Loss: nan, Final Test Accuracy: 0.0035
