In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, TimeDistributed, Flatten
from tensorflow.keras.utils import to_categorical


In [None]:
# Load the saved numpy arrays
from sklearn.preprocessing import LabelEncoder
X_train = np.load('data/processed/X_train.npy')
y_train = np.load('data/processed/y_train.npy')
X_val = np.load('data/processed/X_val.npy')
y_val = np.load('data/processed/y_val.npy')
X_test = np.load('data/processed/X_test.npy')
y_test = np.load('data/processed/y_test.npy')

# Convert labels to one-hot encoding

label_encoder = LabelEncoder()
y_train_enc = label_encoder.fit_transform(y_train)
y_val_enc = label_encoder.transform(y_val)
y_test_enc = label_encoder.transform(y_test)

num_classes = len(label_encoder.classes_)
y_train_cat = to_categorical(y_train_enc, num_classes)
y_val_cat = to_categorical(y_val_enc, num_classes)
y_test_cat = to_categorical(y_test_enc, num_classes)

# Reshape input data if necessary
# For LSTM input, data should be in shape (samples, time_steps, features)
# If your data is already sequences, adjust time_steps accordingly
time_steps = 1  # Adjust based on how you structure your sequences
features = X_train.shape[1]  # Number of features per time step

X_train = X_train.reshape((X_train.shape[0], time_steps, features))
X_val = X_val.reshape((X_val.shape[0], time_steps, features))
X_test = X_test.reshape((X_test.shape[0], time_steps, features))

In [None]:
model = Sequential()

# CNN layers
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3,
          activation='relu'), input_shape=(None, features, 1)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))

# LSTM layers
model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.5))

# Fully connected output layer
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

model.summary()

In [None]:
# Fit the model
history = model.fit(
    X_train,
    y_train_cat,
    validation_data=(X_val, y_val_cat),
    epochs=50,
    batch_size=32,
    verbose=1
)

In [None]:
# Evaluate on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test_cat, verbose=0)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
# Predict classes
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert back to original labels
y_pred_labels = label_encoder.inverse_transform(y_pred_classes)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Classification report
print(classification_report(y_test_enc, y_pred_classes,
      target_names=label_encoder.classes_))

# Confusion matrix
cm = confusion_matrix(y_test_enc, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d', xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()