In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import joblib
from google.colab import drive

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Define the path in Google Drive where you want to save the models
drive_path = '/content/drive/My Drive/block_model/'

# Load and prepare the dataset
df = pd.read_csv('/content/drive/My Drive/block_algorithms_dataset_colab.csv')

# Extract features and labels
X = df[['Mean', 'StdDev', 'Entropy', 'Kurtosis', 'Skewness', 'MinValue', 'MaxValue', 'Range', 'UniqueByteCount']].values
y = df['Label'].values

# Encode the labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Standardize the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape the data for CNN (assume single-channel images)
X_train = X_train.reshape(-1, 9, 1, 1)  # Reshape to (samples, height, width, channels)
X_test = X_test.reshape(-1, 9, 1, 1)

# Convert labels to categorical
y_train = to_categorical(y_train, num_classes=len(label_encoder.classes_))
y_test = to_categorical(y_test, num_classes=len(label_encoder.classes_))

# Define the CNN model
model = Sequential()
model.add(Conv2D(32, (3, 1), activation='relu', input_shape=(9, 1, 1)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 1)))
model.add(Dropout(0.3))

model.add(Conv2D(64, (3, 1), activation='relu'))
model.add(BatchNormalization())
# Reduce the pooling size to avoid reducing the dimension to zero
model.add(MaxPooling2D(pool_size=(1, 1)))
model.add(Dropout(0.3))

# Adjust the kernel size to (1,1) to handle the smaller input dimensions
model.add(Conv2D(128, (1, 1), activation='relu'))
model.add(BatchNormalization())
# Remove this MaxPooling layer as the input dimension is already very small
#model.add(MaxPooling2D(pool_size=(2, 1)))
model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Use EarlyStopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, callbacks=[early_stopping])

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

# Save the model and preprocessing tools locally
model.save(drive_path + 'block_algorithm_cnn_model.h5')
joblib.dump(label_encoder, drive_path + 'label_encoder.pkl')
joblib.dump(scaler, drive_path + 'scaler.pkl')

print("Model and preprocessing tools saved locally.")


Mounted at /content/drive


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 19ms/step - accuracy: 0.3924 - loss: 1.4916 - val_accuracy: 0.6275 - val_loss: 0.7973
Epoch 2/50
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5685 - loss: 0.9782 - val_accuracy: 0.6267 - val_loss: 0.5919
Epoch 3/50
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6084 - loss: 0.8334 - val_accuracy: 0.6350 - val_loss: 0.5564
Epoch 4/50
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6200 - loss: 0.7509 - val_accuracy: 0.6300 - val_loss: 0.5390
Epoch 5/50
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6104 - loss: 0.7196 - val_accuracy: 0.6475 - val_loss: 0.5271
Epoch 6/50
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6244 - loss: 0.6796 - val_accuracy: 0.6375 - val_loss: 0.5189
Epoch 7/50
[1m338/338[0m



Test Loss: 0.4749
Test Accuracy: 0.6707
Model and preprocessing tools saved locally.
