# Multi-Class Classification Task

## Model C (Fine-tuned multi-class classifier)

In [None]:
import tensorflow as tf
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score
import csv
from datetime import datetime
import pandas as pd

In [None]:
# Define directories for heat maps
train_dir = './multiclass_test/training_set'
test_dir = './multiclass_test/test_set'

# Define parameters
img_height, img_width = 224, 224
batch_size = 32
validation_split = 0.2

# Define ImageDataGenerator
training_gen = ImageDataGenerator(validation_split=validation_split, rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

In [None]:
model = models.Sequential()

model.add(layers.Conv2D(64, (5, 5), activation='relu', input_shape=(img_height, img_width, 3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (5, 5), activation='tanh'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(96, (5, 5), activation='tanh'))
model.add(layers.MaxPooling2D((2, 2)))
          
model.add(layers.Dropout(0.3))

model.add(layers.Flatten())

model.add(layers.Dense(96, activation='relu'))

model.add(layers.Dropout(0.3))

# Output Layer
model.add(layers.Dense(9, activation='softmax'))

# Choose optimizer and learning rate
learning_rate = 0.0011228308916583806
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

In [None]:
# Early stopping
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

In [None]:
# Save results to CSV
csv_filename = './multiclass_test/tuning/final_trial_results_2.csv'


# Define the column names
fieldnames = ['trial_num', 'test_acc', 'test_loss', 'precision', 'recall', 'Analysis', 'Backdoors', 'DoS', 
              'Exploits', 'Fuzzers', 'Generic', 'Non-attack', 'Reconnaissance', 'Shellcode']

column_map = {
                0: 'Analysis',
                1: 'Backdoors',
                2: 'DoS',
                3: 'Exploits',
                4: 'Fuzzers',
                5: 'Generic',
                6: 'Non-attack',
                7: 'Reconnaissance',
                8: 'Shellcode'
            }


with open(csv_filename, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    
    for n in range(30):
        # Compile the model
        model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

        # Pull images from directories and assign labels
        train_generator = training_gen.flow_from_directory(
            train_dir,
            target_size=(img_height, img_width),
            batch_size=batch_size,
            class_mode='categorical',
            subset='training')
        
        validation_generator = training_gen.flow_from_directory(
            train_dir,
            target_size=(img_height, img_width),
            batch_size=batch_size,
            class_mode='categorical',
            subset='validation')
        
        test_generator = test_gen.flow_from_directory(
            test_dir,
            target_size=(img_height, img_width),
            batch_size=batch_size,
            class_mode='categorical',
            shuffle=False)

        # Train the model
        history = model.fit(
            train_generator,
            epochs=50,  # maximum number of epochs
            validation_data=validation_generator,
            callbacks=[early_stopping]
        )
    
        # Evaluation
        test_loss, test_accuracy = model.evaluate(test_generator)
        
        # Make Predictions
        predictions = model.predict(test_generator)
        integer_predictions = np.argmax(predictions, axis=1)
        true_labels = test_generator.classes
        
        # Overall precision and recall
        precision = precision_score(true_labels, integer_predictions, average='macro')
        recall = recall_score(true_labels, integer_predictions, average='macro')
        
        # Calculate confusion matrix
        conf_matrix = confusion_matrix(true_labels, integer_predictions)

        # Prepare the row data
        row_data = {
            'trial_num': n,
            'test_acc': test_accuracy,
            'test_loss': test_loss,
            'precision': precision,
            'recall': recall
        }

        # Metrics for each class
        for i in range(conf_matrix.shape[0]):
            TP = conf_matrix[i, i]
            FP = conf_matrix[:, i].sum() - TP
            FN = conf_matrix[i, :].sum() - TP
            TN = conf_matrix.sum() - (FP + FN + TP)
            FPR = FP / (FP + TN) if (FP + TN) != 0 else 0
        
            # Calculate precision and recall
            class_precision = TP / (TP + FP) if (TP + FP) != 0 else 0
            class_recall = TP / (TP + FN) if (TP + FN) != 0 else 0

            # Add class-specific metrics to the row data
            row_data[column_map[i]] = f'TP: {TP} - FP: {FP} - FN: {FN} - TN: {TN} - FPR: {FPR:.4f} - Pr: {class_precision:.4f} - Re: {class_recall:.4f}'


        # Write the row to the CSV file
        writer.writerow(row_data)
        
        print(f"Trial {n} saved to {csv_filename}")

## Fine-Tuning Process

### Parameter Tuning

In [None]:
import os
import csv
import itertools
from tensorflow.keras import models, layers, callbacks
import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Define parameters
target_size = (224, 224)
img_height = 224
img_width = 224
validation_split = 0.2

# Define the parameter ranges for tuning
num_conv_layers = [2, 3, 4]
batch_sizes = [32, 64, 128]
kernel_sizes = [(3, 3), (5, 5), (7, 7)]

# Define directories for heat maps
train_dir = './multiclass_test/training_set'
test_dir = './multiclass_test/test_set'

# Define ImageDataGenerator
training_gen = ImageDataGenerator(validation_split=validation_split)
test_gen = ImageDataGenerator()
training_gen.rescale = 1. / 255.0
test_gen.rescale = 1. / 255.0

In [None]:
# # Prepare CSV file
csv_filename = './multiclass_test/multi_class_tuning_results.csv'
csv_header = ['num_conv_layers', 'batch_size', 'kernel_size', 'test_accuracy', 'precision', 'recall', 'avg_fpr']

In [None]:
# Build and tune
with open(csv_filename, 'a', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(csv_header)

    # Iterate through all combinations of parameters
    for num_layers, batch_size, kernel_size in itertools.product(num_conv_layers, batch_sizes, kernel_sizes):
        print(f"Training with {num_layers} conv layers, batch size {batch_size}, kernel size {kernel_size}")

        # Pull images from directories and assign labels
        train_generator = training_gen.flow_from_directory(
            train_dir,
            target_size=target_size,
            batch_size=batch_size,
            class_mode='categorical',
            subset='training')

        validation_generator = training_gen.flow_from_directory(
            train_dir,
            target_size=target_size,
            batch_size=batch_size,
            class_mode='categorical',
            subset='validation')

        test_generator = test_gen.flow_from_directory(
            test_dir,
            target_size=target_size,
            batch_size=batch_size,
            class_mode='categorical',
            shuffle=False)

        # Build the model
        model = models.Sequential()
        model.add(layers.Conv2D(32, kernel_size, activation='relu', input_shape=(img_height, img_width, 3)))
        model.add(layers.MaxPooling2D((2, 2)))

        for _ in range(num_layers - 1):
            model.add(layers.Conv2D(64, kernel_size, activation='relu'))
            model.add(layers.MaxPooling2D((2, 2)))

        model.add(layers.Flatten())
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(9, activation='softmax'))

        # Compile the model
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        # Early stopping
        early_stopping = callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )

        # Training
        history = model.fit(
            train_generator,
            epochs=50,
            validation_data=validation_generator,
            callbacks=[early_stopping]
        )

        # Get the number of epochs actually trained
        epochs_trained = len(history.history['loss'])

        # Evaluation
        test_loss, test_accuracy = model.evaluate(test_generator)

        # Make Predictions
        predictions = model.predict(test_generator)
        integer_predictions = np.argmax(predictions, axis=1)
        true_labels = test_generator.classes

        # Calculate precision and recall
        precision = precision_score(true_labels, integer_predictions, average='macro')
        recall = recall_score(true_labels, integer_predictions, average='macro')

        # Calculate confusion matrix
        conf_matrix = confusion_matrix(true_labels, integer_predictions)

        # Calculate FPR for each class and then average
        fprs = []
        for i in range(conf_matrix.shape[0]):
            FP = conf_matrix[:, i].sum() - conf_matrix[i, i]
            TN = conf_matrix.sum() - (conf_matrix[i, :].sum() + conf_matrix[:, i].sum() - conf_matrix[i, i])
            FPR = FP / (FP + TN) if (FP + TN) != 0 else 0
            fprs.append(FPR)

        # Calculate average FPR
        avg_fpr = np.mean(fprs)

        # Save results to CSV
        with open(csv_filename, 'a', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([num_layers, batch_size, kernel_size, test_accuracy, precision, recall, avg_fpr, epochs_trained])

print(f"Results saved to {csv_filename}")

### Hyper-Parameter Tuning

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import keras_tuner as kt
import csv
from datetime import datetime
from random import sample
import pandas as pd

In [None]:
# Define directories for heat maps
train_dir = './multiclass_test/training_set'
test_dir = './multiclass_test/test_set'

# Define parameters
img_height, img_width = 224, 224
batch_size = 32
validation_split = 0.2

# Define ImageDataGenerator
training_gen = ImageDataGenerator(validation_split=validation_split, rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

In [None]:
# Pull images from directories and assign labels
train_generator = training_gen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training')

validation_generator = training_gen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation')

test_generator = test_gen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False)

In [None]:
def build_model(hp):
    model = models.Sequential()

    # Convolutional layers
    for i in range(3):
        model.add(layers.Conv2D(
            hp.Int(f'conv_{i+1}_filters', 32, 128, step=32),
            (5, 5),
            activation=hp.Choice(f'conv_{i+1}_activation', ['relu', 'tanh', 'sigmoid']),
            input_shape=(img_height, img_width, 3) if i == 0 else None
        ))
        model.add(layers.MaxPooling2D((2, 2)))

        # Optional Dropout after conv layer
        if hp.Boolean(f'dropout_after_conv_{i+1}'):
            model.add(layers.Dropout(hp.Float(f'dropout_rate_conv_{i+1}', 0.1, 0.5, step=0.1)))


    # Flatten Layer
    model.add(layers.Flatten())

    # Dense Layer
    model.add(layers.Dense(
        hp.Int('dense_units', 32, 128, step=32),
        activation=hp.Choice('dense_activation', ['relu', 'tanh', 'sigmoid'])
    ))

    # Optional Dropout after dense layer
    if hp.Boolean('dropout_after_dense'):
        model.add(layers.Dropout(hp.Float('dropout_rate_dense', 0.1, 0.5, step=0.1)))

    # Output Layer
    model.add(layers.Dense(9, activation='softmax'))

    # Choose optimizer and learning rate
    optimizer_name = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd'])
    learning_rate = hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')

    if optimizer_name == 'adam':
        optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer_name == 'rmsprop':
        optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate)
    else:  # sgd
        optimizer = keras.optimizers.SGD(learning_rate=learning_rate)

    # Compile the model
    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

In [None]:
local_dir = './multiclass_test/tuning'

# Define the tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=50,  # number of different hyperparameter combinations to try
    executions_per_trial=1,
    directory=local_dir,
    project_name='image_classification_70'
)

In [None]:
# Define early stopping callback with tunable patience
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

In [None]:
# Perform the search
tuner.search(
    train_generator,
    epochs=50,
    validation_data=validation_generator,
    callbacks=[early_stopping]
)

In [None]:
# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Save best hyperparameters to CSV
csv_filename = './multiclass_test/tuning/best_hyper_params.csv'

with open(csv_filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)

    # Write header and data
    for param, value in best_hps.values.items():
        writer.writerow([param, value])

print(f"Best hyperparameters saved to {csv_filename}")