In [None]:
!pip install tensorflow
!pip install keras

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras import layers, models

In [None]:
# First, we get the data
from google.colab import drive
drive.mount('/content/drive')

In [None]:
CSV_PATH = '/content/drive/MyDrive/dataset/multilabel/Combined.csv'
df = pd.read_csv(CSV_PATH)

drop_cols = ['Unnamed: 0', 'Label', 'Attack Tool', 'sVid', 'dVid', 'SrcTCPBase', 'DstTCPBase']
df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True)

In [None]:
le = LabelEncoder()
df['Attack Type'] = le.fit_transform(df['Attack Type'])
y_all = df['Attack Type'].values
num_classes = len(le.classes_)

if 'Proto' in df.columns:
    df = pd.get_dummies(df, columns=['Proto'])

In [None]:
df = df.dropna()  # Drop any remaining NaNs
FEATURE_COLS = df.drop(columns=['Attack Type']).select_dtypes(include=[np.number]).columns.tolist()
scaler = StandardScaler()
df[FEATURE_COLS] = scaler.fit_transform(df[FEATURE_COLS])

In [None]:
X = df[FEATURE_COLS].values
Y = df['Attack Type'].values

print("X shape:", X.shape)
print("Y shape:", Y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

In [None]:
model = models.Sequential([
    layers.Input(shape=(X.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                    epochs=10, batch_size=64, class_weight=class_weight_dict)

In [None]:
y_pred = model.predict(X_test).argmax(axis=1)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, labels=np.arange(num_classes), target_names=le.classes_))

In [None]:
model.save("multiclass_cnn_model.h5")
print("✅ Model saved as multiclass_cnn_model.h5")

In [None]:
import os
os.makedirs('model', exist_ok=True)

In [None]:
import joblib

# Save scaler and label encoder for inference
joblib.dump(scaler, 'model/scaler.pkl')
joblib.dump(le, 'model/label_encoder.pkl')

print("✅ scaler.pkl and label_encoder.pkl saved in ./model/")