In [None]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
)
from copy import deepcopy

FRIDAY_PATH = "Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv"
WEDNESDAY_PATH = "Wednesday-workingHours.pcap_ISCX.csv"

df_fri = pd.read_csv(FRIDAY_PATH)
df_wed = pd.read_csv(WEDNESDAY_PATH)

data = pd.concat([df_fri, df_wed], ignore_index=True)

In [None]:
sns.set_theme(style="white")

data["Label"] = data[" Label"].astype(str).str.strip()

class_counts = data["Label"].value_counts().reset_index()
class_counts.columns = ["Label", "Count"]

plt.figure(figsize=(8, 4))
ax = sns.barplot(
    data=class_counts,
    x="Label",
    y="Count",
    palette="tab10"   # similar categorical palette
)

# Annotate with thousands separator, like 537,749
for p in ax.patches:
    height = p.get_height()
    ax.annotate(
        f"{int(height):,}",
        (p.get_x() + p.get_width() / 2, height),
        ha="center",
        va="bottom",
        fontsize=9
    )

plt.xlabel("Frequency Distribution of Class Label")
plt.ylabel("Count")
plt.xticks(rotation=25)
plt.tight_layout()
plt.show()


In [None]:
labels = data["Label"].astype(str).str.strip()
bin_labels = np.where(labels == "BENIGN", "BENIGN", "DoS/DDoS")

unique, counts = np.unique(bin_labels, return_counts=True)

plt.figure(figsize=(5, 4))
plt.bar(unique, counts, color="steelblue")

for x, h in zip(unique, counts):
    plt.text(x, h, f"{int(h):,}", ha="center", va="bottom", fontsize=9)

plt.xlabel("Class")
plt.ylabel("Count")
plt.tight_layout()
plt.show()