In [None]:
### Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
### Read CSV file & Future Formatting
df = pd.read_csv("data.csv")
pd.set_option("display.max_columns", None)
pd.options.display.float_format = "{:.2f}".format

In [None]:
### Data introduction
# Identify unnecessary and target columns
DROP_LABELS = ["Flow ID", "Src IP", "Src Port", "Dst IP", "Dst Port", "Timestamp"]
TARGET_LABELS = ["Label", "Traffic Type", "Traffic Subtype"]

# Drop unnecessary columns and present data
def present(df: pd.DataFrame) -> pd.DataFrame:
    print("Dataset before removing unecessary data and duplicates has", df.shape[0], "rows and", df.shape[1], "columns")
    print("Removing unnecessary data and duplicates ...")    
    df = df.drop(columns=DROP_LABELS)
    df = df.drop_duplicates()
    print("Dataset after removing unecessary data and duplicates has", df.shape[0], "rows and", df.shape[1], "columns")

    display(df.describe())
    display(df.groupby(TARGET_LABELS).size().reset_index(name="Counts"))

    return df

data = present(df)

In [None]:
### Data Visualization
HIST_LABELS = [
    "Flow Duration", "Flow Bytes/s", "Fwd Packet Length Mean", "Flow IAT Mean", "Packet Length Variance",
    "Average Packet Size", "Active Mean", "Idle Mean"
    ]
BAR_LABELS = [
    "Fwd PSH Flags", "Protocol"
]

def histogram(df: pd.DataFrame) -> None:
    for col in HIST_LABELS:
        plt.figure(figsize=(8, 4))
        sns.histplot(data=df[col], bins=20, element="step", kde=True)
        plt.title(f"Histogram: {col}")
        plt.xlabel(col)
        plt.ylabel("Frequency")
        plt.tight_layout()
        plt.show()

def boxplotting(df: pd.DataFrame) -> None:
    for col in BAR_LABELS:
        plt.figure(figsize=(8, 4))
        sns.boxplot(x=df[col])
        plt.title(f"Boxplot: {col}")
        plt.xlabel(col)
        plt.tight_layout()
        plt.show()