In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
SOURCE_PATH = 'dataset/dataset_sdn.csv'

In [None]:
data = pd.read_csv(SOURCE_PATH)
offset = 1728929700000
data['timestamp'] = pd.to_datetime((data['dt'] * 1000) + offset, unit='ms')
data['label'] = data['label'].map({1: 'Malicious', 0: 'Benign'})
data.head()

# Label Distribution

In [None]:
df =  data
label_counts = df['label'].value_counts().reset_index()
label_counts.columns = ['label', 'count']
fig = px.pie(label_counts,names='label',values='count',title='Label Distribution')
fig.show()

# Traffic Volume Over Time

In [None]:
df =  data
df['dt_hour'] = df['timestamp'].dt.floor('H')  # Floors to the nearest hour
hourly_traffic = df.groupby('dt_hour').agg({'pktcount': 'sum', 'bytecount': 'sum'}).reset_index()

hourly_traffic['bytecount'] = hourly_traffic['bytecount'] / 1024
fig = px.line(
    hourly_traffic,
    x='dt_hour',                # X-axis: time
    y=['pktcount', 'bytecount'],  # Y-axis: traffic volume metrics
    title='Traffic Volume Over Time',
    labels={'value': 'Traffic Volume', 'dt_hour': 'Time'},  # Label for axes
    markers=True           # Add markers for data points
)

fig.update_layout(
    legend_title_text='Traffic Metrics',
    yaxis_title='Volume',
    xaxis_title='Time',
    template='plotly'
)
fig.show()

# Packet Rate vs. Byte Rate

In [None]:
df = data
fig = px.scatter(
    df,
    x='pktrate',            # X-axis: Packet Rate
    y='byteperflow',        # Y-axis: Byte per Flow
    color='label',          # Color by label (Malicious/Benign)
    title='Packet Rate vs. Byte Rate',
    labels={'pktrate': 'Packet Rate (pkts/sec)', 'byteperflow': 'Byte Rate (bytes/flow)'},  # Axis labels
    hover_data=['label'],   # Display label in the hover information
)

# Customize the layout
fig.update_layout(
    xaxis_title='Packet Rate (pkts/sec)',
    yaxis_title='Byte Rate (bytes/flow)',
    legend_title='Traffic Label',
    template='plotly'
)

# Show the chart
fig.show()

# Traffic Volume by Ports

In [None]:
df = data.sort_values(by='bytecount', ascending=False)
fig = px.bar(
    df,
    x='bytecount',         # X-axis: Traffic Volume (Byte Count)
    y='port_no',           # Y-axis: Ports
    orientation='h',       # Horizontal bar plot
    title='Traffic Volume by Ports',
    labels={'bytecount': 'Traffic Volume (Bytes)', 'port_no': 'Port Number'},  # Axis labels
    text='bytecount'       # Display byte count on the bars
)
fig.update_layout(
    xaxis_title='Traffic Volume (Bytes)',
    yaxis_title='Port Number',
    template='plotly',
    yaxis=dict(type='category'),  # Ensure port numbers are treated as categorical
)
fig.show()

In [None]:
# figure(figsize=(12, 7), dpi=80)
plt.barh(list(dict(data.src.value_counts()).keys()), dict(data.src.value_counts()).values(), color='lawngreen')

for idx, val in enumerate(dict(data.src.value_counts()).values()):
    plt.text(x = val, y = idx-0.2, s = str(val), color='r', size = 13)

plt.xlabel('Number of Requests')
plt.ylabel('IP addres of sender')
plt.title('Number of all reqests')

In [None]:
# figure(figsize=(12, 7), dpi=80)
plt.barh(list(dict(data[data.label == 1].src.value_counts()).keys()), dict(data[data.label == 1].src.value_counts()).values(), color='blue')

for idx, val in enumerate(dict(data[data.label == 1].src.value_counts()).values()):
    plt.text(x = val, y = idx-0.2, s = str(val), color='r', size = 13)

plt.xlabel('Number of Requests')
plt.ylabel('IP addres of sender')
plt.title('Number of Attack requests')

In [None]:
# figure(figsize=(12, 7), dpi=80)
plt.barh(list(dict(data.src.value_counts()).keys()), dict(data.src.value_counts()).values(), color='lawngreen')
plt.barh(list(dict(data[data.label == 1].src.value_counts()).keys()), dict(data[data.label == 1].src.value_counts()).values(), color='blue')

for idx, val in enumerate(dict(data.src.value_counts()).values()):
    plt.text(x = val, y = idx-0.2, s = str(val), color='r', size = 13)

for idx, val in enumerate(dict(data[data.label == 1].src.value_counts()).values()):
    plt.text(x = val, y = idx-0.2, s = str(val), color='w', size = 13)


plt.xlabel('Number of Requests')
plt.ylabel('IP addres of sender')
plt.legend(['All','malicious'])
plt.title('Number of requests from different IP adress')

In [None]:
# figure(figsize=(10, 6), dpi=80)
plt.bar(list(dict(data.Protocol.value_counts()).keys()), dict(data.Protocol.value_counts()).values(), color='r')
plt.bar(list(dict(data[data.label == 1].Protocol.value_counts()).keys()), dict(data[data.label == 1].Protocol.value_counts()).values(), color='b')

plt.text(x = 0 - 0.15, y = 41321 + 200, s = str(41321), color='black', size=17)
plt.text(x = 1 - 0.15, y = 33588 + 200, s = str(33588), color='black', size=17)
plt.text(x = 2 - 0.15, y = 29436 + 200, s = str(29436), color='black', size=17)

plt.text(x = 0 - 0.15, y = 9419 + 200, s = str(9419), color='w', size=17)
plt.text(x = 1 - 0.15, y = 17499 + 200, s = str(17499), color='w', size=17)
plt.text(x = 2 - 0.15, y = 13866 + 200, s = str(13866), color='w', size=17)

plt.xlabel('Protocol')
plt.ylabel('Count')
plt.legend(['All', 'malicious'])
plt.title('The number of requests from different protocols')

In [None]:
# tx_bytes, tx_kbps, switch
plt.hist(data.dur, bins=20, color='b')
plt.title('Duration')
plt.show()

In [None]:
data

# Test