## Dataset visualization.

This notebook includes some tools to visualize the dataset and its properties.

In [2]:
from VanillaGCN import datanetAPI
import numpy as np
import random
import matplotlib.pyplot as plt
from astropy.visualization import hist

ModuleNotFoundError: No module named 'torch_sparse'

First of all we define a set of variables that will be used to filter the dataset:
- ```data_folder_name```: the training folder (relative to the repository root)
- ```max_avg_lambda_range```: range of the maximum average lambda | traffic intensity used
- ```net_size_lst```: list of the network topology sizes to use

In [None]:
# Source path of the dataset
data_folder_name = "training"
src_path = f"{data_folder_name}/results/dataset/"

# Range of the maximum average lambda | traffic intensity used 
# max_avg_lambda_range = [min_value,max_value] 
max_avg_lambda_range = [10,10000]

# List of the network topology sizes to use
net_size_lst = [4,5,6,7,8,9,10]


The following cell will read the samples from files

In [None]:
# Obtain all the samples from the dataset
reader = datanetAPI.DatanetAPI(src_path,max_avg_lambda_range, net_size_lst)
samples_lst = []
for sample in reader:
  samples_lst.append(sample)
print ("Number of selected samples: ",len(samples_lst))

#### CDF of links load for all samples

In [None]:
# Collect the parameter to plot
loads_lst = []
for s in samples_lst:
    port_stats_obj = s.get_port_stats()
    # For each node
    for src_port_stats in port_stats_obj:
        # For each adjaceny node
        for j in src_port_stats:
            # Append the feature to the list. For this scenario, link utilization
            loads_lst.append(src_port_stats[j]["utilization"])
               
x = np.sort(loads_lst)
y = 1. * np.arange(len(loads_lst)) / (len(loads_lst) - 1)
plt.plot(x, y)
plt.title("CDF of the link load")
plt.ylabel("load")
plt.show()
plt.close()

#### Histogram showing the delay per path of a chosen sample

In [None]:
# Plot histogram of the delay of all path of a sample
# We select a random sample (Or we can try to chose one!)
s = random.choice(samples_lst)
# s= samples_lst[0]
delays_lst = []
performance_matrix = s.get_performance_matrix()
for i in range (s.get_network_size()):
    for j in range (s.get_network_size()):
        if (i == j):
            continue
        # Append to the list the average delay of the path i,j.
        delays_lst.append(performance_matrix[i,j]["AggInfo"]["AvgDelay"])

#Plot histogram using astropy to use correct value of bins
hist(delays_lst, bins='blocks', histtype='stepfilled',alpha=0.2, density=True)
plt.title("Histogram showing the delay per path")
plt.xlabel("Delay (s)")
plt.show()
plt.close()

        


### Heatmap showing the delay between the different nodes for a chosen sample


In [None]:
# Plot histogram of the delay of all path of a sample
# We select a random sample (Or we can try to chose one!)
# s = random.choice(samples_lst)
s= samples_lst[0]
delays_matrix = np.zeros((s.get_network_size(), s.get_network_size()))
performance_matrix = s.get_performance_matrix()
for i in range (s.get_network_size()):
    for j in range (s.get_network_size()):
        if (i == j):
            continue
        # Append to the list the average delay of the path i,j.
        delays_matrix[i,j] = performance_matrix[i,j]["AggInfo"]["AvgDelay"]

#Plot histogram using astropy to use correct value of bins
fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(delays_matrix)
labels = np.arange(s.get_network_size())
ax.set_xticks(labels, labels=labels)
ax.set_yticks(labels, labels=labels)

cbar = ax.figure.colorbar(im, ax=ax)
cbar.ax.set_ylabel("Delay (s)", rotation=-90, va="bottom")

threshold_val = np.max(delays_matrix) * 0.66

for i in range(s.get_network_size()):
    for j in range(s.get_network_size()):
        c = "w" if delays_matrix[i, j] < threshold_val else "k"
        text = ax.text(j, i, delays_matrix[i, j],
                       ha="center", va="center", color=c)

plt.title("Heatmaps of delays between each node")
plt.xlabel("Node")
plt.ylabel("Node")
plt.show()
plt.close()

#### Histogram showing the delay per path across all samples

In [None]:
delays_lst = []
for s in samples_lst:
    performance_matrix = s.get_performance_matrix()
    for i in range (s.get_network_size()):
        for j in range (s.get_network_size()):
            if (i == j):
                continue
            # Append to the list the average delay of the path i,j. 
            delays_lst.append(performance_matrix[i,j]["AggInfo"]["AvgDelay"])

hist(delays_lst, bins='blocks', histtype='stepfilled',alpha=0.2, density=True)
plt.title("Histogram showing the delay per path across all samples")
plt.xlabel("Delay (s)")
plt.show()
plt.close()

In [None]:
# Plot histogram of the delay for all the dataset and ToS 0
delays_lst = []
for s in samples_lst:
    performance_matrix = s.get_performance_matrix()
    traffic_matrix = s.get_traffic_matrix()
    for i in range (s.get_network_size()):
        for j in range (s.get_network_size()):
            if (i == j):
                continue
            # Check if the path contain traffic of ToS 0:
            # We only have one flow per path
            if (traffic_matrix[i,j]["Flows"][0]["ToS"] != 0):
                continue
            # Append to the list the average delay of the path i,j. 
            delays_lst.append(performance_matrix[i,j]["Flows"][0]["AvgDelay"])
            
hist(delays_lst, bins='blocks', histtype='stepfilled',alpha=0.2, density=True)
plt.title("Histogram of delays for all dataset and ToS 0")
plt.xlabel("Delay (s)")
plt.show()
plt.close()

#### Histogram of the losses for all the dataset
If there are no losses, the histogram will not be generated!

In [None]:
# Plot histogram of the losses for all the dataset
losses_lst = []
for s in samples_lst:
    performance_matrix = s.get_performance_matrix()
    traffic_matrix = s.get_traffic_matrix()
    for i in range (s.get_network_size()):
        for j in range (s.get_network_size()):
            if (i == j):
                continue
            pkts_gen = traffic_matrix[i,j]["AggInfo"]["PktsGen"]
            pkts_drop = performance_matrix[i,j]["AggInfo"]["PktsDrop"]
            losses_lst.append(100*(pkts_drop/pkts_gen))
n_losses = np.sum(losses_lst)
print("Total number of losses:", n_losses)
if n_losses:
    hist(losses_lst, bins='blocks', histtype='stepfilled',alpha=0.2, density=True)
    plt.title("Histogram of losses for all dataset")
    plt.xlabel("Losses (%)")
    plt.show()
    plt.close()


#### Histogram of the generated bandwidth per path for all the dataset

In [None]:
# Plot histogram of the generated bw per path for all the dataset
bw_lst = []
for s in samples_lst:
    traffic_matrix = s.get_traffic_matrix()
    for i in range (s.get_network_size()):
        for j in range (s.get_network_size()):
            if (i == j):
                continue
            bw_lst.append(traffic_matrix[i,j]["AggInfo"]["AvgBw"])

hist(bw_lst, bins='blocks', histtype='stepfilled',alpha=0.2, density=True)
plt.title("Histogram of generated Bw per path for all dataset")
plt.xlabel("Bw (bps)")
plt.show()
plt.close()