In [42]:
import pickle
import os
import itertools

In [43]:
def unknown_configuration(modules_per_size, max_number_modules):
    never_seen_config = [[] for i in range(max_number_modules)]
    seen_config = list(modules_per_size[1].keys())
    for i in range(2, max_number_modules):        
        for test_config in list(modules_per_size[i].keys()):
            composed_config = list(set(test_config.split("-")))
            for module in composed_config:
                if module not in seen_config:
                    never_seen_config[i].append(test_config)
                    break
         
    return never_seen_config

In [44]:
data_folder = "data/"
datasets = ['c1', 'c2', 'c3', 'c4']
reduced_datasets = ['c1']
max_number_modules = 9

In [45]:
for dataset in datasets:
    
    modules_per_size = [{} for i in range(max_number_modules)]
        
    dataset_folder = os.path.join(data_folder, dataset)
    X = pickle.load(open(dataset_folder + "/X.pkl", "rb"))
    y = pickle.load(open(dataset_folder + "/y.pkl", "rb"))
    
    for x_, y_ in zip(X, y):
        sequence_size = len(x_[0])
        dict_key = ""
        for i in range(sequence_size):
            dict_key += (x_[0][i][0] + "_" +str(x_[0][i][1][0]) + "_" + str(x_[0][i][1][1]) + "-")
        
        dict_key = dict_key[:-1]
        try:
            modules_per_size[sequence_size][dict_key].append([x_[1], list(y_)])
        except:
            modules_per_size[sequence_size][dict_key] = [[x_[1], list(y_)]]
    
    print(" [+] Dataset", dataset, " information")
    print(" [=] Number of different cascades of each size")
    for i in range(1, max_number_modules):
        different_cascades = len(list(modules_per_size[i].keys()))
        print(" [Cascade size " + str(i) + "] ", different_cascades, " possibilities")
        
    
    # A known cascade is is composed of modules already seen alone in cascades
    # of size 1. Thus, an unkown cascade contain modules that weren't found alone
    # in cascades of size 1.
    never_seen_config = unknown_configuration(modules_per_size, max_number_modules)
    print(" [=] Number of unknown cascades of each size")
    for i in range(2, max_number_modules):
        never_seen = len(never_seen_config[i])
        print(" [Cascade size " + str(i) + "] ", never_seen, " unseen")
        
    
    print(" [=] Biggest cascade")
    print(" [.]", list(modules_per_size[8].keys()))
    
    print(" [=] Cascades of size 1")
    print(" [.]", list(modules_per_size[1].keys()))

    print(" [=] Number of cascades of each size")
    total_total = 0
    for i in range(1, max_number_modules):
        total_elem = 0
        for key in list(modules_per_size[i].keys()):
            total_elem += len(modules_per_size[i][key])
        total_total += total_elem
        print(" [Cascade size " + str(i) + "] ", total_elem, " ocurrences")
    print(" [.] Total number of samples:", total_total)

 [+] Dataset c1  information
 [=] Number of different cascades of each size
 [Cascade size 1]  5  possibilities
 [Cascade size 2]  7  possibilities
 [Cascade size 3]  6  possibilities
 [Cascade size 4]  5  possibilities
 [Cascade size 5]  4  possibilities
 [Cascade size 6]  3  possibilities
 [Cascade size 7]  2  possibilities
 [Cascade size 8]  1  possibilities
 [=] Number of unknown cascades of each size
 [Cascade size 2]  0  unseen
 [Cascade size 3]  0  unseen
 [Cascade size 4]  0  unseen
 [Cascade size 5]  0  unseen
 [Cascade size 6]  0  unseen
 [Cascade size 7]  0  unseen
 [Cascade size 8]  0  unseen
 [=] Biggest cascade
 [.] ['EDFA_24.0_20.0-SMF_5.6_0-EDFA_24.0_20.0-SMF_5.2_0-EDFA_24.0_20.0-SMF_0_5.0-EDFA_24.0_20.0-SMF_0_5.4']
 [=] Cascades of size 1
 [.] ['EDFA_24.0_20.0', 'SMF_5.6_0', 'SMF_5.2_0', 'SMF_0_5.0', 'SMF_0_5.4']
 [=] Number of cascades of each size
 [Cascade size 1]  1728  ocurrences
 [Cascade size 2]  1512  ocurrences
 [Cascade size 3]  1296  ocurrences
 [Cascade siz

In [63]:
import matplotlib.animation
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams["animation.html"] = "jshtml"
plt.rcParams['figure.dpi'] = 100  
plt.ioff()
plt.style.use('ggplot')

In [72]:
modules_per_size[1].keys()

dict_keys(['EDFA_20.0_0.0', 'SMF_1.6_0', 'EDFA_20.0_17.0', 'SMF_1.3_0', 'EDFA_20.0_18.0', 'SMF_0_1.4', 'EDFA_20.0_20.0'])

In [73]:
#module = 'EDFA_20.0_0.0'
module = 'SMF_1.6_0'
module_size = 1

In [74]:
def animate(t):
    title = module
    fig.suptitle(title, fontsize=16)
    ax1.cla()
    ax1.plot(X_train[t])

    ax2.cla()
    ax2.plot(y_train[t])

In [75]:
data = np.asarray(modules_per_size[module_size][module])

X_train = data[:, 0]
y_train = data[:, 1]

assert (data[0][0] == X_train[0]).all()
assert (data[0][1] == y_train[0]).all()

print(" [.] Current module in analysis:", module, ". Module size:", module_size)

plt.close('all')
fig, (ax1, ax2) = plt.subplots(1,2)
matplotlib.animation.FuncAnimation(fig, animate, frames=100)

 [.] Current module in analysis: SMF_1.6_0 . Module size: 1
