
#Artificial Neural Networks for classification of the Resilience in WRONs (Wavelength Routed Optical Networks) through Double and Simple Failures


---


<strong>Authors:</strong>

  * Christian Lira (christian.lira@ufrpe.br)
  * Jonas Freire (jonas.freire@gmail.com)
  * Pedro Araújo (pedro.araujonascimento@ufrpe.br)


###Necessaries Packages

In [0]:
import numpy as np
import pandas as pd

import re
import time

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical

import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score

import io
from contextlib import redirect_stdout

Using TensorFlow backend.


##Functions

###Preprocessing Data Function

In [0]:
"""
Função preprocessing: Faz o pré-processamento da base de dados.
- Limpa os dados
- Separa as métricas
- Gera arquivo de nome "dataset.csv"

Entrada: Lista com o path dos arquivos
"""
def preprocessing(filenames):
  
  # limpando os dados e juntando no arquivo 'dataset.txt'
  data = ""
  for file in filenames:
    arq = open(file, 'r')
    data += arq.read()
    arq.close()
  
  regex = r"(;\n|;$)"
  clean_data = re.sub(regex, '\n', data)
  
  arq = open('data.txt', 'w')
  arq.write(clean_data)
  arq.close()
  
  # separando e normalizando as métricas
  dataframe = pd.read_csv('data.txt', header=None, sep=';')
  database = dataframe.values
  
  # taking the metric "Algebraic Connectivity"
  algebraic_connectivity = database[:, 4:5]
  
  # taking the metric "Natural Connectivity"
  natural_connectivity = database[:, 5:6]
  
  # taking the metric "DFT of Laplacian Entropy"
  dft_laplacian_entropy = database[:, 14:15]
  
  # taking the metric "Number of Nodes"
  list = [[18]] * len(algebraic_connectivity)
  num_nodes = np.array(list)
  
  # taking the metric "Number of Links"
  list = []
    
  for mtx_adj in database[:, 15:16]:
    count = 0
    for vet_adj in mtx_adj:
      for cel in vet_adj:
        if cel == '1':
          count += 1
    
    list.append([count])
  
  num_links = np.array(list)
  
  # taking the metric "Hub Degree"
  mtx_adj = []
  list = []
  i_complete = 0
  j_complete = 0
  count = 0
  links_count= 0
  max_connections = 0
  
  # building the superior matrix
  for np_adj in database[:, 15:16]:
    for str_adj in np_adj:
      vet_adj = str_adj.split(' ')
      count = 0
      links_count = 0
      max_connections = 0
      mtx_adj = []
      for i in range(18):
        i_complete = i
        j_complete = 17 - i
        lista = []
        for j in range(18):
          if i == j:
            lista.append(0)
            i_complete -= 1
          else:
            if i_complete >= 0:
              lista.append(-1)
              i_complete -= 1
            elif j_complete >= 0:
              lista.append(int(vet_adj[count]))
              count += 1
              j_complete -= 1
              
        mtx_adj.append(lista)
        
      # building the full matrix
      for i in range(18):
        for j in range(18):
          if mtx_adj[i][j] == -1:
            mtx_adj[i][j] = mtx_adj[j][i]
      
      for i in range(18):
        for j in range(18):
          if mtx_adj[i][j] == 1:
            links_count += 1           
        if links_count > max_connections:
          max_connections = links_count
        links_count = 0
      
      list.append([max_connections])
  
  hub_degree = np.array(list)
  
  # taking the class "Robustness for simple failures"
  simple_failures_robustness = database[:, 30:31]
  
  # taking the class "Robustness for double failures"
  double_failures_robustness = database[:, 31:32]
  
  dataset = []
  list = []  
  
  for i in range(len(database)):
    list.append(algebraic_connectivity[i].tolist()[0])
    list.append(natural_connectivity[i].tolist()[0])
    list.append(dft_laplacian_entropy[i].tolist()[0])
    list.append(num_nodes[i].tolist()[0])
    list.append(num_links[i].tolist()[0])
    list.append(hub_degree[i].tolist()[0])
    list.append(simple_failures_robustness[i].tolist()[0]) 
    list.append(double_failures_robustness[i].tolist()[0])
    dataset.append(list)
    list = []
    
  my_df = pd.DataFrame(dataset)
  my_df.to_csv('dataset.csv', index=False, header=False)


###Load Data Function

In [0]:
"""

Função which_class: Diz qual à classe um exemplo pertence.
Entrada: Exemplo do dataset e coluna com a classe
Saída: Classe

"""
def whichclass(exemple, col):
  
  if exemple[col] >= 0.0 and exemple[col] < 0.1:
    return(0)
  if exemple[col] >= 0.1 and exemple[col] < 0.2:
    return(1)
  if exemple[col] >= 0.2 and exemple[col] < 0.3:
    return(2)
  if exemple[col] >= 0.3 and exemple[col] < 0.4:
    return(3)
  if exemple[col] >= 0.4 and exemple[col] < 0.5:
    return(4)
  if exemple[col] >= 0.5 and exemple[col] < 0.6:
    return(5)
  if exemple[col] >= 0.6 and exemple[col] < 0.7:
    return(6)
  if exemple[col] >= 0.7 and exemple[col] < 0.8:
    return(7)
  if exemple[col] >= 0.8 and exemple[col] < 0.9:
    return(8)
  if exemple[col] >= 0.9 and exemple[col] <= 1.0:
    return(9)

  
"""

Função loadData: Normaliza o dataset e faz a separabilidade dos conjuntos.

Entrada: Path do arquivo "dataset.csv"
Saídas: Tupla com os valores
  - x_simple_train: Vetor de características do conjunto de treino de falhas simples
  - y_simple_train: Classe do conjunto de treino de falhas simples
  - x_simple_test: Vetor de características do conjunto de testes de falhas simples
  - y_simple_test: Classe do conjunto de testes de falhas simples
  - x_double_train: Vetor de características do conjunto de treino de falhas duplas
  - y_double_train: Classe do conjunto de treino de falhas duplas
  - x_double_test: Vetor de características do conjunto de testes de falhas duplas
  - y_double_test: Classe do conjunto de testes de falhas duplas
  
"""
def loadData(path):
  
  dataframe = pd.read_csv(path, header=None, sep=',')
  database = dataframe.values
    
  # Normalização
  for col in range(len(database[0])):
    if type(database[0][col]) == str:
      max = float(database[0][col].replace('.', '').replace(',', '.'))
      min = float(database[0][col].replace('.', '').replace(',', '.'))
      for lin in range(len(database)):
        line_value = float(database[lin][col].replace('.', '').replace(',', '.'))
        if line_value > max:
          max = line_value
        if line_value < min:
          min = line_value
          
      variancia = max - min
      
      for lin in range(len(database)):
        line_value = float(database[lin][col].replace('.', '').replace(',', '.'))
        database[lin][col] = float((line_value - min) / variancia)
        
    elif type(database[0][col]) == int:
      max = float(database[0][col])
      min = float(database[0][col])
      for lin in range(len(database)):
        line_value = float(database[lin][col])
        if line_value > max:
          max = line_value
        if line_value < min:
          min = line_value
          
      variancia = max - min
      
      if variancia == 0:
        for lin in range(len(database)):
          if max == 0:
            database[lin][col] = 0.0
          else:
            database[lin][col] = 1.0
      else:              
        for lin in range(len(database)):
          line_value = float(database[lin][col])
          database[lin][col] = float((line_value - min) / variancia)
    
    elif type(database[0][col]) == float:
      max = database[0][col]
      min = database[0][col]
      for lin in range(len(database)):
        line_value = database[lin][col]
        if line_value > max:
          max = line_value
        if line_value < min:
          min = line_value
          
      variancia = max - min
      
      for lin in range(len(database)):
        line_value = database[lin][col]
        database[lin][col] = float((line_value - min) / variancia)
  
  # Separabilidade das classes
  qtd_exemples = len(database)
  qtd_classes = np.zeros(10, dtype=int)
  
  for i in range(qtd_exemples):
    if database[i][6] >= 0.0 and database[i][6] < 0.1:
      qtd_classes[0] += 1
    elif database[i][6] >= 0.1 and database[i][6] < 0.2:
      qtd_classes[1] += 1
    elif database[i][6] >= 0.2 and database[i][6] < 0.3:
      qtd_classes[2] += 1
    elif database[i][6] >= 0.3 and database[i][6] < 0.4:
      qtd_classes[3] += 1
    elif database[i][6] >= 0.4 and database[i][6] < 0.5:
      qtd_classes[4] += 1
    elif database[i][6] >= 0.5 and database[i][6] < 0.6:
      qtd_classes[5] += 1
    elif database[i][6] >= 0.6 and database[i][6] < 0.7:
      qtd_classes[6] += 1
    elif database[i][6] >= 0.7 and database[i][6] < 0.8:
      qtd_classes[7] += 1
    elif database[i][6] >= 0.8 and database[i][6] < 0.9:
      qtd_classes[8] += 1
    elif database[i][6] >= 0.9 and database[i][6] <= 1.0:
      qtd_classes[9] += 1
  
  qtd_classes_conj_treino = np.zeros(10, dtype=int)
  qtd_classes_conj_teste = np.zeros(10, dtype=int)

  for i in range(len(qtd_classes)):
    if int(qtd_classes[i] * 0.75) == 0 and qtd_classes[i] != 0:
      qtd_classes_conj_treino[i] = 1
    else:
      qtd_classes_conj_treino[i] = qtd_classes[i] * 0.75
    qtd_classes_conj_teste[i] = qtd_classes[i] - qtd_classes_conj_treino[i]  
  
  train = []
  teste = []
  
  qtd_classe_treino_add = np.zeros(10,dtype=int)
  qtd_classe_teste_add = np.zeros(10, dtype=int)

  np.random.shuffle(database)
  for i in range(qtd_exemples):
    classe = newwhichclass(database[i], 6)
    
    if qtd_classes_conj_treino[classe] > 0:
      train.append(database[i].tolist())
      qtd_classes_conj_treino[classe] -= 1
    else:
      teste.append(database[i].tolist())
      qtd_classes_conj_teste[classe] -= 1
      
  simple_train = np.asarray(train)
  simple_teste = np.asarray(teste)
    
  x_train = simple_train[:, 0:6]
  y_train = simple_train[:, 6:]
  x_test = simple_teste[:, 0:6]
  y_test = simple_teste[:, 6:]
    
  return(x_train, y_train, x_test, y_test)


##ANN

### Loop for test different quantities of neuros in hidden layer

In [0]:
filenames = ['hessen_shuffle 0.txt', 'hessen_shuffle 1.txt', 'hessen_shuffle_2.txt']

average_accuracy = 0
average_mse = 0

sum_acc = 0
sum_mse = 0

list_acc = []
list_mse = []

for i in range(5, 51, 5):

  preprocessing(filenames)
  x_train, y_train, x_test, y_test = newloadData('dataset.csv')

  model = Sequential()
  model.add(Dense(units = i, activation = 'relu', input_dim = 6))
  model.add(Dense(units = 2, activation = 'sigmoid'))
  model.compile(optimizer = 'adam', loss = 'mse', metrics = ['acc'])
    
  model.fit(x_train, y_train, batch_size = 10, epochs = 500, verbose = 0)

  score = model.evaluate(x_test, y_test)
  list_mse.append(score[0])
  list_acc.append(score[1])

for i in range(len(list_acc)):
  sum_acc += list_acc[i]
  sum_mse += list_mse[i]
  
average_accuracy = sum_acc / len(list_acc)
average_mse = sum_mse / len(list_mse)

print('Média de Acurácia ')
print(average_accuracy)
print('Média de MSE ')
print(average_mse)



##Plotting 

In [0]:

filenames = ['hessen_shuffle 0.txt', 'hessen_shuffle 1.txt', 'hessen_shuffle_2.txt']

preprocessing(filenames)
x_train, y_train, x_test, y_test = newloadData('dataset.csv')

model = Sequential()
model.add(Dense(units = 45, activation = 'relu', input_dim = 6))
model.add(Dense(units = 2, activation = 'sigmoid'))
model.compile(optimizer = 'adam', loss = 'mse', metrics = ['acc'])
    
history = model.fit(x_train, y_train, batch_size = 10, epochs = 500, verbose = 0)
score = model.evaluate(x_test, y_test)


###SpeedUp ANN vs SIMTON

In [0]:
predict_time_us = []
predict_time_ms = []
simtom_time = 44382.98886167756
sum_predict = 0
average = 0

for i in range(100):
  f = io.StringIO()
  with redirect_stdout(f):
    previsions = model.predict(x_test[0:1, :], verbose = 1)
  
  s = f.getvalue()
  if 'm' in s.split()[4]:
    predict_time_ms.append(float(s.split()[4].split('m')[0]))
  else:
    predict_time_us.append(float(s.split()[4].split('u')[0]))
  

for element in map(lambda x : x * 1000, predict_time_ms):
  predict_time_us.append(element)
  
for i in predict_time_us:
  sum_predict += i

average = sum_predict / len(predict_time_us)
print(predict_time_us)
print('Tempo de Predição ANN')
print(average, 'us')
print(average / 1000)
print('Tempo de Predição SIMTOM')
print(simtom_time, 'ms')
print('SpeedUp')
print(simtom_time / (average / 1000))


###Generate Graphics

In [0]:
previsions = model.predict(x_test, verbose = 1)

print(previsions)
print(score[1])
print(score[0])

###Accuracy Over Epochs

In [0]:
plt.plot(history.history['acc'])
plt.title('Acurácia Durante Épocas')
plt.ylabel('acurácia')
plt.xlabel('épocas')
plt.legend(['treino'], loc='lower right')
plt.show()

###Loss Over Epochs

In [0]:
plt.plot(history.history['loss'])
plt.title('Erro Durante Épocas')
plt.ylabel('erro')
plt.xlabel('épocas')
plt.legend(['treino'], loc='best')
plt.show()

###Accuracy vs Nº of Neurons (Simple Failures)

In [0]:
acc = [0.9747447073489310, 0.9682025737129538, 0.9701535907349386,
      0.9717310087228770, 0.9725963154103080,0.9733913333,
      0.9720215, 0.9710460772197390, 0.96326267, 0.9710253217]

num = [5,10,15,20,25,30,35,40,45,50]

plt.title('Accuracy x Number of Neurons')
plt.xlabel('nº of neurons')
plt.ylabel('accuracy')
plt.plot(num,acc)
plt.show()


###MSE vs Nº of Neurons (Simple Failures)

In [0]:
mse = [0.0032382364847859000, 0.0030215065701311434, 0.002998395328865773,
      0.0030937843121529000, 0.0030426691457763300,0.002993633333,
      0.002995133333, 0.0029131011183504300, 0.00298817, 0.002994107197]

num = [5,10,15,20,25,30,35,40,45,50]

plt.title('MSE x Number of Neurons')
plt.xlabel('nº of neurons')
plt.ylabel('mse')
plt.plot(num,mse)
plt.show()

###ANN vs SIMTON (Simple Failures)

In [0]:
plt.plot(previsions[150:203, 0:1])
plt.plot(y_test[150:203, 0:1])
plt.title('RNA vs SIMTON (Falhas Simples)')
plt.ylabel('Resiliência')
plt.legend(['RNA', 'SIMTON'], loc='best')
plt.show()

###ANN vs SIMTON (Double Failures)

In [0]:
plt.plot(previsions[600:700, 1:])
plt.plot(y_test[600:700, 1:])
plt.title('ANN vs SIMTOM (Double Failures)')
plt.ylabel('accuracy')
plt.legend(['ANN', 'SIMTOM'], loc='best')
plt.show()