# MNAV - Simple GNN

Dataset: MNAV

Modelo: GNN simple

## Importar Datos

In [None]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from itertools import combinations
from copy import deepcopy
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn.model_selection import ParameterGrid
import torch
import networkx as nx
from sklearn.preprocessing import OrdinalEncoder

El dataset se encuentra disponible en https://github.com/ffedee7/posifi_mnav/tree/master/data_analysis. El dataset disponible está anonimizado pero se puede deshacer con el código de ese repositorio.

In [None]:
dataset = 'datos2.csv'
df = pd.read_csv(dataset)

In [None]:
print(df.shape)
df.describe()

In [None]:
df.head()

## Preprocesamiento

Se definen los APs que se quieren usar tanto para la construcción del grafo como para el modelo.

En este caso usamos solamente los APs que se encuentran dentro del MNAV, es decir que descartamos los APs que aparecen en las medidas pero que no son los que se instalaron.

In [None]:
APs_MAC_2_4 = ['wifi-dc:a5:f4:43:85:c0',
'wifi-dc:a5:f4:43:27:e0',
'wifi-f8:4f:57:ab:da:00',
'wifi-5c:a4:8a:4c:05:c0',
'wifi-1c:1d:86:ce:ef:b0',
'wifi-dc:a5:f4:43:79:20',
'wifi-c0:7b:bc:36:9e:10',
'wifi-1c:1d:86:9f:99:20',
'wifi-c0:7b:bc:36:af:40',
'wifi-c0:7b:bc:36:af:80',
'wifi-1c:1d:86:b6:ac:80',
'wifi-dc:a5:f4:43:72:e0',
'wifi-f8:4f:57:ab:d8:60',
'wifi-dc:a5:f4:43:72:90',
'wifi-f8:4f:57:ab:ce:20']

APs_MAC_5 = ['wifi-dc:a5:f4:45:85:b0',
'wifi-dc:a5:f4:45:27:e0',
'wifi-f8:4f:57:ad:d9:60',
'wifi-5c:a4:8a:4e:05:30',
'wifi-1c:1d:86:d0:ef:00',
'wifi-dc:a5:f4:45:79:10',
'wifi-c0:7b:bc:38:9e:00',
'wifi-1c:1d:86:a1:99:00',
'wifi-c0:7b:bc:38:af:30',
'wifi-c0:7b:bc:38:af:70',
'wifi-1c:1d:86:b8:ac:80',
'wifi-dc:a5:f4:45:72:d0',
'wifi-f8:4f:57:ad:d7:c0',
'wifi-dc:a5:f4:45:72:80',
'wifi-f8:4f:57:ad:cd:80']

In [None]:
# paso los NaN a 0
df = df.fillna(0) 

# sumo 100 a los valores de RSSI y ahora 0 es el minimo
df.iloc[:,1:] = 100 + df.iloc[:,1:] 
values = df.iloc[:,1:]

# las medidas originales en 0 las asumo como que estaban muy lejos
# entonces las dejo en 0 que es el nuevo valor minimo
values[values==100] = 0 
df.iloc[:,1:] = values


##### TO DRAW DISTRIBUTIONS ######

# df.iloc[:,1:] = df.iloc[:,1:] - 100

#########################


# armo dos datsets: uno con las medidas solamente de la frecuencia
# 2.4GHz y otro con las frecuencias 2.4GHz y 5GHz
data_2_4 = df[['location'] + APs_MAC_2_4] # REVISAR PORQUE CREO QUE NO LO VUELVO A USAR
data_2_4_5 = df[['location'] + APs_MAC_2_4 + APs_MAC_5]

**(VALE LA PENA UN ORDINAL ENCODER???? NO ME CONVIENE HACERLO A MANO Y MANTENER EL 1 CON LOCATION_1 Y DEMAS?)**


In [None]:
# paso las zonas por un ordinal encoder
enc = OrdinalEncoder(dtype=int)
y = enc.fit_transform(data_2_4_5['location'].values.reshape(-1,1))
X = data_2_4_5.iloc[:,1:].values

print(enc.categories_)

In [None]:
# ESTO ES UNA PRUEBA PARA VER SI UN SHUFFLE DE LOS DATOS MEJORA LA PERFORMANCE
# dfaux = pd.DataFrame(X)
# dfaux['30'] = y
# subsample = dfaux.sample(frac=1, random_state=99)
# y = subsample.iloc[:, -1].values.reshape(-1,1)
# X = subsample.iloc[:, :-1].values

In [None]:
# separo el dataset en train y test 80-20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape)

## Grafo

En las siguientes celdas se describe un poco el dataset y se muestran las distribuciones de potencia por AP.

In [None]:
data_2_4.groupby(['location']).mean()

In [None]:
font = {'size'   : 15}

plt.rc('font', **font)

In [None]:
axarr = data_2_4.hist(xlabelsize=15, ylabelsize=15, figsize=[12,15], layout=(5, 3), sharex=True, sharey=True)
for ax in axarr.flatten():
    ax.set_xlabel("RSSI")
    ax.set_ylabel("Fingerprints")
plt.savefig('mnav_distribution.pdf') 

In [None]:
data_2_4.describe()

In [None]:
def graph_creator(X_G, th=10, cols=None):
    """
    Dado un dataset y un threshold se arma un grafo basado en las medidas de RRSI
    """

    columns = cols if cols else ['AP1', 'AP2', 'AP3', 'AP4', 'AP5', 'AP6', 'AP7', 'AP8', 'AP9', 'AP10', 'AP11', 'AP12', 'AP13', 'AP14', 'AP15']
    df_data_train = pd.DataFrame(X_G, columns=columns)
    df_G = pd.DataFrame(columns = ['from', 'to', 'weight']) 

    for ap in columns:
        # para cada AP me quedo con las instancias donde el RSSI esta en el rango
        # (max-th) intentando estimar las instancias mas cercanas al AP
        max_val = df_data_train[ap].max()
        df_aux_i = df_data_train[df_data_train[ap]  > (max_val - th)]
        df_aux_i = df_aux_i.drop(ap, axis=1) 
        df_aux_i.head()

        for k, v in df_aux_i.mean().items():
            # armo las aristas con el promedio de RSSI que ven las instancias 
            # filtradas al resto de los APs
            df_G = df_G.append({'from':ap, 'to': k, 'weight': v}, ignore_index=True)

    # armo el grafo sin direcciones
    # PARA EL CASO EN QUE HAYA VALORES DE NODO1 A NODO2 Y NODO2 A NODO1 TOMA EN CUENTA SOLO EL ÚLTIMO VALOR QUE APARECE, NO HACE PROMEDIO NI NADA
    G = nx.from_pandas_edgelist(df_G, source='from', target='to', edge_attr='weight')  # create_using=nx.DiGraph

    # para el GSO parto de la matriz de adyacencia, le saco la diagonal y la 
    # normalizo por su vector propio mas grande
    W = nx.to_numpy_array(G)
    np.fill_diagonal(W,0)
    (w,v) = scipy.sparse.linalg.eigs(W, k=1,which='LM')
    W = W/np.abs(w[0])

    return G, W

In [None]:
a = [0, 1, 1, 2]
b = [1, 0, 2, 1]
edge_index = torch.tensor([a, b], dtype=torch.long)
edge_index

## Entrenamiento

In [None]:
!git clone https://github.com/alelab-upenn/graph-neural-networks
import sys
sys.path.append('/content/graph-neural-networks/')
#necesita hdf5storage ...
!pip install hdf5storage

import alegnn.modules.architectures as architectures
import alegnn.utils.graphML as graphML
# no sé porqué le cambian los parámetros por defecto a pyplot... por lo menos le saco el latex que en colab no funciona por defecto.
plt.rcParams.update({"text.usetex": False})

In [None]:
def train_model_simple(model, train_data, test_data, batch_size=32, n_epochs=100, epsilon=0.005, weight_decay=1e-2, use_scheduler=False):
    
    train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=len(test_data), shuffle=False)

    optimizer = torch.optim.Adam(model.parameters(), lr=epsilon, weight_decay=weight_decay)
    loss = torch.nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
    m = torch.nn.Softmax(dim=1)

    total_loss = []

    for epoch in range(n_epochs):
        model.train()
        for x_batch, y_batch in train_loader:

            x_batch, y_batch = x_batch.to(device), y_batch.to(device)

            #if y_batch.shape[0] == batch_size:
            model.zero_grad()
            y_hat = model(x_batch)
            loss_result = loss(y_hat, y_batch.reshape(-1).type(torch.long))
            loss_result.backward()
            optimizer.step()
            #else:
            #    break
            
        #if (epoch != 0 and epoch%50 == 0):
        if epoch == 10:
            if use_scheduler:
                scheduler.step()

        print("Epoch: {}".format(epoch+1))
        print("Training Loss: {}".format(loss_result)) 

        output = m(y_hat).cpu()   
        train_accuracy = accuracy_score(y_batch.reshape(-1).type(torch.long).cpu(), np.array(torch.argmax(output, axis=1)))

        print("Last Training Accuracy: {}".format(train_accuracy))            
        
        total_loss.append(loss_result)   


        model.eval()
        for x_batch, y_batch in test_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            y_hattest = model(x_batch)

        output = m(y_hattest).cpu()   
        test_accuracy = accuracy_score(y_batch.reshape(-1).type(torch.long).cpu(), np.array(torch.argmax(output, axis=1)))

        print("Last Test Accuracy: {}".format(test_accuracy)) 
        print("--------------------------------")
    
    print("Last LR: {}".format(scheduler.get_last_lr()))
    print("Last Training Loss: {}".format(loss_result)) 
    plt.plot(total_loss)

    return (model, y_hattest)

In [None]:
best_params = {'bs': 128, 'lr': 0.001, 'nfil': 3, 'wd': 0.0001}

## Validación Cruzada

In [None]:
# Create an experiment with your api key
# experiment = Experiment(
#     api_key="krnsqImdHJjs4Y1RdFTqfrtjd",
#     project_name="aagrafos",
#     workspace="facundolezama",
# )

In [None]:
k=5 # cantidad de folds
kf = KFold(n_splits=k)

lr_list = [1e-2, 1e-3, 1e-4]
bs_list = [8, 16, 32] 
wd_list = [1e-3, 1e-4]
n_fil = [1, 2, 3]

param_grid = {'lr': lr_list, 'bs': bs_list, 'wd': wd_list, 'nfil': n_fil}

best_score = 0
best_params = None

for param in list(ParameterGrid(param_grid)):

  ACC = 0
  for train_index, val_index in kf.split(X_train):
    X_train_cv, X_val_cv = X_train[train_index], X_train[val_index]
    y_train_cv, y_val_cv = y_train[train_index], y_train[val_index]

    graph, W = graph_creator(X_train_cv[:,:15], th=10) #el grafo lo armo solo con los datos de 2.4Ghz
    gnn_model = architectures.SelectionGNN(dimNodeSignals=[2, 20, 20], nFilterTaps=[param['nfil'],param['nfil']], bias=True, nonlinearity=torch.nn.ReLU, nSelectedNodes=[15, 15], poolingFunction=graphML.NoPool, poolingSize=[1, 1], dimLayersMLP=[16], GSO=torch.from_numpy(W).float())

    scaler = StandardScaler()
    X_train_cv = scaler.fit_transform(X_train_cv)
    X_val_cv = scaler.transform(X_val_cv)

    x_training_data = np.reshape(X_train_cv,(X_train_cv.shape[0],2,15))
    x_val_data = np.reshape(X_val_cv,(X_val_cv.shape[0],2,15))
    y_training_data = np.reshape(y_train_cv,(y_train_cv.shape[0],1,y_train_cv.shape[1]))
    y_val_data = np.reshape(y_val_cv,(y_val_cv.shape[0],1,y_val_cv.shape[1]))

    train_data = torch.utils.data.TensorDataset(torch.from_numpy(x_training_data).float(), torch.from_numpy(y_training_data).float())
    val_data = torch.utils.data.TensorDataset(torch.from_numpy(x_val_data).float(), torch.from_numpy(y_val_data).float())

    trained_gnn_model, y_pred = train_model_simple(gnn_model, train_data, val_data, n_epochs=1, epsilon=param['lr'], batch_size=param['bs'], weight_decay=param['wd'])

    m = torch.nn.Softmax(dim=1)
    output = m(y_pred)
    ACC += accuracy_score(y_val_cv, np.array(torch.argmax(output, axis=1)))/k

  if ACC > best_score:
      best_score = ACC
      best_params = param

  print("params: ", param)
  print("Accuracy: ", ACC) 
  print()

print("----------- BEST PARAMS --------------")
print("params: ", best_params)
print("Accuracy: ", best_score) 
print()

#experiment.add_tag("corss-2_4-5")
#experiment.log_parameters(best_params)
#experiment.log_metrics({"accuracy": best_score})

## Test

In [None]:
x_training_data = np.reshape(X_train,(X_train.shape[0],2,15))
x_test_data = np.reshape(X_test,(X_test.shape[0],2,15))
y_training_data = np.reshape(y_train,(y_train.shape[0],1,y_train.shape[1]))
y_test_data = np.reshape(y_test,(y_test.shape[0],1,y_test.shape[1]))

#normalize (x-mean)/std
mean = x_training_data.mean(axis=0)
std = x_training_data.std(axis=0)

x_training_data = x_training_data - mean
x_training_data /= std
x_test_data = x_test_data - mean
x_test_data /= std

assert not np.isnan(np.min(x_training_data))
assert not np.isnan(np.min(x_test_data))

train_data = torch.utils.data.TensorDataset(torch.from_numpy(x_training_data).float(), torch.from_numpy(y_training_data).float())
test_data = torch.utils.data.TensorDataset(torch.from_numpy(x_test_data).float(), torch.from_numpy(y_test_data).float())
print("x_training_data shape: ", x_training_data.shape)
print("y_training_data shape: ", y_training_data.shape)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
graph, W = graph_creator(X_train[:,:15], th=10) #el grafo lo armo solo con los datos de 2.4Ghz

gnn_model = architectures.SelectionGNN(dimNodeSignals=[2, 20, 20], nFilterTaps=[best_params['nfil'],best_params['nfil']], bias=True, nonlinearity=torch.nn.ReLU, nSelectedNodes=[15, 15], poolingFunction=graphML.NoPool, poolingSize=[15, 15], dimLayersMLP=[16], GSO=torch.from_numpy(W).float())
#gnn_model = architectures.SelectionGNN(dimNodeSignals=[2, 10, 1], nFilterTaps=[best_params['nfil'], 1], bias=True, nonlinearity=torch.nn.ReLU, nSelectedNodes=[15, 15], poolingFunction=graphML.NoPool, poolingSize=[15, 15], dimLayersMLP=[16], GSO=torch.from_numpy(W).float())
# PROBAR LA ARQUITECTURA LOCALGNN
#gnn_model = architectures.LocalGNN(dimNodeSignals=[2, 20, 20], nFilterTaps=[best_params['nfil'],best_params['nfil']], bias=True, nonlinearity=torch.nn.ReLU, nSelectedNodes=[15, 15], poolingFunction=graphML.NoPool, poolingSize=[15, 15], dimLayersMLP=[16], GSO=torch.from_numpy(W).float())


In [None]:
gnn_model.to(device)

In [None]:
(trained_gnn_model, y_hattest) = train_model_simple(gnn_model, train_data, test_data, n_epochs=50, epsilon=best_params['lr'], batch_size=best_params['bs'], weight_decay=best_params['wd'], use_scheduler=True)

m = torch.nn.Softmax(dim=1)
output = m(y_hattest).cpu()   
ACC_test = accuracy_score(y_test, np.array(torch.argmax(output, axis=1)))

print(ACC_test)
print(classification_report(y_test, np.array(torch.argmax(output, axis=1))))

In [None]:
torch.save(gnn_model.state_dict(), '/content/drive/MyDrive/fing/maestria/tesis/codigo/gnn_model_96_5.csv')

SI HAY UN MODELO GUARDADO CARGARLO

In [None]:
#para seguir entrenando
gnn_model.load_state_dict(torch.load('/content/drive/MyDrive/fing/maestria/tesis/codigo/gnn_model_96_5.csv'))

In [None]:
#para hacer analisis
trained_gnn_model = gnn_model
trained_gnn_model.load_state_dict(torch.load('/content/drive/MyDrive/fing/maestria/tesis/codigo/gnn_model_96_5.csv'))

In [None]:
plt.figure(figsize=[9,7])
cf_matrix = confusion_matrix(y_test, np.array(torch.argmax(output, axis=1)), normalize="true")
sns.heatmap(cf_matrix, annot=True, fmt=".0%", cmap="YlGnBu", vmin=0, vmax=0.2, cbar=False)


# Análisis

In [None]:
% cd drive/MyDrive/fing/maestria/tesis/codigo/
from functions import *

## Análisis variando cantidad de muestras

### GNN

In [None]:
porcentajes = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
accuracy = {}
for porc in porcentajes:
    print('Porcentaje de datos: ', porc)
    accuracy[str(porc)] = []
    for i in range(10):
        dfaux = pd.DataFrame(X)
        dfaux['30'] = y
        subsample = dfaux.sample(frac=porc)
        y_sub = subsample.iloc[:, -1].values.reshape(-1,1)
        X_sub = subsample.iloc[:, :-1].values

        # separo el dataset en train y test 80-20
        X_train, X_test, y_train, y_test = train_test_split(X_sub, y_sub, test_size=0.2, random_state=123)

        x_training_data = np.reshape(X_train,(X_train.shape[0],2,15))
        x_test_data = np.reshape(X_test,(X_test.shape[0],2,15))
        y_training_data = np.reshape(y_train,(y_train.shape[0],1,y_train.shape[1]))
        y_test_data = np.reshape(y_test,(y_test.shape[0],1,y_test.shape[1]))

        train_data = torch.utils.data.TensorDataset(torch.from_numpy(x_training_data).float(), torch.from_numpy(y_training_data).float())
        test_data = torch.utils.data.TensorDataset(torch.from_numpy(x_test_data).float(), torch.from_numpy(y_test_data).float())

        graph_aux, W_aux = graph_creator(X_train[:,:15], th=10) #el grafo lo armo solo con los datos de 2.4Ghz
        gnn_model_aux = architectures.SelectionGNN(dimNodeSignals=[2, 20, 20], nFilterTaps=[best_params['nfil'],best_params['nfil']], bias=True, nonlinearity=torch.nn.ReLU, nSelectedNodes=[15, 15], poolingFunction=graphML.NoPool, poolingSize=[15, 15], dimLayersMLP=[16], GSO=torch.from_numpy(W_aux).float())
        (trained_gnn_model_aux, y_hattest_aux) = train_model_simple(gnn_model_aux, train_data, test_data, n_epochs=300, epsilon=best_params['lr'], batch_size=best_params['bs'], weight_decay=best_params['wd'])

        m = torch.nn.Softmax(dim=1)
        output = m(y_hattest_aux)   
        ACC_test = accuracy_score(y_test, np.array(torch.argmax(output, axis=1)))

        accuracy[str(porc)].append(ACC_test)
        print(ACC_test)
        #print(classification_report(y_test, np.array(torch.argmax(output, axis=1))))

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

plt.figure(figsize=[9,7])
cf_matrix = confusion_matrix(y_test, np.array(torch.argmax(output, axis=1)), normalize="true")
sns.heatmap(cf_matrix, annot=True, fmt=".0%", cmap="YlGnBu", vmin=0, vmax=0.2, cbar=False)

In [None]:
per = [30, 40, 50, 60, 70, 80, 90, 100]
acc_mean = []
for i, porc in enumerate(porcentajes):
    acc_mean.append(np.mean(accuracy[str(porc)]))

In [None]:
plt.plot(per, acc_mean, '.-')
plt.ylabel('Average accuracy')
plt.xlabel('Fingerprints sample size (%)')
plt.grid()

In [None]:
# Agrego otros 5 datos obtenidos en otra tirada independiente para tener más 
# información para hacer el gráfico

accuracy2 = { '0.3':
[0.931637519872814,
0.9205087440381559,
0.8918918918918919,
0.9427662957074722,
0.9459459459459459,
0.9364069952305246,
0.9411764705882353,
0.9491255961844197,
0.9332273449920508,
0.9332273449920508],
'0.4':
[0.9403341288782816,
0.9152744630071599,
0.9331742243436754,
0.9391408114558473,
0.9427207637231504,
0.951073985680191,
0.9415274463007159,
0.9260143198090692,
0.9331742243436754,
0.9451073985680191],
'0.5':
[0.9522445081184336, 
0.9503342884431709,
0.956064947468959,
0.9503342884431709,
0.9340974212034384,
0.944603629417383,
0.9531996179560649,
0.9369627507163324,
0.9407831900668577,
0.9426934097421203],
'0.6':
[0.9451073985680191,
0.9562450278440732,
0.9474940334128878,
0.9498806682577565,
0.9514717581543357,
0.9578361177406524,
0.9578361177406524,
0.9538583929992045,
0.9562450278440732,
0.9554494828957836],
'0.7':
[0.9542974079126876,
0.9583901773533424,
0.9624829467939973,
0.9604365620736699,
0.9618008185538881,
0.9365620736698499,
0.9604365620736699,
0.9645293315143247,
0.9604365620736699,
0.9508867667121419],
'0.8':
[0.9546268656716418,
0.9570149253731344,
0.9534328358208956,
0.9522388059701492,
0.9570149253731344,
0.9582089552238806,
0.9540298507462687,
0.96,
0.9522388059701492,
0.9588059701492537],
'0.9':
[0.9697612732095491,
0.96657824933687,
0.9655172413793104,
0.9522546419098143,
0.9591511936339523,
0.9708222811671088,
0.956498673740053,
0.9681697612732095,
0.9639257294429708,
0.9692307692307692],
'1':
[0.9570200573065902,
0.9598853868194842,
0.9608404966571156,
0.9565425023877746,
0.9570200573065902,
0.9512893982808023,
0.9646609360076409,
0.9637058261700095,
0.9608404966571156,
0.9713467048710601]}

In [None]:
aux = []
aux.append(accuracy2['0.3'])
aux.append(accuracy2['0.4'])
aux.append(accuracy2['0.5'])
aux.append(accuracy2['0.6'])
aux.append(accuracy2['0.7'])
aux.append(accuracy2['0.8'])
aux.append(accuracy2['0.9'])
aux.append(accuracy2['1'])
print(aux)

In [None]:
plt.figure(figsize=[9,7])
plt.boxplot(aux, showfliers=False) #, meanline=True, showmeans=True)
plt.xticks([1, 2, 3, 4, 5, 6, 7, 8], per)
plt.ylabel('Accuracy')
plt.xlabel('Fingerprints sample size (%)')
plt.grid()
plt.savefig('ave_acc_museo.pdf')

- Las lineas de más arriba y más abajo representan el valor máximo y el mínimo respectivamente
- Los extremos de la caja representan el percentil 25% y 75%
- La linea naranja representa el percentil 50% (mediana)

### KNN

In [None]:
porcentajes = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
accuracy = {}
for porc in porcentajes:
    print('Porcentaje de datos: ', porc)
    accuracy[str(porc)] = []
    for i in range(1):
        dfaux = pd.DataFrame(X)
        dfaux['30'] = y
        subsample = dfaux.sample(frac=porc, random_state=123)
        y_sub = subsample.iloc[:, -1].values.reshape(-1,1)
        X_sub = subsample.iloc[:, :-1].values

        # separo el dataset en train y test 80-20
        X_train, X_test, y_train, y_test = train_test_split(X_sub, y_sub, test_size=0.2, random_state=123)

        neigh = KNeighborsClassifier(n_neighbors=3)
        neigh.fit(X_train, y_train.reshape(-1))

        y_pred_knn = neigh.predict(X_test)

        ACC_test = accuracy_score(y_test, y_pred_knn)        
        accuracy[str(porc)].append(ACC_test)
        print(ACC_test)

In [None]:
per = [30, 40, 50, 60, 70, 80, 90, 100]
aux = [[0.931637519872814], [0.9558472553699284], [0.9522445081184336], [0.9562450278440732], [0.9631650750341064], [0.964776119402985], [0.9517241379310345], [0.9613180515759312]]

In [None]:
plt.figure(figsize=[9,7])
plt.boxplot(aux, showfliers=False) #, meanline=True, showmeans=True)
plt.xticks([1, 2, 3, 4, 5, 6, 7, 8], per)
plt.ylabel('Accuracy')
plt.xlabel('Fingerprints sample size (%)')
plt.grid()
plt.savefig('knn_cant_muestras_museo.jpg')

### FCNN

In [None]:
porcentajes = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
accuracy = {}
for porc in porcentajes:
    print('Porcentaje de datos: ', porc)
    accuracy[str(porc)] = []
    for i in range(10):
        dfaux = pd.DataFrame(X)
        dfaux['30'] = y
        subsample = dfaux.sample(frac=porc, random_state=123)
        y_sub = subsample.iloc[:, -1].values.reshape(-1,1)
        X_sub = subsample.iloc[:, :-1].values

        # separo el dataset en train y test 80-20
        X_train, X_test, y_train, y_test = train_test_split(X_sub, y_sub, test_size=0.2, random_state=123)

        clf = MLPClassifier().fit(X_train, y_train.reshape(-1))

        y_pred_fcnn = clf.predict(X_test)

        ACC_test = accuracy_score(y_test, y_pred_fcnn)        
        accuracy[str(porc)].append(ACC_test)
    print(accuracy[str(porc)])

In [None]:
per = [30, 40, 50, 60, 70, 80, 90, 100]
aux = [
       [0.9364069952305246, 0.9284578696343402, 0.9332273449920508, 0.9236883942766295, 0.9236883942766295, 0.931637519872814, 0.8918918918918919, 0.9284578696343402, 0.9268680445151033, 0.9236883942766295],
       [0.9498806682577565, 0.9486873508353222, 0.9295942720763724, 0.9451073985680191, 0.9630071599045346, 0.9248210023866349, 0.9498806682577565, 0.9498806682577565, 0.964200477326969, 0.9498806682577565],
       [0.9531996179560649, 0.9531996179560649, 0.9551098376313276, 0.9465138490926457, 0.9493791786055397, 0.9522445081184336, 0.9503342884431709, 0.9503342884431709, 0.9598853868194842, 0.9493791786055397],
       [0.94351630867144, 0.9522673031026253, 0.9427207637231504, 0.9530628480509149, 0.9482895783611774, 0.9459029435163087, 0.9522673031026253, 0.9530628480509149, 0.9387430389817024, 0.9284009546539379],
       [0.9488403819918144, 0.9590723055934516, 0.9577080491132333, 0.9536152796725784, 0.9583901773533424, 0.9502046384720327, 0.956343792633015, 0.9488403819918144, 0.9597544338335607, 0.9461118690313779],
       [0.9596816976127321, 0.9628647214854111, 0.9586206896551724, 0.9718832891246685, 0.963395225464191, 0.9681697612732095, 0.9702917771883289, 0.9644562334217507, 0.9750663129973475, 0.9692307692307692],
       [0.955820895522388, 0.964776119402985, 0.9665671641791045, 0.9683582089552238, 0.9641791044776119, 0.9611940298507463, 0.964776119402985, 0.9671641791044776, 0.9617910447761194, 0.9617910447761194],
       [0.9646609360076409, 0.9570200573065902, 0.9675262655205349, 0.9522445081184336, 0.9617956064947469, 0.9660936007640879, 0.9684813753581661, 0.9651384909264565, 0.9641833810888252, 0.9684813753581661]
]

In [None]:
plt.figure(figsize=[9,7])
plt.boxplot(aux, showfliers=False) #, meanline=True, showmeans=True)
plt.xticks([1, 2, 3, 4, 5, 6, 7, 8], per)
plt.ylabel('Accuracy')
plt.xlabel('Fingerprints sample size (%)')
plt.grid()
plt.savefig('fcnn_cant_muestras_museo.jpg')

### Bracco, etc

In [None]:
names= ["Nearest Neighbors","Decision Tree","Linear SVM","Random Forest","Neural Net","AdaBoost"]

porcentajes = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
accuracy = {}
for porc in porcentajes:
    print('Porcentaje de datos: ', porc)
    accuracy[str(porc)] = []
    for i in range(10):
        dfaux = pd.DataFrame(X)
        dfaux['30'] = y
        subsample = dfaux.sample(frac=porc, random_state=123)
        y_sub = subsample.iloc[:, -1].values.reshape(-1,1)
        X_sub = subsample.iloc[:, :-1].values

        # separo el dataset en train y test 80-20
        X_train, X_test, y_train, y_test = train_test_split(X_sub, y_sub, test_size=0.2, random_state=123)

        classifiers = [KNeighborsClassifier(3),DecisionTreeClassifier(max_depth=5),SVC(kernel="linear", C=0.025, probability=True), DecisionTreeClassifier(max_depth=5),RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),MLPClassifier(alpha=1, max_iter=1000),
            AdaBoostClassifier()]

        algorithms, youden = train_algorithms(X_train, y_train.ravel(), X_test, y_test.ravel(), names, classifiers)

        y_final = classify(algorithms, youden, X_test)

        ACC_test = accuracy_score(y_test, y_final)        
        accuracy[str(porc)].append(ACC_test)
    print(accuracy[str(porc)])


In [None]:
per = [30, 40, 50, 60, 70, 80, 90, 100]
aux = [
       [0.9475357710651828, 0.9427662957074722, 0.9538950715421304, 0.9491255961844197, 0.9507154213036566, 0.9427662957074722, 0.9507154213036566, 0.9475357710651828, 0.9475357710651828, 0.9459459459459459],
       [0.9689737470167065, 0.964200477326969, 0.9701670644391408, 0.9665871121718377, 0.9689737470167065, 0.9653937947494033, 0.9713603818615751, 0.9737470167064439, 0.9725536992840096, 0.9713603818615751],
       [0.9637058261700095, 0.9675262655205349, 0.9656160458452722, 0.9665711556829035, 0.9627507163323782, 0.9665711556829035, 0.9675262655205349, 0.9627507163323782, 0.9617956064947469, 0.9694364851957975],
       [0.964200477326969, 0.9657915672235481, 0.9681782020684169, 0.9626093874303898, 0.964200477326969, 0.9634049323786794, 0.9649960222752586, 0.9673826571201273, 0.9673826571201273, 0.9649960222752586],
       [0.9597544338335607, 0.9583901773533424, 0.9597544338335607, 0.9583901773533424, 0.9577080491132333, 0.9583901773533424, 0.9604365620736699, 0.9597544338335607, 0.9583901773533424, 0.9597544338335607],
       [0.9749253731343284, 0.9749253731343284, 0.9725373134328358, 0.9737313432835821, 0.9749253731343284, 0.9725373134328358, 0.9737313432835821, 0.9731343283582089, 0.9743283582089552, 0.9749253731343284],
       [0.9729442970822281, 0.9729442970822281, 0.9718832891246685, 0.9740053050397878, 0.9729442970822281, 0.9740053050397878, 0.9740053050397878, 0.9724137931034482, 0.9718832891246685, 0.9729442970822281],
       [0.9680038204393505, 0.9670487106017192, 0.9675262655205349, 0.9651384909264565, 0.9675262655205349, 0.9680038204393505, 0.9646609360076409, 0.9656160458452722, 0.9675262655205349, 0.9646609360076409]
]

In [None]:
plt.figure(figsize=[9,7])
plt.boxplot(aux, showfliers=False) #, meanline=True, showmeans=True)
plt.xticks([1, 2, 3, 4, 5, 6, 7, 8], per)
plt.ylabel('Accuracy')
plt.xlabel('Fingerprints sample size (%)')
plt.grid()
plt.savefig('bracco_cant_muestra_museo.jpg')

## Análisis variando cantidad de APs

Los APs fueron descartados en el siguiente orden:

- AP13 : Se encuentra en la sala de conferencias. Alejado de las zonas de
localización.
- AP10 : Se encuentra en el depósito de obras, alejado de las zonas de localización.
- AP14 : Se encuentra en la sala de dirección, no directamente dentro de zonas
de localización.
- AP3 : Se encuentra en una zona alejada con varios APs.
- AP8 : Se encuentra entre el AP7 el AP9, podría entenderse que es redundante
a ellos dos.

### GNN

In [None]:
APs_to_remove = [13, 10, 14, 3, 8] 
accuracy = {}
X_aux = pd.DataFrame(X)
number_of_APs = 15
cols = ['AP1', 'AP2', 'AP3', 'AP4', 'AP5', 'AP6', 'AP7', 'AP8', 'AP9', 'AP10', 'AP11', 'AP12', 'AP13', 'AP14', 'AP15']


for ap in APs_to_remove:
    print('Nuevo AP removido: ', ap)
    accuracy[str(ap)] = []
    # saco el AP en 2.4GHz y 5GHz
    X_aux = X_aux.drop(columns=[ap-1, ap-1+15], axis=1)
    X_aux2 = X_aux.values
    number_of_APs = number_of_APs -1
    cols.remove('AP'+str(ap))

    for i in range(5):
        # separo el dataset en train y test 80-20
        X_train, X_test, y_train, y_test = train_test_split(X_aux2, y, test_size=0.2, random_state=123)

        x_training_data = np.reshape(X_train,(X_train.shape[0],2,number_of_APs))
        x_test_data = np.reshape(X_test,(X_test.shape[0],2,number_of_APs))
        y_training_data = np.reshape(y_train,(y_train.shape[0],1,y_train.shape[1]))
        y_test_data = np.reshape(y_test,(y_test.shape[0],1,y_test.shape[1]))

        train_data = torch.utils.data.TensorDataset(torch.from_numpy(x_training_data).float(), torch.from_numpy(y_training_data).float())
        test_data = torch.utils.data.TensorDataset(torch.from_numpy(x_test_data).float(), torch.from_numpy(y_test_data).float())

        graph_aux, W_aux = graph_creator(X_train[:,:number_of_APs], th=10, cols=cols) #el grafo lo armo solo con los datos de 2.4Ghz
        gnn_model_aux = architectures.SelectionGNN(dimNodeSignals=[2, 20, 20], nFilterTaps=[best_params['nfil'],best_params['nfil']], bias=True, nonlinearity=torch.nn.ReLU, nSelectedNodes=[number_of_APs, number_of_APs], poolingFunction=graphML.NoPool, poolingSize=[number_of_APs, number_of_APs], dimLayersMLP=[16], GSO=torch.from_numpy(W_aux).float())
        (trained_gnn_model_aux, y_hattest_aux) = train_model_simple(gnn_model_aux, train_data, test_data, n_epochs=300, epsilon=best_params['lr'], batch_size=best_params['bs'], weight_decay=best_params['wd'])

        m = torch.nn.Softmax(dim=1)
        output = m(y_hattest_aux)   
        ACC_test = accuracy_score(y_test, np.array(torch.argmax(output, axis=1)))

        accuracy[str(ap)].append(ACC_test)
        print(ACC_test)
        #print(classification_report(y_test, np.array(torch.argmax(output, axis=1))))

In [None]:
accuracy2 = {'10': [0.9536771728748806,
  0.956064947468959,
  0.954632282712512,
  0.9622731614135626,
  0.9555873925501432],
 '13': [0.9608404966571156,
  0.9608404966571156,
  0.9713467048710601,
  0.9551098376313276,
  0.9684813753581661],
 '14': [0.9317096466093601,
  0.9512893982808023,
  0.9474689589302769,
  0.9498567335243553,
  0.9598853868194842],
 '3': [0.944603629417383,
  0.9517669531996179,
  0.9379178605539638,
  0.9512893982808023,
  0.9422158548233047],
 '8': [0.9426934097421203,
  0.937440305635148,
  0.9407831900668577,
  0.9398280802292264,
  0.9398280802292264]}

In [None]:
acc_mean = []
for i, ap in enumerate(APs_to_remove):
    acc_mean.append(np.mean(accuracy[str(ap)]))

In [None]:
num_ap = [14, 13, 12, 11, 10]
plt.plot(num_ap, acc_mean, '.-')
plt.ylabel('Average accuracy')
plt.xlabel('AP removed')
plt.grid()

In [None]:
aux = []
aux.append(accuracy['13'])
aux.append(accuracy['10'])
aux.append(accuracy['14'])
aux.append(accuracy['3'])
aux.append(accuracy['8'])
print(aux)

In [None]:
plt.figure(figsize=[9,7])
plt.boxplot(aux, showfliers=False) #, meanline=True, showmeans=True)
plt.xticks([1, 2, 3, 4, 5], num_ap)
plt.ylabel('Accuracy')
plt.xlabel('AP removed')
plt.grid()
plt.savefig('ave_acc_museo_removing_ap.pdf')

- Las lineas de más arriba y más abajo representan el valor máximo y el mínimo respectivamente
- Los extremos de la caja representan el percentil 25% y 75%
- La linea naranja representa el percentil 50% (mediana)

### KNN

In [None]:
APs_to_remove = [13, 10, 14, 3, 8] 
accuracy = {}
X_aux = pd.DataFrame(X)
number_of_APs = 15
cols = ['AP1', 'AP2', 'AP3', 'AP4', 'AP5', 'AP6', 'AP7', 'AP8', 'AP9', 'AP10', 'AP11', 'AP12', 'AP13', 'AP14', 'AP15']


for ap in APs_to_remove:
    print('Nuevo AP removido: ', ap)
    accuracy[str(ap)] = []
    # saco el AP en 2.4GHz y 5GHz
    X_aux = X_aux.drop(columns=[ap-1, ap-1+15], axis=1)
    X_aux2 = X_aux.values
    number_of_APs = number_of_APs -1
    cols.remove('AP'+str(ap))

    for i in range(1):
        # separo el dataset en train y test 80-20
        X_train, X_test, y_train, y_test = train_test_split(X_aux2, y, test_size=0.2, random_state=123)

        neigh = KNeighborsClassifier(n_neighbors=3)
        neigh.fit(X_train, y_train.reshape(-1))

        y_pred_knn = neigh.predict(X_test)

        ACC_test = accuracy_score(y_test, y_pred_knn)        
        accuracy[str(ap)].append(ACC_test)
        print(ACC_test)

### FCNN

In [None]:
APs_to_remove = [13, 10, 14, 3, 8] 
accuracy = {}
X_aux = pd.DataFrame(X)
number_of_APs = 15
cols = ['AP1', 'AP2', 'AP3', 'AP4', 'AP5', 'AP6', 'AP7', 'AP8', 'AP9', 'AP10', 'AP11', 'AP12', 'AP13', 'AP14', 'AP15']


for ap in APs_to_remove:
    print('Nuevo AP removido: ', ap)
    accuracy[str(ap)] = []
    # saco el AP en 2.4GHz y 5GHz
    X_aux = X_aux.drop(columns=[ap-1, ap-1+15], axis=1)
    X_aux2 = X_aux.values
    number_of_APs = number_of_APs -1
    cols.remove('AP'+str(ap))

    for i in range(10):
        # separo el dataset en train y test 80-20
        X_train, X_test, y_train, y_test = train_test_split(X_aux2, y, test_size=0.2, random_state=123)

        clf = MLPClassifier().fit(X_train, y_train.reshape(-1))

        y_pred_fcnn = clf.predict(X_test)

        ACC_test = accuracy_score(y_test, y_pred_fcnn)        
        accuracy[str(ap)].append(ACC_test)
    print(accuracy[str(ap)])


### Bracco, etc

In [None]:
APs_to_remove = [13, 10, 14, 3, 8] 
accuracy = {}
X_aux = pd.DataFrame(X)
number_of_APs = 15
cols = ['AP1', 'AP2', 'AP3', 'AP4', 'AP5', 'AP6', 'AP7', 'AP8', 'AP9', 'AP10', 'AP11', 'AP12', 'AP13', 'AP14', 'AP15']

names= ["Nearest Neighbors","Decision Tree","Linear SVM","Random Forest","Neural Net","AdaBoost"]

for ap in APs_to_remove:
    print('Nuevo AP removido: ', ap)
    accuracy[str(ap)] = []
    # saco el AP en 2.4GHz y 5GHz
    X_aux = X_aux.drop(columns=[ap-1, ap-1+15], axis=1)
    X_aux2 = X_aux.values
    number_of_APs = number_of_APs -1
    cols.remove('AP'+str(ap))

    for i in range(10):
        # separo el dataset en train y test 80-20
        X_train, X_test, y_train, y_test = train_test_split(X_aux2, y, test_size=0.2, random_state=123)

        classifiers = [KNeighborsClassifier(3),DecisionTreeClassifier(max_depth=5),SVC(kernel="linear", C=0.025, probability=True), DecisionTreeClassifier(max_depth=5),RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),MLPClassifier(alpha=1, max_iter=1000),
            AdaBoostClassifier()]
        algorithms, youden = train_algorithms(X_train, y_train.ravel(), X_test, y_test.ravel(), names, classifiers)

        y_final = classify(algorithms, youden, X_test)

        ACC_test = accuracy_score(y_test, y_pred_fcnn)        
        accuracy[str(ap)].append(ACC_test)
    print(accuracy[str(ap)])


### Graficas

In [None]:
acc_gnn = [[0.9608404966571156, 0.9608404966571156, 0.9713467048710601, 0.9551098376313276, 0.9684813753581661], [0.9536771728748806, 0.956064947468959, 0.954632282712512, 0.9622731614135626, 0.9555873925501432], [0.9317096466093601, 0.9512893982808023, 0.9474689589302769, 0.9498567335243553, 0.9598853868194842], [0.944603629417383, 0.9517669531996179, 0.9379178605539638, 0.9512893982808023, 0.9422158548233047], [0.9426934097421203, 0.937440305635148, 0.9407831900668577, 0.9398280802292264, 0.9398280802292264]]
acc_knn = [0.9613180515759312, 0.9570200573065902, 0.956064947468959, 0.9531996179560649, 0.940305635148042]
acc_fcnn = [[0.9484240687679083, 0.9436485195797517, 0.9608404966571156, 0.957497612225406, 0.956064947468959, 0.9594078319006686, 0.9613180515759312, 0.9517669531996179, 0.9608404966571156, 0.9536771728748806], [0.957497612225406, 0.9531996179560649, 0.9465138490926457, 0.954632282712512, 0.9531996179560649, 0.9570200573065902, 0.9493791786055397, 0.9512893982808023, 0.9531996179560649, 0.9465138490926457], [0.9498567335243553, 0.9422158548233047, 0.9474689589302769, 0.9479465138490927, 0.9450811843361987, 0.956064947468959, 0.9493791786055397, 0.9426934097421203, 0.9412607449856734, 0.9541547277936963], [0.9455587392550143, 0.9426934097421203, 0.9484240687679083, 0.9336198662846227, 0.9474689589302769, 0.9536771728748806, 0.9484240687679083, 0.94603629417383, 0.9498567335243553, 0.9503342884431709], [0.934574976122254, 0.938872970391595, 0.9393505253104107, 0.9307545367717287, 0.9379178605539638, 0.9317096466093601, 0.9398280802292264, 0.9336198662846227, 0.9426934097421203, 0.936007640878701]]
acc_bracco = [[0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701], [0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701], [0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701], [0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701], [0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701, 0.936007640878701]]

In [None]:
np.mean(acc_fcnn, axis=1)

In [None]:
plt.figure()
plt.plot([14,13,12,11,10], np.mean(acc_gnn, axis=1), label = "gnn")
plt.plot([14,13,12,11,10], acc_knn, label = "knn")
plt.plot([14,13,12,11,10], np.mean(acc_fcnn, axis=1), label = "fcnn")
plt.plot([14,13,12,11,10], np.mean(acc_bracco, axis=1), label = "bracco")
plt.legend()
plt.show()
plt.savefig('cant_aps_media_todos.jpg')

## Análisis de APs caídos

Se entrena normalmente con 15 APs y luego se busca simular la caída de APs llevando al valor mínimo de RSSI los datos de test correspondientes al AP caído.

Se hacen todas las combinaciones posibles y se promedia. El análisis se hace de 1 hasta 3 APs caídos.

### GNN

In [None]:
lst = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for ap in [1,2,3]:
    print("Cantidad de APs caídos: ", ap)
    accuracy_ap_caido_gnn = []

    for combo in combinations(lst, ap):  # 2 for pairs, 3 for triplets, etc
        X_aux_test = deepcopy(X_test)
        for i in combo:
            X_aux_test[:,i] = 0
            X_aux_test[:,i+15] = 0

        x_test_data = np.reshape(X_aux_test,(X_aux_test.shape[0],2,15))
        y_test_data = np.reshape(y_test,(y_test.shape[0],1,y_test.shape[1]))

        x_test_data = x_test_data - mean
        x_test_data /= std

        test_data = torch.utils.data.TensorDataset(torch.from_numpy(x_test_data).float(), torch.from_numpy(y_test_data).float())
        test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=len(test_data), shuffle=False)

        for x_batch, y_batch in test_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            y_hattest = trained_gnn_model(x_batch)

        m = torch.nn.Softmax(dim=1)
        output = m(y_hattest).cpu()    
        ACC_test = accuracy_score(y_test, np.array(torch.argmax(output, axis=1)))
        print(ACC_test)
        accuracy_ap_caido_gnn.append(ACC_test)
    print("    Accuracy: ", np.mean(accuracy_ap_caido_gnn))

### KNN

In [None]:
lst = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train)

accuracy_ap_caido_knn = []

for ap in [1,2,3]:
    print("Cantidad de APs caídos: ", ap)
    accuracy_ap_caido_knn.append([])
    for combo in combinations(lst, ap):  # 2 for pairs, 3 for triplets, etc
        X_aux_test = deepcopy(X_test)
        for i in combo:
            X_aux_test[:,i] = 0
            X_aux_test[:,i+15] = 0

        y_pred_knn = neigh.predict(X_aux_test)

        ACC_test = accuracy_score(y_test, y_pred_knn)
        accuracy_ap_caido_knn[ap-1].append(ACC_test)
    print("    Accuracy: ", np.mean(accuracy_ap_caido_knn[ap-1]))



### FCNN

In [None]:
lst = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = MLPClassifier().fit(X_train, y_train)
accuracy_ap_caido_fcnn = []

for ap in [1,2,3]:
    print("Cantidad de APs caídos: ", ap)
    accuracy_ap_caido_fcnn.append([])

    for combo in combinations(lst, ap):  # 2 for pairs, 3 for triplets, etc
        X_aux_test = deepcopy(X_test)
        for i in combo:
            X_aux_test[:,i] = 0
            X_aux_test[:,i+15] = 0

        y_pred_fcnn = clf.predict(X_aux_test)

        ACC_test = accuracy_score(y_test, y_pred_fcnn)
        accuracy_ap_caido_fcnn[ap-1].append(ACC_test)
    print("    Accuracy: ", np.mean(accuracy_ap_caido_fcnn[ap-1]))

### Bracco, etc

In [None]:
lst = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

names= ["Nearest Neighbors","Decision Tree","Linear SVM","Random Forest","Neural Net","AdaBoost"]

classifiers = [KNeighborsClassifier(3),DecisionTreeClassifier(max_depth=5),SVC(kernel="linear", C=0.025, probability=True), DecisionTreeClassifier(max_depth=5),RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),MLPClassifier(alpha=1, max_iter=1000),
            AdaBoostClassifier()]
algorithms, youden = train_algorithms(X_train, y_train.ravel(), X_test, y_test.ravel(), names, classifiers)
accuracy_ap_caido_bracco = []

for ap in [1,2,3]:
    print("Cantidad de APs caídos: ", ap)
    accuracy_ap_caido_bracco.append([])

    for combo in combinations(lst, ap):  # 2 for pairs, 3 for triplets, etc
        X_aux_test = deepcopy(X_test)
        for i in combo:
            X_aux_test[:,i] = 0
            X_aux_test[:,i+15] = 0

        y_final = classify(algorithms, youden, X_aux_test)

        ACC_test = accuracy_score(y_test, y_final)
        print(ACC_test)
        accuracy_ap_caido_bracco[ap-1].append(ACC_test)
    print("    Accuracy: ", np.mean(accuracy_ap_caido_bracco[ap-1]))


## Análisis de dispositivos con mayor/menor RSSI

Se hace lo siguiente:

- se entrena el modelo normalmente
- se suma/resta un valor a los datos de test simulando el mejor/peor HW
- se evalua el desempeño
    - GNN a priori no tendría una ventaja

### GNN

In [None]:
rssi_diff = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5] # variaciones de RSSI a causa de mejor/peor HW
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for rssi in rssi_diff:
    print("Variación de RSSI: ", rssi, " dB")
    accuracy_rssi_diff = []

    X_aux_test = deepcopy(X_test)
    X_aux_test = X_aux_test + rssi

    x_test_data = np.reshape(X_aux_test,(X_aux_test.shape[0],2,15))
    y_test_data = np.reshape(y_test,(y_test.shape[0],1,y_test.shape[1]))

    x_test_data = x_test_data - mean
    x_test_data /= std    

    test_data = torch.utils.data.TensorDataset(torch.from_numpy(x_test_data).float(), torch.from_numpy(y_test_data).float())
    test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=len(test_data), shuffle=False)

    for x_batch, y_batch in test_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        y_hattest = trained_gnn_model(x_batch)

    m = torch.nn.Softmax(dim=1)
    output = m(y_hattest).cpu()   
    ACC_test = accuracy_score(y_test, np.array(torch.argmax(output, axis=1)))
    print("     Accuracy ", ACC_test)
    accuracy_rssi_diff.append(ACC_test)

### KNN

In [None]:
rssi_diff = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5] # variaciones de RSSI a causa de mejor/peor HW
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train.reshape(-1))
accuracy_rssi_diff_knn = []

for rssi in rssi_diff:
    print("Variación de RSSI: ", rssi, " dB")

    X_aux_test = deepcopy(X_test)
    X_aux_test = X_aux_test + rssi 

    y_pred_knn = neigh.predict(X_aux_test)

    ACC_test = accuracy_score(y_test, y_pred_knn)         
    accuracy_rssi_diff_knn.append(ACC_test)
    print(ACC_test)
print(accuracy_rssi_diff_knn)

### FCNN

In [None]:
rssi_diff = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5] # variaciones de RSSI a causa de mejor/peor HW
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = MLPClassifier().fit(X_train, y_train)
accuracy_rssi_diff_fcnn = []

for rssi in rssi_diff:
    print("Variación de RSSI: ", rssi, " dB")

    X_aux_test = deepcopy(X_test)
    X_aux_test = X_aux_test + rssi
    
    y_pred_fcnn = clf.predict(X_aux_test)

    ACC_test = accuracy_score(y_test, y_pred_fcnn)         
    accuracy_rssi_diff_fcnn.append(ACC_test)
    print(ACC_test)
print(accuracy_rssi_diff_fcnn)

### Bracco, etc

In [None]:
rssi_diff = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5] # variaciones de RSSI a causa de mejor/peor HW
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

names= ["Nearest Neighbors","Decision Tree","Linear SVM","Random Forest","Neural Net","AdaBoost"]

classifiers = [KNeighborsClassifier(3),DecisionTreeClassifier(max_depth=5),SVC(kernel="linear", C=0.025, probability=True), DecisionTreeClassifier(max_depth=5),RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),MLPClassifier(alpha=1, max_iter=1000),
            AdaBoostClassifier()]
algorithms, youden = train_algorithms(X_train, y_train.ravel(), X_test, y_test.ravel(), names, classifiers)
accuracy_rssi_diff_bracco = []

for rssi in rssi_diff:
    print("Variación de RSSI: ", rssi, " dB")

    X_aux_test = deepcopy(X_test)
    X_aux_test = X_aux_test + rssi

    y_final = classify(algorithms, youden, X_aux_test)

    ACC_test = accuracy_score(y_test, y_final)         
    accuracy_rssi_diff_bracco.append(ACC_test)
    print(ACC_test)
print(accuracy_rssi_diff_bracco)

## Análisis de obstáculos

### GNN

In [None]:
lst = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
obstaculos = [-10, -15] # dB
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for ap in lst:
    print("AP con obstáculo:", ap+1)    

    for obstaculo in obstaculos:

        X_aux_test = deepcopy(X_test)
        X_aux_test[:,ap] += obstaculo
        X_aux_test[:,ap+15] += obstaculo

        x_test_data = np.reshape(X_aux_test,(X_aux_test.shape[0],2,15))
        y_test_data = np.reshape(y_test,(y_test.shape[0],1,y_test.shape[1]))

        x_test_data = x_test_data - mean
        x_test_data /= std 

        test_data = torch.utils.data.TensorDataset(torch.from_numpy(x_test_data).float(), torch.from_numpy(y_test_data).float())
        test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=len(test_data), shuffle=False)

        for x_batch, y_batch in test_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            y_hattest = trained_gnn_model(x_batch)

        m = torch.nn.Softmax(dim=1)
        output = m(y_hattest).cpu()   
        ACC_test = accuracy_score(y_test, np.array(torch.argmax(output, axis=1)))
        print("     Obstaculo:", obstaculo, "dB --- ", "Accuracy: ", np.mean(ACC_test))

### KNN

In [None]:
lst = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
obstaculos = [-10, -15] # dB
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train.reshape(-1))

for ap in lst:
    print("AP con obstáculo:", ap+1)    

    for obstaculo in obstaculos:

        X_aux_test = deepcopy(X_test)
        X_aux_test[:,ap] += obstaculo
        X_aux_test[:,ap+15] += obstaculo

        y_pred_knn = neigh.predict(X_aux_test)

        ACC_test = accuracy_score(y_test, y_pred_knn)
        print("     Obstaculo:", obstaculo, "dB --- ", "Accuracy: ", np.mean(ACC_test))

### FCNN

In [None]:
lst = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
obstaculos = [-10, -15] # dB
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = MLPClassifier().fit(X_train, y_train)

for ap in lst:
    print("AP con obstáculo:", ap+1)    

    for obstaculo in obstaculos:

        X_aux_test = deepcopy(X_test)
        X_aux_test[:,ap] += obstaculo
        X_aux_test[:,ap+15] += obstaculo

        y_pred_fcnn = clf.predict(X_aux_test)

        ACC_test = accuracy_score(y_test, y_pred_fcnn)
        print("     Obstaculo:", obstaculo, "dB --- ", "Accuracy: ", np.mean(ACC_test))

### Bracco, etc

In [None]:
lst = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
obstaculos = [-10, -15] # dB
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

names= ["Nearest Neighbors","Decision Tree","Linear SVM","Random Forest","Neural Net","AdaBoost"]

classifiers = [KNeighborsClassifier(3),DecisionTreeClassifier(max_depth=5),SVC(kernel="linear", C=0.025, probability=True), DecisionTreeClassifier(max_depth=5),RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),MLPClassifier(alpha=1, max_iter=1000),
            AdaBoostClassifier()]
algorithms, youden = train_algorithms(X_train, y_train.ravel(), X_test, y_test.ravel(), names, classifiers)

for ap in lst:
    print("AP con obstáculo:", ap+1)    

    for obstaculo in obstaculos:

        X_aux_test = deepcopy(X_test)
        X_aux_test[:,ap] += obstaculo
        X_aux_test[:,ap+15] += obstaculo

        y_final = classify(algorithms, youden, X_aux_test)

        ACC_test = accuracy_score(y_test, y_final) 
        print("     Obstaculo:", obstaculo, "dB --- ", "Accuracy: ", np.mean(ACC_test))

## Análisis de deformaciones en GSO

In [None]:
lst = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
obstaculos = [-10, -15] # dB
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for ap in lst:
    print("AP con obstáculo:", ap+1)    

    for obstaculo in obstaculos:

        X_aux_train = deepcopy(X_train)
        X_aux_train[:,ap] += obstaculo
        X_aux_train[:,ap+15] += obstaculo

        new_graph, new_GSO = graph_creator(X_aux_train[:,:15], th=10) #el grafo lo armo solo con los datos de 2.4Ghz
        trained_gnn_model.changeGSO(new_GSO)

        X_aux_test = deepcopy(X_test)
        X_aux_test[:,ap] += obstaculo
        X_aux_test[:,ap+15] += obstaculo

        x_test_data = np.reshape(X_aux_test,(X_aux_test.shape[0],2,15))
        y_test_data = np.reshape(y_test,(y_test.shape[0],1,y_test.shape[1]))

        x_test_data = x_test_data - mean
        x_test_data /= std         

        test_data = torch.utils.data.TensorDataset(torch.from_numpy(x_test_data).float(), torch.from_numpy(y_test_data).float())
        test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=len(test_data), shuffle=False)

        for x_batch, y_batch in test_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            y_hattest = trained_gnn_model(x_batch)

        m = torch.nn.Softmax(dim=1)
        output = m(y_hattest).cpu()   
        ACC_test = accuracy_score(y_test, np.array(torch.argmax(output, axis=1)))
        print("     Obstaculo:", obstaculo, "dB --- ", "Accuracy: ", np.mean(ACC_test))