# UJIINDOORLOC - Simple GNN

Dataset: UJIINDOORLOC

Modelo: GNN simple

## Importar Datos

In [None]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from itertools import combinations
from copy import deepcopy
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn.model_selection import ParameterGrid
import torch
import networkx as nx
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder


from torch_geometric.data import Data
from torch_geometric.nn.conv.dna_conv import Linear
from torch_geometric.utils import to_networkx, is_undirected, to_undirected
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ChebConv, SAGEConv, TAGConv, GraphConv
from torch_geometric.loader import DataLoader

from zipfile import ZipFile

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


In [None]:
# Descarga de datos
!kaggle datasets download -d giantuji/UjiIndoorLoc

In [None]:
dataset = 'UjiIndoorLoc.zip'
zip_file = ZipFile(dataset)
df = pd.read_csv(zip_file.open('TrainingData.csv'))
df.head()

## BORRAR

In [None]:
df_X = df.iloc[:,:520]
df_X.values[df_X.values==100] = -105

In [None]:
aps_mean = df_X.describe().iloc[1,:]
aps_std = df_X.describe().iloc[2,:]

In [None]:
font = {'size'   : 15}
plt.rc('font', **font)

In [None]:
aps_mean.hist(figsize=[8,6])
# aps_std.hist(figsize=[8,6])
plt.xlabel("RSSI (dBm)")
plt.ylabel("Fingerprints")
# plt.show()
plt.savefig("ujiindoorloc_mean_distribution.pdf")

## Preprocesamiento

In [None]:
def preprocess_dataset(dataset_path, filter_std, dataset_percentage=None):
    
    zip_file = ZipFile(dataset_path)
    df = pd.read_csv(zip_file.open('TrainingData.csv'))
    
    df['CLASS'] = df['BUILDINGID'].astype(str) + df['FLOOR'].astype(str)
    
    df_X = df.iloc[:,:520]
    df_y = df['CLASS']

    df_X.values[df_X.values==100] = -105


    # keep those APs where std > filter_std
    ap = (df_X.describe().iloc[2]>filter_std).index
    values = (df_X.describe().iloc[2]>filter_std).values
    filtered_aps = [ap[i] for i in range(len(values)) if values[i]==True]
    df_X = df_X[filtered_aps]
        
    # take minimum -105 to 0
    df_X.iloc[:,:] = 105 + df_X.values
    df_X['CLASS'] = df_y.values 
    
    if dataset_percentage:
        df_X = df_X.sample(frac=dataset_percentage)       
    
    # apply ordinal encoder to the classes and split X, y
    enc = OrdinalEncoder(dtype=int)
    y = enc.fit_transform(df_X['CLASS'].values.reshape(-1,1))
    X = df_X.iloc[:,:-1].values    

    dfaux = pd.DataFrame(X)

    number_aps = len(dfaux.columns)
    dfaux[str(number_aps)] = y
    subsample = dfaux.sample(frac=1, random_state=99)
    y = subsample.iloc[:, -1].values.reshape(-1,1)
    X = subsample.iloc[:, :-1].values
    
    # split 80-20
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
    
    print("X_train shape: ", X_train.shape)
    return X_train, X_test, y_train, y_test, number_aps, len(df_X['CLASS'].value_counts()), filtered_aps, enc

## Grafo

En las siguientes celdas se describe un poco el dataset y se muestran las distribuciones de potencia por AP.

In [None]:
def graph_creator(X_G, th=10):
    """
    Dado un dataset y un threshold se arma un grafo basado en las medidas de RRSI
    """
    df_data_train = pd.DataFrame(X_G)
    df_G = pd.DataFrame(columns = ['from', 'to', 'weight']) 

    columns = df_data_train.columns.to_list()
    for ap in columns:
        # para cada AP me quedo con las instancias donde el RSSI esta en el rango
        # (max-th) intentando estimar las instancias mas cercanas al AP
        max_val = df_data_train[ap].max()
        df_aux_i = df_data_train[df_data_train[ap]  > (max_val - th)]
        df_aux_i = df_aux_i.drop(ap, axis=1) 
        df_aux_i.head()

        for k, v in df_aux_i.mean().items():
            # armo las aristas con el promedio de RSSI que ven las instancias 
            # filtradas al resto de los APs
            # weight = v
            # if df_G.loc[(df_G['from'] == k) & (df_G['to'] == ap)].weight.any():
            #     weight = np.mean([float(df_G.loc[(df_G['from'] == k) & (df_G['to'] == ap)].weight), weight])
            #     df_G.loc[(df_G['from'] == k) & (df_G['to'] == ap)] = k, ap, weight
            df_G = df_G.append({'from':ap, 'to': k, 'weight': v}, ignore_index=True)
        

    edge_index_first_row = []
    edge_index_second_row = []
    edge_attr = []
    for index, row in df_G.iterrows():
        edge_index_first_row.append(columns.index(row['from']))
        edge_index_second_row.append(columns.index(row['to']))
        edge_attr.append([float(row.weight)])

    
    edge_index = torch.tensor([edge_index_first_row, edge_index_second_row], dtype=torch.long)
    edge_attr = torch.tensor(edge_attr, dtype=torch.float)                           
    edge_index, edge_attr = to_undirected(edge_index, edge_attr, reduce="mean")
    return edge_index, edge_attr

In [None]:
def build_dataset(X, y, graph):
    dataset = []
    for i in range(len(y)):
        data = deepcopy(graph)
        data.x = torch.Tensor(X[i])
        data.y = torch.Tensor(y[i])
        data.train_mask = torch.Tensor([True]*len(y))
        data.val_mask = torch.Tensor([True]*len(y))
        data.test_mask = torch.Tensor([True]*len(y))                
        dataset.append(data)
    return dataset

## Modelo

In [None]:
class GNN_GCNConv(torch.nn.Module):
    def __init__(self, number_of_aps, number_of_classes, conv_out_features: list = [16, 20]):
        super().__init__()
        self.number_of_aps = number_of_aps
        self.number_of_classes = number_of_classes
        self.conv_out_features = conv_out_features
        self.conv1 = GCNConv(1, self.conv_out_features[0], bias=True, normalize=True)
        self.conv2 = GCNConv(self.conv_out_features[0], self.conv_out_features[1], bias=True, normalize=True)
        self.fc = torch.nn.Linear(self.conv_out_features[-1]*self.number_of_aps, self.number_of_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        # print("After Conv1: ", x.shape)

        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)        
        # print("After Conv2: ", x.shape)

        # x = torch.flatten(x, 0)
        x = torch.reshape(x, (int(x.shape[0]/self.number_of_aps),self.conv_out_features[-1]*self.number_of_aps))
        # print("After Flatten: ", x.shape)
        
        x = self.fc(x)
        x = F.relu(x)        
        # print("After FC: ", x.shape)

        return x

In [None]:
class GNN_TAGConv(torch.nn.Module):
    def __init__(self, number_of_aps, number_of_classes, k: list = [1,1], conv_out_features: list = [16, 20]):
        super().__init__()
        self.number_of_aps = number_of_aps
        self.number_of_classes = number_of_classes        
        self.k = k
        self.conv_out_features = conv_out_features
        self.conv1 = TAGConv(1, self.conv_out_features[0], K=self.k[0], bias=True, normalize=True)
        self.conv2 = TAGConv(self.conv_out_features[0], self.conv_out_features[1], K=self.k[1], bias=True, normalize=True)
        self.fc = torch.nn.Linear(self.conv_out_features[-1]*self.number_of_aps, self.number_of_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        # print("After Conv1: ", x.shape)

        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)        
        # print("After Conv2: ", x.shape)

        # x = torch.flatten(x, 0)
        x = torch.reshape(x, (int(x.shape[0]/self.number_of_aps),self.conv_out_features[-1]*self.number_of_aps))        
        # print("After Flatten: ", x.shape)
        
        x = self.fc(x)
        x = F.relu(x)        
        # print("After FC: ", x.shape)

        return x

In [None]:
class GNN_GraphConv(torch.nn.Module):
    def __init__(self, number_of_aps, number_of_classes, aggr: str = "add", conv_out_features: list = [16, 20]):
        super().__init__()
        self.number_of_aps = number_of_aps
        self.number_of_classes = number_of_classes           
        self.aggr = aggr
        self.conv_out_features = conv_out_features
        self.conv1 = GraphConv(1, self.conv_out_features[0], aggr=self.aggr, bias=True)
        self.conv2 = GraphConv(self.conv_out_features[0], self.conv_out_features[1], aggr=self.aggr, bias=True)
        self.fc = torch.nn.Linear(self.conv_out_features[-1]*self.number_of_aps, self.number_of_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        # print("After Conv1: ", x.shape)

        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)        
        # print("After Conv2: ", x.shape)

        # x = torch.flatten(x, 0)
        x = torch.reshape(x, (int(x.shape[0]/self.number_of_aps),self.conv_out_features[-1]*self.number_of_aps))        
        # print("After Flatten: ", x.shape)
        
        x = self.fc(x)
        x = F.relu(x)        
        # print("After FC: ", x.shape)

        return x

## Entrenamiento

In [None]:
# data.edge_attr = (data.edge_attr - data.edge_attr.mean()) / data.edge_attr.std()
data.edge_attr = (data.edge_attr - data.edge_attr.min()) / data.edge_attr.max()
# data.edge_attr *= 4
# data.edge_attr = torch.nn.functional.normalize(data.edge_attr, dim=0)
g = to_networkx(data, edge_attrs=["edge_attr"])
weights = nx.get_edge_attributes(g,'edge_attr').values()
# pos = nx.circular_layout(g)
nx.draw(g, width=list(weights))

In [None]:
# Split de los datos y armado del objeto Data con el grafo

X_train, X_test, y_train, y_test, num_aps, num_classes, filtered_aps, enc_train = preprocess_dataset(dataset, filter_std=3, dataset_percentage=0.4)
edge_index, edge_attr = graph_creator(X_train[:,:-1], th=10)
data = Data(edge_index=edge_index, edge_attr=edge_attr, num_nodes=num_aps)
print(f"Undirected: {data.is_undirected()}")

In [None]:
# Armado del dataset

x_training_data = np.reshape(X_train,(X_train.shape[0],num_aps,1))
x_test_data = np.reshape(X_test,(X_test.shape[0],num_aps,1))
y_training_data = y_train
y_test_data = y_test

#normalize (x-mean)/std
mean = x_training_data.mean(axis=0)
std = x_training_data.std(axis=0)

x_training_data = x_training_data - mean
x_training_data /= std
x_test_data = x_test_data - mean
x_test_data /= std

train_dataset = build_dataset(x_training_data, y_training_data, data)
test_dataset = build_dataset(x_test_data, y_test_data, data)

In [None]:
train_dataset[0]

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
batch_size = 32
learning_rate = 0.01

### GCNConv 

In [None]:
model = GNN_GCNConv(num_aps, num_classes, conv_out_features=[20,20]).to(device)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
loss = torch.nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)


In [None]:
train_loss = []
train_accuracy = []
test_loss = []
test_accuracy = []

m = torch.nn.Softmax(dim=1)

for epoch in range(100):
    print(f"Epoch: {epoch}")
    
    # TRAIN
    model.train()
    train_accuracy_epoch = []
    train_loss_epoch = []
    for data in train_loader:

        optimizer.zero_grad()
        # print(data.x.shape)
        # print(data.y.shape)
        
        
        out = model(data.to(device))
        # print(out)
        # out_softmax = np.array(torch.argmax(out, dim=0)).item()
        # out_softmax = torch.tensor([out_softmax])
        loss_result = loss(out, data.y.type(torch.long))        
        loss_result.backward()
        
        train_loss_epoch.append(loss_result.detach().cpu())
        output = m(out)
        train_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))


        optimizer.step()

    if scheduler.get_last_lr()[0] > 0.0005:
        scheduler.step()

    train_accuracy.append(np.mean(train_accuracy_epoch))
    train_loss.append(np.mean(train_loss_epoch))

    # VALIDATION
    model.eval()
    test_accuracy_epoch = []
    test_loss_epoch = []
    for data in test_loader:
        out = model(data.to(device))
        loss_result = loss(out, data.y.type(torch.long))        
        
        test_loss_epoch.append(loss_result.detach().cpu())
        output = m(out)
        test_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

    test_accuracy.append(np.mean(test_accuracy_epoch))
    test_loss.append(np.mean(test_loss_epoch))


print(f"Last LR: {scheduler.get_last_lr()}")

plt.figure()
plt.plot(train_loss, label="Train loss")
plt.plot(test_loss, label="Validation loss")
plt.legend()

plt.figure()
plt.plot(train_accuracy, label="Train accuracy")
plt.plot(test_accuracy, label="Validation accuracy")
plt.legend()

plt.show()


In [None]:
m = torch.nn.Softmax(dim=1)
output = m(out)
accuracy = accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1)))

print(accuracy)
print(classification_report(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

#### Save Model

In [None]:
torch.save(model.state_dict(), "GCNConv_best_model.pth")

### TAGConv 

In [None]:
model = GNN_TAGConv(num_aps, num_classes, k=[2,2], conv_out_features=[20,20]).to(device)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
loss = torch.nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)


In [None]:
train_loss = []
train_accuracy = []
test_loss = []
test_accuracy = []

m = torch.nn.Softmax(dim=1)

for epoch in range(100):
    print(f"Epoch: {epoch}")
    
    # TRAIN
    model.train()
    train_accuracy_epoch = []
    train_loss_epoch = []
    for data in train_loader:

        optimizer.zero_grad()
        # print(data.x.shape)
        # print(data.y.shape)
        
        
        out = model(data.to(device))
        # print(out)
        # out_softmax = np.array(torch.argmax(out, dim=0)).item()
        # out_softmax = torch.tensor([out_softmax])
        loss_result = loss(out, data.y.type(torch.long))        
        loss_result.backward()
        
        train_loss_epoch.append(loss_result.detach().cpu())
        output = m(out)
        train_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))


        optimizer.step()

    # if scheduler.get_last_lr()[0] > 0.0005:
    #     scheduler.step()
    if (epoch+1)%10 == 0:
        scheduler.step()
        
    train_accuracy.append(np.mean(train_accuracy_epoch))
    train_loss.append(np.mean(train_loss_epoch))

    # VALIDATION
    model.eval()
    test_accuracy_epoch = []
    test_loss_epoch = []
    for data in test_loader:
        out = model(data.to(device))
        loss_result = loss(out, data.y.type(torch.long))        
        
        test_loss_epoch.append(loss_result.detach().cpu())
        output = m(out)
        test_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

    test_accuracy.append(np.mean(test_accuracy_epoch))
    test_loss.append(np.mean(test_loss_epoch))


print(f"Last LR: {scheduler.get_last_lr()}")

plt.figure()
plt.plot(train_loss, label="Train loss")
plt.plot(test_loss, label="Validation loss")
plt.legend()

plt.figure()
plt.plot(train_accuracy, label="Train accuracy")
plt.plot(test_accuracy, label="Validation accuracy")
plt.legend()

plt.show()


In [None]:
test_accuracy[-1]

In [None]:
m = torch.nn.Softmax(dim=1)
output = m(out)
accuracy = accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1)))

print(accuracy)
print(classification_report(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

#### Save Model

In [None]:
# torch.save(model.state_dict(), "TAGConv_best_model.pth")

In [None]:
torch.save(model.state_dict(), "UJI_Simple_porc0.4_12_best_model.pth")

### GraphConv 

In [None]:
model = GNN_GraphConv(num_aps, num_classes, aggr="mean", conv_out_features=[20,20]).to(device)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
loss = torch.nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)


In [None]:
train_loss = []
train_accuracy = []
test_loss = []
test_accuracy = []

m = torch.nn.Softmax(dim=1)

for epoch in range(100):
    print(f"Epoch: {epoch}")
    
    # TRAIN
    model.train()
    train_accuracy_epoch = []
    train_loss_epoch = []
    for data in train_loader:

        optimizer.zero_grad()
        # print(data.x.shape)
        # print(data.y.shape)
        
        
        out = model(data.to(device))
        # print(out)
        # out_softmax = np.array(torch.argmax(out, dim=0)).item()
        # out_softmax = torch.tensor([out_softmax])
        loss_result = loss(out, data.y.type(torch.long))        
        loss_result.backward()
        
        train_loss_epoch.append(loss_result.detach().cpu())
        output = m(out)
        train_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))


        optimizer.step()

    if scheduler.get_last_lr()[0] > 0.0005:
        scheduler.step()

    train_accuracy.append(np.mean(train_accuracy_epoch))
    train_loss.append(np.mean(train_loss_epoch))

    # VALIDATION
    model.eval()
    test_accuracy_epoch = []
    test_loss_epoch = []
    for data in test_loader:
        out = model(data.to(device))
        loss_result = loss(out, data.y.type(torch.long))        
        
        test_loss_epoch.append(loss_result.detach().cpu())
        output = m(out)
        test_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

    test_accuracy.append(np.mean(test_accuracy_epoch))
    test_loss.append(np.mean(test_loss_epoch))


print(f"Last LR: {scheduler.get_last_lr()}")

plt.figure()
plt.plot(train_loss, label="Train loss")
plt.plot(test_loss, label="Validation loss")
plt.legend()

plt.figure()
plt.plot(train_accuracy, label="Train accuracy")
plt.plot(test_accuracy, label="Validation accuracy")
plt.legend()

plt.show()


In [None]:
m = torch.nn.Softmax(dim=1)
output = m(out)
accuracy = accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1)))

print(accuracy)
print(classification_report(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

#### Save Model

In [None]:
torch.save(model.state_dict(), "GraphConv_best_model.pth")

## TEST

In [None]:
dataset = 'UjiIndoorLoc.zip'
zip_file = ZipFile(dataset)
df_test = pd.read_csv(zip_file.open('ValidationData.csv'))
df.head()

df_test['CLASS'] = df_test['BUILDINGID'].astype(str) + df_test['FLOOR'].astype(str)

df_X_test = df_test[filtered_aps]
df_y_test = df_test['CLASS']
print(df_X_test.shape)

df_X_test.values[df_X_test.values==100] = -105
df_X_test.iloc[:,:] = 105 + df_X_test.values
df_X_test['CLASS'] = df_y_test.values 

df_X_test.describe()


y_test = enc_train.transform(df_X_test['CLASS'].values.reshape(-1,1))
X_test = df_X_test.iloc[:,:-1].values


# Armado del dataset

x_test_data = np.reshape(X_test,(X_test.shape[0],num_aps,1))
y_test_data = y_test

#normalize (x-mean)/std

x_test_data = x_test_data - mean
x_test_data /= std

test_dataset = build_dataset(x_test_data, y_test_data, data)

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
np.shape(test_dataset)

Load best model

In [None]:
# model = GNN_GraphConv(num_aps, num_classes, aggr="mean", conv_out_features=[20,20]).to(device)
# model.load_state_dict(torch.load("GraphConv_best_model.pth"))

model = GNN_TAGConv(num_aps, num_classes, k=[2,2], conv_out_features=[20,20])
model.load_state_dict(torch.load("checkpoints/TAGConv_best_model.pth"))
model.to(device)

In [None]:
train_loss = []
train_accuracy = []
test_loss = []
test_accuracy = []

m = torch.nn.Softmax(dim=1)

# TEST
model.eval()
for data in test_loader:
    out = model(data.to(device))
    
    output = m(out)
    test_accuracy.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

total_test_accuracy = np.mean(test_accuracy)

print(f"TEST ACCURACY: {total_test_accuracy}")

In [None]:
m = torch.nn.Softmax(dim=1)
output = m(out)
accuracy = accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1)))

print(accuracy)
print(classification_report(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

## Pruebas

In [None]:
import pandas as pd
from zipfile import ZipFile
import torch
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from torch_geometric.data import Data
from torch_geometric.nn.conv.dna_conv import Linear
from torch_geometric.utils import to_networkx, is_undirected, to_undirected
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ChebConv, SAGEConv, TAGConv, GraphConv
from torch_geometric.loader import DataLoader
from copy import deepcopy


## KNN 

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

neigh = KNeighborsClassifier(n_neighbors=15)
param = {'n_neighbors': [5, 10, 15]}

clf_knn= GridSearchCV(neigh, param)
clf_knn.fit(X_train, y_train.ravel())


print(clf_knn.best_params_)
print(clf_knn.best_score_)

K = clf_knn.best_params_['n_neighbors']
neigh = KNeighborsClassifier(n_neighbors=K)
neigh.fit(X_train, y_train.ravel())
y_pred_knn = neigh.predict(X_test)

print(accuracy_score(y_test, y_pred_knn))
print(classification_report(y_test, y_pred_knn))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train, y_train.ravel())
y_pred_knn = neigh.predict(X_test)

print(accuracy_score(y_test, y_pred_knn))
print(classification_report(y_test, y_pred_knn))

## Análisis variando cantidad de muestras

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 32
learning_rate = 0.001
print_every = 5

porcentajes = [0.9, 1] # [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
accuracy = {"0.3":[], "0.4":[], "0.5":[], "0.6":[], "0.7":[], "0.8":[], "0.9":[], "1":[]}

for porc in porcentajes:
    print('Porcentaje de datos: ', porc)
    
    for i in range(4):    

        # Split de los datos y armado del objeto Data con el grafo

        X_train, X_test, y_train, y_test, num_aps, num_classes, filtered_aps, enc_train = preprocess_dataset(dataset, filter_std=3, dataset_percentage=porc)
        edge_index, edge_attr = graph_creator(X_train[:,:-1], th=10)
        data = Data(edge_index=edge_index, edge_attr=edge_attr, num_nodes=num_aps)

        # Armado del dataset

        x_training_data = np.reshape(X_train,(X_train.shape[0],num_aps,1))
        x_test_data = np.reshape(X_test,(X_test.shape[0],num_aps,1))
        y_training_data = y_train
        y_test_data = y_test

        #normalize (x-mean)/std
        mean = x_training_data.mean(axis=0)
        std = x_training_data.std(axis=0)

        x_training_data = x_training_data - mean
        x_training_data /= std
        x_test_data = x_test_data - mean
        x_test_data /= std

        train_dataset = build_dataset(x_training_data, y_training_data, data)
        test_dataset = build_dataset(x_test_data, y_test_data, data)

        model = GNN_TAGConv(num_aps, num_classes, k=[2,2], conv_out_features=[20,20]).to(device)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
        loss = torch.nn.CrossEntropyLoss()
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)

        
        train_loss = []
        train_accuracy = []
        test_loss = []
        test_accuracy = []
        best_test_accuracy = 0
        
        m = torch.nn.Softmax(dim=1)

        for epoch in range(100):
            # print(f"Epoch: {epoch}")

            # TRAIN
            model.train()
            train_accuracy_epoch = []
            train_loss_epoch = []
            for data in train_loader:

                optimizer.zero_grad()
                # print(data.x.shape)
                # print(data.y.shape)


                out = model(data.to(device))
                # print(out)
                # out_softmax = np.array(torch.argmax(out, dim=0)).item()
                # out_softmax = torch.tensor([out_softmax])
                loss_result = loss(out, data.y.type(torch.long))        
                loss_result.backward()

                train_loss_epoch.append(loss_result.detach().cpu())
                output = m(out)
                train_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))


                optimizer.step()

            # if scheduler.get_last_lr()[0] > 0.0005:
            #     scheduler.step()
            if (epoch+1)%10 == 0:
                scheduler.step()

            train_accuracy.append(np.mean(train_accuracy_epoch))
            train_loss.append(np.mean(train_loss_epoch))

            # VALIDATION
            model.eval()
            test_accuracy_epoch = []
            test_loss_epoch = []
            for data in test_loader:
                out = model(data.to(device))
                loss_result = loss(out, data.y.type(torch.long))        

                test_loss_epoch.append(loss_result.detach().cpu())
                output = m(out)
                test_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

            test_accuracy.append(np.mean(test_accuracy_epoch))
            if test_accuracy[-1] > best_test_accuracy:
                best_test_accuracy = test_accuracy[-1]
                torch.save(model.state_dict(), f"UJI_Simple_porc{porc}_{i}_best_model.pth")

            test_loss.append(np.mean(test_loss_epoch))

        print(f"Best Accuracy: Train {np.max(train_accuracy)}, Val {np.max(test_accuracy)}")
        accuracy[str(porc)].append(np.max(test_accuracy))

In [None]:
print(accuracy)

### TEST

In [None]:
dataset = 'UjiIndoorLoc.zip'
zip_file = ZipFile(dataset)
df_test = pd.read_csv(zip_file.open('ValidationData.csv'))
df.head()

df_test['CLASS'] = df_test['BUILDINGID'].astype(str) + df_test['FLOOR'].astype(str)

df_X_test = df_test[filtered_aps]
df_y_test = df_test['CLASS']
print(df_X_test.shape)

df_X_test.values[df_X_test.values==100] = -105
df_X_test.iloc[:,:] = 105 + df_X_test.values
df_X_test['CLASS'] = df_y_test.values 

df_X_test.describe()


y_test = enc_train.transform(df_X_test['CLASS'].values.reshape(-1,1))
X_test = df_X_test.iloc[:,:-1].values


# Armado del dataset

x_test_data = np.reshape(X_test,(X_test.shape[0],num_aps,1))
y_test_data = y_test

#normalize (x-mean)/std

x_test_data = x_test_data - mean
x_test_data /= std

test_dataset = build_dataset(x_test_data, y_test_data, data)

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
porcentajes = [0.4] #, 0.6, 0.7, 0.8, 0.9, 1]
accuracy = {"0.3":[], "0.4":[], "0.5":[], "0.6":[], "0.7":[], "0.8":[], "0.9":[], "1":[]}

for porc in porcentajes:
    for i in range(13):
        model = GNN_TAGConv(num_aps, num_classes, k=[2,2], conv_out_features=[20,20])
        model.load_state_dict(torch.load(f"checkpoints/UJI_Simple_porc{porc}_{i}_best_model.pth"))
        model.to(device)
        
        test_loss = []
        test_accuracy = []

        m = torch.nn.Softmax(dim=1)

        # TEST
        model.eval()
        test_accuracy_epoch = []
        test_loss_epoch = []
        for data in test_loader:
            out = model(data.to(device))

            output = m(out)
            test_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

        test_accuracy.append(np.mean(test_accuracy_epoch))
        accuracy[str(porc)].append(np.mean(test_accuracy)) 

In [None]:
accuracy

In [None]:
porcentajes = [0.6, 0.7, 0.8, 0.9, 1] # [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
# accuracy = {"0.3":[], "0.4":[], "0.5":[], "0.6":[], "0.7":[], "0.8":[], "0.9":[], "1":[]}

for porc in porcentajes:
    for i in range(5):
        model = GNN_TAGConv(num_aps, num_classes, k=[2,2], conv_out_features=[20,20])
        model.load_state_dict(torch.load(f"UJI_Simple_porc{porc}_{i}_best_model.pth"))
        model.to(device)
        
        test_loss = []
        test_accuracy = []

        m = torch.nn.Softmax(dim=1)

        # TEST
        model.eval()
        test_accuracy_epoch = []
        test_loss_epoch = []
        for data in test_loader:
            out = model(data.to(device))

            output = m(out)
            test_accuracy_epoch.append(accuracy_score(data.y.cpu().reshape(-1).type(torch.long), np.array(torch.argmax(output.cpu(), axis=1))))

        test_accuracy.append(np.mean(test_accuracy_epoch))
        accuracy[str(porc)].append(np.mean(test_accuracy)) 

In [None]:
accuracy

In [None]:
num_aps