### Import required libraries

In [None]:
import random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import DynamicEdgeConv, MessagePassing, knn_graph
from torch.nn import Sequential as Seq, Linear, ReLU

from sklearn.svm import SVC

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, OrdinalEncoder, Normalizer

### Selecting Device

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

### Load traning and test data and Concatinate them

In [None]:
df_train = pd.read_csv('../data_sets/UJIndoorLoc/trainingData.csv')

In [None]:
missing = 0.0
if missing > 0:
    df_train = df_train.sample(frac=1-missing, random_state=42)

In [None]:
df_val = pd.read_csv('../data_sets/UJIndoorLoc/UJIndoorLoc/validationData.csv')

train_numb = len(df_train)
test_numb = len(df_val)

df = pd.concat([df_train, df_val])
df

### Create Classes for classification

In [None]:
df['CLASS'] = df['BUILDINGID'].astype(str) + df['FLOOR'].astype(str)
df

In [None]:
no_out = len(df['CLASS'].unique())

In [None]:
df_y = df[["CLASS"]]
df_y

In [None]:
enc = OrdinalEncoder(dtype=np.int32)
y = enc.fit_transform(df_y['CLASS'].values.reshape(-1,1))
y = y.reshape(-1,)

In [None]:
y.max()

### Seperate Input and Output

In [None]:
df_x = df.iloc[: , :520]

df_x

In [None]:
# Split data into train and test sets
X_train = df_x[:len(df_train)]
X_test = df_x[len(df_train):]
y_train = y[:len(df_train)]
y_test = y[len(df_train):]

### Scale the input features

In [None]:
norm_scl = Normalizer()

norm_scl.fit(X_train)

In [None]:
df_x = norm_scl.transform(df_x)

In [None]:
X_train = norm_scl.transform(X_train)
X_test = norm_scl.transform(X_test)

### KNN Classifier

In [None]:
acc_list =[]
for K in range(1, 60):
    knn = KNeighborsClassifier(n_neighbors=K, weights='distance')

    # Train the model on the training set
    knn.fit(X_train, y_train)

    # Predict on the test set
    y_pred = knn.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    acc_list.append(acc.item())
    print("K = ", K, "Accuracy_score: {:.4f}".format(acc))

In [None]:
plt.plot(acc_list)
plt.title(f"best accuracy: {max(acc_list)}")
# plt.savefig(f"result_part/knn_missing_{missing}.png")

### MLP Classifier

In [None]:
def mlp_call(h_layer):
    mlp = MLPClassifier(hidden_layer_sizes=h_layer, activation='relu', solver='adam', random_state=42)
    mlp.fit(X_train, y_train)
    y_pred = mlp.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Hidden layer: {h_layer} \tAccuracy: {accuracy}")
    return accuracy

In [None]:
h_layers = [(32), (64), (64, 32), (128), (128, 32), (256, 32), (256, 64), (512, 64), (512, 256, 64), (1024, 512, 256), (1024, 16)]
mlp_res = []
for h_layer in h_layers:
    mlp_res.append(mlp_call(h_layer))    

In [None]:
plt.plot(mlp_res)
plt.title(f"best accuracy: {max(mlp_res)}")
plt.savefig(f"../results/uji_result/mlp_missing_{missing}.png")

### SVM Classifier

In [None]:
def svm_call(c):
    svm = SVC(kernel='rbf', C=c, random_state=42)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    svm_acc = accuracy_score(y_test, y_pred)
    print(f"C = {c} \tAccuracy: {svm_acc}")
    return svm_acc


In [None]:
cs = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5]
svm_res = []
for c in cs:
    svm_res.append(svm_call(c))

In [None]:
plt.plot(cs, svm_res)
plt.title(f"best accuracy: {max(svm_res)}")
plt.savefig(f"../results/uji_result/svm_missing_{missing}.png")

### Define node Feature and Lable & Create Data object

In [None]:
# Define the node features
x = torch.tensor(df_x, dtype=torch.float).float()

# Define the node labels
y = torch.tensor(y, dtype=torch.float).long().reshape(-1)

# Create the Data object
data = Data(x=x, y=y)
data = data.to(device)

### Define Train and Test Mask

In [None]:
# train_mask = np.array([random.random() < 0.8 for _ in data.x])
train_mask = torch.BoolTensor([True]*(train_numb) + [False]*test_numb).view(-1)
train_mask

In [None]:
test_mask = np.invert(train_mask)
test_mask

In [None]:
data.train_mask = torch.tensor(train_mask, dtype=torch.bool)
data.test_mask = torch.tensor(test_mask, dtype=torch.bool)

### Print some quick statistics about the data:

In [None]:
# number of nodes
print("Number of nodes: ", data.num_nodes)

In [None]:
# sample nodes from the graph
print("Shape of sample nodes: ", data.x.shape)

In [None]:
# check training nodes
print("# of nodes to train on: ", data.train_mask.sum().item())

# check test nodes
print("# of nodes to test on: ", data.test_mask.sum().item())

In [None]:
print("X shape: ", data.x.shape)
# print("Edge shape: ", data.edge_index.shape)
print("Y shape: ", data.y.shape)

### Define DynamicEdgeConv

In [None]:
class EdgeConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super().__init__(aggr='sum')
        self.mlp = Seq(Linear(2 * in_channels, out_channels), ReLU(), Linear(out_channels, out_channels))

    def forward(self, x, edge_index):
        return self.propagate(edge_index, x=x)

    def message(self, x_i, x_j):
        tmp = torch.cat([x_i, x_j - x_i], dim=1)  # tmp has shape [E, 2 * in_channels]
        return self.mlp(tmp)

In [None]:
class DynamicEdgeConv(EdgeConv):
    def __init__(self, in_channels, out_channels, k=5):
        super().__init__(in_channels, out_channels)
        self.k = k

    def forward(self, x, batch=None):
        edge_index = knn_graph(x, self.k, batch, loop=False, flow=self.flow)
        return super().forward(x, edge_index)

### Define Model

In [None]:
class DEC(torch.nn.Module):
    def __init__(self, k1, k2):
        super().__init__()
        self.conv1 = DynamicEdgeConv(data.num_features, 256, k=k1)
        self.conv2 = DynamicEdgeConv(256, 50, k=k2)
        self.fc1 = nn.Linear(50, no_out)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, data):
        x = data.x
        x = self.conv1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc1(x)

        return x

In [None]:
# useful function for computing accuracy
def compute_accuracy(pred_y, y):
    return (pred_y == y).sum()

In [None]:
data.y

### Train the model

In [None]:
def train_fun(k1, k2, lr):
    model = DEC(k1, k2).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)
    loss_function = nn.CrossEntropyLoss()
    
    losses = []
    accuracies = []
    test_accu = []

    # epoch = 0
    for epoch in range(500):
        model.train()
        optimizer.zero_grad()
        out = model(data)

        loss = loss_function(out[data.train_mask], data.y[data.train_mask])
        correct = compute_accuracy(out.argmax(dim=1)[data.train_mask], data.y[data.train_mask])
        acc = int(correct) / int(data.train_mask.sum())
        losses.append(loss.item())
        accuracies.append(acc)


        loss.backward()
        optimizer.step()

        # epoch+=1

        if (epoch) % 1 == 0:
            model.eval()
            pred = model(data).argmax(dim=1)
            correct = compute_accuracy(pred[data.test_mask], data.y[data.test_mask])
            acc_test = int(correct) / int(data.test_mask.sum())
            test_accu.append(acc_test)
            print(f'Epoch: {epoch}, \tAccuracy: {acc}, \tTest accuracy: {acc_test}')
          
    res = f"best performance on train: {max(accuracies)}, test: {max(test_accu)}"  
    print(res)
    plt.plot(accuracies)
    plt.plot(test_accu)
    plt.title(res)
    plt.legend(['Train acc.', 'Test acc.'])
    plt.savefig(f'../results/uji_result/max_dec_missing_{missing}_({k1}_{k2}).png')
    plt.show()

### Grid Search with varying k1 and k2

In [None]:
for k1 in [30, 20, 15, 10, 5, 2]:
    for k2 in [15, 10, 6, 3, 1]:
        if k1 > k2:
            print("Doing for: ", k1, k2)
            train_fun(k1, k2, 0.009)