In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.naive_bayes import CategoricalNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

def naiveBayesTrainPredict(X_train, y_train, X_test):
    #Ajuste de valores negativos, deslocando dados
    min_x = min([X_train.min(), X_test.min()])
    X_train = X_train-min_x
    X_test = X_test-min_x
    #Treinamento e previsão
    max_x = int(max([X_train.max(), X_test.max()]))
    nb = CategoricalNB(min_categories=(max_x+1))
    nb.fit(X_train, y_train)
    return nb.predict(X_test)
    
def mlpTrainPredict(X_train, y_train, X_test):
    np.random.seed(111)
    mlpNet = MLPClassifier(hidden_layer_sizes = 90,
                    activation = 'logistic', 
                    solver ='sgd', 
                    batch_size = 1,                    
                    alpha = 0, 
                    momentum = 0,
                    learning_rate='constant', 
                    learning_rate_init=0.05,
                    max_iter = 50,                     
                    shuffle = True, 
                    random_state = 111,
                    early_stopping = True,
                    validation_fraction = .33,
                    verbose = True)
    mlpNet.fit(X_train, y_train)
    return mlpNet.predict(X_test)
    
def plotResult(y_pred, y_test, title, labels=None):
    #Obtem métricas
    confusion = np.transpose(confusion_matrix(y_test, y_pred))
    precision = precision_score(y_test, y_pred, average=None)
    sensitivity = recall_score(y_test, y_pred, average=None)
    accuracy = accuracy_score(y_test, y_pred)
    
    #Cores com padrão matlab
    size = confusion.shape[1]
    matriz = np.array([[(230,140,140,255) for x in range(size + 1)] for y in range(size + 1)]) #Vermelho (Erros)
    matriz[range(size), range(size)] = (120,230,180,255) #Verde (Acertos)
    matriz[size, range(size)] = (128,128,128,120)        #Cinza (Precisão) 
    matriz[range(size), size] = (128,128,128,120)        #Cinza (Sensibilidade)
    matriz[size, size] = (120,150,230,255)               #Azul  (Acurácia)
    plt.imshow(matriz, cmap='viridis', interpolation='nearest')
        
    #Ajuste de valores dos eixos
    if (labels is None):
        labels = np.arange(0, size)
    plt.xticks(np.arange(0, size), labels)
    plt.yticks(np.arange(0, size), labels, rotation=90, ha='center', va='center')

    #Adição de porcentagens
    for i in range(size):
        for j in range(size):
            plt.text(j, i, str(confusion[i][j]) + '\n', ha='center', va='center', color='black', fontsize=10, weight='bold')
            plt.text(j, i, '\n' + str(round(confusion[i][j]/y_pred.shape[0]*100, 2)) + '%', ha='center', va='center', color='black', fontsize=10)
    for i in range(size):
        plt.text(size, i, str(round(precision[i] * 100,2)) + '%\n', ha='center', va='center', color='green', fontsize=10)
        plt.text(size, i, '\n' + str(round(100-(precision[i] * 100),2)) + '%', ha='center', va='center', color='red', fontsize=10)
    for j in range(size):
        plt.text(j, size, str(round(sensitivity[j] * 100,2)) + '%\n', ha='center', va='center', color='green', fontsize=10)
        plt.text(j, size, '\n' + str(round(100 - (sensitivity[j] * 100),2)) + '%', ha='center', va='center', color='red', fontsize=10)
    plt.text(size, size, str(round(accuracy * 100,2)) + '%\n', ha='center', va='center', color='green', weight='bold', fontsize=10)
    plt.text(size, size, '\n' + str(round(100 - (accuracy * 100),2)) + '%', ha='center', va='center', color='red', fontsize=10)

    #Titulo e legendas
    plt.title(title, weight='bold')
    plt.xlabel('Target Class', weight='bold')
    plt.ylabel('Output Class', weight='bold')
    plt.show()
    
#Carrega arquivos
indoorLoc_points = pd.read_csv('/kaggle/input/25d-indoor-positioning-using-wifi-signals/uerj_wifi_indoorLoc_points.csv')
indoorLoc_train = pd.read_csv('/kaggle/input/25d-indoor-positioning-using-wifi-signals/uerj_wifi_indoorLoc_train.csv')
indoorLoc_test = pd.read_csv('/kaggle/input/25d-indoor-positioning-using-wifi-signals/uerj_wifi_indoorLoc_test.csv')

#Correlaciona amostragens aos respectivos andares
indoorLoc_train = pd.merge(indoorLoc_train, indoorLoc_points, on='point', how='inner')
indoorLoc_test = pd.merge(indoorLoc_test, indoorLoc_points, on='point', how='inner')

#Seleciona apenas as componentes RSS (X)
X_train = indoorLoc_train.loc[:, indoorLoc_train.columns.str.startswith('RSS')].to_numpy()
X_test = indoorLoc_test.loc[:, indoorLoc_test.columns.str.startswith('RSS')].to_numpy()

#Seleciona apenas os andares (Y)
y_train = indoorLoc_train['floor'].to_numpy()
y_test = indoorLoc_test['floor'].to_numpy()

#Aplica PCA e seleciona as primeiras 60 PCs
pca = PCA(n_components=X_train.shape[1], svd_solver='full').fit(X_train)
X_train_with_PCA = np.round(pca.transform(X_train)[:,:60])
X_test_with_PCA = np.round(pca.transform(X_test)[:,:60])

#Naive Bayes sem PCA
print('Iniciando Naive Bayes sem PCA...')
plotResult(naiveBayesTrainPredict(X_train, y_train, X_test), y_test, 
           title='Naive Bayes Sem PCA',
           labels=['Andar 4','Andar 5', 'Andar 6'])
#Naive Bayes com PCA
print('Iniciando Naive Bayes com PCA...')
plotResult(naiveBayesTrainPredict(X_train_with_PCA, y_train, X_test_with_PCA), y_test,
           title='Naive Bayes Com PCA',
           labels=['Andar 4','Andar 5', 'Andar 6'])
#MLP com PCA
print('Iniciando MLP com PCA...')
plotResult(mlpTrainPredict(X_train_with_PCA, y_train, X_test_with_PCA),y_test, 
           title='MLP Com PCA',
           labels=['Andar 4','Andar 5', 'Andar 6'])