In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pandas as pd
import random
from collections import namedtuple
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

In [9]:
""" readDataset: Function that reads and randomize the dataset, returning a namedtuple -> dataset.X and dataset.Y """
def readDataset(filename, y_columns, sep='\t'):
    
    # Reading the dataset.
    data = pd.read_csv(filename, index_col=False, header=None, sep=sep)
    
    # Acquiring dataset data and class data.
    y = data.iloc[:,-y_columns:]
    y = np.array(y)
    X = data.iloc[:,:-y_columns]
    X = np.array(X)
    
    # Randomizing dataset.
    indices = np.random.choice(len(X), len(X), replace=False)
    X_values = X[indices]
    y_values = y[indices]
    
    # Creating an alias to dataset -> dataset.X and dataset.Y
    dataset = namedtuple('datset', 'X Y')

    return dataset(X=X_values, Y=y_values)


""" Função processing: Function that transform divides the dataset in train and test and transform the Y values in binary """
def train_test_split(dataset, percentage):

    # Computing the lenght of dataset.
    lenght = dataset.X.shape[0]

    # Split dataset into train and test.
    x_train = dataset.X[0:int(percentage*lenght), :]
    y_train = dataset.Y[0:int(percentage*lenght), :]
    x_test = dataset.X[int(percentage*lenght):, :]
    y_test = dataset.Y[int(percentage*lenght):, :]
        
    # Creating an alias to train and test set.
    dataset = namedtuple('datset', 'X Y')
    train = dataset(X=x_train, Y=y_train)
    test = dataset(X=x_test, Y=y_test)

    return train, test


""" Função hidden_train: função responsável por calcular a distância para cada cluster e aplicar a função gaussiana de base radial """
def hidden_train(x, n_clusters, clusters, sigma):
    
    distances = np.zeros(n_clusters)

    for i in range(n_clusters):
        
        # Calculando a distância euclidiana para cada cluster i.
        distances[i] = np.sqrt(np.sum(((x - clusters[i])**2)))
    
    # Applying radial basis function
    distances = np.exp(-(((distances**2))/(2*(sigma**2))))
    
    # Envia para a camada escondida o cálculo da distância do exemplo x para cada cluster.
    return distances


""" Função forward: função responsável por aplicar os pesos da camada de saida juntamente com as distancias obtidas para cada cluster """
def forward(x, output_weights):
    
    # Função de ativação = função identidade. Concatena com 1 p/ adição do theta.
    x_aux = np.concatenate((x, np.ones(1)))
    return np.matmul(x_aux, output_weights.T) 


""" Função backward: função responsável pela atualização dos pesos da camada de saída """
def backward(dataset, eta, n_classes, output_weights, entry, f_net_o, distance):
    
    y = dataset.Y[entry]
    
    # Calcula o erro
    error = y - f_net_o
    
    # Atualiza os pesos da camada de saída
    output_weights += eta*np.matmul(error.reshape(n_classes,1),np.append(distance,1).reshape(1,n_classes+1))

    return output_weights, error


""" Função testing: função responsável pela etapa de teste da rede RBF """
def testing(dataset, output_weights, n_clusters, clusters, sigma):
    counter = 0
    
    # Para cada entrada de teste..
    for entry in range(dataset.X.shape[0]):
        
        # Calcula a distancia para cada cluster
        distances = hidden_train(dataset.X[entry], n_clusters, clusters, sigma)
        
        # Aplica as distancias juntamente com os pesos obtidos
        y_hat = forward(distances, output_weights)
        
        # Computa a saida esperada e a obtida
        y_hat = np.argmax(y_hat)
        y = np.argmax(dataset.Y[entry])
        
        # Compara as saidas, se igual -> soma 1 acerto.
        if (y == y_hat):
            counter += 1

    return (counter/dataset.X.shape[0])


""" Função RBF: função responsável por todas as etapas de execução da rede RBF """
def RBF(dataset, n_classes, eta, data_ratio, epochs, sigma, delta_error):
    
    train, test = train_test_split(dataset, data_ratio)
    
    n_clusters = n_classes
    clusters = np.zeros((n_clusters, dataset.X.shape[1]))

    # Calculating the clusters
    for i in range(1, n_clusters+1):
        clusters[i-1] = train.X[np.argmax(train.Y,axis=1)+1 == i].mean(axis=0)
    
    hidden_units = n_clusters

    # Initializing and filling the weights values of output layer
    output_weights = np.zeros((n_clusters,n_classes+1))

    # Inicialização dos pesos da camada de saída com distribuição uniforme de -1 a 1
    for i in range(n_clusters):
        for j in range(n_classes+1):
            output_weights[i][j] = random.uniform(-1, 1)

    # Treina a rede até que o erro decreça ao mínimo aceito (delta_error)
    epoch = 0
    errors_list = [1,0]
    delta = 10

    while(abs(delta) > delta_error):
        error = 0
        
        # Para cada entrada de treino
        for entry in range(train.X.shape[0]):
            
            # Calcula a distancia do exemplo para cada cluster
            distances = hidden_train(train.X[entry], n_clusters, clusters, sigma)
            
            # Calcula a saida esperada com base nos pesos atuais
            f_net_o = forward(distances, output_weights)
            
            # Atualiza os pesos e computa o erro
            output_weights, erro = backward(train, eta, n_classes, output_weights, entry, f_net_o, distances)
            error += sum(erro*erro)
        
        epoch += 1
        print(epoch,error)
        
    # Etapa de teste
    return testing(test, output_weights, n_clusters, clusters, sigma)

In [12]:
digitos = readDataset('semeion.data', 10, sep=' ')

# Teste das acurácias da rede RBF
RBF(digitos, 10, 0.5, 1, 500, 1.9, 1e-2)

1 1910.80663407778
2 1896.2489612129032
3 1886.9001196006122
4 1877.8796633479508
5 1869.1710720949952
6 1860.7587091447438
7 1852.6277736651978
8 1844.7642554828224
9 1837.1548923268872
10 1829.7871293917926
11 1822.6490810914654
12 1815.7294948871122
13 1809.017717075726
14 1802.5036604330792
15 1796.1777736105082
16 1790.0310121904993
17 1784.0548113109303
18 1778.24105977305
19 1772.5820755525824
20 1767.0705826379392
21 1761.6996891235008
22 1756.4628664899424
23 1751.3539300071618
24 1746.367020199002
25 1741.496585312095
26 1736.7373647344455
27 1732.0843733121771
28 1727.5328865157783
29 1723.078426409773
30 1718.716748382221
31 1714.4438285928566
32 1710.2558521008775
33 1706.1492016355965
34 1702.1204469749646
35 1698.166334899143
36 1694.2837796878584
37 1690.4698541320636
38 1686.7217810320503
39 1683.0369251555974
40 1679.4127856312527
41 1675.846988753126
42 1672.3372811748875
43 1668.8815234718934
44 1665.4776840514323
45 1662.1238333923043
46 1658.8181385957619
47 1655.

366 1232.3870372174586
367 1231.7863041927312
368 1231.1876181454038
369 1230.5909691134832
370 1229.9963472029804
371 1229.40374258728
372 1228.813145506588
373 1228.2245462673352
374 1227.6379352415897
375 1227.0533028665034
376 1226.4706396437468
377 1225.889936138938
378 1225.3111829810932
379 1224.7343708620767
380 1224.1594905360762
381 1223.5865328190391
382 1223.015488588154
383 1222.4463487813234
384 1221.8791043966492
385 1221.313746491904
386 1220.7502661840251
387 1220.18865464861
388 1219.6289031194135
389 1219.0710028878625
390 1218.5149453025433
391 1217.9607217687387
392 1217.4083237479283
393 1216.8577427573236
394 1216.308970369396
395 1215.7619982114006
396 1215.2168179649261
397 1214.6734213654263
398 1214.1318002017745
399 1213.5919463158061
400 1213.053851601884
401 1212.5175080064462
402 1211.9829075275884
403 1211.45004221461
404 1210.9189041676113
405 1210.3894855370393
406 1209.8617785233073
407 1209.335775376339
408 1208.8114683951962
409 1208.288849927641
41

726 1100.31953335245
727 1100.102578898613
728 1099.8861512809565
729 1099.6702488706696
730 1099.4548700451012
731 1099.2400131877562
732 1099.0256766882571
733 1098.811858942301
734 1098.5985583516542
735 1098.3857733241073
736 1098.1735022734504
737 1097.9617436194408
738 1097.7504957877795
739 1097.5397572100867
740 1097.3295263238579
741 1097.1198015724488
742 1096.910581405052
743 1096.701864276649
744 1096.493648648004
745 1096.2859329856235
746 1096.078715761737
747 1095.8719954542603
748 1095.6657705467776
749 1095.4600395285197
750 1095.2548008943115
751 1095.0500531445832
752 1094.8457947853103
753 1094.6420243280068
754 1094.4387402896964
755 1094.2359411928778
756 1094.0336255655166
757 1093.8317919409983
758 1093.6304388581198
759 1093.4295648610594
760 1093.2291684993395
761 1093.0292483278374
762 1092.8298029067064
763 1092.6308308014
764 1092.432330582624
765 1092.2343008263201
766 1092.0367401136323
767 1091.839647030892
768 1091.643020169596
769 1091.4468581263682
77

KeyboardInterrupt: 