# Perceptron para avaliar o Iris Dataset com apenas duas classes

Importações necessárias para rodar a aplicação

In [1]:
import numpy as np
import random
import math

Classe Perceptron

In [2]:
class Perceptron(object):
    def __init__(self, input_size, lr=1, epochs=100):
        self.W = np.zeros(input_size+1)
        # add one for bias
        self.epochs = epochs
        self.lr = lr
    
    def activation_fn(self, x):
        #return (x >= 0).astype(np.float32)
        #print( " x=", x)
        return 1 if x >= 0 else 0
 
    def predict(self, x):
        z = self.W.T.dot(x)
        a = self.activation_fn(z)
        return a
 
    def fit(self, X, d):
        for _ in range(self.epochs):
            for i in range(d.shape[0]):
                x = np.insert(X[i], 0, 1)
                y = self.predict(x)
                e = d[i] - y
                self.W = self.W + self.lr * e * x


Define constantes que determinam o tamanho do conjunto de treinamento e o tamanho do conjunto de teste. Neste caso, utilizamos 15% do conjunto para treinamento.

In [3]:
TRAINING_PERCENTAGE = 0.15
TEST_PERCENTAGE = 1 - TRAINING_PERCENTAGE

Inicia indicando um índice para cada label e removendo de forma aleatória uma das labels.

In [11]:
if __name__ == '__main__':
    iris_labels = ["Iris-setosa", "Iris-virginica", "Iris-versicolor"]
    random_index = random.randint(0,2)
    print("Label a ser removida: "+ iris_labels[random_index])
    del iris_labels[random_index]
    print(iris_labels)

    label_to_int = {}
    int_to_label = {}

    for index, label in enumerate(iris_labels):
        label_to_int[label] = index
        int_to_label[index] = label
    
    print(label_to_int)

Label a ser removida: Iris-virginica
['Iris-setosa', 'Iris-versicolor']
{'Iris-setosa': 0, 'Iris-versicolor': 1}


Abre o arquivo do dataset e separa os dados dos resultados. ( Os resultados já seram convertidos para int)

In [12]:
    iris_data = open("iris.data", "r")

    lines = iris_data.readlines()

    array_data = []
    decision = []

    for line in lines:
        data = line.split(",")
        label = data[-1].replace("\n", "")
        if label in label_to_int:
            del data[-1]
            array_data.append(list(map(float, data)))
            decision.append(label_to_int[label])

Define e monta quais conjuntos vão ser para treinamento e quais vão ser para teste

In [13]:
    data_size = len(array_data)
    training_data_size = math.ceil(data_size * TRAINING_PERCENTAGE)
    test_data_size = math.floor(data_size * TEST_PERCENTAGE)

    training_sample_indexes = set(random.sample(range(0, data_size), training_data_size))
    test_sample_indexes = set(range(0, data_size)) - training_sample_indexes

    training_data = []
    training_data_result = []

    print("*** Conjunto de treinamento ***")
    for index in training_sample_indexes:
        training_data.append(array_data[index])
        training_data_result.append(decision[index])
        print(array_data[index], int_to_label[decision[index]])
    print(" ----")
    
    test_data = []
    test_data_result = []
    
    print("*** Conjunto de teste ***")
    for index in test_sample_indexes:
        test_data.append(array_data[index])
        test_data_result.append(decision[index])
        print(array_data[index], int_to_label[decision[index]])
    print(" ----")
    

*** Conjunto de treinamento ***
[5.6, 2.9, 3.6, 1.3] Iris-versicolor
[5.9, 3.2, 4.8, 1.8] Iris-versicolor
[4.5, 2.3, 1.3, 0.3] Iris-setosa
[6.8, 2.8, 4.8, 1.4] Iris-versicolor
[5.0, 2.0, 3.5, 1.0] Iris-versicolor
[5.1, 3.8, 1.6, 0.2] Iris-setosa
[5.3, 3.7, 1.5, 0.2] Iris-setosa
[5.0, 3.3, 1.4, 0.2] Iris-setosa
[5.7, 3.8, 1.7, 0.3] Iris-setosa
[6.0, 2.7, 5.1, 1.6] Iris-versicolor
[6.4, 3.2, 4.5, 1.5] Iris-versicolor
[4.6, 3.6, 1.0, 0.2] Iris-setosa
[6.3, 2.3, 4.4, 1.3] Iris-versicolor
[5.5, 2.6, 4.4, 1.2] Iris-versicolor
[5.2, 3.4, 1.4, 0.2] Iris-setosa
 ----
*** Conjunto de teste ***
[5.1, 3.5, 1.4, 0.2] Iris-setosa
[4.9, 3.0, 1.4, 0.2] Iris-setosa
[4.7, 3.2, 1.3, 0.2] Iris-setosa
[4.6, 3.1, 1.5, 0.2] Iris-setosa
[5.0, 3.6, 1.4, 0.2] Iris-setosa
[5.4, 3.9, 1.7, 0.4] Iris-setosa
[4.6, 3.4, 1.4, 0.3] Iris-setosa
[5.0, 3.4, 1.5, 0.2] Iris-setosa
[4.4, 2.9, 1.4, 0.2] Iris-setosa
[4.9, 3.1, 1.5, 0.1] Iris-setosa
[5.4, 3.7, 1.5, 0.2] Iris-setosa
[4.8, 3.4, 1.6, 0.2] Iris-setosa
[4.8, 3.0, 1.

Monta o array utilizando o numpy, instancia a classe Perceptron e já faz o treinamento

In [14]:
    X = np.array(training_data)
    d = np.array(training_data_result)
 
    perceptron = Perceptron(input_size=len(training_data[0]))
    perceptron.fit(X, d)
    print("The W results = ",perceptron.W)

The W results =  [-1.  -1.4 -4.2  6.   2.3]


Para cada dado do conjunto de testes, o Perceptron vai ser chamado para predizer qual é o resultado. Caso o resultado seja corretom, aumenta o contator de predições certas.

In [15]:
    right_predictions_count = 0

    for index, test in enumerate(test_data):
        x = np.insert(test, 0, 1)
        prediction = perceptron.predict(x)

        print("------")
        print("The input value (x) =", test)
        print("The predict - y = ", int_to_label[prediction])
        print("The real value = ", int_to_label[test_data_result[index]] )
        print("------")

        if (prediction == test_data_result[index]):
            right_predictions_count += 1

------
The input value (x) = [5.1, 3.5, 1.4, 0.2]
The predict - y =  Iris-setosa
The real value =  Iris-setosa
------
------
The input value (x) = [4.9, 3.0, 1.4, 0.2]
The predict - y =  Iris-setosa
The real value =  Iris-setosa
------
------
The input value (x) = [4.7, 3.2, 1.3, 0.2]
The predict - y =  Iris-setosa
The real value =  Iris-setosa
------
------
The input value (x) = [4.6, 3.1, 1.5, 0.2]
The predict - y =  Iris-setosa
The real value =  Iris-setosa
------
------
The input value (x) = [5.0, 3.6, 1.4, 0.2]
The predict - y =  Iris-setosa
The real value =  Iris-setosa
------
------
The input value (x) = [5.4, 3.9, 1.7, 0.4]
The predict - y =  Iris-setosa
The real value =  Iris-setosa
------
------
The input value (x) = [4.6, 3.4, 1.4, 0.3]
The predict - y =  Iris-setosa
The real value =  Iris-setosa
------
------
The input value (x) = [5.0, 3.4, 1.5, 0.2]
The predict - y =  Iris-setosa
The real value =  Iris-setosa
------
------
The input value (x) = [4.4, 2.9, 1.4, 0.2]
The pr

No final, são exibidas estatísticas relacionadas aos testes

In [16]:
    print("\n\n\n*******")
    print("Number of tests = ", len(test_data))
    print("Number of right predictions = ", right_predictions_count)
    print("Number of wrong predictions = ", len(test_data) - right_predictions_count)
    print("Correct prediction rate = ", (right_predictions_count/len(test_data) * 100), "%")
    print("*******\n\n\n")




*******
Number of tests =  85
Number of right predictions =  85
Number of wrong predictions =  0
Correct prediction rate =  100.0 %
*******



