In [19]:
import numpy as np
import csv
import random
from collections import Counter, defaultdict

In [22]:
# Uma tentativa de modelar a flor
class iris():
    def __init__(self, nome, caracteristica):
        self.nome = nome
        self.caracteristica = caracteristica

In [23]:
# fazendo alguns tratamentos no nome da flor e criando o objeto
def parse_iris_row(row):
    measurements = [float(value) for value in row[:-1]]
    label = row[-1].split("-")[-1]
    return iris(label, measurements)

In [24]:
# Função para dividir o conjunto de dados
def split_data(data, train_size=0.8):
    random.shuffle(data)
    cut = int(len(data) * train_size)
    return data[:cut], data[cut:]

In [25]:
# calcula a distância euclidiana
def euclidian_distance(vector1, vector2):
    return np.sqrt(np.square(np.subtract(vector1,vector2)).sum())

In [26]:
# Contagem de ocorrências
def majority_vote(labels):
    vote_counts = Counter(labels)
    winner, winner_count = vote_counts.most_common(1)[0]
    num_winners = len([count for count in vote_counts.values() if count == winner_count])
    if num_winners == 1:
        return winner
    else:
        return majority_vote(labels[:-1])

In [27]:
def knn_classify(k, iris_class, new_point):
    by_distance = sorted(iris_class,
                     key=lambda lp: euclidian_distance(lp.caracteristica, new_point))
    k_nearest_labels = [lp.nome for lp in by_distance[:k]]
    return majority_vote(k_nearest_labels)

In [28]:
import csv
with open('iris.data') as f:
    reader = csv.reader(f)
    iris_data = [parse_iris_row(row) for row in reader]

In [29]:
iris_train, iris_test = split_data(iris_data)

In [34]:
confusion_matrix = defaultdict(int)
num_correct = 0
for iris in iris_test:
    predicted = knn_classify(5, iris_train, iris.caracteristica)
    actual = iris.nome
    
    if predicted == actual:
        num_correct += 1
        
    confusion_matrix[(predicted, actual)] += 1
    
pct_correct = num_correct / len(iris_test)
print(pct_correct)

0.9333333333333333


In [36]:
for k, v in confusion_matrix.items():
    print(f'{k} = {v}')

('versicolor', 'versicolor') = 12
('virginica', 'versicolor') = 2
('setosa', 'setosa') = 10
('virginica', 'virginica') = 6
