# Implementação do DMM e k1NN

## Importando o dataset iris através da biblioteca scikit learn

In [27]:
from sklearn import datasets
iris = datasets.load_iris()

X = iris.data
y = iris.target

iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

## Dividindo os dados em treino / test

> Foi escoliho o teste de 40% para obter a quantidade de dados sugeridos durante a aula.

In [28]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, stratify=y)

# DMM

### Treinando o modelo (calculando vetores médios)

In [16]:
def train(self, X, y):

    t = list(zip(X, y))

    c1 = filter(lambda x: x[1] == 0, t)
    cx1, cy1= zip(*c1)
    c1_mean = sum(cx1) / len(cx1)

    c2 = filter(lambda x: x[1] == 1, t)
    cx2, cy2= zip(*c2)
    c2_mean = sum(cx2) / len(cx2)

    c3 = filter(lambda x: x[1] == 2, t)
    cx3, cy3= zip(*c3)
    c3_mean = sum(cx3) / len(cx3)

    self.__models = [c1_mean, c2_mean, c3_mean]

### Implementação da distância Euclidiana e calculo em batch

In [18]:
def __calc_distance(self, data, mean):
    return numpy.sqrt(numpy.sum((data - mean) ** 2))

In [19]:
def __distances(self, models, X):
    for x in X:
        yield [self.__calc_distance(x, m) for m in models]

### Predição do modelo (escolha da menor distância)

In [20]:
def predict(self, X):
    distances_euclidian = list(self.__distances(self.__models, X))
    return [numpy.argmin(d) for d in distances_euclidian]

# 1NN

### Treinando o modelo (armazenando todos os pontos)

In [21]:
def train(self, X, y):
    self.__models = list(zip(y, X))

### Predição do modelo (buscando a classe do ponto mais próximo )

In [22]:
def predict(self, X):
    for x in X:
        distances = [(label, self.__calc_distance(x, model)) for label, model in self.__models]
        dy, dx = zip(*distances)
        idx = numpy.argmin(dx)
        yield dy[idx]

# Matriz de confusão

In [23]:
def confusion_matrix(actual, predicted):
    num_features = 3
    cm = numpy.zeros((num_features,num_features))
    for a, p in zip(actual, predicted):
        cm[a][p] += 1

    return cm

## Calculando accuracy

In [24]:
def accuracy(matrix):
    res = 0
    for i in range(len(matrix)):
        res += matrix[i][i]
    return res / matrix.sum()

# Executando treinamento e teste dos modelos

## DMM

In [35]:
from classifier import DMM, K1NN
from metrics import confusion_matrix, accuracy

dmm = DMM()

dmm.train(X_train, y_train)
dmm_pred = dmm.predict(X_test)

dmm_cm = confusion_matrix(y_test, dmm_pred)

print(dmm_cm)
print("Accuracy: {}".format(accuracy(dmm_cm)))

[[20.  0.  0.]
 [ 0. 18.  2.]
 [ 0.  1. 19.]]
Accuracy: 0.95


### 1NN

In [37]:
k1nn = K1NN()

k1nn.train(X_train, y_train)
k1nn_pred = list(k1nn.predict(X_test))

k1nn_cm = confusion_matrix(y_test, k1nn_pred)
print(k1nn_cm)
print("Accuracy {}".format(accuracy(k1nn_cm)))

[[20.  0.  0.]
 [ 0. 19.  1.]
 [ 0.  0. 20.]]
Accuracy 0.9833333333333333
