In [1]:
pip install numpy matplotlib mlbench

Collecting mlbench
  Downloading mlbench-0.0.0-py3-none-any.whl (3.3 kB)
Installing collected packages: mlbench
Successfully installed mlbench-0.0.0


In [2]:
import numpy as np
import matplotlib.pyplot as plt

# Função para calcular a densidade de probabilidade kernel (KDE)
def pdf_kde(xi, N, x):
    s = 0
    h = 1.06 * np.std(x) * N**(-1/5)
    for i in range(N):
        s += np.exp(-(1/(2*h**2)) * np.dot((x[i, :] - xi).T, (x[i, :] - xi)))
    p = (1/(N * (np.sqrt(2 * np.pi * h)))**N) * s
    return p

# Classificador Bayesiano
def bayes_classifier(x_train, y_train, x_test):
    pC1 = np.sum(y_train == 1) / len(x_train)
    pC2 = 1 - pC1
    c1 = x_train[y_train == 0, :]
    c2 = x_train[y_train == 1, :]
    y_hat = []
    for i in range(len(x_test)):
        p1 = pdf_kde(x_test[i, :], len(c1), c1)
        p2 = pdf_kde(x_test[i, :], len(c2), c2)
        if p1 * pC1 / (p2 * pC2) >= 1:
            y_hat.append(0)
        else:
            y_hat.append(1)
    return np.array(y_hat)

# Geração de dados usando mlbench
from mlbench import mlbench_spirals

data = mlbench_spirals(200, sd=0.05)
x = np.array(data['x'])
y = np.array(data['classes']) - 1
index = np.random.permutation(len(x))

# Avaliação do classificador usando validação cruzada com 10 folds
accuracy = np.zeros((10, 1))
best = 0
for i in range(20, 201, 20):
    test = index[i-20:i]
    train = index[np.setdiff1d(index, test)]
    x_train, y_train = x[train, :], y[train]
    x_test, y_test = x[test, :], y[test]
    y_hat = bayes_classifier(x_train, y_train, x_test)
    aux = np.sum((y_test == y_hat) * 1) / 20
    accuracy[i//20 - 1, 0] = aux
    if aux > best:
        best_train, best_test, save_index, best = train, test, i//20, aux

print(accuracy)
print(np.std(accuracy, axis=0))
print(np.mean(accuracy, axis=0))

# Treinamento final com os melhores conjuntos de treino e teste
x_train, y_train = x[best_train, :], y[best_train]
x_test, y_test = x[best_test, :], y[best_test]

# Superfície de verossimilhança
pC1 = np.sum(y_train == 1) / len(x_train)
pC2 = 1 - pC1
c1 = x_train[y_train == 0, :]
c2 = x_train[y_train == 1, :]
y_hat = np.zeros(len(y_test))
espaco_de_verossimilhanca = np.zeros((len(y_test), 2))
for i in range(len(x_test)):
    p1 = pdf_kde(x_test[i, :], len(c1), c1)
    p2 = pdf_kde(x_test[i, :], len(c2), c2)

    espaco_de_verossimilhanca[i, 0] = p1
    espaco_de_verossimilhanca[i, 1] = p2

    K = (p1 * pC1) / (p2 * pC2)

    y_hat[i] = 0 if K >= 1 else 1

# Plotagem do espaço de verossimilhança
plt.scatter(espaco_de_verossimilhanca[y_hat == 0, 0], espaco_de_verossimilhanca[y_hat == 0, 1], c='blue')
plt.scatter(espaco_de_verossimilhanca[y_hat == 1, 0], espaco_de_verossimilhanca[y_hat == 1, 1], c='red')
plt.xlabel('')
plt.ylabel('')
plt.title('Verossimilhanças para o fold 1')
plt.show()

# Restante do código de plotagem
# ...



ImportError: ignored