In [12]:
# ===== NAIVE BAYES COM SCIKIT LEARN =====

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Carregar a base de dados
data = load_breast_cancer()
X = data.data
y = data.target

# Dividir a base de dados em conjuntos de treino e teste usando holdout
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinar o classificador Naive Bayes
clf = GaussianNB()
clf.fit(X_train, y_train)

# Avaliar o desempenho do classificador
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Acurácia: {:.2f}%'.format(accuracy * 100))


Acurácia: 97.37%


In [15]:
# ===== NAIVE BAYES SEM SCIKIT LEARN =====

import csv
import random
import math
import numpy as np
from sklearn.datasets import load_breast_cancer

class NaiveBayes:
    def __init__(self):
        self.mean = None
        self.stdev = None
        self.priors = None

    def fit(self, X_train, y_train):
        n_samples, n_features = X_train.shape
        self.classes = np.unique(y_train)
        n_classes = len(self.classes)

        # Calcula a média, desvio padrão e prior para cada classe
        self.mean = np.zeros((n_classes, n_features))
        self.stdev = np.zeros((n_classes, n_features))
        self.priors = np.zeros(n_classes)
        for i, c in enumerate(self.classes):
            X_class = X_train[y_train == c]
            self.mean[i, :] = X_class.mean(axis=0)
            self.stdev[i, :] = X_class.std(axis=0)
            self.priors[i] = X_class.shape[0] / float(n_samples)

    def predict(self, X_test):
        # Calcula a probabilidade para cada classe
        posteriors = []
        for i, c in enumerate(self.classes):
            prior = np.log(self.priors[i])
            likelihood = np.sum(np.log(self._gaussian_pdf(X_test, self.mean[i, :], self.stdev[i, :])), axis=1)
            posterior = prior + likelihood
            posteriors.append(posterior)

        # Retorna a classe com a maior probabilidade
        return np.argmax(posteriors, axis=0)

    def _gaussian_pdf(self, X, mean, stdev):
        exponent = np.exp(-((X - mean)**2 / (2 * stdev**2)))
        return (1 / (np.sqrt(2 * np.pi) * stdev)) * exponent

data = load_breast_cancer()
X = data.data
y = data.target

# Divide os dados em conjuntos de treino e teste

n_samples = X.shape[0]
train_size = int(n_samples * 0.8)
indices = list(range(n_samples))
random.shuffle(indices)
train_indices = indices[:train_size]
test_indices = indices[train_size:]
X_train = X[train_indices, :]
y_train = y[train_indices]
X_test = X[test_indices, :]
y_test = y[test_indices]

# Treina o classificador Naive Bayes
clf = NaiveBayes()
clf.fit(X_train, y_train)

# Faz predições para os dados de teste
y_pred = clf.predict(X_test)

# Calcula a acurácia do classificador
accuracy = np.mean(y_pred == y_test)
print('Acurácia: {:.2f}%'.format(accuracy * 100))


Acurácia: 92.98%
