In [1]:
import numpy as np

class NaiveBayesClassifier:
    def __init__(self):
        self.prior = None
        self.likelihood = None
    
    def fit(self, X, y):
        # Calculate class priors
        self.prior = {}
        for label in np.unique(y):
            self.prior[label] = np.sum(y == label) / len(y)
        
        # Calculate likelihoods
        self.likelihood = {}
        for label in np.unique(y):
            self.likelihood[label] = {}
            for feature in range(X.shape[1]):
                # Calculate P(feature|class)
                self.likelihood[label][feature] = {}
                for value in np.unique(X[:, feature]):
                    self.likelihood[label][feature][value] = \
                        np.sum((X[:, feature] == value) & (y == label)) / np.sum(y == label)
    
    def predict(self, X):
        predictions = []
        for i in range(X.shape[0]):
            # Calculate posterior for each class
            posteriors = {}
            for label in self.prior:
                posterior = np.log(self.prior[label])
                for feature in range(X.shape[1]):
                    posterior += np.log(self.likelihood[label][feature].get(X[i, feature], 0.01))  # Smoothing
                posteriors[label] = posterior
            # Predict the class with the highest posterior
            predictions.append(max(posteriors, key=posteriors.get))
        return predictions

In [2]:
# Example usage
X_train = np.array([[1, 0], [1, 1], [0, 1], [0, 0], [1, 1]])
y_train = np.array([0, 1, 1, 0, 1])

nb_classifier = NaiveBayesClassifier()
nb_classifier.fit(X_train, y_train)

X_test = np.array([[1, 0], [0, 1], [1, 1]])
predictions = nb_classifier.predict(X_test)
print(predictions)  # Output: [0, 1, 1]

[0, 1, 1]


  posterior += np.log(self.likelihood[label][feature].get(X[i, feature], 0.01))  # Smoothing


In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Gaussian Naive Bayes
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Predict
y_pred = gnb.predict(X_test)

# Evaluate
accuracy = (y_pred == y_test).sum() / len(y_test)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [4]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

# Sample text data
X = ["I love to eat pizza", "Pizza is the best", "I hate broccoli"]
y = [1, 1, 0]  # 1 for positive sentiment, 0 for negative sentiment

# Vectorize text data
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Multinomial Naive Bayes
mnb = MultinomialNB()
mnb.fit(X_vectorized, y)

# Test
new_text = ["Broccoli is disgusting"]
new_text_vectorized = vectorizer.transform(new_text)
prediction = mnb.predict(new_text_vectorized)
print("Prediction:", prediction)


Prediction: [0]


In [5]:
from sklearn.naive_bayes import ComplementNB

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Complement Naive Bayes
cnb = ComplementNB()
cnb.fit(X_train, y_train)

# Predict
y_pred = cnb.predict(X_test)

# Evaluate
accuracy = (y_pred == y_test).sum() / len(y_test)
print("Accuracy:", accuracy)


Accuracy: 0.7


In [6]:
from sklearn.naive_bayes import BernoulliNB

# Sample binary data
X = [[1, 0], [0, 1], [1, 1], [0, 0]]
y = [1, 0, 1, 0]  # 1 for positive, 0 for negative

# Bernoulli Naive Bayes
bnb = BernoulliNB()
bnb.fit(X, y)

# Test
new_data = [[1, 0], [1, 1]]
predictions = bnb.predict(new_data)
print("Predictions:", predictions)


Predictions: [1 1]


In [7]:
from sklearn.naive_bayes import CategoricalNB

# Sample data
X = [[0, 1, 2], [1, 2, 2], [2, 1, 0], [0, 1, 1]]
y = [0, 1, 1, 0]  # Classes

# Categorical Naive Bayes
cat_nb = CategoricalNB()
cat_nb.fit(X, y)

# Test
new_data = [[0, 1, 1], [2, 1, 0]]
predictions = cat_nb.predict(new_data)
print("Predictions:", predictions)


Predictions: [0 1]


Certainly! Here are some common applications of each Naive Bayes algorithm:

### 1. Gaussian Naive Bayes
- **Text Classification**: Gaussian Naive Bayes can be used for text classification tasks where features are continuous variables, such as sentiment analysis or spam detection. For example, it can classify documents based on the frequency of certain words.
- **Medical Diagnosis**: It can be used in medical diagnosis tasks where features are continuous, such as the levels of different biomarkers, to classify patients into different disease categories.

### 1.9.2. Multinomial Naive Bayes
- **Text Classification**: Multinomial Naive Bayes is particularly suitable for text classification tasks where features represent word counts or frequencies. It's commonly used in spam filtering, topic classification, and sentiment analysis.
- **Document Classification**: It's often used in document categorization tasks, such as news article classification or document clustering.

### 1.9.3. Complement Naive Bayes
- **Text Classification**: Complement Naive Bayes is particularly effective for text classification tasks with imbalanced class distributions. It's often used in sentiment analysis, topic classification, and document categorization tasks.
- **Spam Filtering**: It can be used in spam filtering where the spam class is significantly smaller than the non-spam class.

### 1.9.4. Bernoulli Naive Bayes
- **Text Classification**: Bernoulli Naive Bayes is commonly used for binary feature classification tasks, such as document classification where features represent the presence or absence of words in the document.
- **Sentiment Analysis**: It's used in sentiment analysis tasks where features represent the occurrence of specific words or features in the text.

### 1.9.5. Categorical Naive Bayes
- **Data Classification**: Categorical Naive Bayes is useful for classification tasks where features are categorical variables with discrete values. It can be used in various domains such as marketing (e.g., customer segmentation), finance (e.g., credit risk assessment), and healthcare (e.g., disease diagnosis).

### 1.9.6. Out-of-core Naive Bayes
- **Large-scale Text Classification**: Out-of-core Naive Bayes is suitable for large-scale text classification tasks where the dataset cannot fit into memory. It's commonly used in scenarios like processing large volumes of social media data, news articles, or emails for classification tasks.
- **Real-time Classification**: It can be used in real-time classification scenarios where new data arrives continuously, and the model needs to be updated incrementally without retraining on the entire dataset.

In [9]:
# Import the required libraries
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Define email data and tags
emails = [
    "Will you be attending today's meeting?",
    "Don't miss out, it's full of great discounts!",
    "Don't forget to install the update",
    "Last day: 50% off!",
    "Last call, renew your subscription",
    "Where to meet friends?"
]

labels = [0, 1, 0, 1, 1, 0]  # 0: Normal, 1: Spam

# Convert text to vectors
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(emails)

# Separate data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Build and train a Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)

# Predict using test data
y_pred = nb_classifier.predict(X_test)

# Evaluate the success of the model
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy Rate:", accuracy)

# Show classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Normal", "Spam"]))

Model Accuracy Rate: 0.0

Classification Report:
              precision    recall  f1-score   support

      Normal       0.00      0.00      0.00       1.0
        Spam       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

