# Gaussian Naive Bayes Example

Import necessary libraries

In [145]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_iris

Load the Iris dataset, create dataframe X with the iris data and series y with the target

In [147]:
iris = load_iris()
X = iris.data
y = iris.target

Create a DataFrame for easier data handling, assign the species column to y

In [149]:
iris_df = pd.DataFrame(X, columns=iris.feature_names)
iris_df['species'] = y

Split the data into training and testing sets, 80/20 for train/test, ramdom state = 42

In [151]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Initialize the Naive Bayes model

In [153]:
naive_bayes = GaussianNB()

Train the model

In [155]:
naive_bayes.fit(X_train, y_train)

Make predictions

In [157]:
y_pred = naive_bayes.predict(X_test)

Calculate accuracy

In [159]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:",accuracy)

Accuracy: 1.0


Display classification report

In [48]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))


Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



# Multinomial Naive Bayes Example

Import necessary libraries

In [235]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer

Load the 20 Newsgroups dataset, remove headers, footers and quotes

In [178]:
categories = ['sci.space', 'comp.graphics', 'rec.sport.baseball']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, remove=('headers', 'footers', 'quotes'))

Vectorize the text data using CountVectorizer

In [179]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(newsgroups_train.data)
y = newsgroups_train.target

Split the data into training and testing sets, 80/20 train to test, 42 random state

In [182]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Initialize and train the Multinomial Naive Bayes model

In [184]:
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)

Make predictions on the test set

In [186]:
y_pred = nb_classifier.predict(X_test)

Calculate accuracy and display classification report

In [188]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=newsgroups_train.target_names))

Accuracy: 0.9211267605633803

Classification Report:
                    precision    recall  f1-score   support

     comp.graphics       0.89      0.93      0.91       119
rec.sport.baseball       0.94      0.94      0.94       117
         sci.space       0.94      0.89      0.91       119

          accuracy                           0.92       355
         macro avg       0.92      0.92      0.92       355
      weighted avg       0.92      0.92      0.92       355



**Bernoulli Naive Bayes Example**

In [237]:
from sklearn.naive_bayes import BernoulliNB

In [192]:
# Vectorize the text data using CountVectorizer with binary=True for Bernoulli Naive Bayes
vectorizer = CountVectorizer(binary=True)
X = vectorizer.fit_transform(newsgroups_train.data)
y = newsgroups_train.target

In [194]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [254]:
# Initialize and train the Bernoulli Naive Bayes model
nb_classifier = BernoulliNB()
nb_classifier.fit(X_train, y_train)

In [198]:
# Make predictions on the test set
y_pred = nb_classifier.predict(X_test)

In [200]:
# Calculate accuracy and display classification report
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=newsgroups_train.target_names))

Accuracy: 0.7746478873239436

Classification Report:
                    precision    recall  f1-score   support

     comp.graphics       0.82      0.88      0.85       119
rec.sport.baseball       0.68      0.97      0.80       117
         sci.space       0.93      0.48      0.63       119

          accuracy                           0.77       355
         macro avg       0.81      0.78      0.76       355
      weighted avg       0.81      0.77      0.76       355



**CountVectorization Function**

In [239]:
# Sample text data
corpus = ['This is a sample sample sentence.', 'Another example sentence.', 'More text data here.']

In [241]:
# create an instance of CountVectorizer
vectorizer = CountVectorizer()

In [243]:
# Fit and transform the text data
X = vectorizer.fit_transform(corpus)

In [245]:
# Get the feature names (vocabulary)
feature_names = vectorizer.get_feature_names_out()

In [247]:
# Display the transformed matrix
print(X.toarray())
print("Feature names:", feature_names)

[[0 0 0 0 1 0 2 1 0 1]
 [1 0 1 0 0 0 0 1 0 0]
 [0 1 0 1 0 1 0 0 1 0]]
Feature names: ['another' 'data' 'example' 'here' 'is' 'more' 'sample' 'sentence' 'text'
 'this']


**binary=True Vectorizer**

In [251]:
vectorizer = CountVectorizer(binary=True)
X = vectorizer.fit_transform(corpus)
feature_names = vectorizer.get_feature_names_out()
print(X.toarray())
print("Feature_names:", feature_names)

[[0 0 0 0 1 0 1 1 0 1]
 [1 0 1 0 0 0 0 1 0 0]
 [0 1 0 1 0 1 0 0 1 0]]
Feature_names: ['another' 'data' 'example' 'here' 'is' 'more' 'sample' 'sentence' 'text'
 'this']
