In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.datasets import make_classification, fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns

# Example 1: Gaussian Naive Bayes for continuous features
X, y = make_classification(n_samples=1000, n_features=20, n_informative=5, 
                          n_redundant=2, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_gnb = gnb.predict(X_test)

print("Gaussian Naive Bayes:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_gnb):.4f}")
print(classification_report(y_test, y_pred_gnb))

# Example 2: Text classification with Multinomial Naive Bayes
# Load dataset (subset of 20 newsgroups)
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
news_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)
news_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)

# Feature extraction with TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(news_train.data)
X_test_tfidf = tfidf_vectorizer.transform(news_test.data)

# Train Multinomial Naive Bayes
mnb = MultinomialNB(alpha=0.1)  # alpha is the smoothing parameter
mnb.fit(X_train_tfidf, news_train.target)
y_pred_mnb = mnb.predict(X_test_tfidf)

print("\nMultinomial Naive Bayes (Text Classification):")

Gaussian Naive Bayes:
Accuracy: 0.8633
              precision    recall  f1-score   support

           0       0.83      0.92      0.87       153
           1       0.91      0.80      0.85       147

    accuracy                           0.86       300
   macro avg       0.87      0.86      0.86       300
weighted avg       0.87      0.86      0.86       300


Multinomial Naive Bayes (Text Classification):
