Lab 5: Naïve Bayes Classifier
This script demonstrates different types of Naïve Bayes classifiers.

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris, load_breast_cancer, fetch_20newsgroups
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer
import seaborn as sns


In [None]:
def gaussian_naive_bayes():
    """Demonstrate Gaussian Naïve Bayes for continuous data"""
    print("=" * 50)
    print("Gaussian Naïve Bayes")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    print(f"\nDataset: Iris")
    print(f"Shape: {X.shape}")
    print(f"Classes: {iris.target_names}")
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    # Create and train Gaussian Naïve Bayes
    gnb = GaussianNB()
    gnb.fit(X_train, y_train)
    
    # Make predictions
    y_pred = gnb.predict(X_test)
    
    # Evaluate
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nAccuracy: {accuracy:.4f}")
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=iris.target_names))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=iris.target_names,
                yticklabels=iris.target_names)
    plt.title('Confusion Matrix - Gaussian Naïve Bayes')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig('lab5_gaussian_nb_confusion.png')
    plt.close()
    print("\nConfusion matrix saved as 'lab5_gaussian_nb_confusion.png'")
    
    # Print class probabilities
    print("\nClass prior probabilities:")
    for i, prob in enumerate(gnb.class_prior_):
        print(f"{iris.target_names[i]}: {prob:.4f}")


In [None]:
def multinomial_naive_bayes():
    """Demonstrate Multinomial Naïve Bayes for text classification"""
    print("\n" + "=" * 50)
    print("Multinomial Naïve Bayes (Text Classification)")
    print("=" * 50)
    
    # Create sample text data
    texts = [
        "I love machine learning",
        "Python is great for data science",
        "Deep learning is fascinating",
        "I enjoy coding in Python",
        "Machine learning is powerful",
        "Data science is my passion",
        "I hate spam emails",
        "This is unwanted message",
        "Get free money now",
        "Spam spam spam",
        "Unwanted advertisement here",
        "Click here for prize"
    ]
    
    labels = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]  # 0: legitimate, 1: spam
    
    print(f"\nSample size: {len(texts)}")
    print(f"Classes: Legitimate (0), Spam (1)")
    
    # Convert text to features
    vectorizer = CountVectorizer()
    X = vectorizer.fit_transform(texts)
    
    print(f"Feature matrix shape: {X.shape}")
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, labels, test_size=0.25, random_state=42
    )
    
    # Train Multinomial Naïve Bayes
    mnb = MultinomialNB()
    mnb.fit(X_train, y_train)
    
    # Make predictions
    y_pred = mnb.predict(X_test)
    
    # Evaluate
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nAccuracy: {accuracy:.4f}")
    
    # Test with new samples
    new_texts = ["I love data science", "Free prize click now"]
    new_X = vectorizer.transform(new_texts)
    predictions = mnb.predict(new_X)
    proba = mnb.predict_proba(new_X)
    
    print("\nPredictions on new samples:")
    for text, pred, prob in zip(new_texts, predictions, proba):
        label = "Legitimate" if pred == 0 else "Spam"
        print(f"Text: '{text}'")
        print(f"  Prediction: {label}")
        print(f"  Probabilities: Legitimate={prob[0]:.4f}, Spam={prob[1]:.4f}")


In [None]:
def bernoulli_naive_bayes():
    """Demonstrate Bernoulli Naïve Bayes for binary features"""
    print("\n" + "=" * 50)
    print("Bernoulli Naïve Bayes")
    print("=" * 50)
    
    # Create binary feature data
    np.random.seed(42)
    X = np.random.randint(2, size=(100, 5))  # Binary features
    y = (X[:, 0] + X[:, 1] + X[:, 2] > 1).astype(int)  # Target based on features
    
    print(f"\nDataset shape: {X.shape}")
    print(f"Binary features (0 or 1)")
    print(f"First 5 samples:\n{X[:5]}")
    print(f"First 5 labels: {y[:5]}")
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    # Train Bernoulli Naïve Bayes
    bnb = BernoulliNB()
    bnb.fit(X_train, y_train)
    
    # Make predictions
    y_pred = bnb.predict(X_test)
    
    # Evaluate
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nAccuracy: {accuracy:.4f}")
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))


In [None]:
def compare_naive_bayes_types():
    """Compare different Naïve Bayes classifiers"""
    print("\n" + "=" * 50)
    print("Comparing Naïve Bayes Types")
    print("=" * 50)
    
    # Load dataset
    cancer = load_breast_cancer()
    X = cancer.data
    y = cancer.target
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    # Train different classifiers
    classifiers = {
        'Gaussian NB': GaussianNB(),
        'Multinomial NB': MultinomialNB(),
        'Bernoulli NB': BernoulliNB()
    }
    
    results = {}
    
    print("\nResults on Breast Cancer dataset:")
    for name, clf in classifiers.items():
        try:
            clf.fit(X_train, y_train)
            accuracy = clf.score(X_test, y_test)
            results[name] = accuracy
            print(f"{name}: {accuracy:.4f}")
        except Exception as e:
            print(f"{name}: Error - {str(e)}")
    
    # Plot comparison
    if results:
        plt.figure(figsize=(10, 6))
        plt.bar(results.keys(), results.values(), 
                color=['blue', 'green', 'orange'])
        plt.ylabel('Accuracy')
        plt.title('Comparison of Naïve Bayes Classifiers')
        plt.ylim([0.8, 1.0])
        for i, (name, acc) in enumerate(results.items()):
            plt.text(i, acc + 0.01, f'{acc:.4f}', ha='center')
        plt.tight_layout()
        plt.savefig('lab5_nb_comparison.png')
        plt.close()
        print("\nComparison plot saved as 'lab5_nb_comparison.png'")


In [None]:
def naive_bayes_with_cross_validation():
    """Demonstrate Naïve Bayes with cross-validation"""
    print("\n" + "=" * 50)
    print("Naïve Bayes with Cross-Validation")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Create classifier
    gnb = GaussianNB()
    
    # Perform cross-validation
    scores = cross_val_score(gnb, X, y, cv=5)
    
    print(f"\nCross-validation scores: {scores}")
    print(f"Mean accuracy: {scores.mean():.4f}")
    print(f"Standard deviation: {scores.std():.4f}")


In [None]:
def feature_probability_analysis():
    """Analyze feature probabilities in Naïve Bayes"""
    print("\n" + "=" * 50)
    print("Feature Probability Analysis")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Train Gaussian Naïve Bayes
    gnb = GaussianNB()
    gnb.fit(X, y)
    
    # Display learned parameters
    print("\nClass prior probabilities:")
    for i, (class_name, prior) in enumerate(zip(iris.target_names, gnb.class_prior_)):
        print(f"{class_name}: {prior:.4f}")
    
    print("\nFeature means per class:")
    for i, class_name in enumerate(iris.target_names):
        print(f"\n{class_name}:")
        for j, feature_name in enumerate(iris.feature_names):
            print(f"  {feature_name}: {gnb.theta_[i, j]:.4f}")
    
    print("\nFeature variances per class:")
    for i, class_name in enumerate(iris.target_names):
        print(f"\n{class_name}:")
        for j, feature_name in enumerate(iris.feature_names):
            print(f"  {feature_name}: {gnb.var_[i, j]:.4f}")


In [None]:
def main():
    """Main function to demonstrate Naïve Bayes classifiers"""
    print("\n" + "=" * 50)
    print("Lab 5: Naïve Bayes Classifier")
    print("=" * 50)
    
    # Gaussian Naïve Bayes
    gaussian_naive_bayes()
    
    # Multinomial Naïve Bayes
    multinomial_naive_bayes()
    
    # Bernoulli Naïve Bayes
    bernoulli_naive_bayes()
    
    # Compare types
    compare_naive_bayes_types()
    
    # Cross-validation
    naive_bayes_with_cross_validation()
    
    # Feature probability analysis
    feature_probability_analysis()
    
    print("\n" + "=" * 50)
    print("Lab 5 Complete!")
    print("=" * 50)


In [None]:
if __name__ == "__main__":
    main()
