<a href="https://colab.research.google.com/github/ahmedebeed555/NaiiveBays/blob/main/NaiiveBays.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Multinomial Naiive Bays

In [5]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# 1. Load dataset
categories = ['rec.autos', 'talk.politics.misc']  # two categories to simulate binary sentiment
data = fetch_20newsgroups(subset='all', categories=categories, remove=('headers', 'footers', 'quotes'))

# 2. Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    data.data, data.target, test_size=0.3, stratify=data.target, random_state=42
)

# 3. Convert text into TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english', lowercase=True)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# 4. Train Naïve Bayes
model = MultinomialNB(alpha=1.0)
model.fit(X_train_tfidf, y_train)

# 5. Predictions
predictions = model.predict(X_test_tfidf)

# 6. Evaluation
print("Accuracy:", accuracy_score(y_test, predictions))
print("\nClassification Report:\n", classification_report(y_test, predictions, target_names=categories))


Accuracy: 0.939622641509434

Classification Report:
                     precision    recall  f1-score   support

         rec.autos       0.91      0.99      0.95       297
talk.politics.misc       0.98      0.88      0.93       233

          accuracy                           0.94       530
         macro avg       0.95      0.93      0.94       530
      weighted avg       0.94      0.94      0.94       530



Gaussian Naiive Bays

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# 1. Load dataset (Iris: flower classification)
iris = load_iris()
X, y = iris.data, iris.target

# 2. Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# 3. Train Gaussian Naïve Bayes
model = GaussianNB()
model.fit(X_train, y_train)

# 4. Predictions
y_pred = model.predict(X_test)

# 5. Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))


Accuracy: 0.9111111111111111

Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.82      0.93      0.88        15
   virginica       0.92      0.80      0.86        15

    accuracy                           0.91        45
   macro avg       0.92      0.91      0.91        45
weighted avg       0.92      0.91      0.91        45



Bernolli Naiive Bays



In [7]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, classification_report

# 1. Load dataset
categories = ['rec.autos', 'talk.politics.misc']
data = fetch_20newsgroups(subset='all', categories=categories, remove=('headers', 'footers', 'quotes'))

# 2. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    data.data, data.target, test_size=0.3, stratify=data.target, random_state=42
)

# 3. Convert text to binary features (word present or not)
vectorizer = CountVectorizer(binary=True, stop_words='english')
X_train_bin = vectorizer.fit_transform(X_train)
X_test_bin = vectorizer.transform(X_test)

# 4. Train Bernoulli Naïve Bayes
model = BernoulliNB()
model.fit(X_train_bin, y_train)

# 5. Predictions
y_pred = model.predict(X_test_bin)

# 6. Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=categories))


Accuracy: 0.7679245283018868

Classification Report:
                     precision    recall  f1-score   support

         rec.autos       0.71      1.00      0.83       297
talk.politics.misc       0.99      0.48      0.64       233

          accuracy                           0.77       530
         macro avg       0.85      0.74      0.74       530
      weighted avg       0.83      0.77      0.75       530

