In [4]:
# Demonstrate the Text Classifier using Naive Bayes classifier Algorithm.

from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
import joblib

# Load the 20 Newsgroups dataset
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
train_data = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)
test_data = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)

# Create a pipeline: TF-IDF vectorizer and Multinomial Naive Bayes classifier
model = make_pipeline(
    TfidfVectorizer(),
    MultinomialNB()
)

# Train the model
model.fit(train_data.data, train_data.target)

# Predict on test data
predicted = model.predict(test_data.data)

# Evaluate the model
print("Accuracy:", accuracy_score(test_data.target, predicted))
print(classification_report(test_data.target, predicted, target_names=test_data.target_names))

# Make a new text prediction
new_text = [
    "Atheism ia a belief system that rejects the existence of Deities.",
    "The Bible contains teachings and stories that are Foundational to Christianity",
    "The latest research in medicine shows promising results.",
    "I'm having trouble with my graphics card, can anyone help?"
]

# Use the trained model to predict the category of the new text
new_predicted = model.predict(new_text)

# Print the predictions
for text, prediction in zip(new_text, new_predicted):
    print(f"Text: {text}")
    print(f"Predicted Category: {train_data.target_names[prediction]}")
    print()


Accuracy: 0.8348868175765646
                        precision    recall  f1-score   support

           alt.atheism       0.97      0.60      0.74       319
         comp.graphics       0.96      0.89      0.92       389
               sci.med       0.97      0.81      0.88       396
soc.religion.christian       0.65      0.99      0.78       398

              accuracy                           0.83      1502
             macro avg       0.89      0.82      0.83      1502
          weighted avg       0.88      0.83      0.84      1502

Text: Atheism ia a belief system that rejects the existence of Deities.
Predicted Category: alt.atheism

Text: The Bible contains teachings and stories that are Foundational to Christianity
Predicted Category: soc.religion.christian

Text: The latest research in medicine shows promising results.
Predicted Category: sci.med

Text: I'm having trouble with my graphics card, can anyone help?
Predicted Category: comp.graphics

