In [1]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load the 20 Newsgroups dataset
categories = ['comp.graphics', 'sci.med', 'soc.religion.christian', 'talk.politics.misc']
newsgroups = fetch_20newsgroups(subset='all', categories=categories, shuffle=True, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(newsgroups.data, newsgroups.target, test_size=0.3, random_state=42)

# Convert the text data into numerical features using CountVectorizer
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train a Multinomial Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_vec, y_train)

# Make predictions on the test set
y_pred = nb_classifier.predict(X_test_vec)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print the classification report
target_names = newsgroups.target_names
print(classification_report(y_test, y_pred, target_names=target_names))


Accuracy: 0.98
                        precision    recall  f1-score   support

         comp.graphics       0.96      0.99      0.97       303
               sci.med       0.99      0.97      0.98       303
soc.religion.christian       1.00      0.97      0.98       298
    talk.politics.misc       0.96      0.98      0.97       217

              accuracy                           0.98      1121
             macro avg       0.98      0.98      0.98      1121
          weighted avg       0.98      0.98      0.98      1121

