In [2]:
import numpy as np
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
# Load the 20 newsgroups dataset (a sample dataset included in scikit-learn)
newsgroups_train = fetch_20newsgroups(subset='train')
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(newsgroups_train.data,
newsgroups_train.target, test_size=0.2, random_state=42)
# Vectorize the text data using TF-IDF representation
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)
# Train Naïve Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_vectorized, y_train)
# Predict using Naïve Bayes classifier
nb_predictions = nb_classifier.predict(X_test_vectorized)
# Train SVM classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_vectorized, y_train)
# Predict using SVM classifier
svm_predictions = svm_classifier.predict(X_test_vectorized)

# Evaluate Naïve Bayes classifier
print("Naïve Bayes Classifier:")
print(classification_report(y_test, nb_predictions, target_names=newsgroups_train.target_names))
# Evaluate SVM classifier
print("\nSupport Vector Machine (SVM) Classifier:")
print(classification_report(y_test, svm_predictions, target_names=newsgroups_train.target_names)) 

Naïve Bayes Classifier:
                          precision    recall  f1-score   support

             alt.atheism       0.94      0.76      0.84        97
           comp.graphics       0.85      0.81      0.83       104
 comp.os.ms-windows.misc       0.91      0.75      0.82       115
comp.sys.ibm.pc.hardware       0.66      0.80      0.72       123
   comp.sys.mac.hardware       0.96      0.79      0.86       126
          comp.windows.x       0.89      0.90      0.89       106
            misc.forsale       0.89      0.73      0.80       109
               rec.autos       0.90      0.91      0.91       139
         rec.motorcycles       0.93      0.93      0.93       122
      rec.sport.baseball       0.92      0.96      0.94       102
        rec.sport.hockey       0.94      0.97      0.95       108
               sci.crypt       0.78      1.00      0.88       125
         sci.electronics       0.90      0.79      0.84       114
                 sci.med       0.99      0.89      