In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveClassifier, LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt


In [2]:
# Load dataset
df = pd.read_csv(r"C:\Users\AB92922\Downloads\news.csv", index_col=None)
dataset = df.drop("Unnamed: 0", axis=1)

In [3]:
# Split dataset into features and target
y = dataset["label"]
X_train, X_test, y_train, y_test = train_test_split(dataset['text'], y, test_size=0.33, random_state=42)

In [4]:
# Text preprocessing with CountVectorizer
count_vectorizer = CountVectorizer(stop_words='english')
count_train = count_vectorizer.fit_transform(X_train)
count_test = count_vectorizer.transform(X_test)

In [None]:
# Gradient Boosting Classifier
gb = GradientBoostingClassifier(random_state=42)

# Train the model
gb.fit(count_train, y_train)

# Make predictions
y_pred_gb = gb.predict(count_test)

# Evaluate the model
print("Classification Report for Gradient Boosting Classifier:")
print(classification_report(y_test, y_pred_gb))

# Accuracy Score
accuracy = metrics.accuracy_score(y_test, y_pred_gb)
print(f"Gradient Boosting Accuracy: {accuracy:.4f}")


In [None]:
# Confusion Matrix
conf_matrix = metrics.confusion_matrix(y_test, y_pred_gb)
plt.figure(figsize=(6, 4))
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix - GB')
plt.colorbar()
plt.show()

In [None]:
# Make predictions using the trained Logistic Regression model
y_pred_svm = gb.predict(count_test)

# Display first 10 predictions
print("First 10 predictions:", y_pred_gb[:10])

# Display corresponding true labels for comparison
print("True labels for the first 10 samples:", y_test[:10].values)
