In [1]:
import pandas as pd
import numpy as np
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report

# Load the dataset
file_path = "Articles.csv"
df = pd.read_csv(file_path, encoding="ISO-8859-1")

# Selecting relevant columns
X = df["Article"]  # Text data
y = df["NewsType"]  # Labels

# Splitting dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Text preprocessing and model pipeline
model = make_pipeline(TfidfVectorizer(stop_words='english'), MultinomialNB())

# Train the model
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Print classification report
print("Classification Report:\n", classification_report(y_test, y_pred))

# Function to predict category of user input
def predict_category(text):
    prediction = model.predict([text])
    return prediction[0]

# Example user input
user_input = input("Enter a news article: ")
predicted_category = predict_category(user_input)
print("Predicted Category:", predicted_category)


Classification Report:
               precision    recall  f1-score   support

    business       0.99      1.00      1.00       262
      sports       1.00      0.99      1.00       277

    accuracy                           1.00       539
   macro avg       1.00      1.00      1.00       539
weighted avg       1.00      1.00      1.00       539

Enter a news article: Dhoni finishes off instyle
Predicted Category: sports
