In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report

# Load dataset
def load_data(file_path):
    df = pd.read_csv(file_path)
    df = df.dropna(subset=['CONTENT', 'CLASS'])  # Ensure no missing values
    return df['CONTENT'], df['CLASS']

# Train a Naïve Bayes classifier
def train_text_classifier(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    model = make_pipeline(TfidfVectorizer(stop_words='english', max_features=5000), MultinomialNB())
    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    print("Classification Report:\n", classification_report(y_test, y_pred))

    return model

# Predict class for a new comment
def predict_class(model, comment):
    return model.predict([comment])[0]

if __name__ == "__main__":
    file_path = "Youtube01-Psy.csv"  # Update with the correct path
    X, y = load_data(file_path)
    model = train_text_classifier(X, y)

    # User input
    user_comment = input("Enter a comment: ")
    predicted_class = predict_class(model, user_comment)
    print(f"Predicted Class: {predicted_class}")


Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.97      0.94        35
           1       0.97      0.91      0.94        35

    accuracy                           0.94        70
   macro avg       0.94      0.94      0.94        70
weighted avg       0.94      0.94      0.94        70

Enter a comment: This video is amazing! I learned so much. Keep up the great work!
Predicted Class: 0
