In [None]:
# Naive-Bayes Method

# pip install scikit-learn

import nltk
import random
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

nltk.download('punkt')

def preprocess_text(text):
    # Tokenize the text into words
    words = nltk.word_tokenize(text)

    # Join the words back into a sentence
    preprocessed_text = " ".join(words)

    return preprocessed_text

# Example data for text classification
data = [
    ("I love this product. It's fantastic!", "positive"),
    ("The weather today is terrible.", "negative"),
    ("I don't really care about this movie.", "neutral"),
    ("This book is neither good nor bad.", "neutral"),
    ("I feel happy and excited.", "positive"),
]

# Shuffle the data to ensure randomness
random.shuffle(data)

# Preprocess the data
preprocessed_data = [(preprocess_text(text), label) for text, label in data]

# Split the data into training and testing sets
texts, labels = zip(*preprocessed_data)
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Create a bag-of-words model using CountVectorizer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_texts)
X_test = vectorizer.transform(test_texts)

# Create a Multinomial Naive Bayes classifier
classifier = MultinomialNB()

# Train the classifier
classifier.fit(X_train, train_labels)

# Make predictions on the test set
predictions = classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(test_labels, predictions)
print(f"Accuracy: {accuracy:.2f}")

# Print the classification report and confusion matrix
print("Classification Report:")
print(classification_report(test_labels, predictions))

print("Confusion Matrix:")
print(confusion_matrix(test_labels, predictions))
