In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, classification_report

# Sample data for demonstration
data = {
    'message': [
        "I love this movie, it's so good.",
        "This book was boring, I did not like it.",
        "The food at that restaurant was amazing!",
        "The service was terrible, I won't go back.",
        "The concert last night was fantastic!",
        "I had a terrible experience at the store.",
        "The vacation was incredible, I had so much fun.",
        "I'm disappointed with the product quality.",
        "The weather today is beautiful.",
        "The traffic was awful this morning."
    ],
    'label': ['pos', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'neg']
}

# Create DataFrame
msg = pd.DataFrame(data)

# Convert labels to numerical values
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})

# Split data into training and testing sets
Xtrain, Xtest, ytrain, ytest = train_test_split(msg['message'], msg['labelnum'], random_state=42)

# Initialize CountVectorizer
count_v = CountVectorizer()

# Fit and transform on training data
Xtrain_dm = count_v.fit_transform(Xtrain)
Xtest_dm = count_v.transform(Xtest)

# Create DataFrame from Xtrain_dm.toarray()
df = pd.DataFrame(Xtrain_dm.toarray(), columns=count_v.get_feature_names_out())

# Initialize and train Multinomial Naive Bayes model
clf = MultinomialNB()
clf.fit(Xtrain_dm, ytrain)

# Predictions
pred = clf.predict(Xtest_dm)

# Print results
for doc, p in zip(Xtest, pred):
    p = 'pos' if p == 1 else 'neg'
    print("%s -> %s" % (doc, p))

# Print accuracy metrics
print('\nAccuracy Metrics:')
print('Accuracy:', accuracy_score(ytest, pred))
print('Recall:', recall_score(ytest, pred))
print('Precision:', precision_score(ytest, pred))
print('Confusion Matrix:\n', confusion_matrix(ytest, pred))
print(classification_report(ytest, pred, target_names=['neg', 'pos']))


The weather today is beautiful. -> pos
This book was boring, I did not like it. -> pos
I had a terrible experience at the store. -> pos

Accuracy Metrics:
Accuracy: 0.3333333333333333
Recall: 1.0
Precision: 0.3333333333333333
Confusion Matrix:
 [[0 2]
 [0 1]]
              precision    recall  f1-score   support

         neg       0.00      0.00      0.00         2
         pos       0.33      1.00      0.50         1

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
