In [4]:
import re
from collections import defaultdict

class NaiveBayesClassifier:
    def __init__(self):
        self.class_probabilities = defaultdict(float)
        self.feature_probabilities = defaultdict(lambda: defaultdict(float))
        self.feature_counts = defaultdict(lambda: defaultdict(int))
        self.classes = set()

    def clean_text(self, text):
        # Limpieza.
        text = re.sub(r'\W', ' ', text)
        return text.lower().split()

    def train(self, documents):
      for document, label in documents:
          self.classes.add(label)
          for word in self.clean_text(document):
              self.feature_counts[label][word] += 1

      total_documents = len(documents)
      for label in self.classes:
          self.class_probabilities[label] = sum(1 for _, l in documents if l == label) / total_documents
          total_words = sum(self.feature_counts[label].values())
          for word in self.feature_counts[label]:
              self.feature_probabilities[label][word] = self.feature_counts[label][word] / total_words

    def predict(self, document):
        document_words = set(self.clean_text(document))
        scores = {}

        for label in self.classes:
            scores[label] = 0
            for word in document_words:
                if word in self.feature_probabilities[label]:
                    scores[label] += self.feature_probabilities[label][word]

            scores[label] += self.class_probabilities[label]

        return max(scores, key=scores.get)

# Ejemplo de uso:
training_data = [
    ("good movie", "positive"),
    ("bad experience", "negative"),
    ("not bad", "positive"),
    ("awful movie", "negative"),
    ("I think the movie was not very funny , but was not just bored", "neutral"),
    ("I do not think was bored", "neutral")
]

classifier = NaiveBayesClassifier()
classifier.train(training_data)

test_document = "I think it is OK"
prediction = classifier.predict(test_document)
print(f"The document is predicted as: {prediction}")

The document is predicted as: neutral
