In [None]:
import nltk
from nltk.corpus import movie_reviews
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# 1. Carregar dados de reviews do NLTK
nltk.download('movie_reviews')
reviews = [movie_reviews.raw(fileid) for fileid in movie_reviews.fileids()]
labels = [1 if fileid.startswith('pos') else 0 for fileid in movie_reviews.fileids()]

In [None]:
# 2. Dividir o dataset em treino e teste
X_train, X_test, y_train, y_test = train_test_split(reviews, labels, test_size=0.25, random_state=42)

In [None]:
# 3. Vetorização com TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=2000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [None]:
# 4. Treinamento do classificador Naive Bayes
model = MultinomialNB()
model.fit(X_train_vec, y_train)

In [None]:
# 5. Avaliação do modelo
y_pred = model.predict(X_test_vec)
print("Acurácia:", accuracy_score(y_test, y_pred))
print("Relatório de Classificação:")
print(classification_report(y_test, y_pred))

In [None]:
# 6. Teste com nova review
nova_review = "The movie was fantastic with a great storyline and strong performances."
nova_review_vec = vectorizer.transform([nova_review])
print("Classificação:", model.predict(nova_review_vec)[0])