In [1]:
import pandas as pd
import langid 
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score


In [2]:
# Carica il dataset
file_path = 'input/output.csv'
data = pd.read_csv(file_path)

# Rimozione righe con contenuto nullo
data = data.dropna(subset=['content'])


# Controlla le classi nella colonna 'is_positive'
print(data['is_positive'].value_counts())


is_positive
Positive    102377
Negative     98346
Name: count, dtype: int64


In [3]:
# Feature di interesse
X = data['content']
y = data['is_positive']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


In [4]:
# Trasformare il testo in rappresentazioni numeriche
tfidf = TfidfVectorizer(stop_words="english", max_features=10000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)


NameError: name 'detect_language' is not defined

## Addestramento Modello

In [16]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)


In [None]:
# Valutazione
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8212464918544597

Classification Report:
               precision    recall  f1-score   support

    Negative       0.85      0.77      0.81     29504
    Positive       0.80      0.87      0.83     30713

    accuracy                           0.82     60217
   macro avg       0.82      0.82      0.82     60217
weighted avg       0.82      0.82      0.82     60217



In [None]:
# Mostriamo alcune recensioni e la loro predizione
test_results = pd.DataFrame({
    'review': X_test,
    'predicted': y_pred,
    'actual': y_test
}).reset_index(drop=True)

print(test_results.head(10))

                                              review predicted    actual
0                                             cheese  Positive  Positive
1  great puzzles,great voice acting,great comedy,...  Positive  Positive
2                          good game would recommend  Positive  Positive
3  okay okay for real i didnt hate it UNTIL I SOM...  Positive  Negative
4                                    Прекрасная игра  Negative  Positive
5             Peak game is super good and fun. 10/10  Positive  Positive
6             If you wanna sell your soul install it  Negative  Negative
7  The game scene as the worse than opposing forc...  Positive  Positive
8                       this is og and the best game  Positive  Positive
9  It's not very popular, but there's quite a lot...  Positive  Positive


In [19]:
# Nuove recensioni
new_reviews = ["This game is amazing!", "It's a buggy mess."]
new_reviews_tfidf = tfidf.transform(new_reviews)

# Predizione
predictions = model.predict(new_reviews_tfidf)
print(predictions)


['Positive' 'Negative']
