In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    classification_report,
)
from sklearn.model_selection import cross_val_score
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

data = pd.read_csv("../IMDB-Dataset-GoogleTranslate-Processed.csv")
review, sentiment = data["review"], data["sentiment"]
x_train, x_test, y_train, y_test = train_test_split(review, sentiment, test_size=0.2, random_state=0)
pipeline_1 = Pipeline(
    [
        ("vect", TfidfVectorizer()),
        ("clf", MultinomialNB(fit_prior=True, class_prior=None)),
    ]
)
pipeline_2 = Pipeline(
    [
        ("vect", TfidfVectorizer()),
        ("clf", LinearSVC()),
    ]
)
pipeline_3 = Pipeline(
    [
        ("vect", TfidfVectorizer()),
        ("clf", LogisticRegression(solver='sag')),
    ]
)

pipeline_1.fit(x_train, y_train)
pipeline_2.fit(x_train, y_train)
pipeline_3.fit(x_train, y_train)

predict_1 = pipeline_1.predict(x_test)
predict_2 = pipeline_2.predict(x_test)
predict_3 = pipeline_3.predict(x_test)

print(classification_report(y_test, predict_1, digits=4))
print(classification_report(y_test, predict_2, digits=4))
print(classification_report(y_test, predict_3, digits=4))





              precision    recall  f1-score   support

    negative     0.8446    0.8816    0.8627      4975
    positive     0.8775    0.8394    0.8580      5025

    accuracy                         0.8604     10000
   macro avg     0.8610    0.8605    0.8604     10000
weighted avg     0.8611    0.8604    0.8603     10000

              precision    recall  f1-score   support

    negative     0.8953    0.8852    0.8902      4975
    positive     0.8876    0.8975    0.8925      5025

    accuracy                         0.8914     10000
   macro avg     0.8915    0.8914    0.8914     10000
weighted avg     0.8914    0.8914    0.8914     10000

              precision    recall  f1-score   support

    negative     0.8943    0.8754    0.8847      4975
    positive     0.8791    0.8975    0.8882      5025

    accuracy                         0.8865     10000
   macro avg     0.8867    0.8864    0.8865     10000
weighted avg     0.8867    0.8865    0.8865     10000

