In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    classification_report,
)
from sklearn.model_selection import cross_val_score
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

data = pd.read_csv("../IMDB-Dataset-GoogleTranslate-Processed2.csv")
review, sentiment = data["review"], data["sentiment"]
x_train, x_test, y_train, y_test = train_test_split(
    review, sentiment, test_size=0.2, random_state=0
)
pipeline_1 = Pipeline(
    [
        ("vect", TfidfVectorizer()),
        ("clf", MultinomialNB(fit_prior=True, class_prior=None)),
    ]
)
pipeline_2 = Pipeline(
    [
        ("vect", TfidfVectorizer()),
        ("clf", LinearSVC(dual="auto")),
    ]
)
pipeline_3 = Pipeline(
    [
        ("vect", TfidfVectorizer()),
        ("clf", LogisticRegression(solver="saga")),
    ]
)

pipeline_1.fit(x_train, y_train)
pipeline_2.fit(x_train, y_train)
pipeline_3.fit(x_train, y_train)

predict_1 = pipeline_1.predict(x_test)
predict_2 = pipeline_2.predict(x_test)
predict_3 = pipeline_3.predict(x_test)

print(classification_report(y_test, predict_1, digits=4))
print(classification_report(y_test, predict_2, digits=4))
print(classification_report(y_test, predict_3, digits=4))

              precision    recall  f1-score   support

    negative     0.8448    0.8810    0.8625      4975
    positive     0.8770    0.8398    0.8580      5025

    accuracy                         0.8603     10000
   macro avg     0.8609    0.8604    0.8603     10000
weighted avg     0.8610    0.8603    0.8603     10000

              precision    recall  f1-score   support

    negative     0.8977    0.8907    0.8942      4975
    positive     0.8926    0.8995    0.8960      5025

    accuracy                         0.8951     10000
   macro avg     0.8951    0.8951    0.8951     10000
weighted avg     0.8951    0.8951    0.8951     10000

              precision    recall  f1-score   support

    negative     0.8963    0.8808    0.8885      4975
    positive     0.8840    0.8991    0.8915      5025

    accuracy                         0.8900     10000
   macro avg     0.8901    0.8900    0.8900     10000
weighted avg     0.8901    0.8900    0.8900     10000

