In [None]:
import pandas as pd
import re
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Load the dataset
df = pd.read_csv("amazon_alexa.csv")

# Drop unnecessary columns
df = df.drop(["date", "rating", "variation", "feedback"], axis=1)

# Clean the text data
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9]", " ", text)
    text = re.sub(r"\s+", " ", text)
    words = text.split()
    words = [word for word in words if word not in stopwords.words("english")]
    text = " ".join(words)
    return text

df["verified_reviews"] = df["verified_reviews"].apply(clean_text)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df["verified_reviews"], df["sentiment"], test_size=0.2)

# Vectorize the text data
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# Train the models
nb_model = MultinomialNB().fit(X_train, y_train)
svm_model = LinearSVC().fit(X_train, y_train)
lr_model = LogisticRegression().fit(X_train, y_train)

# Evaluate the models
nb_preds = nb_model.predict(X_test)
svm_preds = svm_model.predict(X_test)
lr_preds = lr_model.predict(X_test)

print("Naive Bayes")
print(f"Accuracy: {accuracy_score(y_test, nb_preds)}")
print(f"Precision: {precision_score(y_test, nb_preds)}")
print(f"Recall: {recall_score(y_test, nb_preds)}")

print("Support Vector Machine")
print(f"Accuracy: {accuracy_score(y_test, svm_preds)}")
print(f"Precision: {precision_score(y_test, svm_preds)}")
print(f"Recall: {recall_score(y_test, svm_preds)}")

print("Logistic Regression")
print(f"Accuracy: {accuracy_score(y_test, lr_preds)}")
print(f"Precision: {precision_score(y_test, lr_preds)}")
print(f"Recall: {recall_score(y_test, lr_preds)}")

# Use the best model to predict sentiment for new data
new_review = "This product is amazing!"
new_review = clean_text(new_review)
new_review_vectorized = vectorizer.transform([new_review])
if svm_model.predict(new_review_vectorized) == 1:
    print("Positive sentiment")
else:
    print("Negative sentiment")
