# Sentiment Analysis Technique: ML Approach

**Naive Bayes ML Model**

In [5]:
# Load in libraries
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [60]:
# Load in dataset
df = pd.read_csv("iPhone reviews data for sentiment analysis.csv")
df.head()

Unnamed: 0,review,tokenized,rating_cleaned,sentiment
0,Really fast shipping option was great because ...,"['really', 'fast', 'shipping', 'option', 'grea...",5,positive
1,I can hardly believe this phone is refurbished...,"['hardly', 'believe', 'phone', 'refurbish', 's...",5,positive
2,I have had this iPhone for 38 days.It has the ...,"['iphone', 'day', 'worst', 'battery', 'life', ...",1,negative
3,I ordered this phone in excellent condition an...,"['order', 'phone', 'excellent', 'condition', '...",5,positive
4,Good so far. Phones battery healt is at 86%. P...,"['good', 'far', 'phone', 'battery', 'health', ...",4,positive


In [62]:
# Naive Bayes ML Model

# Keep only Text and Sentiment columns
data = df[['tokenized', 'sentiment']]

# Split data into training and testing sets
texts = data['tokenized'].tolist()
labels = data['sentiment'].tolist()
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.4, random_state=42)

# Extract features (bag of words representation)
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Initialize Naive Bayes classifier
nb_classifier = MultinomialNB()

# Train classifier
nb_classifier.fit(X_train_vectorized, y_train)

# Predict sentiment using Naive Bayes
nb_predictions = nb_classifier.predict(X_test_vectorized)

# Calculate classification report for Naive Bayes
nb_classification_report = classification_report(y_test, nb_predictions, target_names=['Negative', 'Neutral', 'Positive'])

**SVM ML Model**

In [64]:
# Keep only Text and Sentiment columns
data = df[['tokenized', 'sentiment']]

# Split data into training and testing sets
texts = data['tokenized'].tolist()
labels = data['sentiment'].tolist()
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.4, random_state=42)

# Extract features (bag of words representation)
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Initialize SVM classifier
svm_classifier = SVC(kernel='linear')  # You can also try 'rbf' or 'poly' kernels

# Train classifier
svm_classifier.fit(X_train_vectorized, y_train)

# Predict sentiment using SVM
svm_predictions = svm_classifier.predict(X_test_vectorized)

# Calculate classification report for SVM
svm_classification_report = classification_report(y_test, svm_predictions, target_names=['Negative', 'Neutral', 'Positive'])

In [66]:
# Print classification report for Naive Bayes
print("\nClassification Report for Naive Bayes:")
print(nb_classification_report)

# Print classification report for SVM
print("\nClassification Report for SVM:")
print(svm_classification_report)


Classification Report for Naive Bayes:
              precision    recall  f1-score   support

    Negative       0.75      0.57      0.65       100
     Neutral       0.33      0.21      0.26        29
    Positive       0.82      0.92      0.87       281

    accuracy                           0.79       410
   macro avg       0.63      0.57      0.59       410
weighted avg       0.77      0.79      0.77       410


Classification Report for SVM:
              precision    recall  f1-score   support

    Negative       0.69      0.68      0.69       100
     Neutral       0.35      0.41      0.38        29
    Positive       0.88      0.88      0.88       281

    accuracy                           0.80       410
   macro avg       0.64      0.66      0.65       410
weighted avg       0.80      0.80      0.80       410



# Sentiment Analysis Technique: Lexicon Approach

**TextBlob VADER**

In [72]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.metrics import classification_report
from tabulate import tabulate

In [9]:
# Load in dataset
df = pd.read_csv("iPhone reviews data for sentiment analysis.csv")
df.head()

Unnamed: 0,review,tokenized,rating_cleaned,sentiment
0,Really fast shipping option was great because ...,"['really', 'fast', 'shipping', 'option', 'grea...",5,positive
1,I can hardly believe this phone is refurbished...,"['hardly', 'believe', 'phone', 'refurbish', 's...",5,positive
2,I have had this iPhone for 38 days.It has the ...,"['iphone', 'day', 'worst', 'battery', 'life', ...",1,negative
3,I ordered this phone in excellent condition an...,"['order', 'phone', 'excellent', 'condition', '...",5,positive
4,Good so far. Phones battery healt is at 86%. P...,"['good', 'far', 'phone', 'battery', 'health', ...",4,positive


In [74]:
# Select the relevant column
data = df[['review']].copy()

# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Define a function to get VADER sentiment
def get_vader_sentiment(text):
    if pd.isnull(text):
        text = ''
    else:
        text = str(text)
    vs = analyzer.polarity_scores(text)
    compound = vs['compound']
    if compound > 0.05:
        label = 'Positive'
    elif compound < -0.05:
        label = 'Negative'
    else:
        label = 'Neutral'
    return pd.Series([compound, label])

# Apply the function to each row
data[['VADER_Compound', 'VADER_Sentiment']] = data['review'].apply(get_vader_sentiment)

# Add actual sentiment labels
data['Actual_Sentiment'] = df['sentiment'].str.title()
data['VADER_Sentiment'] = data['VADER_Sentiment'].str.title()

# Print table of actual vs predicted
table_data = data[['review', 'Actual_Sentiment', 'VADER_Sentiment']].head(10)
print(tabulate(table_data, headers='keys', tablefmt='plain', showindex=False))

# Generate classification report
y_true = data['Actual_Sentiment']
y_pred = data['VADER_Sentiment']
report = classification_report(y_true, y_pred, target_names=['Negative', 'Neutral', 'Positive'])

print("\nClassification Report for VADER:")
print(report)

review                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  

In [47]:
from tabulate import tabulate

# Create a copy to avoid changing the original
sample = data.copy()

# Truncate the long lemmatized text for display
sample['review'] = sample['review'].apply(lambda x: (x[:100] + '...') if len(x) > 100 else x)

# Convert DataFrame to list of lists with column headers as first row
table_data = [sample.columns.tolist()] + sample.values.tolist()

# Print the sentiment analysis results in plain table format
print(tabulate(table_data, headers="firstrow", tablefmt="plain"))

review                                                                                                         VADER_Compound  VADER_Sentiment
Really fast shipping option was great because I broke my phone. The phone is in excellent condition ...                0.9883  Positive
I can hardly believe this phone is refurbished - it seems brand new. I haven't bought refurbished el...                0.9726  Positive
I have had this iPhone for 38 days.It has the worst battery life of any iPhone I've ever owned, and ...                0.8924  Positive
I ordered this phone in excellent condition and it was just THAT !!! Excellent just like as describe...                0.9791  Positive
Good so far. Phones battery healt is at 86%. Phone condition is good, no scratches. Phone seems to b...                0.7433  Positive
For the price and quality this is a great phone. The size is about perfect and the screen works grea...                0.9571  Positive
Great phone in great condition. Battery h

**TextBlob Lexocon**

In [68]:
from textblob import TextBlob
from sklearn.metrics import classification_report
from tabulate import tabulate
import pandas as pd

In [64]:
# Load in dataset
df = pd.read_csv("iPhone reviews data for sentiment analysis.csv")
df.head()

Unnamed: 0,review,tokenized,rating_cleaned,sentiment
0,Really fast shipping option was great because ...,"['really', 'fast', 'shipping', 'option', 'grea...",5,positive
1,I can hardly believe this phone is refurbished...,"['hardly', 'believe', 'phone', 'refurbish', 's...",5,positive
2,I have had this iPhone for 38 days.It has the ...,"['iphone', 'day', 'worst', 'battery', 'life', ...",1,negative
3,I ordered this phone in excellent condition an...,"['order', 'phone', 'excellent', 'condition', '...",5,positive
4,Good so far. Phones battery healt is at 86%. P...,"['good', 'far', 'phone', 'battery', 'health', ...",4,positive


In [70]:
# Select the relevant column
data = df[['review']].copy()

# Define function to classify sentiment using TextBlob
def get_textblob_sentiment(text):
    if pd.isnull(text):
        text = ''
    else:
        text = str(text)
    polarity = TextBlob(text).sentiment.polarity
    if polarity > 0.05:
        label = 'Positive'
    elif polarity < -0.05:
        label = 'Negative'
    else:
        label = 'Neutral'
    return pd.Series([polarity, label])

# Apply the function to the reviews
data[['TextBlob_Polarity', 'TextBlob_Sentiment']] = data['review'].apply(get_textblob_sentiment)

# Add actual labels for comparison
data['Actual_Sentiment'] = df['sentiment'].str.title()

# Create table to show actual vs predicted sentiment
sentiment_table = data[['review', 'Actual_Sentiment', 'TextBlob_Sentiment']]
sentiment_table_sample = sentiment_table.head(10).copy()
sentiment_table_sample['review'] = sentiment_table_sample['review'].apply(lambda x: (x[:100] + '...') if len(x) > 100 else x)

print("\nSample Sentiment Comparison Table:")
print(tabulate(sentiment_table_sample, headers='keys', tablefmt='plain', showindex=False))

# Classification report
report = classification_report(data['Actual_Sentiment'], data['TextBlob_Sentiment'], target_names=['Negative', 'Neutral', 'Positive'])
print("\nClassification Report for TextBlob:")
print(report)


Sample Sentiment Comparison Table:
review                                                                                                   Actual_Sentiment    TextBlob_Sentiment
Really fast shipping option was great because I broke my phone. The phone is in excellent condition ...  Positive            Positive
I can hardly believe this phone is refurbished - it seems brand new. I haven't bought refurbished el...  Positive            Positive
I have had this iPhone for 38 days.It has the worst battery life of any iPhone I've ever owned, and ...  Negative            Positive
I ordered this phone in excellent condition and it was just THAT !!! Excellent just like as describe...  Positive            Positive
Good so far. Phones battery healt is at 86%. Phone condition is good, no scratches. Phone seems to b...  Positive            Positive
For the price and quality this is a great phone. The size is about perfect and the screen works grea...  Positive            Positive
Great phone in g

# Result of Sentiment analysis

In [4]:
# Install required packages (run only once if not already installed)
!pip install vaderSentiment textblob

# Imports
import pandas as pd
import ast
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

# Load dataset
df = pd.read_csv("iPhone reviews data for sentiment analysis.csv")

# Convert stringified lists back to Python lists
df['tokenized'] = df['tokenized'].apply(ast.literal_eval)

# Prepare input text and labels
texts = [' '.join(tokens) for tokens in df['tokenized']]
labels = df['sentiment'].tolist()

# Vectorize text
vectorizer = CountVectorizer()
X_all = vectorizer.fit_transform(texts)

# Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(X_all, labels)
df['sentiment_nb'] = nb_model.predict(X_all)

# SVM
svm_model = SVC(kernel='linear')
svm_model.fit(X_all, labels)
df['sentiment_svm'] = svm_model.predict(X_all)

# VADER Sentiment
analyzer = SentimentIntensityAnalyzer()
def get_vader_sentiment(text):
    score = analyzer.polarity_scores(text)['compound']
    if score > 0.05:
        return 'positive'
    elif score < -0.05:
        return 'negative'
    else:
        return 'neutral'
df['sentiment_vader'] = df['review'].apply(get_vader_sentiment)

# TextBlob Sentiment
def get_textblob_sentiment(text):
    polarity = TextBlob(text).sentiment.polarity
    if polarity > 0.05:
        return 'positive'
    elif polarity < -0.05:
        return 'negative'
    else:
        return 'neutral'
df['sentiment_textblob'] = df['review'].apply(get_textblob_sentiment)

# Save only tokenized + all sentiment results
df_result = df[['tokenized', 'sentiment', 'sentiment_nb', 'sentiment_svm', 'sentiment_vader', 'sentiment_textblob']]
df_result.to_csv("sentiment_results.csv", index=False)

