In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
zomato = pd.read_excel('/Users/abhi/Desktop/MSU/8. Coursework/3. Fall Semester/ITM881_Network Analytics/03_Project/Zomato_filtered.xlsx')
swiggy = pd.read_excel('/Users/abhi/Desktop/MSU/8. Coursework/3. Fall Semester/ITM881_Network Analytics/03_Project/Swiggy_filtered.xlsx')

### Sentiment Analysis

In [3]:
from textblob import TextBlob

zomato['review_description'] = zomato['review_description'].astype(str)
swiggy['review_description'] = swiggy['review_description'].astype(str)

# Create a function to analyze sentiment
def analyze_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0.05:
        return 'Positive'
    elif analysis.sentiment.polarity < -0.05:
        return 'Negative'
    else:
        return 'Neutral'

zomato['sentiment'] = zomato['review_description'].apply(analyze_sentiment)
swiggy['sentiment'] = swiggy['review_description'].apply(analyze_sentiment)

# Display the resulting DataFrame with sentiment labels
print(zomato[['review_description', 'sentiment']])
print(swiggy[['review_description', 'sentiment']])


                                      review_description sentiment
0      A few months ago, the app worked flawlessly. U...  Positive
1      Great delivery app. But off lately there have ...  Positive
2      Your app and delivery everything is good. But ...  Positive
3      Poor cust. Experience. I ordered items from a ...  Negative
4      Zomato seems to have lost it recently. Have be...  Negative
...                                                  ...       ...
87964                   Very fast dilevery very good app  Positive
87965  Good food with discounted from famous restaura...  Positive
87966               Your delivery is so fast . I like it  Positive
87967                 Nice service&cashback is very good  Positive
87968                                      Gold is Gold!   Neutral

[87969 rows x 2 columns]
                                      review_description sentiment
0      Swiggy clearly has to be one of the worst apps...  Negative
1      The delivery time for each or

In [4]:
# Count the number of occurrences for each sentiment label
sentiment_counts_zomato = zomato['sentiment'].value_counts()
sentiment_counts_swiggy = swiggy['sentiment'].value_counts()

# Calculate the total number of samples
total_samples_zomato = len(zomato)
total_samples_swiggy = len(swiggy)


# Calculate the percentage of each sentiment
percentage_positive_z = (sentiment_counts_zomato.get('Positive', 0) / total_samples_zomato) * 100
percentage_negative_z = (sentiment_counts_zomato.get('Negative', 0) / total_samples_zomato) * 100
percentage_neutral_z = (sentiment_counts_zomato.get('Neutral', 0) / total_samples_zomato) * 100

# Display the percentages
print(f"Percentage of Positive Sentiments for Zomato: {percentage_positive_z:.2f}%")
print(f"Percentage of Negative Sentiments for Zomato: {percentage_negative_z:.2f}%")
print(f"Percentage of Neutral Sentiments for Zomato: {percentage_neutral_z:.2f}%")

# Calculate the percentage of each sentiment
percentage_positive_s = (sentiment_counts_swiggy.get('Positive', 0) / total_samples_swiggy) * 100
percentage_negative_s = (sentiment_counts_swiggy.get('Negative', 0) / total_samples_swiggy) * 100
percentage_neutral_s = (sentiment_counts_swiggy.get('Neutral', 0) / total_samples_swiggy) * 100

# Display the percentages
print(f"Percentage of Positive Sentiments for Swiggy: {percentage_positive_s:.2f}%")
print(f"Percentage of Negative Sentiments for Swiggy: {percentage_negative_s:.2f}%")
print(f"Percentage of Neutral Sentiments for Swiggy: {percentage_neutral_s:.2f}%")


Percentage of Positive Sentiments for Zomato: 54.89%
Percentage of Negative Sentiments for Zomato: 31.76%
Percentage of Neutral Sentiments for Zomato: 13.34%
Percentage of Positive Sentiments for Swiggy: 41.91%
Percentage of Negative Sentiments for Swiggy: 42.14%
Percentage of Neutral Sentiments for Swiggy: 15.95%


### Zomato

In [8]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=100, stop_words='english')

# Fit and transform the "review_description" column
tfidf_matrix = tfidf_vectorizer.fit_transform(zomato['review_description'])

# Get the feature names (words or phrases)
feature_names = tfidf_vectorizer.get_feature_names_out()

# Create a DataFrame to display the top keywords or phrases
keywords_df = pd.DataFrame(data=tfidf_matrix.toarray(), columns=feature_names)

# Find the most common keywords or phrases in positive reviews
positive_reviews = zomato[zomato['sentiment'] == 'Positive']
positive_keywords = keywords_df.loc[positive_reviews.index].mean().sort_values(ascending=False)

# Find the most common keywords or phrases in negative reviews
negative_reviews = zomato[zomato['sentiment'] == 'Negative']
negative_keywords = keywords_df.loc[negative_reviews.index].mean().sort_values(ascending=False)

# Display the top keywords or phrases for positive and negative reviews
print("Top Keywords or Phrases in Positive Reviews:")
print(positive_keywords.head(10))

print("\nTop Keywords or Phrases in Negative Reviews:")
print(negative_keywords.head(10))


Top Keywords or Phrases in Positive Reviews:
good        0.142863
delivery    0.114847
app         0.102775
food        0.090583
zomato      0.076577
service     0.075852
fast        0.063888
ðÿ          0.056850
best        0.056812
time        0.053741
dtype: float64

Top Keywords or Phrases in Negative Reviews:
order         0.107315
worst         0.104731
bad           0.094444
zomato        0.091965
food          0.084498
service       0.083921
delivery      0.081536
customer      0.080845
app           0.078064
experience    0.062443
dtype: float64


### Swiggy

In [9]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=100, stop_words='english')

# Fit and transform the "review_description" column
tfidf_matrix = tfidf_vectorizer.fit_transform(swiggy['review_description'])

# Get the feature names (words or phrases)
feature_names = tfidf_vectorizer.get_feature_names_out()

# Create a DataFrame to display the top keywords or phrases
keywords_df = pd.DataFrame(data=tfidf_matrix.toarray(), columns=feature_names)

# Find the most common keywords or phrases in positive reviews
positive_reviews = swiggy[swiggy['sentiment'] == 'Positive']
positive_keywords = keywords_df.loc[positive_reviews.index].mean().sort_values(ascending=False)

# Find the most common keywords or phrases in negative reviews
negative_reviews = swiggy[swiggy['sentiment'] == 'Negative']
negative_keywords = keywords_df.loc[negative_reviews.index].mean().sort_values(ascending=False)

# Display the top keywords or phrases for positive and negative reviews
print("Top Keywords or Phrases in Positive Reviews:")
print(positive_keywords.head(10))

print("\nTop Keywords or Phrases in Negative Reviews:")
print(negative_keywords.head(10))


Top Keywords or Phrases in Positive Reviews:
good        0.137698
app         0.110522
delivery    0.107824
food        0.090936
swiggy      0.079847
service     0.063212
best        0.059411
time        0.055023
ðÿ          0.053882
order       0.051178
dtype: float64

Top Keywords or Phrases in Negative Reviews:
worst       0.115177
order       0.104573
delivery    0.095116
app         0.090313
service     0.089577
bad         0.086564
swiggy      0.082740
food        0.073652
customer    0.073073
time        0.062778
dtype: float64
