In [34]:
#importing essential libraries
import pandas as pd
from collections import Counter

                                                         ***ANALYSIS TO GET ACTIONABLE INSIGHTS***

*Top 5 - Positive & Negative Words*

In [39]:
# Loading the sentiment analysis file
data = pd.read_csv('BA_reviews_sentiment_analysis.csv') 

data['cleaned_review'] = data['cleaned_review'].str.replace('ba', '')
data['cleaned_review'] = data['cleaned_review'].str.replace('british airway', '')

# Top 5 most frequent words in positive and negative reviews
for sentiment in ['Positive', 'Negative']:
    reviews = data[data['sentiment'] == sentiment]['cleaned_review']
    
    # Combine all reviews into one
    combined_reviews = ' '.join(reviews)
    
    # Tokenize the combined reviews
    words = combined_reviews.split()
    
    # Count word occurrences
    word_counts = Counter(words)
    
    # Get the top 5 most frequent words
    top_words = word_counts.most_common(5)
    
    print(f"Top 5 words in {sentiment} reviews:", [word[0] for word in top_words])

Top 5 words in Positive reviews: ['flight', 'seat', 'service', 'good', 'crew']
Top 5 words in Negative reviews: ['flight', 'seat', 'service', 'london', 'hour']


*Most Common Words Across Topics*

In [36]:
# Loading the topic words file
topic_words = pd.read_csv('BA_reviews_topic_words.csv')  

# All top words across topics
all_top_words = [word for column in topic_words.columns for word in topic_words[column]]

# Occurrences of top words
word_counts = Counter(all_top_words)

# Get the most common words
common_words = word_counts.most_common(10)

# Print of the most common words across topics
print("Most Common Words Across Topics:")
print(common_words)

Most Common Words Across Topics:
[('service', 14), ('time', 12), ('food', 12), ('seat', 10), ('crew', 9), ('cabin', 8), ('london', 7), ('one', 7), ('airline', 6), ('hour', 5)]


*VADER Sentiment Analyser to get Top 5 positive & Negative reviews associated with ther respective positive & negative words*

In [37]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from collections import defaultdict

#  VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Keywords for positive and negative reviews
positive_keywords = ['flight', 'seat', 'service', 'good', 'time']
negative_keywords = ['flight', 'seat', 'service', 'london', 'hour']

# Dictionary to store reviews for each keyword
positive_reviews_by_keyword = defaultdict(list)
negative_reviews_by_keyword = defaultdict(list)

# Iterate through each review and check keywords
for index, row in data.iterrows():
    review = row['review']
    sentiment_score = sid.polarity_scores(review)['compound']
    
    for keyword in positive_keywords:
        if keyword in review.lower():
            positive_reviews_by_keyword[keyword].append((review, sentiment_score))
    
    for keyword in negative_keywords:
        if keyword in review.lower():
            negative_reviews_by_keyword[keyword].append((review, sentiment_score))

# Top 5 positive reviews for each keyword
top_positive_reviews = []
for keyword in positive_keywords:
    reviews = positive_reviews_by_keyword[keyword]
    top_reviews = sorted(reviews, key=lambda x: x[1], reverse=True)[:5]
    top_positive_reviews.extend([(review, sentiment_score, keyword) for review, sentiment_score in top_reviews])

# Top 5 negative reviews for each keyword
top_negative_reviews = []
for keyword in negative_keywords:
    reviews = negative_reviews_by_keyword[keyword]
    top_reviews = sorted(reviews, key=lambda x: x[1])[:5]
    top_negative_reviews.extend([(review, sentiment_score, keyword) for review, sentiment_score in top_reviews])

#Saving the output file
output_file = 'VADER_sentiment_analysis.txt'
with open(output_file, 'w') as f:
    f.write("Top 5 Positive Reviews with Positive Keywords:\n")
    for i, (review, sentiment_score, keyword) in enumerate(top_positive_reviews, start=1):
        f.write(f"Review {i}: Keyword: {keyword}, Sentiment Score: {sentiment_score:.2f}\n")
        f.write(f"Review: {review}\n\n")

    f.write("Top 5 Negative Reviews with Negative Keywords:\n")
    for i, (review, sentiment_score, keyword) in enumerate(top_negative_reviews, start=1):
        f.write(f"Review {i}: Keyword: {keyword}, Sentiment Score: {sentiment_score:.2f}\n")
        f.write(f"Review: {review}\n\n")

print(f"Output saved to {output_file}")


Output saved to VADER_sentiment_analysis.txt
