In [None]:
import pandas as pd
import re
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import matplotlib.pyplot as plt
from collections import Counter

nltk.download('vader_lexicon')

data = pd.read_csv('/content/drive/MyDrive/BA_reviews.csv', encoding='utf-8')

def clean_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = text.lower()
    return text

data['cleaned_reviews'] = data['reviews'].fillna('').apply(clean_text)

sia = SentimentIntensityAnalyzer()

def get_sentiment(text):
    score = sia.polarity_scores(text)['compound']
    if score > 0.05:
        return 'positive'
    elif score < -0.05:
        return 'negative'
    else:
        return 'neutral'

data['predicted_sentiment'] = data['cleaned_reviews'].apply(get_sentiment)

positive_reviews = len(data[data['predicted_sentiment'] == 'positive'])
negative_reviews = len(data[data['predicted_sentiment'] == 'negative'])
neutral_reviews = len(data[data['predicted_sentiment'] == 'neutral'])

total_reviews = len(data)
positive_percentage = (positive_reviews / total_reviews) * 100
negative_percentage = (negative_reviews / total_reviews) * 100
neutral_percentage = (neutral_reviews / total_reviews) * 100

print(f"Positive Reviews: {positive_percentage:.2f}%")
print(f"Negative Reviews: {negative_percentage:.2f}%")
print(f"Neutral Reviews: {neutral_percentage:.2f}%")

labels = ['Positive', 'Negative', 'Neutral']
sizes = [positive_percentage, negative_percentage, neutral_percentage]
colors = ['green', 'red', 'blue']
explode = (0.1, 0, 0)

plt.figure(figsize=(8, 8))
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
plt.title('Sentiment Analysis Results')
plt.show()

def tokenize(text):
    return text.split()

word_counter = Counter()
data['cleaned_reviews'].apply(lambda x: word_counter.update(tokenize(x)))

key_topics = ['service', 'seat', 'crew', 'staff', 'cabin', 'flight']
topic_counts = {topic: word_counter[topic] for topic in key_topics}

topics, frequencies = zip(*sorted(topic_counts.items(), key=lambda x: x[1], reverse=True))

plt.figure(figsize=(8, 5))
plt.bar(topics, frequencies, color='orange')
plt.xlabel('Key Topics')
plt.ylabel('Frequency')
plt.title('Key Topics in Reviews')
plt.show()