<a href="https://colab.research.google.com/github/avesselinov/Project1/blob/main/Project_1_Analysis_Plan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from scipy import stats

# Install and download VADER lexicon
nltk.download('vader_lexicon')

# Load the Yelp dataset (replace 'yelp_data.csv' with actual dataset file)
df = pd.read_csv('yelp_data.csv')

# Ensure relevant columns exist (some assumed columns i.e. 'review_text', 'price', 'rating')
df = df[['review_text', 'price', 'rating']].dropna()

# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):
    """Compute sentiment compound score for a given review."""
    return analyzer.polarity_scores(text)['compound']

# Apply sentiment analysis
df['sentiment_score'] = df['review_text'].apply(get_sentiment)

# Group by price category and calculate mean sentiment
grouped = df.groupby('price')['sentiment_score'].mean().reset_index()
print(grouped)

# Perform ANOVA test
price_groups = [df[df['price'] == p]['sentiment_score'].dropna() for p in df['price'].unique()]
anova_result = stats.f_oneway(*price_groups)
print(f"ANOVA p-value: {anova_result.pvalue}")

# Visualization
sns.boxplot(x='price', y='sentiment_score', data=df)
plt.title('Sentiment Scores by Restaurant Price Level')
plt.xlabel('Price Level')
plt.ylabel('Sentiment Score')
plt.show()
