In [6]:
# Load CSV file
csvFile = pd.read_csv('data/reduced_length.csv')

# Set up the analyzer
nltk.download('vader_lexicon')
analyzer = SentimentIntensityAnalyzer()

# Define custom sentiment update function
def get_sentiment(sentence):
    sid = SentimentIntensityAnalyzer()
    # Custom lexicon update with expanded beer-related sentiments
    newWords = {
        # Positive Sentiment Words
        'crisp': 2.0, 'refreshing': 2.5, 'smooth': 2.3, 'balanced': 2.2, 
        'aromatic': 2.0, 'malty': 1.8, 'hoppy': 1.7, 'fruity': 1.9, 
        'creamy': 2.0, 'sessionable': 2.4, 'floral': 1.8, 'zesty': 1.9, 
        'rich': 2.5, 'velvety': 2.5, 'fragrant': 2.4, 'robust': 2.3, 
        'lively': 2.2, 'layered': 2.1, 'subtle': 2.0, 'caramel': 2.1, 
        'toffee': 2.0, 'spicy': 1.9, 'vanilla': 2.3, 'nutty': 2.0, 
        'biscuity': 1.8, 'citrusy': 2.0, 'piney': 1.9, 'clean': 2.0, 
        'tropical': 2.2, 'silky': 2.5, 'elegant': 2.4, 'peppery': 1.7, 
        'herbal': 1.8,

        # Negative Sentiment Words
        'skunky': -2.5, 'watery': -2.0, 'bitter': -2.2, 'flat': -2.3, 
        'stale': -2.5, 'sour': -2.0, 'overpowering': -1.8, 'burnt': -2.5, 
        'alcoholic': -2.1, 'cloying': -2.3, 'astringent': -2.0, 'metallic': -2.4, 
        'harsh': -2.5, 'off-putting': -2.4, 'musty': -2.3, 'overcarbonated': -2.1, 
        'soapy': -2.3, 'chemically': -2.4, 'clunky': -2.2, 'thin': -2.0, 
        'overripe': -2.3, 'soggy': -2.1, 'grainy': -1.9, 'chalky': -2.2, 
        'muddy': -2.3, 'dull': -2.0, 'medicinal': -2.4, 'boozy': -2.2, 
        'syrupy': -2.3,

        # Neutral/Context-Dependent Words
        'light': 1.0, 'dry': 1.2, 'full-bodied': 1.5, 'funky': 0.5, 
        'earthy': 0.7, 'toasty': 1.5, 'grassy': 1.0, 'oaky': 1.2, 
        'tangy': 0.9, 'woody': 1.1, 'fizzy': 1.0, 'spiced': 1.0, 
        'bready': 0.8, 'oily': -1.0, 'prickly': 1.0, 'drying': -1.0
    }
    sid.lexicon.update(newWords)  # Update the lexicon
    score = sid.polarity_scores(sentence)  # Get sentiment scores
    return score  # Return full sentiment scores

# Create empty lists to store sentiment scores
neg = []
neu = []
pos = []
compound = []

# Loop through the texts and get the sentiment scores using the updated function
for text in csvFile["Review"]:
    scores = get_sentiment(text)
    neg.append(scores['neg'])
    neu.append(scores['neu'])
    pos.append(scores['pos'])
    compound.append(scores['compound'])

# Add sentiment scores as new columns to the DataFrame
sentiments = csvFile
sentiments['neg'] = neg
sentiments['neu'] = neu
sentiments['pos'] = pos
sentiments['compound'] = compound

# Display the updated DataFrame with sentiment scores
print(sentiments.head())

# Calculate the average sentiment score per beer
sentiment_avg = sentiments.groupby(["Beer"])["compound"].mean().sort_values(ascending=False)


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\mbmma\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


                          Beer  Rating  \
0  Kentucky Brunch Brand Stout    4.61   
1  Kentucky Brunch Brand Stout    4.71   
2  Kentucky Brunch Brand Stout    5.00   
3  Kentucky Brunch Brand Stout    4.80   
4  Kentucky Brunch Brand Stout    4.98   

                                              Review    neg    neu    pos  \
0  Sampled at the brewery, this is the 2022 bottl...  0.027  0.762  0.210   
1  The perfect barrel aged stout. Not overly swee...  0.205  0.553  0.242   
2  The flirtation with maple comes to a crescendo...  0.033  0.715  0.252   
3  The flirtation with maple comes to a crescendo...  0.033  0.715  0.252   
4  On tap at TG for part of KBBS release day - ra...  0.073  0.558  0.369   

   compound  
0    0.9938  
1    0.2216  
2    0.9924  
3    0.9924  
4    0.9761  
