In [40]:
'''
This python program scrapes TSwift's "Shake it off" and performs super basic sentiment analysis
using the NLTK library and vader_lexicon

This example demonstrates using the NLTK library, bs4, passing functions as arguments

Jimmy D Nguyen 11-12-2022
'''


from bs4 import BeautifulSoup
import requests
url = 'https://www.azlyrics.com/lyrics/taylorswift/shakeitoff.html'
page = requests.get(url)

In [41]:
soup = BeautifulSoup(page.content, 'html.parser')

In [42]:
lyrics = soup.find_all('div', class_='col-xs-12 col-lg-8 text-center')

In [43]:
# len(lyrics) was 1 element only

lyrics = lyrics[0].find_all('div')

In [44]:
shakeitoff = lyrics[5].text

In [45]:
shakeitoff_split = shakeitoff.split(sep='\n')

In [46]:
shakeitoff_processed = []

for i in shakeitoff_split:
    if i != '':
        if i != '\r':
            shakeitoff_processed.append(i)
        
        

In [47]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [48]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\jimmy\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [49]:
analyzer = SentimentIntensityAnalyzer()

In [50]:
# Function for sentiment aggregator

def sentiment_calculator(text):
    compound_score = 0
    for i in text:
        score_result = analyzer.polarity_scores(i)
        compound_score = compound_score + score_result['compound']
    return compound_score/float(len(text))

In [51]:
# line by line analysis

# this demonstrates passing a function as an argument
# important to be function=function in order to have a default 
# or the parent function will want an argument pass

def line_analyser(lyrics_list, sentiment_calculator=sentiment_calculator):
    for i in lyrics_list:
        compound_score = analyzer.polarity_scores(i)['compound']
        print(f"{i}: ==> {compound_score}")
    print(f'Aggregate Compound Score: {sentiment_calculator(lyrics_list)}')
        
    

In [52]:
# processing line by line and evaluating the sentiment with -1 being the most negative
# +1 being the most positive and 0 being neutral
# the aggregate score is the very end as the Compound Score

line_analyser(shakeitoff_processed)

I stay out too late: ==> 0.0
Got nothing in my brain: ==> 0.0
That's what people say, mmm-mmm: ==> 0.0
That's what people say, mmm-mmm: ==> 0.0
I go on too many dates: ==> 0.0
But I can't make them stay: ==> 0.0
At least that's what people say, mmm-mmm: ==> 0.0
That's what people say, mmm-mmm: ==> 0.0
But I keep cruising: ==> 0.0
Can't stop, won't stop moving: ==> 0.4168
It's like I got this music: ==> 0.3612
In my mind: ==> 0.0
Saying, "It's gonna be alright.": ==> 0.0
'Cause the players gonna play, play, play, play, play: ==> 0.875
And the haters gonna hate, hate, hate, hate, hate: ==> -0.9709
Baby, I'm just gonna shake, shake, shake, shake, shake: ==> -0.6705
I shake it off, I shake it off: ==> -0.34
Heart-breakers gonna break, break, break, break, break: ==> 0.0
And the fakers gonna fake, fake, fake, fake, fake: ==> -0.9382
Baby, I'm just gonna shake, shake, shake, shake, shake: ==> -0.6705
I shake it off, I shake it off: ==> -0.34
I never miss a beat: ==> 0.1139
I'm lightning on m







According to vader_lexicon "Shake It Off" is apparently a 
slightly negative jam with an aggregated -0.19 score below neutral... LOL
