In [1]:
import pandas as pd
import os, codecs
import spacy
from collections import Counter

In [2]:
FOLDER = "data/"

In [3]:
filename = "lyrics_per_song.csv"
df = pd.read_csv(FOLDER + filename)
df.head()

Unnamed: 0,Song,Performer,Year,Decade,Lyrics
0,eastside,"benny blanco, halsey",2019,2010,"Uh\nYeah, yeah\n\nWhen I was young, I fell in ..."
1,wait for you,elliott yamin,2007,2000,I never felt nothing in the world like this be...
2,wildflower,skylark,1973,1970,She's faced the hardest times you could imagin...
3,even though i'm leaving,luke combs,2019,2010,"Daddy, I'm afraid, won't you stay a little whi..."
4,do re mi,blackbear,2017,2010,"Do, re, mi, fa, so\nYeah, yeah, yeah, oh\nDo, ..."


In [4]:
nlp = spacy.load('en')

In [5]:
docs = list()
for lyrics_doc in os.listdir(FOLDER):
    if ".txt" in lyrics_doc:
        print(lyrics_doc)
        with codecs.open(os.path.join(FOLDER,lyrics_doc),encoding="utf8") as f:
            docs.append(f.read())

2010_lyrics.txt
1990_lyrics.txt
1970_lyrics.txt
2020_lyrics.txt
1960_lyrics.txt
1980_lyrics.txt
1950_lyrics.txt
2000_lyrics.txt


In [6]:
#preview first lines of 2020_lyrics.txt
print(docs[3][0:300])

I'm like the water when your ship rolled in that night
Rough on the surface, but you cut through like a knife
And if it was an open-shut case
I never would've known from that look on your face
Lost in your current like a priceless wine

The more that you say, the less I know
Wherever you stray, I fo


In [7]:
#remove new lines
docs = [" ".join(d.split()) for d in docs]
#preview
print(docs[3][0:300])

I'm like the water when your ship rolled in that night Rough on the surface, but you cut through like a knife And if it was an open-shut case I never would've known from that look on your face Lost in your current like a priceless wine The more that you say, the less I know Wherever you stray, I fol


In [8]:
stopwords = spacy.lang.en.stop_words.STOP_WORDS
print('Number of stop words: %d' % len(stopwords))
print('First ten stop words:',list(stopwords)[:10])

Number of stop words: 326
First ten stop words: ['of', 'a', 'it', 'themselves', 'now', '’s', 'her', 'some', '‘ll', 'not']


**Most common words, without stop words and punctuation:**

# Semantic analysis

In [2]:
%pip install empath 

Processing /home/ghalifaten/.cache/pip/wheels/2a/e5/c2/fd8dad0a452927c85ecd3c6cbaa4748125246eed73d8303184/empath-0.89-py3-none-any.whl
Installing collected packages: empath
Successfully installed empath-0.89
Note: you may need to restart the kernel to use updated packages.


In [9]:
from empath import Empath
lexicon = Empath()

In [14]:
doc = nlp(docs[5]) #1980_lyrics.txt

In [16]:
empath_features = lexicon.analyze(doc.text,categories = ["love", "pain", "sadness", "hate", "joy"], normalize = True)

In [17]:
empath_features

{'love': 0.01418016269870886,
 'pain': 0.00802298679005896,
 'sadness': 0.002910664974998134,
 'hate': 0.002910664974998134,
 'joy': 0.0015672811403836108}

In [29]:
import numpy as np
def round_up(value):
    return np.round(value*100, 2)

In [30]:
decades = [2010,1990,1970,2020,1960,1980,1950,2000] #in the order of "docs"
for i in range(len(decades)):
    doc = nlp(docs[i])
    empath_features = lexicon.analyze(doc.text,categories = ["love", "pain", "sadness", "hate", "joy"], normalize = True)
    for key in empath_features:
        empath_features[key] = round_up(empath_features[key])§
    print('{0}: {1}\n'.format(decades[i], empath_features))

2010: {'love': 0.68, 'pain': 0.47, 'sadness': 0.12, 'hate': 0.21, 'joy': 0.03}

1990: {'love': 0.8, 'pain': 0.6, 'sadness': 0.38, 'hate': 0.37, 'joy': 0.17}

1970: {'love': 1.09, 'pain': 0.66, 'sadness': 0.17, 'hate': 0.21, 'joy': 0.17}

2020: {'love': 0.72, 'pain': 0.3, 'sadness': 0.18, 'hate': 0.12, 'joy': 0.06}

1960: {'love': 1.29, 'pain': 0.56, 'sadness': 0.26, 'hate': 0.15, 'joy': 0.09}

1980: {'love': 1.42, 'pain': 0.8, 'sadness': 0.29, 'hate': 0.29, 'joy': 0.16}

1950: {'love': 1.24, 'pain': 0.28, 'sadness': 0.08, 'hate': 0.06, 'joy': 0.06}

2000: {'love': 0.64, 'pain': 0.64, 'sadness': 0.14, 'hate': 0.16, 'joy': 0.08}

