# Data processing

In [1]:
# Import libraries
import pandas as pd
import os
import json
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
pd.options.mode.chained_assignment = None

In [2]:
# Read csv data with songs to process
# pd.set_option("display.max_rows", None, "display.max_columns", None)
songs = pd.read_csv('all_time_billboard_wrap_up_cleaned.csv')
display(songs)

Unnamed: 0,Position,Artist,Song Title,Year,JSON file,Split Names,Song Language
0,1,Gordon Jenkins and The Weavers,Goodnight Irene,1950,0.json,Gordon Jenkins,en
1,2,Nat King Cole,Mona Lisa,1950,1.json,Nat King Cole,en
2,4,Gary and Bing Crosby,Sam's Song,1950,3.json,Gary,en
3,5,Gary and Bing Crosby,Simple Melody,1950,4.json,Gary,en
4,6,Teresa Brewer,"Music, Music, Music",1950,5.json,Teresa Brewer,en
...,...,...,...,...,...,...,...
6300,96,Morgan Wallen,More Than My Hometown,2020,6675.json,Morgan Wallen,en
6301,97,Luke Combs,Lovin' On You,2020,6676.json,Luke Combs,en
6302,98,Moneybagg Yo,Said Sum,2020,6677.json,Moneybagg Yo,en
6303,99,H.E.R. Featuring YG,Slide,2020,6678.json,H.E.R.,en


### Tokenizing and lemmatizing

In [3]:
# Download Punkt sentence tokenizing, parts of speech and lemmatizing models
# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')
# nltk.download('wordnet')

In [4]:
# pd.set_option("display.max_rows", None, "display.max_columns", None)

# Create columns with lyrics, tokenized lyrics, particular parts of speech and lemmatized words
wd = os.getcwd()
jsons_path = wd + '\\jsons\\'
lemmatizer = WordNetLemmatizer()
songs['Lyrics'] = ''
songs['Tokenized Lyrics']  = ''
songs['Lemmatized Words'] = ''
songs['Parts of Speech'] = ''
songs['Adjectives'] = '' # JJ
songs['Nouns'] = '' # NN
songs['Adverbs'] = '' # RB
songs['Pronouns'] = '' # PRB
songs['Verbs'] = '' # VB
for i in range(len(songs)):
    with open(jsons_path + songs['JSON file'][i], 'r', encoding = 'UTF-8') as f:
        data = json.load(f)['lyrics'].lower()
        # Delete additional blank lines and comments in square brackets like [Verse 1], [Intro]
        # https://stackoverflow.com/questions/14596884/remove-text-between-and
        # https://stackoverflow.com/questions/1140958/whats-a-quick-one-liner-to-remove-empty-lines-from-a-python-string
        data = re.sub(r'[\[].*?[\]]', '', data)
        data = os.linesep.join([j for j in data.splitlines() if j])
    songs['Lyrics'][i] = ' '.join(data.split('\r\n'))
    tokenized_lyrics = word_tokenize(data)
    songs['Tokenized Lyrics'][i] = tokenized_lyrics
    songs['Lemmatized Words'][i] = [lemmatizer.lemmatize(word) for word in tokenized_lyrics]
    songs['Parts of Speech'][i] = nltk.pos_tag(songs['Lemmatized Words'][i])
    songs['Adjectives'][i] = []
    songs['Nouns'][i] = []
    songs['Adverbs'][i] = []
    songs['Pronouns'][i] = []
    songs['Verbs'][i] = []
    for k in songs['Parts of Speech'][i]:
        if k[1] == 'JJ':
            songs['Adjectives'][i].append(k[0])
        elif k[1] == 'NN':
            songs['Nouns'][i].append(k[0])
        elif k[1] == 'RB':
            songs['Adverbs'][i].append(k[0])
        elif k[1] == 'PRP':
            songs['Pronouns'][i].append(k[0])
        elif k[1] == 'VB':
            songs['Verbs'][i].append(k[0])
display(songs)

Unnamed: 0,Position,Artist,Song Title,Year,JSON file,Split Names,Song Language,Lyrics,Tokenized Lyrics,Lemmatized Words,Parts of Speech,Adjectives,Nouns,Adverbs,Pronouns,Verbs
0,1,Gordon Jenkins and The Weavers,Goodnight Irene,1950,0.json,Gordon Jenkins,en,irene goodnight irene goodnight goodnight iren...,"[irene, goodnight, irene, goodnight, goodnight...","[irene, goodnight, irene, goodnight, goodnight...","[(irene, NN), (goodnight, VBD), (irene, JJ), (...","[irene, goodnight, last, saturday, i, gon, ire...","[irene, goodnight, irene, irene, i, dream, nig...","[down, now, sometimes, sometimes, sometimes, l...","[you, me, me, you, you, you, you]","[see, take, see, jump, see, stop, stop, stop, ..."
1,2,Nat King Cole,Mona Lisa,1950,1.json,Nat King Cole,en,"mona lisa, mona lisa, men have named you you'r...","[mona, lisa, ,, mona, lisa, ,, men, have, name...","[mona, lisa, ,, mona, lisa, ,, men, have, name...","[(mona, NN), (lisa, NN), (,, ,), (mona, NN), (...","[mystic, broken, many, warm, real, cold, lovel...","[mona, lisa, mona, lisa, lady, smile, mona, st...","[so, only, lonely, just, there, just, lonely, ...","[you, you, it, you, they, you, you, they, they...","[smile, tempt, hide, lie, mona, smile, tempt, ..."
2,4,Gary and Bing Crosby,Sam's Song,1950,3.json,Gary,en,"ah, here's a happy tune, you'll love to croon ...","[ah, ,, here, 's, a, happy, tune, ,, you, 'll,...","[ah, ,, here, 's, a, happy, tune, ,, you, 'll,...","[(ah, NN), (,, ,), (here, RB), ('s, VBZ), (a, ...","[happy, grim, grand, wrong, happy, happy, litt...","[ah, tune, song, melody, sam, song, nothing, m...","[here, then, only, soon, so, never, here, real...","[you, they, it, sam, it, they, it, you, you, i...","[love, croon, call, be, get, do, agree, forget..."
3,5,Gary and Bing Crosby,Simple Melody,1950,4.json,Gary,en,won't you play some simple melody like my moth...,"[wo, n't, you, play, some, simple, melody, lik...","[wo, n't, you, play, some, simple, melody, lik...","[(wo, MD), (n't, RB), (you, PRP), (play, VB), ...","[simple, good, old, simple, musical, classical...","[melody, mother, sang, harmony, melody, demon,...","[n't, n't, just, simply, na, n't, just, simply...","[you, me, you, me, you, you, you, me, you, you...","[play, play, set, play, play, get, rag, set, p..."
4,6,Teresa Brewer,"Music, Music, Music",1950,5.json,Teresa Brewer,en,put another nickel in in the nickelodeon all i...,"[put, another, nickel, in, in, the, nickelodeo...","[put, another, nickel, in, in, the, nickelodeo...","[(put, VB), (another, DT), (nickel, NN), (in, ...","[danciong, danciong, old]","[nickel, nickelodeon, i, music, music, music, ...","[close, so, close, so]","[you, you, you, me, you, you, me, you, you, yo...","[put, do, want, come, do, want, come, keep]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6300,96,Morgan Wallen,More Than My Hometown,2020,6675.json,Morgan Wallen,en,"girl, our mamas are best friends and so are we...","[girl, ,, our, mamas, are, best, friends, and,...","[girl, ,, our, mama, are, best, friend, and, s...","[(girl, NN), (,, ,), (our, PRP$), (mama, NN), ...","[whole, likely, few, real, deep, real, u, brig...","[girl, mama, friend, town, rooting, u, home, t...","[so, n't, n't, just, n't, yet, baby, n't, n't,...","[we, 'em, we, you, you, you, you, you, you, yo...","[settle, let, grow, stop, blame, go, i, need, ..."
6301,97,Luke Combs,Lovin' On You,2020,6676.json,Luke Combs,en,don't get me wrong i like a bobber on the wate...,"[do, n't, get, me, wrong, i, like, a, bobber, ...","[do, n't, get, me, wrong, i, like, a, bobber, ...","[(do, VBP), (n't, RB), (get, VB), (me, PRP), (...","[wrong, friday, dunn, strong, i, right, two-do...","[bobber, water, hookin, reelin, i, night, ride...","[n't, again, soon, n't, enough, back, soon, n'...","[me, 'em, it, me, you, you, you, you, you, you...","[get, slow, hit, get, get, get]"
6302,98,Moneybagg Yo,Said Sum,2020,6677.json,Moneybagg Yo,en,"(turn me up, yc) huh? (what?) ah, i thought a ...","[(, turn, me, up, ,, yc, ), huh, ?, (, what, ?...","[(, turn, me, up, ,, yc, ), huh, ?, (, what, ?...","[((, (), (turn, VB), (me, PRP), (up, RP), (,, ...","[somethin, nothin, nothin, pussy, somethin, st...","[huh, ah, broke, nigga, ah, talkin, shit, trap...","[yc, still, n't, n't, just, n't, really, n't, ...","[me, they, we, it, it, she, it, you, she, you,...","[turn, i, sayin', sayin, run, run, say, i, go,..."
6303,99,H.E.R. Featuring YG,Slide,2020,6678.json,H.E.R.,en,you always wearin' them glasses you don't wann...,"[you, always, wearin, ', them, glasses, you, d...","[you, always, wearin, ', them, glass, you, do,...","[(you, PRP), (always, RB), (wearin, VBP), (', ...","[shit, attractive, same, same, fast, black, to...","[glass, sucker, look, eye, huh, show, passion,...","[always, n't, na, just, only, so, n't, too, n'...","[you, them, you, you, them, you, you, you, he,...","[wan, let, girl, tryna, keep, let, know, be, p..."
