# Mac Miller: A lyrical analysis using NLP

### Data Extraction

In [74]:
# setting up libraries
import spotipy
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn
import gensim
from gensim.models import Word2Vec
import json
import matplotlib.pyplot as plt
from pandas.io.json import json_normalize
from lyricsgenius import Genius # lyrics genius is a fantastic library that helps us take lyrics from the Genius API
import nltk
import re
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords, wordnet

In [2]:
#setting up Genius API
client_id = 'AqnRTduqC75eC1V8Fr3EKTnLep6I_-rk99_RdWPEpSlfeyg5IoZrLs9KRLQBK6Db'
token = 'r_Gny4srkI6fW0JKR5zYU8LofeURTtx4Up7kQa_kqXlkoy6O82vZ_UaUZTWuq8fg'
genius = Genius(token)

#Removes [chorus] [bridge] etc. tags.
genius.remove_section_headers = True

#excluding repeat songs that might hinder analysis
genius.excluded_terms = ["(Remix)", "(Live)"]
#artist = genius.search_artist("Kendrick Lamar", max_songs=10)

I had to choose specific albums because analyzing his whole discography might not yield important insights (filler mixtapes or albums that don't portray his personality well). As such, I decided to include his 6 studio albums (Blue Slide Park, Watching Movies with the Sound Off, GO:OD AM, The Divine Feminine, Swimming, and Circles), and five very popular mixtapes (all of which I absolutely love) (K.I.D.S., Best Day Ever, I Love Life Thank You, Macadelic, and Faces).

In [None]:
# add albums to a list to parse into a dataframe later
albums = []

# The "try/except" block exists because I kept running into a "request timed out" error in the previous 
# code block, so I went and wrote this to retry the call to artist's albums until it works

while True:
    try:
        albums.append(genius.search_album("Blue Slide Park",  "Mac Miller"))
        albums.append(genius.search_album("Watching Movies with the Sound Off",  "Mac Miller"))
        albums.append(genius.search_album("GO:OD AM",  "Mac Miller"))
        albums.append(genius.search_album("The Divine Feminine",  "Mac Miller"))
        albums.append(genius.search_album("Swimming",  "Mac Miller"))
        albums.append(genius.search_album("Circles",  "Mac Miller"))
        albums.append(genius.search_album("K.I.D.S",  "Mac Miller"))
        albums.append(genius.search_album("Best Day Ever",  "Mac Miller"))
        albums.append(genius.search_album("I Love Life, Thank You",  "Mac Miller"))
        albums.append(genius.search_album("Macadelic",  "Mac Miller"))
        albums.append(genius.search_album("Faces",  "Mac Miller"))
        break
    except:
        pass
    
# saving lyrics into files, we keep them separated so we can analyze data by album and not just per song
for i in range(len(albums)):
    albums[i].save_lyrics()

Searching for "Blue Slide Park" by Mac Miller...
Searching for "Blue Slide Park" by Mac Miller...
Searching for "Blue Slide Park" by Mac Miller...


In [67]:
# loading json file as dict
bsp1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_BlueSlidePark.json"))
wmtsf1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_WatchingMovieswiththeSoundOff.json"))
gam1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_GOODAM.json"))
tdf1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_TheDivineFeminine.json"))
swim1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_Swimming.json"))
circles1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_Circles.json"))
kids1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_K.I.D.S..json"))
bde1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_BestDayEver.json"))
illty1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_ILoveLifeThankYou.json"))
mac1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_Macadelic.json"))
faces1 = json.load(open("/Users/faisalqureshi/Desktop/Spotify music NLP project/lyrics/Lyrics_Faces.json"))

# normalizing into dataframe and specifying tracks
bsp = pd.json_normalize(bsp1['tracks'])
wmtsf = pd.json_normalize(wmtsf1['tracks'])
gam = pd.json_normalize(gam1["tracks"])
tdf = pd.json_normalize(tdf1['tracks'])
swim = pd.json_normalize(swim1['tracks']).drop("song.stats.concurrents", axis = 1)
circles = pd.json_normalize(circles1['tracks'])
kids = pd.json_normalize(kids1['tracks'])
bde = pd.json_normalize(bde1['tracks'])
illty = pd.json_normalize(illty1['tracks'])
mac = pd.json_normalize(mac1['tracks'])
faces = pd.json_normalize(faces1['tracks']).drop("song.stats.concurrents", axis = 1)

# add all the albums into one list for aggregation
all_albums = [bsp, wmtsf, gam, tdf, swim, circles, kids, bde, illty, mac, faces]

### Data Cleaning

In [71]:
# using regex to remove punctuation and lowercasing lyrics

for i in range(len(all_albums)):
    for j in range(len(all_albums[i][["song.lyrics"]])):
        all_albums[i]["song.lyrics"][j] = all_albums[i]["song.lyrics"][j].replace("\n", ". ").replace("\n\n", '')
        all_albums[i]["song.lyrics"][j] = re.sub("[^a-zA-Z0-9 -]", '', all_albums[i]["song.lyrics"][j]).lower()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [80]:
# dropping stop words and lemmatizing the lyrics

lemmatizer = WordNetLemmatizer()
stop_words = nltk.corpus.stopwords.words('english')

# Map POS tag to first character lemmatize() accepts
def get_wordnet_pos(word):
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}
    return tag_dict.get(tag, wordnet.NOUN)

# Funtion to lemmatize words and remove stop words
def lemmatizing_and_stop(lyrics):
    prac = [lemmatizer.lemmatize(lyric, get_wordnet_pos(lyric)) for lyric in nltk.word_tokenize(lyrics)]
    return ' '.join([lyric for lyric in prac if lyric not in (stop_words)])

# Lemmatizing lyrics and removing stop words from them
for i in range(len(all_albums)):
    for j in range(len(all_albums[i][["song.lyrics"]])):
        all_albums[i]["song.lyrics"] = all_albums[i]["song.lyrics"].apply(lambda lyric: lemmatizing_and_stop(lyric))

In [85]:
# finding unique word count and adding it to the dataframe

def finding_number(lyrics):
    number = len(np.unique(lyrics.split(' '))) / len(lyrics.split(' '))
    return number

for i in range(len(all_albums)):
    all_albums[i]["unique words %"] = all_albums[i]["song.lyrics"].apply(lambda lyric: finding_number(lyric))
    
faces

Unnamed: 0,number,song._type,song.annotation_count,song.api_path,song.artist_names,song.full_title,song.header_image_thumbnail_url,song.header_image_url,song.id,song.instrumental,...,song.primary_artist.is_meme_verified,song.primary_artist.is_verified,song.primary_artist.name,song.primary_artist.slug,song.primary_artist.url,song.primary_artist.iq,song.artist,song.lyrics,unique words,unique words %
0,1,song,13,/songs/426742,Mac Miller,Inside Outside by Mac Miller,https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,426742,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,inside outside lyric yeah yeah shoulda die alr...,73,0.489933
1,2,song,19,/songs/402541,Mac Miller,Here We Go by Mac Miller,https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,402541,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,go lyric without fear face enemy brave upright...,161,0.752336
2,3,song,52,/songs/426749,Mac Miller (Ft. ScHoolboy Q),Friends by Mac Miller (Ft. ScHoolboy Q),https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,426749,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,friend lyric one two three pop pop one two thr...,374,0.523077
3,4,song,22,/songs/426794,Mac Miller,Angel Dust by Mac Miller,https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,426794,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,angel dust lyric yeah yeah um uh lady gentlema...,169,0.655039
4,5,song,24,/songs/426809,Mac Miller,Malibu by Mac Miller,https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,426809,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,malibu lyric good time trap oh yeah ha-ha-ha o...,188,0.652778
5,6,song,23,/songs/426823,Mac Miller (Ft. Sir Michael Rocks),What Do You Do by Mac Miller (Ft. Sir Michael ...,https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,426823,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,lyric tambourine shaker man think im eric andr...,215,0.711921
6,7,song,16,/songs/426868,Mac Miller,It Just Doesn't Matter by Mac Miller,https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,426868,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,doesnt matter lyric theyve get best equipment ...,207,0.699324
7,8,song,7,/songs/403766,Mac Miller,Therapy by Mac Miller,https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,403766,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,therapy lyric pittsburgh man hows feel tell fe...,148,0.395722
8,9,song,27,/songs/426762,Mac Miller (Ft. Earl Sweatshirt),Polo Jeans by Mac Miller (Ft. Earl Sweatshirt),https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,426762,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,polo jean lyric yeah fuck go nut give fuck go ...,234,0.680233
9,10,song,15,/songs/426831,Mac Miller,Happy Birthday by Mac Miller,https://images.genius.com/6476e95bec5c6ed1b91e...,https://images.genius.com/6476e95bec5c6ed1b91e...,426831,False,...,True,True,Mac Miller,Mac-miller,https://genius.com/artists/Mac-miller,41018,Mac Miller,happy birthday lyric bum-ba-da-bum-bum yeah ye...,153,0.554348


In [96]:
# selecting only the important columns for analysis and removing songs with only instrumentals

for i in range(len(all_albums)):
    # no mac miller song has all unique words except for the ones with no lyrics in them, hence they would have
    # unique words % as 1.
    all_albums[i] = all_albums[i][all_albums[i]["unique words %"] != 1]
    all_albums[i] = all_albums[i][["song.full_title", "song.artist_names", "song.lyrics", "unique words %"]]
    