# Emotion detection in the song lyrics

- Khatiwada Angelina
- Data Science and Economics, UNIMI
- Oct 2021

### Part 2: Songs Dataset with Genius API


In [1]:
#Importing libraries

import pandas as pd
import lyricsgenius as lg
import json

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)

In [2]:
#Creating lists of artists for different genres

Electronic = ['Daft Punk','Depeche Mode', 'Calvin Harris',
              'David Guetta', 'Avicii', 'The Chainsmokers', 'Alan Walker']

Blues = ['Muddy Waters', 'Robert Johnson', 'B.B. King',
         'Eric Clapton' ]

Country = ['Johnny Cash', 'Dolly Parton', 'John Denver',
           'Garth Brooks']

Folk = ['Bob Dylan', 'Simon  Garfunkel', 'Woody Guthrie', 'Joan Baez',
       'Sufjan Stevens', 'Passenger']

HipHop_Rap = ['Eminem', 'JayZ', 'Nas', 'Kanye West',
              'Cardi B', '50 Cent', 'Drake', 'Nicki Minaj',
               'Snoop Dogg', 'Lil Nas X']

Jazz = ['Duke Ellington', 'Louis Armstrong', 'Billie Holiday','Ella Fitzgerald', 'Miles Davis']

Pop = ['Michael Jackson', 'Taylor Swift', 'Frank Sinatra',
       'Ariana Grande', 'Lady Gaga', 'Ed Sheeran', 'Dua Lipa',
       'Katy Perry', 'Justin Bieber', 'Billie Eilish', 'The Weeknd',
      'Bruno Mars', 'Shawn Mendes', 'Elton John', 'One Direction', 'Maroon 5',
      'Madonna', 'Britney Spears', 'Rihanna', 'Beyoncé', 'Justin Timberlake', 'Camila Cabello']

RnB_Soul = ['Stevie Wonder', 'Sam Cooke', 'Otis Redding']  
Reggae = ['Bob Marley', 'Peter Tosh', 'Jimmy Cliff', 'Steel Pulse']

Classic_Rock = ['The Beatles', 'John Lennon', 'The Rolling Stones',
               'Eagles', 'Buddy Holly', 'Billy Joel', 'Bon Jovi',
               'Elvis Presley', 'Bruce Springsteen',
               'U2']

Hard_Rock = ['ACDC', 'Aerosmith', 'Queen', 'Led Zeppelin',
            "Guns n Roses", 'Pink Floyd','Scorpions','The Who', 'KISS']
 
Punk_Rock = ['Green Day', 'The Offspring', 'Ramones ', 'Blink 182',
            'The Clash', 'NOFX', 'Bad Religion', 'Sex Pistols', 'Sublime', 'Deftones']

Alternative_Rock_Indie = ['The Killers', 'Nirvana', 'Muse', 'Artic Monkeys',
                         'Foster the People', 'Mumford  Sons', 'Lumineers',
                         'Florence  The Machine', 'Red Hot Chili Peppers', 
                         'The Smiths', 'The Cure', 'Oasis', 'Linkin Park',
                         'Coldplay', 'Imagine Dragons', 'Thirty Seconds to Mars',]

Metal = ['Bullet for My Valentine', 'Iron Maiden', 'Black Sabbath', 'Metallica',
         'Megadeth', 'Evanescence', 'Nightwish', 'Within Temptation', 'System of a Down', 'Slipknot']

In [3]:
# Creating dictionary: mapping artists to genres
singers = {'Pop': Pop,
           'Hip-hop/Rap': HipHop_Rap ,
           'Folk': Folk, 
           'Country': Country,
           'R&B/Soul': RnB_Soul,
           'Jazz': Jazz,
           'Blues': Blues,
           'Electronic/Dance': Electronic,
           'Reggae': Reggae,
           'Classic Rock': Classic_Rock,
           'Hard Rock': Hard_Rock,
           'Punk Rock': Punk_Rock,
           'Alternative Rock/Indie': Alternative_Rock_Indie,
           'Metal': Metal
                     }

In [4]:
# Dataframe of artists and genres
artists = []
genres = []
for genre, list_singers in singers.items():
    for singer in list_singers:
        artists.append(singer)
        genres.append(genre)

singer_genre = pd.DataFrame({'artists_initial': artists,
                             'genre': genres
    })

print(singer_genre.shape)
print(len(artists))
singer_genre.head()

(120, 2)
120


Unnamed: 0,artists_initial,genre
0,Michael Jackson,Pop
1,Taylor Swift,Pop
2,Frank Sinatra,Pop
3,Ariana Grande,Pop
4,Lady Gaga,Pop


### Genius API: song lyrics collection

In [13]:
# connecting to Genius API
token_genius = pd.read_csv('token_Genius.txt', header = None)
TOKEN = token_genius[0][0]

genius = lg.Genius(TOKEN,
                   skip_non_songs=True,
                   excluded_terms=["(Remix)","(Live)"],
                   remove_section_headers=True)

In [14]:
# function to download song lyrics and metadata in json files (k songs per each artist)
def download_lyrics(names_of_singers, k):
    for name in names_of_singers:
        artist = genius.search_artist(name, max_songs=k, sort='popularity')
        name_json_file = name + '.json'
        artist.save_lyrics(name_json_file)

In [15]:
# function to get the lyrics, title, year, album and youtube link from json file
def get_lyrics(names_of_singers):
    artist_names = []
    song_titles = []
    song_lyrics = []
    album_names = []
    release_dates = []
    youtube_links = []
    names_initial = []
    
    for name in names_of_singers:
        name_json_file = name + '.json'
        
        with open(name_json_file) as f:
            data = json.load(f)
        
        s = [song['lyrics'] for song in  data['songs']]
        n = [song['artist'] for song in data['songs']]
        t = [song['title'] for song in data['songs']]
        r = [song['release_date_for_display'] for song in data['songs']]
        a = [song['album']['name'] if song['album'] is not None else "No album found" for song in data['songs']]
        y = []
        an = []
        for song in data['songs']:
            an.append(name)
            links = song['media']
            youtube_found=False
            if links:
                for media in links:
                    if media['provider'] == 'youtube':
                        y.append(media['url'])
                        youtube_found=True
            if not youtube_found:
                y.append("YouTube link not found")

        artist_names.append(n)
        song_titles.append(t)
        song_lyrics.append(s)
        album_names.append(a)
        release_dates.append(r)
        youtube_links.append(y)
        names_initial.append(an)
            
    return artist_names, song_titles, song_lyrics, album_names, release_dates, youtube_links, names_initial

In [None]:
download_lyrics(artists, 5)

In [16]:
artist_names, song_titles, song_lyrics, album_names, release_dates, youtube_links, names_initial = get_lyrics(artists)

In [17]:
song_lyrics  = pd.DataFrame({'artist_name': sum(artist_names, []),
                            'song_title': sum(song_titles, []),
                             'album_name': sum(album_names, []),
                             'release_date': sum(release_dates, []),
                             'youtube_link': sum(youtube_links, []),
                             'song_lyrics': sum(song_lyrics, []),
                             'artists_initial': sum(names_initial, [])})

print(song_lyrics.shape)

(600, 7)


In [18]:
# final song lyrics dataset
song_lyrics_final = pd.merge(song_lyrics, singer_genre, how="inner", on=["artists_initial"])
song_lyrics_final.head(1)

Unnamed: 0,artist_name,song_title,album_name,release_date,youtube_link,song_lyrics,artists_initial,genre
0,Michael Jackson,Billie Jean,Thriller,"November 30, 1982",http://www.youtube.com/watch?v=Zi_XLOBDo_Y,"She was more like a beauty queen from a movie scene\nI said, ""Don't mind, but what do you mean, I am the one\nWho will dance on the floor in the round?""\nShe said I am the one\nWho will dance on the floor in the round\nShe told me her name was Billie Jean\nAs she caused a scene\nThen every head turned with eyes that dreamed of bein' the one\nWho will dance on the floor in the round\n\nPeople always told me, ""Be careful of what you do\nDon't go around breakin' young girls' hearts"" (Hee-hee)\nAnd mother always told me, ""Be careful of who you love\nAnd be careful of what you do (Oh-oh)\n'Cause the lie becomes the truth"" (Oh-oh)\nHey-ay\n\nBillie Jean is not my lover\nShe's just a girl who claims that I am the one (Oh, baby)\nBut the kid is not my son, hoo!\nShe says I am the one (Oh, baby)\nBut the kid is not my son (Hee-hee-hee, no-no-no, hee-hee-hee)\nHoo!\n\nFor forty days and for forty nights, the law was on her side\nBut who can stand when she's in demand?\nHer schemes and plans\n'Cause we danced on the floor in the round, hee!\nSo take my strong advice, just remember to always think twice\n(Don't think twice) Do think twice! (A-hoo!)\nShe told my baby we'd danced 'til three, then she looked at me\nThen showed a photo of a baby cryin', his eyes were like mine (Oh, no)\n'Cause we danced on the floor in the round, baby\n(Ooh, hee-hee-hee)\n\nPeople always told me, ""Be careful of what you do\nAnd don't go around breakin' young girls' hearts""\n(Don't break no hearts!) (Hee-hee)\nBut she came and stood right by me\nJust the smell of sweet perfume (Ha-oh)\nThis happened much too soon (Ha-oh, ha-ooh)\nShe called me to her room (Ha-oh, hoo!)\nHey-ay\n\nBillie Jean is not my lover (Hoo!)\nShe's just a girl who claims that I am the one\nBut the kid is not my son\nNo-no-no, no-no-no-no-no-no (Hoo!)\nBillie Jean is not my lover\nShe's just a girl who claims that I am the one (Oh baby)\nBut the kid is not my son (Oh, no, no)\nShe says I am the one (Oh baby)\nBut the kid is not my son (No, hee-hee!)\n\nAh, hee-hee-hee!\nHee! Hoo!\n\nShe says I am the one, but the kid is not my son\nNo-no-no, hoo! (Ouw!)\nBillie Jean is not my lover\nShe's just a girl who claims that I am the one\n(You know what you did to me, baby)\nBut the kid is not my son\nNo-no-no, no-no-no-no (No-no-no, no-no-no)\nShe says I am the one\nBut the kid is not my son (No-no-no-no)\n\nShe says I am the one\nYou know what you did\nShe says he is my son\nBreakin' my heart, babe\nShe says I am the one\nBillie Jean is not my lover\nBillie Jean is not my lover\nBillie Jean is not my lover (She is just a girl)\nBillie Jean is not my lover (She is just a girl)\n(Don't call me Billie Jean)\nBillie Jean is not my lover (She is just a girl)\n(Hoo! She's not at the scene)\nBillie Jean is not (Hee! Aaow! Ooh!)\nBillie Jean is",Michael Jackson,Pop


In [19]:
song_lyrics_final['genre'].value_counts()

Pop                       110
Alternative Rock/Indie     80
Hip-hop/Rap                50
Classic Rock               50
Punk Rock                  50
Metal                      50
Hard Rock                  45
Electronic/Dance           35
Folk                       30
Jazz                       25
Country                    20
Blues                      20
Reggae                     20
R&B/Soul                   15
Name: genre, dtype: int64

In [21]:
song_lyrics_final.to_csv('song_lyrics_unlabeled.csv')