In [22]:
import random

In [26]:
import os
import re
import requests
import pandas as pd
import urllib.request
from bs4 import BeautifulSoup
#from absl import logging

import numpy as np


class GeniusArtistDataCollect:
    """A wrapper class that is able to retrieve, clean, and organize all the album songs of a given artist
    Uses the Genius API and webscraping techniques to get the data."""

    def __init__(self, client_access_token, artist_name, SONGS_PER_ARTIST=25):
        """
        Instantiate a GeniusArtistDataCollect object
        :param client_access_token: str - Token to access the Genius API. Create one at https://genius.com/developers
        :param artist_name: str - The name of the artist of interest
        THIS HAS BEEN REMOVED :param albums: list - A list of all the artist's albums to be collected
        """

        self.client_access_token = client_access_token

        self.artist_name = artist_name

        #self.albums = albums
        
        self.SONGS_PER_ARTIST = SONGS_PER_ARTIST

        self.base_url = 'https://api.genius.com/'

        self.headers = {'Authorization': 'Bearer ' + self.client_access_token}

        self.artist_songs = None

    def search(self, query):
        """Makes a search request in the Genius API based on the query parameter. Returns a JSON response."""

        request_url = self.base_url + 'search'
        data = {'q': query}
        response = requests.get(request_url, data=data, headers=self.headers).json()

        return response

    def get_artist_songs(self):
        """Gets the songs of self.artist_name and places in a pandas.DataFrame"""

        # Search for the artist and get their id
        search_artist = self.search(self.artist_name)
        artist_id = str(search_artist['response']['hits'][0]['result']['primary_artist']['id'])

        print("ID: " + artist_id)

        # Initialize DataFrame
        df = pd.DataFrame(columns=['Title', 'URL'])

        # Iterate through all the pages of the artist's songs
        more_pages = True
        page = 1
        i = 0
        while more_pages:

            print("page: " + str(page))

            # Make a request to get the songs of an artist on a given page
            request_url = self.base_url + 'artists/' + artist_id + '/songs' + '?per_page=50&page=' + str(page)
            response = requests.get(request_url, headers=self.headers).json()

            #print(response)

            # For each song which the given artist is the primary_artist of the song, add the song title and
            # Genius URL to the DataFrame
            for song in response['response']['songs']:

                if str(song['primary_artist']['id']) == artist_id:

                    title = song['title']
                    url = song['url']

                    df.loc[i] = [title, url]
                    i += 1

            page += 1

            if response['response']['next_page'] is None:
                more_pages = False
                
        ## !!! need add some separator
        ixs = df.index.to_list()
        if len(ixs) > self.SONGS_PER_ARTIST:
            new_ixs = random.sample(ixs, self.SONGS_PER_ARTIST)
            df = df.iloc[new_ixs]

        # Get the HTML, Album Name, and Song Lyrics from helper methods in the class
        df['Artist'] = self.artist_name
        df['html'] = df['URL'].apply(self.get_song_html)
        df['Album'] = df['html'].apply(self.get_album_from_html)
        #df['InAnAlbum'] = df['Album'].apply(lambda a: self.is_track_in_an_album(a, self.albums))
        #df = df[df['InAnAlbum'] == True]
        df['Lyrics'] = df.apply(lambda row: self.get_lyrics(row.html), axis=1)

        del df['html']

        self.artist_songs = df

        return self.artist_songs

    def get_song_html(self, url):
        """Scrapes the entire HTML of the url parameter"""

        request = urllib.request.Request(url)
        request.add_header("Authorization", "Bearer " + self.client_access_token)
        request.add_header("User-Agent",
                           "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)")
        page = urllib.request.urlopen(request)
        html = BeautifulSoup(page, "html")

        print("Scraped: " + url)
        return html

    def get_lyrics(self, html):
        """Scrapes the html parameter to get the song lyrics on a Genius page in one, large String object"""

        lyrics = html.find("div", class_="lyrics")

        all_words = ''
        
        if lyrics:
            # Clean lyrics
            for line in lyrics.get_text():
                all_words += line

            # Remove identifiers like chorus, verse, etc
            all_words = re.sub(r'[\(\[].*?[\)\]]', '', all_words)

            # remove empty lines, extra spaces, and special characters
            all_words = os.linesep.join([s for s in all_words.splitlines() if s])
            all_words = all_words.replace('\r', '')
            all_words = all_words.replace('\n', ' ')
            all_words = all_words.replace('  ', ' ')

        return all_words

    def get_album_from_html(self, html):
        """Scrapes the html parameter to get the album name of the song on a Genius page"""

        parse = html.findAll("span")

        album = ''

        for i in range(len(parse)):
            if parse[i].text == 'Album':
                i += 1
                album = parse[i].text.strip()
                break

        return album

In [24]:
token = 'BY2dBiO_pbxEB4o0ICILn8BXA-sEj9jnPFnV7Ni5p6lljigymic3skze_qlMmD5z'


In [6]:
df_artists = pd.read_excel('eng_artists.xlsx', header=0)
m_art_list = df_artists['men'].to_list()
f_art_list = df_artists['women'].to_list()

In [7]:
m_art_list

['system of a down',
 'primus',
 'the national',
 'arctic monkeys',
 'sum 41',
 'nickelback',
 'linkin park',
 'idles',
 'louis armstrong',
 'lee hazlewood',
 'isaac hayes',
 "Rag'n'Bone Man",
 'ed sheeran',
 'justin timberlake',
 'justin bieber',
 'hozier',
 'imagine dragons',
 'bruno mars',
 'the weeknd',
 'twenty one pilots',
 'john maus',
 'king gizzard & the lizard wizard',
 'death grips',
 'anderson .paak',
 'zebra katz',
 'tyler, the creator',
 'kendrick lamar']

In [27]:
g = GeniusArtistDataCollect(token, 'hozier')
face = g.get_artist_songs()
face

ID: 73910
page: 1


KeyboardInterrupt: 

In [13]:
all_artists = []
all_artists.extend(m_art_list)
all_artists.extend(f_art_list)

In [11]:
face.loc[face['Lyrics'] != '']

Unnamed: 0,Title,URL,Artist,Album,Lyrics
1,Angel of Small Death & the Codeine Scene,https://genius.com/Hozier-angel-of-small-death...,hozier,Hozier,"I watch the work of my kin, bold and boyful To..."
2,Arsonist’s Lullabye,https://genius.com/Hozier-arsonists-lullabye-l...,hozier,Hozier,"When I was a child, I heard voices Some would ..."
3,As It Was,https://genius.com/Hozier-as-it-was-lyrics,hozier,"Wasteland, Baby!","There is a roadway, muddy and foxgloved Whenev..."
6,Blood,https://genius.com/Hozier-blood-lyrics,hozier,,Tryin' hard to recognise Some pure motive insi...
7,Bridge Over Troubled Water,https://genius.com/Hozier-bridge-over-troubled...,hozier,,"When you're weary, feeling small When tears ar..."
9,Cherry Wine (Live),https://genius.com/Hozier-cherry-wine-live-lyrics,hozier,Hozier,"Her eyes and words are so icy Oh, but she burn..."
12,Dinner & Diatribes,https://genius.com/Hozier-dinner-and-diatribes...,hozier,"Wasteland, Baby!","Honey, this club here is stuck up Dinner and d..."
14,Foreigner’s God,https://genius.com/Hozier-foreigners-god-lyrics,hozier,Hozier,She moved with shameless wonder The perfect cr...
15,From Eden,https://genius.com/Hozier-from-eden-lyrics,hozier,Hozier,Babe There's something tragic about you Someth...
17,Get Away*,https://genius.com/Hozier-get-away-lyrics,hozier,,Lyrics will follow upon release...


In [14]:
len(all_artists)

54

In [28]:
all_songs_dict = {}

for artist in all_artists:
    g = GeniusArtistDataCollect(token, artist)
    face = g.get_artist_songs()
    
    all_songs_dict[artist] = face.loc[face['Lyrics'] != ''].to_dict(orient='records')

ID: 16259
page: 1
page: 2
page: 3
Scraped: https://genius.com/System-of-a-down-ddevil-lyrics
Scraped: https://genius.com/System-of-a-down-side-of-the-freeway-lyrics
Scraped: https://genius.com/System-of-a-down-deer-dance-lyrics
Scraped: https://genius.com/System-of-a-down-darts-lyrics
Scraped: https://genius.com/System-of-a-down-daron-malakian-thanks-annotated
Scraped: https://genius.com/System-of-a-down-sugar-lyrics
Scraped: https://genius.com/System-of-a-down-jet-pilot-lyrics
Scraped: https://genius.com/System-of-a-down-ginger-lyrics
Scraped: https://genius.com/System-of-a-down-cigaro-lyrics
Scraped: https://genius.com/System-of-a-down-waiting-for-you-version-2-lyrics
Scraped: https://genius.com/System-of-a-down-nuguns-lyrics
Scraped: https://genius.com/System-of-a-down-nations-lyrics
Scraped: https://genius.com/System-of-a-down-needles-lyrics
Scraped: https://genius.com/System-of-a-down-fuck-the-system-lyrics
Scraped: https://genius.com/System-of-a-down-soldier-side-intro-lyrics
Scr

ID: 8358
page: 1
page: 2
page: 3
Scraped: https://genius.com/Nickelback-edge-of-a-revolution-lyrics
Scraped: https://genius.com/Nickelback-nickelback-leader-of-men-lyrics
Scraped: https://genius.com/Nickelback-follow-you-home-lyrics
Scraped: https://genius.com/Nickelback-satellite-lyrics
Scraped: https://genius.com/Nickelback-cowboy-hat-lyrics
Scraped: https://genius.com/Nickelback-left-lyrics
Scraped: https://genius.com/Nickelback-rockstar-lyrics
Scraped: https://genius.com/Nickelback-dont-ever-let-it-end-lyrics
Scraped: https://genius.com/Nickelback-this-means-war-lyrics
Scraped: https://genius.com/Nickelback-kiss-it-goodbye-lyrics
Scraped: https://genius.com/Nickelback-how-you-remind-me-lyrics
Scraped: https://genius.com/Nickelback-shouldve-listened-lyrics
Scraped: https://genius.com/Nickelback-because-of-you-lyrics
Scraped: https://genius.com/Nickelback-never-again-album-version-lyrics
Scraped: https://genius.com/Nickelback-if-today-was-your-last-day-lyrics
Scraped: https://genius.

Scraped: https://genius.com/Lee-hazlewood-poet-fool-or-bum-lyrics
Scraped: https://genius.com/Lee-hazlewood-for-a-day-like-today-lyrics
Scraped: https://genius.com/Lee-hazlewood-im-blue-lyrics
ID: 369
page: 1
page: 2
page: 3
page: 4
page: 5
page: 6
page: 7
page: 8
page: 9
page: 10
page: 11
page: 12
page: 13
page: 14
Scraped: https://genius.com/Isaac-hayes-if-loving-you-is-wrong-i-dont-want-to-be-right-lyrics
Scraped: https://genius.com/Isaac-hayes-title-theme-lyrics
Scraped: https://genius.com/Isaac-hayes-wonderful-lyrics
Scraped: https://genius.com/Isaac-hayes-pursuit-of-the-pimpmobile-lyrics
Scraped: https://genius.com/Isaac-hayes-the-405-lyrics
Scraped: https://genius.com/Isaac-hayes-the-feeling-that-keeps-on-coming-lyrics
Scraped: https://genius.com/Isaac-hayes-theme-from-the-men-instrumental-lyrics
Scraped: https://genius.com/Isaac-hayes-precious-precious-lyrics
Scraped: https://genius.com/Isaac-hayes-baby-im-a-want-you-lyrics
Scraped: https://genius.com/Isaac-hayes-hot-lava-annot

Scraped: https://genius.com/Justin-bieber-patient-lyrics
Scraped: https://genius.com/Justin-bieber-what-do-you-mean-the-roots-remix-lyrics
Scraped: https://genius.com/Justin-bieber-forever-lyrics
Scraped: https://genius.com/Justin-bieber-hailey-lyrics
Scraped: https://genius.com/Justin-bieber-love-yourself-single-version-lyrics
Scraped: https://genius.com/Justin-bieber-im-sorry-for-my-racist-joke-lyrics
Scraped: https://genius.com/Justin-bieber-confirmation-lyrics
Scraped: https://genius.com/Justin-bieber-stress-lyrics
Scraped: https://genius.com/Justin-bieber-love-you-different-lyrics
Scraped: https://genius.com/Justin-bieber-die-in-your-arms-lyrics
Scraped: https://genius.com/Justin-bieber-where-are-you-now-lyrics
Scraped: https://genius.com/Justin-bieber-mlk-interlude-lyrics
Scraped: https://genius.com/Justin-bieber-miracle-lyrics
Scraped: https://genius.com/Justin-bieber-idea-1-lyrics
Scraped: https://genius.com/Justin-bieber-celebrities-counting-down-to-what-do-you-mean-annotated


Scraped: https://genius.com/Twenty-one-pilots-stressed-out-ajr-remix-lyrics
Scraped: https://genius.com/Twenty-one-pilots-fake-you-out-lyrics
Scraped: https://genius.com/Twenty-one-pilots-time-to-say-goodbye-lyrics
Scraped: https://genius.com/Twenty-one-pilots-we-dont-believe-whats-on-tv-lyrics
Scraped: https://genius.com/Twenty-one-pilots-classicmp3-lyrics
Scraped: https://genius.com/Twenty-one-pilots-my-blood-bbc-radio-1-live-lounge-lyrics
Scraped: https://genius.com/Twenty-one-pilots-polarize-lyrics
Scraped: https://genius.com/Twenty-one-pilots-holding-on-to-you-live-at-the-lc-pavilion-lyrics
Scraped: https://genius.com/Twenty-one-pilots-oh-ms-believer-lyrics
Scraped: https://genius.com/Twenty-one-pilots-jar-of-hearts-lyrics
Scraped: https://genius.com/Twenty-one-pilots-ode-to-sleep-live-from-sxsw-annotated
Scraped: https://genius.com/Twenty-one-pilots-twenty-one-pilots-morph-turkce-ceviri-lyrics
Scraped: https://genius.com/Twenty-one-pilots-ode-to-sleep-only-skeleton-bones-remain-v

Scraped: https://genius.com/Anderson-paak-put-me-thru-lyrics
Scraped: https://genius.com/Anderson-paak-shine-on-lyrics
Scraped: https://genius.com/Anderson-paak-bubblin-remix-lyrics
Scraped: https://genius.com/Anderson-paak-saviers-road-lyrics
Scraped: https://genius.com/Anderson-paak-left-to-right-lyrics
ID: 16863
page: 1
Scraped: https://genius.com/Zebra-katz-pulla-stunt-lyrics
Scraped: https://genius.com/Zebra-katz-necklace-lyrics
Scraped: https://genius.com/Zebra-katz-lousy-lyrics
Scraped: https://genius.com/Zebra-katz-3rd-dgre-lyrics
Scraped: https://genius.com/Zebra-katz-winter-titty-lyrics
Scraped: https://genius.com/Zebra-katz-sleepn-lyrics
Scraped: https://genius.com/Zebra-katz-hello-hi-lyrics
Scraped: https://genius.com/Zebra-katz-w8wtf-lyrics
Scraped: https://genius.com/Zebra-katz-lick-it-n-split-lyrics
Scraped: https://genius.com/Zebra-katz-you-tell-em-lyrics
Scraped: https://genius.com/Zebra-katz-ima-read-le1f-remix-lyrics
Scraped: https://genius.com/Zebra-katz-intro-to-le

Scraped: https://genius.com/Tash-sultana-sweet-and-dandy-lyrics
Scraped: https://genius.com/Tash-sultana-greed-official-remix-lyrics
Scraped: https://genius.com/Tash-sultana-blackbird-lyrics
Scraped: https://genius.com/Tash-sultana-coma-lyrics
Scraped: https://genius.com/Tash-sultana-vanilla-honey-lyrics
Scraped: https://genius.com/Tash-sultana-pretty-lady-lyrics
Scraped: https://genius.com/Tash-sultana-greed-lyrics
Scraped: https://genius.com/Tash-sultana-brain-flower-livesessions-lyrics
Scraped: https://genius.com/Tash-sultana-higher-lyrics
Scraped: https://genius.com/Tash-sultana-pink-moon-lyrics
Scraped: https://genius.com/Tash-sultana-harvest-love-lyrics
Scraped: https://genius.com/Tash-sultana-big-smoke-pt-1-live-lyrics
Scraped: https://genius.com/Tash-sultana-willow-tree-lyrics
Scraped: https://genius.com/Tash-sultana-cant-buy-happiness-lyrics
Scraped: https://genius.com/Tash-sultana-jungle-lyrics
Scraped: https://genius.com/Tash-sultana-beyond-the-pine-lyrics
Scraped: https://g

Scraped: https://genius.com/Cocteau-twins-calfskin-smack-lyrics
Scraped: https://genius.com/Cocteau-twins-pitch-the-baby-lyrics
Scraped: https://genius.com/Cocteau-twins-frou-frou-foxes-in-midsummer-fires-lyrics
Scraped: https://genius.com/Cocteau-twins-itis-all-but-an-ark-lark-lyrics
Scraped: https://genius.com/Cocteau-twins-sea-swallow-me-lyrics
Scraped: https://genius.com/Cocteau-twins-bloody-and-blunt-lyrics
Scraped: https://genius.com/Cocteau-twins-pepper-tree-lyrics
Scraped: https://genius.com/Cocteau-twins-fifty-fifty-clown-lyrics
Scraped: https://genius.com/Cocteau-twins-summer-blink-lyrics
Scraped: https://genius.com/Cocteau-twins-need-fire-lyrics
Scraped: https://genius.com/Cocteau-twins-rococo-lyrics
Scraped: https://genius.com/Cocteau-twins-sugar-hiccup-lyrics
Scraped: https://genius.com/Cocteau-twins-the-tinderbox-of-a-heart-lyrics
Scraped: https://genius.com/Cocteau-twins-how-to-bring-a-blush-to-the-snow-lyrics
Scraped: https://genius.com/Cocteau-twins-great-spangled-frit

Scraped: https://genius.com/Adele-many-shades-of-black-lyrics
Scraped: https://genius.com/Adele-never-tear-us-apart-lyrics
ID: 1177
page: 1
page: 2
page: 3
page: 4
page: 5
page: 6
page: 7
page: 8
page: 9
page: 10
page: 11
page: 12
page: 13
page: 14
page: 15
Scraped: https://genius.com/Taylor-swift-your-face-lyrics
Scraped: https://genius.com/Taylor-swift-teardrops-on-my-guitar-lyrics
Scraped: https://genius.com/Taylor-swift-cardigan-voice-memo-lyrics
Scraped: https://genius.com/Taylor-swift-closure-lyrics
Scraped: https://genius.com/Taylor-swift-this-is-why-we-cant-have-nice-things-lyrics
Scraped: https://genius.com/Taylor-swift-last-kiss-live-2011-lyrics
Scraped: https://genius.com/Taylor-swift-christmas-tree-farm-recorded-live-at-the-2019-iheartradio-jingle-ball-lyrics
Scraped: https://genius.com/Taylor-swift-dark-blue-tennessee-lyrics
Scraped: https://genius.com/Taylor-swift-bad-blood-lyrics
Scraped: https://genius.com/Taylor-swift-amas-artist-of-the-decade-performance-lyrics
Scrape

Scraped: https://genius.com/Ariana-grande-the-honeymoon-tour-special-guests-annotated
Scraped: https://genius.com/Ariana-grande-i-dont-want-to-be-alone-for-christmas-lyrics
Scraped: https://genius.com/Ariana-grande-bad-decisions-dwt-version-lyrics
Scraped: https://genius.com/Ariana-grande-sit-it-upon-it-lyrics
Scraped: https://genius.com/Ariana-grande-the-wizard-and-i-live-lyrics
Scraped: https://genius.com/Ariana-grande-into-you-dangerous-woman-medley-lyrics
Scraped: https://genius.com/Ariana-grande-put-your-hearts-up-lyrics
Scraped: https://genius.com/Ariana-grande-blazed-lyrics
Scraped: https://genius.com/Ariana-grande-sweetener-world-tour-setlist-lyrics
Scraped: https://genius.com/Ariana-grande-pink-champagne-lyrics
Scraped: https://genius.com/Ariana-grande-rem-lyrics
Scraped: https://genius.com/Ariana-grande-pov-lyrics
Scraped: https://genius.com/Ariana-grande-voodoo-love-lyrics
Scraped: https://genius.com/Ariana-grande-be-alright-justin-bieber-cover-lyrics
Scraped: https://genius

Scraped: https://genius.com/Ashnikko-build-a-babe-lyrics
Scraped: https://genius.com/Ashnikko-daisy-30-lyrics
Scraped: https://genius.com/Ashnikko-vanilla-lyrics
Scraped: https://genius.com/Ashnikko-clitoris-the-musical-lyrics
Scraped: https://genius.com/Ashnikko-invitation-lyrics
Scraped: https://genius.com/Ashnikko-wow-cover-lyrics
Scraped: https://genius.com/Ashnikko-nice-girl-lyrics
Scraped: https://genius.com/Ashnikko-little-boy-lyrics
ID: 578324
page: 1
page: 2
page: 3
page: 4
page: 5
page: 6
Scraped: https://genius.com/Nosfe-praf-repack-lyrics
Scraped: https://genius.com/Nosfe-airsoft-20-lyrics
Scraped: https://genius.com/Nosfe-condimente-lyrics
Scraped: https://genius.com/Nosfe-n-a-plecat-lyrics
Scraped: https://genius.com/Nosfe-miau-lyrics
Scraped: https://genius.com/Nosfe-bine-boss-lyrics
Scraped: https://genius.com/Nosfe-paris-lyrics
Scraped: https://genius.com/Nosfe-sunt-bine-boss-lyrics
Scraped: https://genius.com/Nosfe-prototip-lyrics
Scraped: https://genius.com/Nosfe-ban

In [29]:
len(all_songs_dict)

54

In [31]:
import json
with open('all_songs_dictlist.json', 'w', encoding='utf-8') as f:
    json.dump(all_songs_dict, f, ensure_ascii=False, indent=4)

https://drive.google.com/drive/u/0/folders/0BxlA8wH3PTUfV1F1UTBwVTJPd3c
https://towardsdatascience.com/using-python-to-analyze-the-brutal-lyrics-of-the-black-dahlia-murder-with-genius-api-spacy-bfc7e0e8577f
https://github.com/bendgame/lyrics-analysis/blob/master/TBDManalysis.ipynb

Lemmas


In [134]:
df['processed'] = df['Lyrics'].apply(lemmatize)

In [None]:
df.groupby('Artist')['Lyrics'].nunique()