In [2]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from string import punctuation
import re
from nltk.stem import SnowballStemmer
from autocorrect import Speller
from collections import Counter 
import spacy
from datetime import datetime
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
import unicodedata

In [3]:
df = pd.DataFrame(pd.read_excel("../data/Hot 100 Audio Features.xlsx"))
df_hotstuff = pd.DataFrame(pd.read_csv("../data/Hot Stuff.csv"))

# drop songs without genres
df.dropna(subset=['spotify_genre'], inplace=True)

# get songs with rap genre
df_rap = pd.DataFrame()
for index, row in df.iterrows():
    genres = df.spotify_genre.squeeze()[index]
    if 'rap' in genres:
        df_rap = df_rap.append(row)

# drop duplicate songs (songs with same songID)
df_rap = df_rap.drop_duplicates(subset=['SongID'], keep='first')

# merge df_rap and df_hotstuff to get weekID
df_merge = pd.merge(df_rap, df_hotstuff, how='left')

# drop songs with no weekID
df_merge.dropna(subset=['WeekID'], inplace=True)

# drop duplicate songs (songs with same songID)
df_merge = df_merge.drop_duplicates(subset=['SongID'],keep='first')

# get years
years = []
for index, row in df_merge.iterrows():
    weekID = df_merge.WeekID[index]
    year = datetime.strptime(weekID, "%m/%d/%Y").year
    years.append(year)
df_merge['Year'] = years

# set up final dataframe with year, performer, and song
df_final = df_merge[['Year', 'Performer', 'Song']]
df_final = df_final.reset_index(drop=True)

df_final

Unnamed: 0,Year,Performer,Song
0,2019,Post Malone Featuring DaBaby,Enemies
1,2019,"Yella Beezy, Gucci Mane & Quavo",Bacc At It Again
2,2019,DaBaby,VIBEZ
3,2019,NF,When I Grow Up
4,2019,Post Malone,Hollywood's Bleeding
...,...,...,...
3589,2016,Chris Brown,Zero
3590,2018,Kodak Black Featuring Travis Scott & Offset,ZEZE
3591,2017,Future,Zoom
3592,2006,Lil' Boosie Featuring Yung Joc,Zoom


In [30]:
# df_final[df_final['Song'].str.contains(r".*[*].*[*]")]

In [4]:
# strip accents from text
# ex. beyoncé --> beyonce
def strip_accents(text):
    try:
        text = unicode(text, 'utf-8')
    except NameError:
        pass
    text = unicodedata.normalize('NFD', text)\
           .encode('ascii', 'ignore')\
           .decode("utf-8")
    return str(text)

In [31]:
def get_url(song_title, artist_name):
    print("Searching for: ", song_title, "-", artist_name)
    
    headers = {'Authorization': 'Bearer ' + 'zZ6YtjOlYsm1o5Me_vIO6MczexIf6k5PGlgiMHi4aO6bnZmsyVdG7J7YQ0VXIOHE'}
    data = {'q': song_title + ' ' + artist_name}
    base_url = 'https://api.genius.com'
    search_url = base_url + '/search'
    
    current_page = 1 # page number of results
    next_page = True
    
    title = re.sub(r'[^a-zA-Z0-9 ]', '', song_title.lower())
    # print("Title:", title)
    
    artist = re.sub(r'[^a-zA-Z0-9 ]', '', artist_name.lower())
    # print("Artist:", artist)
    
    while next_page:
        params = {'page': current_page} # set page number
        response = requests.get(search_url, data=data, headers=headers, params=params)
        d = response.json()
        page_hits = d['response']['hits']
        
        if page_hits:
            for hit in page_hits:
                res = hit['result']
                full_title = res['full_title']
                full_title = strip_accents(full_title)
                # convert full_title to lowercase and remove non-alphanumeric characters
                full_title = re.sub(r'[^a-zA-Z0-9 ]', '', full_title.lower())
                # print("Full Title:", full_title)
                
                if (
                    'lyrics' in res['url'] and
                    title in full_title and 
                    (artist.split()[0] in full_title or
                     artist.split()[1] in full_title) and
                    # song is not a translation
                    'espanol' not in full_title and
                    'nederlandse' not in full_title and
                    'polskie' not in full_title and
                    'portugues' not in full_title and
                    'francaise' not in full_title and
                    'deutsche' not in full_title and
                    'oversttelse' not in full_title and
                    'traduzione' not in full_title and
                    'ceviri' not in full_title and
                    'translation' not in full_title and
                    # song is not a review by rap critic
                    'rap critic' not in full_title and
                    # song is not instrumental
                    'instrumental' not in full_title
                ):
                    url = res['url']
                    print("URL found: ", url)
                    return url
                    
            # increment current_page value for next loop
            current_page += 1
            print("Finished scraping page {}".format(current_page))
        else:
            # if page_hits is empty, stop
            next_page = False
        
    return 0

In [20]:
def get_lyrics(song_title, artist_name):
    url = get_url(song_title, artist_name)
    if url == 0:
        #print("Lyrics not found!")
        return np.NaN
    else:
        page = requests.get(url)
        html = BeautifulSoup(page.text, 'html.parser')
        lyrics = html.find('div', class_='lyrics').get_text()
        return lyrics

In [None]:
lyrics_list = []
numMissing = 0

for i, row in df_final.iterrows():
    if (i % 10 == 0): 
        print ('*******************************', i/len(df_final), '% done')
    artist = row['Performer']
    song = row['Song']
    lyrics = get_lyrics(song, artist)
    if lyrics == np.NaN:
        numMissing += 1
    #print(lyrics)
    lyrics_list.append(lyrics)

******************************* 0.0 % done
--- Searching for:  Enemies - Post Malone Featuring DaBaby
Finished scraping page 2
URL found:  https://genius.com/Post-malone-enemies-lyrics
--- Searching for:  Bacc At It Again - Yella Beezy, Gucci Mane & Quavo
URL found:  https://genius.com/Yella-beezy-quavo-and-gucci-mane-bacc-at-it-again-lyrics
--- Searching for:  VIBEZ - DaBaby
URL found:  https://genius.com/Dababy-vibez-lyrics
--- Searching for:  When I Grow Up - NF
URL found:  https://genius.com/Nf-when-i-grow-up-lyrics
--- Searching for:  Hollywood's Bleeding - Post Malone
URL found:  https://genius.com/Post-malone-hollywoods-bleeding-lyrics
--- Searching for:  Heat - Chris Brown Featuring Gunna
URL found:  https://genius.com/Chris-brown-heat-lyrics
--- Searching for:  Time - NF
URL found:  https://genius.com/Nf-time-lyrics
--- Searching for:  Cuban Links - Rod Wave & Kevin Gates
URL found:  https://genius.com/Rod-wave-cuban-links-lyrics
--- Searching for:  Peta - Roddy Ricch Featurin

URL found:  https://genius.com/Trippie-redd-who-needs-love-lyrics
--- Searching for:  Clout - Offset Featuring Cardi B
URL found:  https://genius.com/Offset-clout-lyrics
--- Searching for:  Candy - Doja Cat
URL found:  https://genius.com/Doja-cat-candy-lyrics
--- Searching for:  Please Me - Cardi B & Bruno Mars
URL found:  https://genius.com/Cardi-b-and-bruno-mars-please-me-lyrics
--- Searching for:  Easy - DaniLeigh Featuring Chris Brown
URL found:  https://genius.com/Danileigh-easy-remix-lyrics
--- Searching for:  Worth It - YK Osiris
URL found:  https://genius.com/Yk-osiris-worth-it-lyrics
******************************* 0.019476905954368393 % done
--- Searching for:  Brown Skin Girl - Beyonce, SAINt JHN & Wizkid Featuring Blue Ivy Carter
Finished scraping page 2
--- Searching for:  OFF THE RIP - DaBaby
URL found:  https://genius.com/Dababy-off-the-rip-lyrics
--- Searching for:  24/7 - Meek Mill Featuring Ella Mai
URL found:  https://genius.com/Meek-mill-24-7-lyrics
--- Searching fo

URL found:  https://genius.com/Summer-walker-potential-lyrics
--- Searching for:  Middle Child - J. Cole
URL found:  https://genius.com/J-cole-middle-child-lyrics
--- Searching for:  Mac 10 - Trippie Redd Featuring Lil Baby & Lil Duke
URL found:  https://genius.com/Trippie-redd-mac-10-lyrics
--- Searching for:  No Guidance - Chris Brown Featuring Drake
URL found:  https://genius.com/Devvon-terrell-no-guidance-chris-brown-ft-drake-devvon-terrell-cover-lyrics
--- Searching for:  Snake Skin - Trippie Redd
URL found:  https://genius.com/Trippie-redd-snake-skin-lyrics
--- Searching for:  Futsal Shuffle 2020 - Lil Uzi Vert
URL found:  https://genius.com/Lil-uzi-vert-futsal-shuffle-2020-lyrics
--- Searching for:  Bandit - Juice WRLD & YoungBoy Never Broke Again
URL found:  https://genius.com/Juice-wrld-and-youngboy-never-broke-again-bandit-lyrics
--- Searching for:  Goodbyes - Post Malone Featuring Young Thug
URL found:  https://genius.com/Genius-traduzioni-italiane-post-malone-goodbyes-ft-yo

URL found:  https://genius.com/Lil-nas-x-and-billy-ray-cyrus-old-town-road-lyrics
--- Searching for:  The Take - Tory Lanez Featuring Chris Brown
URL found:  https://genius.com/Tory-lanez-the-take-lyrics
--- Searching for:  On God - Kanye West
URL found:  https://genius.com/Kanye-west-on-god-lyrics
--- Searching for:  Reply - A Boogie Wit da Hoodie Featuring Lil Uzi Vert
URL found:  https://genius.com/A-boogie-wit-da-hoodie-reply-lyrics
--- Searching for:  How About Now - Drake
Finished scraping page 2
Finished scraping page 3
Finished scraping page 4
Finished scraping page 5
Finished scraping page 6
Finished scraping page 7
Finished scraping page 8
Finished scraping page 9
Finished scraping page 10
Finished scraping page 11
Finished scraping page 12
Finished scraping page 13
Finished scraping page 14
Finished scraping page 15
Finished scraping page 16
Finished scraping page 17
Finished scraping page 18
Finished scraping page 19
Finished scraping page 20
Finished scraping page 21
Finis

Finished scraping page 39
Finished scraping page 40
Finished scraping page 41
Finished scraping page 42
Finished scraping page 43
Finished scraping page 44
Finished scraping page 45
Finished scraping page 46
Finished scraping page 47
Finished scraping page 48
Finished scraping page 49
Finished scraping page 50
Finished scraping page 51
Finished scraping page 52
Finished scraping page 53
Finished scraping page 54
Finished scraping page 55
Finished scraping page 56
Finished scraping page 57
Finished scraping page 58
Finished scraping page 59
Finished scraping page 60
Finished scraping page 61
Finished scraping page 62
Finished scraping page 63
Finished scraping page 64
--- Searching for:  Rap Devil - Machine Gun Kelly
URL found:  https://genius.com/Machine-gun-kelly-rap-devil-lyrics
--- Searching for:  The First Night - Monica
URL found:  https://genius.com/Monica-the-first-night-lyrics
--- Searching for:  Bartier Cardi - Cardi B Featuring 21 Savage
URL found:  https://genius.com/Cardi-b

URL found:  https://genius.com/Lil-jon-and-the-east-side-boyz-get-low-lyrics
--- Searching for:  Holla Holla - Ja Rule
URL found:  https://genius.com/Ja-rule-holla-holla-lyrics
--- Searching for:  Enough Of No Love - Keyshia Cole Featuring Lil Wayne
URL found:  https://genius.com/Keyshia-cole-enough-of-no-love-feat-lil-wayne-lyrics
--- Searching for:  Hear Me Calling - Juice WRLD
URL found:  https://genius.com/Juice-wrld-hear-me-calling-lyrics
--- Searching for:  My Homies Still - Lil Wayne Featuring Big Sean
URL found:  https://genius.com/Lil-wayne-my-homies-still-lyrics
--- Searching for:  Mentirosa - Mellow Man Ace
URL found:  https://genius.com/Mellow-man-ace-mentirosa-lyrics
--- Searching for:  Mama Said Knock You Out - LL Cool J
URL found:  https://genius.com/Ll-cool-j-mama-said-knock-you-out-lyrics
--- Searching for:  Headsprung - LL Cool J
URL found:  https://genius.com/Ll-cool-j-headsprung-lyrics
******************************* 0.08347245409015025 % done
--- Searching for:  Lo

URL found:  https://genius.com/Nicki-minaj-barbie-dreams-lyrics
--- Searching for:  Beautiful - Akon Featuring Colby O'Donis & Kardinal Offishall
URL found:  https://genius.com/Akon-beautiful-lyrics
******************************* 0.1001669449081803 % done
--- Searching for:  DJ Got Us Fallin' In Love - Usher Featuring Pitbull
URL found:  https://genius.com/Usher-dj-got-us-fallin-in-love-lyrics
--- Searching for:  Anaconda - Nicki Minaj
URL found:  https://genius.com/Nicki-minaj-anaconda-lyrics
--- Searching for:  All Me - Drake Featuring 2 Chainz & Big Sean
URL found:  https://genius.com/Allame-all-me-tribute-to-drake-2-chainz-and-big-sean-lyrics
--- Searching for:  1985 (Intro To The Fall Off) - J. Cole
URL found:  https://genius.com/Honr258r-j-cole-interpretation-by-katherine-sebina-1985-intro-to-the-fall-off-lyrics
--- Searching for:  Same Bitches - Post Malone Featuring G-Eazy & YG
URL found:  https://genius.com/Post-malone-same-bitches-lyrics
--- Searching for:  Talk Up - Drake F

Finished scraping page 84
Finished scraping page 85
Finished scraping page 86
Finished scraping page 87
Finished scraping page 88
Finished scraping page 89
Finished scraping page 90
Finished scraping page 91
Finished scraping page 92
Finished scraping page 93
Finished scraping page 94
Finished scraping page 95
Finished scraping page 96
Finished scraping page 97
Finished scraping page 98
Finished scraping page 99
Finished scraping page 100
Finished scraping page 101
--- Searching for:  Burn It Down - Linkin Park
URL found:  https://genius.com/Linkin-park-burn-it-down-lyrics
--- Searching for:  Brackets - J. Cole
URL found:  https://genius.com/J-cole-brackets-lyrics
--- Searching for:  Blue Tint - Drake
URL found:  https://genius.com/Drake-blue-tint-lyrics
--- Searching for:  BEBE - 6ix9ine Featuring Anuel AA
URL found:  https://genius.com/6ix9ine-bebe-lyrics
******************************* 0.11407902058987202 % done
--- Searching for:  Baby By Me - 50 Cent Featuring Ne-Yo
URL found:  ht

Finished scraping page 36
Finished scraping page 37
Finished scraping page 38
Finished scraping page 39
Finished scraping page 40
Finished scraping page 41
Finished scraping page 42
Finished scraping page 43
Finished scraping page 44
Finished scraping page 45
Finished scraping page 46
Finished scraping page 47
Finished scraping page 48
Finished scraping page 49
Finished scraping page 50
Finished scraping page 51
Finished scraping page 52
Finished scraping page 53
Finished scraping page 54
Finished scraping page 55
Finished scraping page 56
Finished scraping page 57
Finished scraping page 58
Finished scraping page 59
Finished scraping page 60
Finished scraping page 61
Finished scraping page 62
Finished scraping page 63
Finished scraping page 64
Finished scraping page 65
Finished scraping page 66
Finished scraping page 67
Finished scraping page 68
Finished scraping page 69
Finished scraping page 70
Finished scraping page 71
Finished scraping page 72
Finished scraping page 73
Finished scr

"\n\n[Intro: Jungkook]\n'Cause I, I, I'm in the stars tonight\nSo watch me bring the fire and set\u2005the\u2005night alight\n\n[Verse 1: Jungkook]\nShoes on,\u2005get up in the morn'\nCup of\u2005milk, let's rock and roll\nKing Kong, kick the drum\nRolling on\u205flike\u205fa\u205fRolling Stone\nSing song\u205fwhen I'm walkin'\u205fhome\nJump up to the top, LeBron\nDing-dong, call me on my phone\nIce tea and a game of ping pong\n\n[Pre-Chorus: RM, j-hope]\nThis is gettin' heavy, can you hear the bass boom? I'm ready (Woo-hoo)\nLife is sweet as honey, yeah, this beat cha-ching like money, huh\nDisco overload, I'm into that, I'm good to go\nI'm diamond, you know I glow up\nHey, so let's go\n\n[Chorus: Jungkook, Jimin]\n'Cause I, I, I'm in the stars tonight\nSo watch me bring the fire and set the night alight (Hey)\nShinin' through the city with a little funk and soul\nSo I'ma light it up like dynamite, woah-oh-oh\n\n[Verse 2: V, RM]\nBring a friend, join the crowd, whoever wanna come al

In [None]:
lyrics

In [None]:
# drop rows with no lyrics
df_final.dropna(subset=['Lyrics'], inplace=True)

# export to csv
df_final.to_csv("Hot100Data3483.csv")