In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import secrets
import re
import seaborn as sns
import matplotlib.pyplot as plt
import lyricsgenius
from datetime import datetime

In [2]:
def request_song_info(song_title, artist_name):
    base_url = 'https://api.genius.com'
    headers = {'Authorization': 'Bearer ' + secrets.GENIUS_ACCESS_TOKEN}
    search_url = base_url + '/search'
    data = {'q': song_title + ' ' + artist_name}
    response = requests.get(search_url, data=data, headers=headers)

    return response

In [3]:
def is_artists_matching(billboard, genius_hit):
    secondary_artist = []
    featuring_split = []
    primary_artists_billboard = []
    billboard = billboard.lower()
    
    if("featuring" in billboard):
        featuring_split=re.split("featuring ", billboard)
        secondary_artist = featuring_split[1:]
        primary_artists_billboard = re.split("& |, ", featuring_split[0])
    elif("ft." in billboard):
        featuring_split=re.split("ft. ", billboard)
        secondary_artist = featuring_split[1:]
        primary_artists_billboard = re.split("& |, ", featuring_split[0])
    elif('feat.' in billboard):
        featuring_split=re.split("feat. ", billboard)
        secondary_artist = featuring_split[1:]
        primary_artists_billboard = re.split("& |, ", featuring_split[0])
    else:
        primary_artists_billboard = re.split("& |, ", billboard)
    contains_all_artists = True
    
    for artist in primary_artists_billboard:
        if(artist.lower().strip() not in genius_hit['result']['primary_artist']['name'].lower()):
#             print("primary artist not found")
            return False
    
    if(secondary_artist):
        for second in secondary_artist:
            if second.lower().strip() not in str(genius_hit['result']['full_title'].lower()):
#                 print("secondary artist not found")
                return False
    
    return contains_all_artists

In [4]:
def get_best_lyrics(song, response_hits):
    lyric_result  = "https://www.genius.com"
    for hit in response_hits:
#         print("info: "+ song['artist'].lower())
        if(is_artists_matching(song['artist'].lower(), hit )):
            return lyric_result + hit['result']['path']
    return  "not found"

In [5]:
def remove_para(title):
    song = list(title)
    newtitle = ""
    in_para = False
    for char in song:
        if char == "(":
            in_para = True
            continue
        elif char == ")":
            in_para = False
        
        if not in_para and char != ")":
            newtitle = newtitle + char
    return newtitle

def get_primary_artist(artist):
    secondary_artist = ""
    featuring_split = []
    if("featuring" in artist):
        featuring_split=re.split("featuring ", artist)
        secondary_artist = featuring_split[1:]
        primary_artists_billboard = re.split("& |, ", featuring_split[0])
    elif("ft." in artist):
        featuring_split=re.split("ft. ", artist)
        secondary_artist = featuring_split[1:]
        primary_artists_billboard = re.split("& |, ", featuring_split[0])
    elif('feat.' in artist):
        featuring_split=re.split("feat. ", artist)
        secondary_artist = featuring_split[1:]
        primary_artists_billboard = re.split("& |, ", featuring_split[0])       
    elif(' x ' in artist):
        featuring_split=re.split(" x ", artist)
        secondary_artist = featuring_split[1:]
        primary_artists_billboard = re.split("& |, ", featuring_split[0])
    else:
         primary_artists_billboard = re.split("& |, ", artist)
    return primary_artists_billboard[0]

In [6]:
def get_genius_urls(top_songs):
    urls = []
    for song in top_songs:
        print("--> searching: " +remove_para(song["title"])+ " "+ song["artist"].lower())
        response = request_song_info(remove_para(song["title"]), song["artist"].lower())
        json = response.json()
        lyric_url = get_best_lyrics(song, json['response']['hits'])
        if(lyric_url == "not found"): #try with just primary artist
            print("--> searching again: " +remove_para(song["title"]), get_primary_artist(song["artist"].lower()))
            response = request_song_info(remove_para(song["title"]), get_primary_artist(song["artist"].lower()))
            json = response.json()
            lyric_url = get_best_lyrics(song, json['response']['hits'])
            if(lyric_url != "not found"): #still not found, do not add.
                urls.append(lyric_url)
        else:
            urls.append(lyric_url)
    return urls

In [7]:

url = "https://www.billboard.com/charts/year-end/2019/hot-100-songs"
billboard_page = requests.get(url)
soup = BeautifulSoup(billboard_page.text, "html.parser")
titles = [div.text for div in soup.find_all("div", "ye-chart-item__title")]
ranks = [div.text for div in soup.find_all("div", "ye-chart-item__rank")]
artists = [div.text for div in soup.find_all("div", "ye-chart-item__artist")]

top_songs = []
for i in range(0, len(ranks)):
    top_songs.append({"title": titles[i].replace("\n", ""), "artist": artists[i].replace("\n", "")})
    print(top_songs[i])



{'title': 'Old Town Road', 'artist': 'Lil Nas X Featuring Billy Ray Cyrus'}
{'title': 'Sunflower (Spider-Man: Into The Spider-Verse)', 'artist': 'Post Malone & Swae Lee'}
{'title': 'Without Me', 'artist': 'Halsey'}
{'title': 'Bad Guy', 'artist': 'Billie Eilish'}
{'title': 'Wow.', 'artist': 'Post Malone'}
{'title': 'Happier', 'artist': 'Marshmello & Bastille'}
{'title': '7 Rings', 'artist': 'Ariana Grande'}
{'title': 'Talk', 'artist': ' Khalid'}
{'title': 'Sicko Mode', 'artist': 'Travis Scott'}
{'title': 'Sucker', 'artist': 'Jonas Brothers'}
{'title': 'High Hopes', 'artist': 'Panic! At The Disco'}
{'title': 'Thank U, Next', 'artist': 'Ariana Grande'}
{'title': 'Truth Hurts', 'artist': 'Lizzo'}
{'title': 'Dancing With A Stranger', 'artist': 'Sam Smith & Normani'}
{'title': 'Senorita', 'artist': 'Shawn Mendes & Camila Cabello'}
{'title': "I Don't Care", 'artist': 'Ed Sheeran & Justin Bieber'}
{'title': 'Eastside', 'artist': 'benny blanco, Halsey & Khalid'}
{'title': 'Going Bad', 'artist':

In [8]:
for song in top_songs:
    print("--> searching: " +remove_para(song["title"])+ " "+ song["artist"].lower())
    response = request_song_info(remove_para(song["title"]), song["artist"].lower())
    json = response.json()
    lyric_url = get_best_lyrics(song, json['response']['hits'])
    if(lyric_url == "not found"): #try with just primary artist

        print("--> searching again: " +remove_para(song["title"]), get_primary_artist(song["artist"].lower()))
        response = request_song_info(remove_para(song["title"]), get_primary_artist(song["artist"].lower()))
        json = response.json()
        lyric_url = get_best_lyrics(song, json['response']['hits'])
    print(lyric_url)

--> searching: Old Town Road lil nas x featuring billy ray cyrus
https://www.genius.com/Lil-nas-x-billy-ray-cyrus-and-diplo-old-town-road-diplo-remix-lyrics
--> searching: Sunflower  post malone & swae lee
https://www.genius.com/Post-malone-and-swae-lee-sunflower-lyrics
--> searching: Without Me halsey
https://www.genius.com/Halsey-without-me-lyrics
--> searching: Bad Guy billie eilish
https://www.genius.com/Billie-eilish-bad-guy-lyrics
--> searching: Wow. post malone
https://www.genius.com/Post-malone-wow-lyrics
--> searching: Happier marshmello & bastille
https://www.genius.com/Marshmello-and-bastille-happier-lyrics
--> searching: 7 Rings ariana grande
https://www.genius.com/Ariana-grande-7-rings-lyrics
--> searching: Talk  khalid
https://www.genius.com/Khalid-talk-lyrics
--> searching: Sicko Mode travis scott
https://www.genius.com/Travis-scott-sicko-mode-lyrics
--> searching: Sucker jonas brothers
https://www.genius.com/Jonas-brothers-sucker-lyrics
--> searching: High Hopes panic! 

KeyboardInterrupt: 