In [75]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup as bs
import os
import re
from api_keys import *

In [74]:
# get artist object from genius api
def request_artist_info(artist_name, page):
    base_url = 'https://api.genius.com'
    headers = {'Authorization': 'Bearer ' + GENIUS_API_TOKEN}
    search_url = base_url + '/search?per_page=10&page=' + str(page)
    data = {'q': artist_name}
    response = requests.get(search_url, data=data, headers=headers)
    return response

def request_song_url(artist_name, song_cap):
    page = 1
    songs = []
    while True:
        response = request_artist_info(artist_name, page)
        json = response.json()
        song_info = []
        for hit in json['response']['hits']:
            if artist_name.lower() in hit['result']['primary_artist']['name'].lower():
                song_info.append(hit)
        for song in song_info:
            if (len(songs) < song_cap):
                url = song['result']['url']
                songs.append(url)
            
        if (len(songs) == song_cap):
            break
        else:
            page += 1
        
    print('found {} songs by {}'.format(len(songs), artist_name))
    return songs

request_song_url('luke bryan', 20)

found 20 songs by luke bryan


['https://genius.com/Luke-bryan-most-people-are-good-lyrics',
 'https://genius.com/Luke-bryan-rain-is-a-good-thing-lyrics',
 'https://genius.com/Luke-bryan-thats-my-kind-of-night-lyrics',
 'https://genius.com/Luke-bryan-what-she-wants-tonight-lyrics',
 'https://genius.com/Luke-bryan-play-it-again-lyrics',
 'https://genius.com/Luke-bryan-light-it-up-lyrics',
 'https://genius.com/Luke-bryan-country-girl-shake-it-for-me-lyrics',
 'https://genius.com/Luke-bryan-strip-it-down-lyrics',
 'https://genius.com/Luke-bryan-drunk-on-you-lyrics',
 'https://genius.com/Luke-bryan-huntin-fishin-and-lovin-every-day-lyrics',
 'https://genius.com/Luke-bryan-home-alone-tonight-lyrics',
 'https://genius.com/Luke-bryan-fast-lyrics',
 'https://genius.com/Luke-bryan-drink-a-beer-lyrics',
 'https://genius.com/Luke-bryan-build-me-a-daddy-lyrics',
 'https://genius.com/Luke-bryan-knockin-boots-lyrics',
 'https://genius.com/Luke-bryan-kick-the-dust-up-lyrics',
 'https://genius.com/Luke-bryan-what-makes-you-country-

In [51]:
def get_attrs(html):
  script = html.find('script', string=re.compile(r'var targeting_list ='))
  if script:
    script_string = script.string
    match = re.search(r'var targeting_list = (\[.*?\]);', script_string)
    if match:
      json_string = match.group(1)
      targeting_list = json.loads(json_string) # dict
    else:
      print('no targeting_list')
  else:
    print('script not found')
  targeting_list = {item['name']: item['values'] for item in targeting_list} # simplify dict
  return targeting_list

In [67]:
def scrape(url):
    page = requests.get(url)
    html = bs(page.text, 'html.parser')

    targeting_list = get_attrs(html)
    title = targeting_list['song_title'][0]
    tag = targeting_list['primary_tag'][0]
    artist = targeting_list['artist_name'][0]
    year = targeting_list['release_year'][0]
    views = targeting_list['pageviews'][0]

    lyrics = html.find(attrs={"data-lyrics-container": "true"}).get_text(' ')
    lyrics = os.linesep.join([s for s in lyrics.splitlines() if s])
    lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)
    return title, tag, artist, year, views, lyrics

scrape('https://genius.com/Luke-bryan-sunrise-sunburn-sunset-lyrics')

('Sunrise Sunburn Sunset',
 'country',
 'Luke Bryan',
 '2017',
 '21347',
 " Your daddy had a lakehouse He had me paint it that summer sophomore year Said I could stay for free A couple buddies, a couple weeks You rode in in a little red Honda And headed straight for the pier Tank top and cutoff jeans A little PacSun underneath Who knew it was 'bout to be  Sunrise, sunburn, sunset, repeat Moonlight, all night, crashing into me Nothing will ever be easy as you and me Tangled up with nowhere to be Just sunrise, sunburn, sunset, repeat Sunrise, sunburn, sunset, repeat, ayy  I turned those shutters the color of your eyes 'Til the day turned into night And took turns with a jar Turn that boat dock into a bar And then we passed the guitar around the fire to each other And watched all of our friends pair off with one another But we felt like the stars we were under And everybody knew that that was our summer")

In [72]:
super_list = []
for url in request_song_url('john mayer', 20):
    try:
        super_list.append(scrape(url))
    except:
        print('error')
df = pd.DataFrame(super_list, columns=['title', 'tag', 'artist', 'year', 'views', 'lyrics'])
df

Found 20 songs by john mayer


In [73]:
df

Unnamed: 0,title,tag,artist,year,views,lyrics
0,New Light,pop,John Mayer,2018,675535,"Ah , a h , ah Ah... I'm the boy in your oth..."
1,Gravity,rock,John Mayer,2007,499112,Gravity is working against me And gravity wan...
2,Slow Dancing in a Burning Room,rock,John Mayer,2006,442736,It's not a silly little moment It's not the s...
3,You’re Gonna Live Forever in Me,pop,John Mayer,2017,202157,A great big bang and dinosaurs Fiery raining...
4,Free Fallin’ Live,rock,John Mayer,2008,197838,"She's a good girl, loves her mama Loves Jesus..."
5,In the Blood,rock,John Mayer,2017,196332,How much of my mother has my mother left in m...
6,In Your Atmosphere Live,rock,John Mayer,2008,158081,I don't think I'm going to go to L.A. anymore...
7,Daughters,rock,John Mayer,2004,209097,I know a girl She puts the color inside of my...
8,Paper Doll,country,John Mayer,2013,175164,"Paper doll, come try it on Step out of that b..."
9,Waiting on the World to Change,rock,John Mayer,2006,165670,"One, two One, two, three Me and all my frien..."
