# Using Genius APIs to Pull out Lyrics for Top Songs Based on Streaming Popularity

In [24]:
# Import dependencies
import requests
import numpy as np
import pandas as pd
from config import auth, genius_bearer_token
import json
from bs4 import BeautifulSoup
import re

## Pulling in data from playlist csvs

In [27]:
# create base url
base_dataframe_path = "../../Data_Files/Play_Lists/"

# create list of all of the dataframe names
playlists_names = ["today_top_hits", "rap_caviar", "mint", "rock_this", "are_and_be", 
             "viva_latino", "hot_country", "new_music_friday_malaysia",
             "viral_hits", "fresh_finds", "global_top_50", "united_states_top_50",
             "global_viral_50", "united_states_viral_50"]

In [28]:
# loop through all of the playlist names and pull in data into the appropriate dataframes
# Dataframes by playlist
today_top_hits = pd.read_csv(base_dataframe_path + "today_top_hits.csv")
rap_caviar = pd.read_csv(base_dataframe_path + "rap_caviar.csv")
mint = pd.read_csv(base_dataframe_path + "mint.csv")
rock_this = pd.read_csv(base_dataframe_path + "rock_this.csv")
are_and_be = pd.read_csv(base_dataframe_path + "are_and_be.csv")
viva_latino = pd.read_csv(base_dataframe_path + "viva_latino.csv")
hot_country = pd.read_csv(base_dataframe_path + "hot_country.csv")
new_music_friday_malaysia = pd.read_csv(base_dataframe_path + "new_music_friday_malaysia.csv")
viral_hits = pd.read_csv(base_dataframe_path + "viral_hits.csv")
fresh_finds = pd.read_csv(base_dataframe_path + "fresh_finds.csv")
global_top_50 = pd.read_csv(base_dataframe_path + "global_top_50.csv")
united_states_top_50 = pd.read_csv(base_dataframe_path + "united_states_top_50.csv")
global_viral_50 = pd.read_csv(base_dataframe_path + "global_viral_50.csv")
united_states_viral_50 = pd.read_csv(base_dataframe_path + "united_states_viral_50.csv")

In [29]:
today_top_hits.head()

Unnamed: 0.1,Unnamed: 0,Playlist,Track Name,Artist Name,Duration (ms),Popularity,Instrumentalness,Lyrics Path
0,0,Today's Top Hits,Happier,Marshmello,214289,89,0.0,../Data_Files/Song_Lyrics/today_top_hits/Happi...
1,1,Today's Top Hits,Eastside (with Halsey & Khalid),benny blanco,173799,95,0.0,../Data_Files/Song_Lyrics/today_top_hits/Easts...
2,2,Today's Top Hits,Be Alright,Dean Lewis,196373,90,0.0,../Data_Files/Song_Lyrics/today_top_hits/BeAlr...
3,3,Today's Top Hits,Whenever (feat. Conor Maynard),Kris Kross Amsterdam,163636,69,0.0,../Data_Files/Song_Lyrics/today_top_hits/Whene...
4,4,Today's Top Hits,Promises (with Sam Smith),Calvin Harris,213309,90,5e-06,../Data_Files/Song_Lyrics/today_top_hits/Promi...


## Working with Genius API

In [20]:
# define the url parameters for the API request
genius_base_url = "http://api.genius.com"
genius_headers = {'Authorization': 'Bearer ' + genius_bearer_token}
genius_search_url = genius_base_url + "/search"

# define path to store lyrics at
lyric_base_path = "../Data_Files/Song_Lyrics"

# create list to store all dataframes of playlists
playlists = [today_top_hits, rap_caviar, mint, rock_this, are_and_be, 
             viva_latino, hot_country, new_music_friday_malaysia,
             viral_hits, fresh_finds, global_top_50, united_states_top_50,
             global_viral_50, united_states_viral_50]

playlist_counter = 0

In [21]:
# loop through all dataframes of playlists to store song lyrics
for playlist in playlists:

    # create new column in play list data frame for link to song lyric .txt file
    playlist["Lyrics Path"] = ""
    # define storage path to lyric .txt files
    playlist_lyrics_path = lyric_base_path + "/" + str(playlists_names[playlist_counter]) + "/"

    # loop through each row in the data frame
    for index, row in playlist.iterrows():

        # search for a test song
        song_title = row["Track Name"]
        artist_name = row["Artist Name"]
        genius_data = {'q': song_title + ' ' + artist_name}
        genius_response = requests.get(genius_search_url, params=genius_data, headers=genius_headers)
        genius_json = genius_response.json()

        # loop through hit in json request to pull out song url
        for hit in genius_json["response"]["hits"]:
            if hit["result"]["primary_artist"]["name"] == artist_name:
                song_url = hit["result"]["url"]
                break

        # use beautiful soup to crawl found song url and store the lyrics
        song_page = requests.get(song_url)
        song_html = BeautifulSoup(song_page.text, 'html.parser')
        song_lyrics = song_html.find('div', class_='lyrics').get_text()

        # store the lyrics in a .txt file and save to the project folder
        text_file_name = (song_title + "_" + artist_name)
        text_file_name = re.sub(r"[^a-zA-Z0-9_]+", '', text_file_name)
        txt_file = open(playlist_lyrics_path + text_file_name + ".txt", "w") 
        txt_file.write(song_lyrics) 
        txt_file.close()

        # store the path to the file in the dataframe
        playlist["Lyrics Path"][index] = (playlist_lyrics_path + text_file_name + ".txt")

    # save the dataframe as a csv file
    file_name = str(playlists_names[playlist_counter])
    file_name = re.sub(r"[^a-zA-Z0-9_]+", '', file_name)
    playlist.to_csv("../Data_Files/Play_Lists/" + file_name + ".csv")
    
    playlist_counter += 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
