# Lyrics of Beyoncé's Songs From the Genius API
I followed this tutorial from Melanie Walsh's Intro to Cultural Analytics & Python book:
https://melaniewalsh.github.io/Intro-Cultural-Analytics/04-Data-Collection/08-Collect-Genius-Lyrics.html

In [1]:
# Import Dependencies
from bs4 import BeautifulSoup
import re
import lyricsgenius
import requests
from pathlib import Path
import pandas as pd

In [2]:
def clean_up(song_title):

    if "Ft" in song_title:
        before_ft_pattern = re.compile(".*(?=\(Ft)")
        song_title_before_ft = before_ft_pattern.search(song_title).group(0)
        clean_song_title = song_title_before_ft.strip()
        clean_song_title = clean_song_title.replace("/", "-")
    
    else:
        song_title_no_lyrics = song_title.replace("Lyrics", "")
        clean_song_title = song_title_no_lyrics.strip()
        clean_song_title = clean_song_title.replace("/", "-")
    
    return clean_song_title

def get_all_songs_from_album(artist, album_name):
    
    artist = artist.replace(" ", "-")
    album_name = album_name.replace(" ", "-")
    
    response = requests.get(f"https://genius.com/albums/{artist}/{album_name}")
    html_string = response.text
    document = BeautifulSoup(html_string, "html.parser")
    song_title_tags = document.find_all("h3", attrs={"class": "chart_row-content-title"})
    song_titles = [song_title.text for song_title in song_title_tags]
    
    clean_songs = []
    for song_title in song_titles:
        clean_song = clean_up(song_title)
        clean_songs.append(clean_song)
        
    return clean_songs

def download_album_lyrics(artist, album_name): 
    
    # You will need to go to Genius Developers to get your own client access token
    client_access_token = 'OOhXRVg8i53XE0YfrXnEYsMhwOONMAxkagskbY-L9WZXLeOHDXeSF7XE8UbHGkLI'
    LyricsGenius = lyricsgenius.Genius(client_access_token)
    LyricsGenius.remove_section_headers = True
    
    clean_songs = get_all_songs_from_album(artist, album_name)
    
    for song in clean_songs:
        
        song_object = LyricsGenius.search_song(song, artist)
        
        if song_object != None:
            
            artist_title = artist.replace(" ", "-")
            album_title = album_name.replace(" ", "-")
            song_title = song.replace("/", "-")
            song_title = song.replace(" ", "-")
            
            custom_filename=f"{artist_title}_{album_title}/{song_title}"
            

            Path(f"{artist_title}_{album_title}").mkdir(parents=True, exist_ok=True)
            
            song_object.save_lyrics(filename=custom_filename, extension='txt', sanitize=False)
        
        else:
            print('No lyrics')

## Specify Your Artists + Albums of Interest

In [3]:
download_album_lyrics("Beyoncé", "Dangerously in Love")
download_album_lyrics("Beyoncé", "B'Day")
download_album_lyrics("Beyoncé", "I Am...Sasha Fierce")
download_album_lyrics("Beyoncé", "4")
download_album_lyrics("Beyoncé", "Lemonade")
download_album_lyrics("Beyoncé", "The Lion King: The Gift")
download_album_lyrics("Beyoncé", "Renaissance")

In [4]:
def clean_text(text):
    
    # Some light data cleaning - you will need to adjust based on your data
    text = text.replace('See Beyonce LiveGet tickets as low as $270', ' ') # remove ad
    text = re.sub('\d*Embed', ' ', text) # remove ending text with number + Embed
    
    return text

In [5]:
# Specify the folder names with the lyric data from Genius
directory_paths = ['Dangerously_in_Love/',
                   'BDay/',
                   'I Am...Sasha Fierce/',
                   '4/',
                   'Lemonaade/',
                   'The Lion King - The Gift/',
                   'Renaissance/']

In [6]:
pd.options.display.max_rows = 500
pd.set_option('display.max_colwidth', 0)

beyonce_lyrics = pd.DataFrame({"Album": [],
                          "Song Name": [],
                          "Lyrics": []})

idx = 0

for i, album in enumerate(directory_paths):
    
    album_name = album[13:-1].replace("-", " ")
        
    for song in Path(album).glob('*.txt'):
        
        song_name = str(song).replace("-", " ").split("/")[1][:-4]
                
        full_text = open(song, encoding="utf-8")
        lyrics_list = full_text.readlines()[1:] #read()
        lyrics_list = [l.replace("\n", "") for l in lyrics_list]
        lyrics = ' '.join(lyrics_list)
        lyrics = clean_text(lyrics)
        full_text.close()
        
        beyonce_lyrics.loc[idx] = [album_name, song_name, lyrics]
        idx += 1

In [7]:
beyonce_lyrics

Unnamed: 0,Album,Song Name,Lyrics


## Step 4: Export the Lyrics as a CSV File

In [8]:
#beyonce_lyrics.to_csv("beyonce_genius_data.csv", index=False)