In [1]:
import pandas as pd 
import lyricsgenius as lg
import os 
import re
from dotenv import load_dotenv

import requests

In [2]:
df = pd.read_csv('datasets/muse_v3.csv')
df.head()

Unnamed: 0,lastfm_url,track,artist,seeds,number_of_emotion_tags,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
0,https://www.last.fm/music/eminem/_/%2527till%2...,'Till I Collapse,Eminem,['aggressive'],6,4.55,5.273125,5.690625,cab93def-26c5-4fb0-bedd-26ec4c1619e1,4xkOaSrkexMciUUogZKVTS,rap
1,https://www.last.fm/music/metallica/_/st.%2banger,St. Anger,Metallica,['aggressive'],8,3.71,5.833,5.42725,727a2529-7ee8-4860-aef6-7959884895cb,3fOc9x06lKJBhz435mInlH,metal
2,https://www.last.fm/music/rick%2bross/_/speedi...,Speedin',Rick Ross,['aggressive'],1,3.08,5.87,5.49,,3Y96xd4Ce0J47dcalLrEC8,rap
3,https://www.last.fm/music/m.i.a./_/bamboo%2bbanga,Bamboo Banga,M.I.A.,"['aggressive', 'fun', 'sexy', 'energetic']",13,6.555071,5.537214,5.691357,99dd2c8c-e7c1-413e-8ea4-4497a00ffa18,6tqFC1DIOphJkCwrjVzPmg,hip-hop
4,https://www.last.fm/music/dope/_/die%2bmf%2bdie,Die MF Die,Dope,['aggressive'],7,3.771176,5.348235,5.441765,b9eb3484-5e0e-4690-ab5a-ca91937032a5,5bU4KX47KqtDKKaLM4QCzh,metal


In [20]:
# Get the .env variables
load_dotenv()

# Get API TOKEN
def get_genius_access_token():
    return os.getenv("GENIUS_ACCESS")

# Search for song on genius
def search_song_on_genius(lookup_terms):
    base_url = "https://api.genius.com"
    headers = {
        "Authorization": "Bearer " + get_genius_access_token()
    }
    search_url = base_url + "/search"
    data = {
        "q": lookup_terms
    }
    response = requests.get(search_url, data=data, headers=headers)
    return response.json()

# From all song hits, get artist's names
def get_artists(lookup_terms):
    songs = search_song_on_genius(lookup_terms)['response']['hits']
    try:
        artists = [song['result']['artist_names'] for song in songs]
    except Exception as e:
        artists = None

    return artists

# Access Genius API
GENIUS = lg.Genius(get_genius_access_token(), remove_section_headers=True, timeout=120)

# Get lyrics of song
def get_lyrics(song, artist):
    try: 
        song_title = re.sub(r'\(.*\)', '', song)
        print(song_title)
        song = clean_lyrics(GENIUS.search_song(song_title, artist).lyrics)
        # artists = get_artists(' '.join(song[0:20]))
        # print(artists)
        return song
    except:
        return None

# Gets lyrics given df entry
def create_lyrics(row):
    lyrics = get_lyrics(row['track'], row['artist'])
    if(row.name % 10 == 0):
        print(row.name)
    if(lyrics):
        return lyrics
    return None

# Cleans lyric data (NEEDS MORE WORK)
def clean_lyrics(lyrics):
    lyrics = ' '.join(lyrics.split('\n')[1:]) # Remove the user who provided lyrics & song title 
    lyrics = lyrics.lower() 

    lyrics = lyrics[:-5].split(" ") # Remove embedding identifier at end of song
    return lyrics[:-1] + [re.sub(r'\d', '', lyrics[-1])]

In [22]:
# Test create / clean_lyrics
# temp_songs = []
# for i in range(1):
#     current_song = df.iloc[i]
#     temp_songs.append(clean_lyrics(get_lyrics(current_song['track'], current_song['artist'])))


def test_sampling(n, df):
    sampled_df = df.sample(n=n)
    sampled_df['lyrics'] = sampled_df.apply(create_lyrics, axis=1)
    return sampled_df

# test_string = "Rudy (Live)"
# result = re.sub(r'\(.*\)', '', test_string)
# print(result)

sampled_df = test_sampling(10, df)
sampled_df['lyrics'].head()


Twisted Mind
Searching for "Twisted Mind" by HighWay17...
Done.
Ordo Fratrum Minorum 
Searching for "Ordo Fratrum Minorum " by Verney 1826...
No results found for: 'Ordo Fratrum Minorum  Verney 1826'
The talk of all the U.S.A.
Searching for "The talk of all the U.S.A." by Middle Of The Road...
Done.
Yoke
Searching for "Yoke" by Basement...
Done.
Aluminum Unicorn
Searching for "Aluminum Unicorn" by Jack Simons...
No results found for: 'Aluminum Unicorn Jack Simons'
Slow Rider
Searching for "Slow Rider" by Johnny Cash...
Done.
Fed Up
Searching for "Fed Up" by Grief...
Done.
The Last Strike Of Heroes
Searching for "The Last Strike Of Heroes" by March of Heroes...
Done.
Washington Square
Searching for "Washington Square" by Counting Crows...
Done.
Spring, Summer, Winter and Fall
Searching for "Spring, Summer, Winter and Fall" by Aphrodite's Child...
Done.


30614    [living, life, like, a, pirate,, took, it,, na...
59157                                                 None
74285    [she's, a, woman, yes,, i, know, she's, a, wom...
71442    [on, my, indecision, hide, me, in, my, room, a...
30675                                                 None
Name: lyrics, dtype: object

In [54]:
print(get_lyrics("Hemorrhage", "Fuel"))

Searching for "Hemorrhage" by Fuel...
Done.
None


In [23]:
sampled_df.to_csv("./datasets/music_data.csv")

In [14]:
df['seeds'].value_counts()[0:50]

seeds
['sleazy']                              938
['lazy']                                835
['martial']                             799
['exotic']                              799
['fierce']                              792
['organic']                             784
['lyrical']                             775
['gritty']                              770
['erotic']                              766
['technical']                           763
['cheerful']                            761
['optimistic']                          759
['lonely']                              754
['aggressive']                          742
['serious']                             732
['positive']                            732
['light']                               725
['thoughtful']                          725
['gloomy']                              715
['sacred']                              685
['angry']                               683
['whimsical']                           680
['sexy']                  