In [12]:
import pandas as pd 
import lyricsgenius as lg
import os 
import re
from dotenv import load_dotenv

import requests

In [3]:
df = pd.read_csv('datasets/muse_v3.csv')
df.head()

Unnamed: 0,lastfm_url,track,artist,seeds,number_of_emotion_tags,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
0,https://www.last.fm/music/eminem/_/%2527till%2...,'Till I Collapse,Eminem,['aggressive'],6,4.55,5.273125,5.690625,cab93def-26c5-4fb0-bedd-26ec4c1619e1,4xkOaSrkexMciUUogZKVTS,rap
1,https://www.last.fm/music/metallica/_/st.%2banger,St. Anger,Metallica,['aggressive'],8,3.71,5.833,5.42725,727a2529-7ee8-4860-aef6-7959884895cb,3fOc9x06lKJBhz435mInlH,metal
2,https://www.last.fm/music/rick%2bross/_/speedi...,Speedin',Rick Ross,['aggressive'],1,3.08,5.87,5.49,,3Y96xd4Ce0J47dcalLrEC8,rap
3,https://www.last.fm/music/m.i.a./_/bamboo%2bbanga,Bamboo Banga,M.I.A.,"['aggressive', 'fun', 'sexy', 'energetic']",13,6.555071,5.537214,5.691357,99dd2c8c-e7c1-413e-8ea4-4497a00ffa18,6tqFC1DIOphJkCwrjVzPmg,hip-hop
4,https://www.last.fm/music/dope/_/die%2bmf%2bdie,Die MF Die,Dope,['aggressive'],7,3.771176,5.348235,5.441765,b9eb3484-5e0e-4690-ab5a-ca91937032a5,5bU4KX47KqtDKKaLM4QCzh,metal


In [73]:
# Get the .env variables
load_dotenv()

# Get API TOKEN
def get_genius_access_token():
    return os.getenv("GENIUS_ACCESS")

def search_song_on_genius(lookup_terms):
    base_url = "https://api.genius.com"
    headers = {
        "Authorization": "Bearer " + get_genius_access_token()
    }
    search_url = base_url + "/search"
    data = {
        "q": lookup_terms
    }
    response = requests.get(search_url, data=data, headers=headers)
    return response.json()

def get_artists(lookup_terms):
    songs = search_song_on_genius(lookup_terms)
    try:
        artists = [song['result']['artist_names'] for song in songs]
    except Exception as E:
        artists = None
    return artists

# Access Genius API
GENIUS = lg.Genius(os.getenv("GENIUS_ACCESS"), remove_section_headers=True, timeout=120)

# Get lyrics of song
def get_lyrics(song, artist):
    try: 
        song = clean_lyrics(GENIUS.search_song(song, artist).lyrics)
        artists = get_artists(' '.join(song[0:5]))
        print(artists)
        # print(song)
        return clean_lyrics(song.lyrics)
    except:
        return None

# Gets lyrics given df entry
def create_lyrics(row):
    lyrics = get_lyrics(row['track'], row['artist'])
    if(row.name % 10 == 0):
        print(row.name)
    if(lyrics):
        return clean_lyrics(lyrics)
    return None

# Cleans lyric data (NEEDS MORE WORK)
def clean_lyrics(lyrics):
    lyrics = ' '.join(lyrics.split('\n')[1:]) # Remove the user who provided lyrics & song title 
    lyrics = lyrics.lower() 

    lyrics = lyrics[:-5].split(" ") # Remove embedding identifier at end of song
    return lyrics[:-1] + [re.sub(r'\d', '', lyrics[-1])]

In [75]:
# Test create / clean_lyrics
# temp_songs = []
# for i in range(1):
#     current_song = df.iloc[i]
#     temp_songs.append(clean_lyrics(get_lyrics(current_song['track'], current_song['artist'])))

import json

get_artists("Last Christmas")

{'meta': {'status': 200}, 'response': {'hits': []}}


TypeError: string indices must be integers

In [7]:
df['lyrics'] = df.apply(create_lyrics, axis=1)

Searching for "'Till I Collapse" by Eminem...
Done.
0
Searching for "St. Anger" by Metallica...
Done.
Searching for "Speedin'" by Rick Ross...
Done.
Searching for "Bamboo Banga" by M.I.A....
Done.
Searching for "Die MF Die" by Dope...
Done.
Searching for "Step Up" by Drowning Pool...
Done.
Searching for "Feedback" by Kanye West...
Done.
Searching for "7 Words" by Deftones...
Done.
Searching for "Limp" by Fiona Apple...
Done.
Searching for "Sweet Amber" by Metallica...
Done.
Searching for "Depression" by Black Flag...
Done.
10
Searching for "Comprachicos" by Pendulum...
Done.
Searching for "When Girls Telephone Boys" by Deftones...
Done.
Searching for "Two Words" by Kanye West...
Done.
Searching for "What I See" by Black Flag...
Done.
Searching for "Requiem" by Lamb of God...
Done.
Searching for "Room 13" by Black Flag...
Done.
Searching for "Shake Ya Ass" by Mystikal...
Done.
Searching for "Combat" by Deftones...
Done.
Searching for "Glue Man" by Fugazi...
Done.
Searching for "Boom Boo

In [11]:
df.to_csv("./datasets/music_data.csv")

In [14]:
df['seeds'].value_counts()[0:50]

seeds
['sleazy']                              938
['lazy']                                835
['martial']                             799
['exotic']                              799
['fierce']                              792
['organic']                             784
['lyrical']                             775
['gritty']                              770
['erotic']                              766
['technical']                           763
['cheerful']                            761
['optimistic']                          759
['lonely']                              754
['aggressive']                          742
['serious']                             732
['positive']                            732
['light']                               725
['thoughtful']                          725
['gloomy']                              715
['sacred']                              685
['angry']                               683
['whimsical']                           680
['sexy']                  