In [1]:
import requests

session = requests.Session()
session.headers = {
    'application': 'LyricRec',
    'User-Agent': 'https://github.com/frankpacini/LyricRec'
}

access_token = 'Bearer ' + open("token2.txt", "r").read()
authorization_header = {'Authorization': access_token}

def getArtistId(artist_name):
    search_response = session.request('GET', 'https://api.genius.com/search', params={'q': artist_name, "per_page": 20}, headers=authorization_header)
    
    for i in range(50):
        search_result = search_response.json()['response']['hits'][i]['result']['primary_artist']
        search_result_name = search_result['name'].replace(u'\u200b', '')
        if search_result_name == artist_name:
            return search_result['id']
        else:
            print("    Incorrect artist name '{}', expected '{}'".format(search_result_name, artist_name))
    return -1

def getArtistSongs(artist_name, artist_id=None):
    if artist_id is None:
        artist_id = getArtistId(artist_name)
    uri = 'https://api.genius.com/artists/{}/songs'.format(artist_id)
    next_page = 1
    songs = []
    while next_page:
        params = {'sort': 'popularity', 'per_page': 50, 'page': next_page}
        response = session.request('GET', uri, params=params, headers=authorization_header)
        try:
            next_page = response.json()['response']['next_page']
        except KeyError:
            print(response.json())
        
        for song in response.json()['response']['songs']:
            if song['primary_artist']['name'] != artist_name or song['lyrics_state'] != 'complete':
                continue
            if 'pageviews' not in song['stats']:
                next_page = None
                break
            songs.append([song['id'], song['title'], artist_name, song['stats']['pageviews'], song['pyongs_count'], song['annotation_count']])
    return songs

In [7]:
with open("artists.txt", encoding="utf-8") as file:
    artists = {line.rstrip() for line in file.readlines()}
artists = list(artists)

In [8]:
ids = []
for artist in artists:
    print(artist)
    ids.append(getArtistId(artist))
artists += ['Prince', 'R.E.M.']
ids += [660, 8243]

Radiohead
Barbra Streisand
Bee Gees
Nirvana
Sly and the Family Stone
The Temptations
JAY-Z
Fleetwood Mac
Arcade Fire
King Crimson
Eagles
Swans
Simon & Garfunkel
Chris Brown
Taylor Swift
Marvin Gaye
    Incorrect artist name 'Charlie Puth', expected 'Marvin Gaye'
Otis Redding
Madvillain
James Brown
Eminem
Madonna
    Incorrect artist name 'Drake', expected 'Madonna'
Céline Dion
MF DOOM
    Incorrect artist name 'Madvillain', expected 'MF DOOM'
Animal Collective
Linkin Park
The Band
Katy Perry
Al Green
The Clash
Mariah Carey
Lou Reed
Patti Smith
    Incorrect artist name 'Patti Smith Group', expected 'Patti Smith'
Chuck Berry
Nick Drake
Buddy Holly
    Incorrect artist name 'Weezer', expected 'Buddy Holly'
Led Zeppelin
Stevie Wonder
Billy Joel
Björk
Van Morrison
Neil Young
Joy Division
The Doors
Metallica
Kate Bush
Tina Turner
    Incorrect artist name 'Ike & Tina Turner', expected 'Tina Turner'
Tool
The Who
Death Grips
The Jimi Hendrix Experience
OutKast
Bruno Mars
Elvis Presley
Oasis
T

In [14]:
import pandas as pd
pd.DataFrame({'Artist': artists, 'Id': ids}).to_csv("artists.csv")

In [16]:
artist_df = pd.read_csv("artists.csv")
songs = []
for _,row in artist_df.iterrows():
    print(row['Artist'])
    songs += getArtistSongs(row['Artist'], row['Id'])

Radiohead
Barbra Streisand
Bee Gees
Nirvana
Sly and the Family Stone
The Temptations
JAY-Z
Fleetwood Mac
Arcade Fire
King Crimson
Eagles
Swans
Simon & Garfunkel
Chris Brown
Taylor Swift
Marvin Gaye
Otis Redding
Madvillain
James Brown
Eminem
Madonna
Céline Dion
MF DOOM
Animal Collective
Linkin Park
The Band
Katy Perry
Al Green
The Clash
Mariah Carey
Lou Reed
Patti Smith
Chuck Berry
Nick Drake
Buddy Holly
Led Zeppelin
Stevie Wonder
Billy Joel
Björk
Van Morrison
Neil Young
Joy Division
The Doors
Metallica
Kate Bush
Tina Turner
Tool
The Who
Death Grips
The Jimi Hendrix Experience
OutKast
Bruno Mars
Elvis Presley
Oasis
The Rolling Stones
U2
Nick Cave & The Bad Seeds
Johnny Cash
Aerosmith
Ray Charles
Eric Clapton
A Tribe Called Quest
Maroon 5
Frank Zappa
Whitney Houston
Curtis Mayfield
Pink Floyd
The Police
Beastie Boys
The Beatles
James Taylor
The Byrds
Black Sabbath
Queen
Adele
my bloody valentine
Coldplay
Joni Mitchell
Lady Gaga
The Velvet Underground
Rihanna
The Cure
Lil Wayne
Aretha Fra

In [17]:
pd.DataFrame(songs, columns=['Id', 'Title', 'Artist', 'Page Views', 'Pyongs Count', 'Annotation Count']).to_csv('songs.csv')

In [136]:
import lyricsgenius as lg
import pandas as pd
TOKEN = open("token.txt", "r").read()
genius = lg.Genius(TOKEN, skip_non_songs=True, 
                   excluded_terms=["(Remix)", "(Live)", "- Live", "- Remix"], remove_section_headers=True)

In [137]:
df = pd.read_csv('songs_with_lyrics.csv').drop('Unnamed: 0', axis=1)
# start = 12571
# for i, row in list(df.iterrows())[start:]:
for i in list(df.loc[df['Lyrics'] == "''"].index):
    row = df.iloc[i]
    for j in range(5):
        try:
            lyrics = genius.lyrics(song_id=row['Id'])
            break
        except requests.Timeout:
            print("Timeout")
            # df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
            df.to_csv('songs_with_lyrics.csv')
            df = pd.read_csv('songs_with_lyrics.csv').drop('Unnamed: 0', axis=1)
            continue
    if j == 5:
        break
    if lyrics:
        lyrics_end = len(lyrics) - len("EmbedShare URLCopyEmbedCopy")
        while lyrics_end > 0:
            if not lyrics[lyrics_end-1].isdigit():
                break
            lyrics_end-=1
        df.at[i, 'Lyrics'] = repr(lyrics[:lyrics_end])
    print(i, row['Title'], row['Artist'])
df.to_csv('songs_with_lyrics.csv')
df.head()

12571 This Isn’t the Place Nine Inch Nails
12572 The Only Time Nine Inch Nails
12573 And All That Could Have Been Nine Inch Nails
12574 The Background World Nine Inch Nails
12575 The Lovers Nine Inch Nails
12576 The Day the World Went Away Nine Inch Nails
12577 La Mer Nine Inch Nails
12578 Sin Nine Inch Nails
12579 The Perfect Drug Nine Inch Nails
12580 Sanctified Nine Inch Nails
12581 Last Nine Inch Nails
12582 The Wretched Nine Inch Nails
12583 Shit Mirror Nine Inch Nails
12584 God Break Down the Door Nine Inch Nails
12585 Gave Up Nine Inch Nails
12586 The Great Below Nine Inch Nails
12587 All Time Low Nine Inch Nails
12588 Burning Bright (Field on Fire) Nine Inch Nails
12589 Into the Void Nine Inch Nails
12590 She’s Gone Away Nine Inch Nails
12591 Ahead of Ourselves Nine Inch Nails
12592 With Teeth Nine Inch Nails
12593 Happiness in Slavery Nine Inch Nails
12594 You Know What You Are? Nine Inch Nails
12595 Branches/Bones Nine Inch Nails
12596 Survivalism Nine Inch Nails
Couldn't fin

12735 Enchanting Ghost Sufjan Stevens
12736 Lamentations Sufjan Stevens
12737 In the Devil’s Territory Sufjan Stevens
12738 Gilgamesh Sufjan Stevens
12739 The Upper Peninsula Sufjan Stevens
12740 Abraham Sufjan Stevens
12741 Exploding Whale Sufjan Stevens
12742 He Woke Me Up Again Sufjan Stevens
12743 Ursa Major Sufjan Stevens
12744 Goodbye to All That Sufjan Stevens
12745 One Last “Whoo-Hoo!” for the Pullman Sufjan Stevens
12746 Landslide Sufjan Stevens
12747 Die Happy Sufjan Stevens
12748 Tonya Harding (in Eb major) Sufjan Stevens
12749 Bad Communication Sufjan Stevens
12750 All Good Naysayers, Speak Up! Or Forever Hold Your Peace! Sufjan Stevens
12751 With My Whole Heart Sufjan Stevens
12752 Say Yes! to M!ch!gan! Sufjan Stevens
12753 A Conjunction of Drones Simulating the Way in Which Sufjan Stevens Has an Existential Crisis in the Great Godfrey Maze Sufjan Stevens
12754 You Are the Blood Sufjan Stevens
12755 Did I Make You Cry on Christmas Day? (Well, You Deserved It!) Sufjan Steve

12928 Don’t Stop ’Til You Get Enough Michael Jackson


ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

In [138]:
df.to_csv('songs_with_lyrics.csv')

In [39]:
res = session.request('GET', 'https://api.genius.com/songs/2843755', headers=authorization_header).json()

In [41]:
res['response']['song']

{'annotation_count': 3,
 'api_path': '/songs/2843755',
 'apple_music_id': None,
 'apple_music_player_url': 'https://genius.com/songs/2843755/apple_music_player',
 'artist_names': 'Frank Ocean',
 'description': {'dom': {'tag': 'root',
   'children': [{'tag': 'p', 'children': ['?']}]}},
 'embed_content': "<div id='rg_embed_link_2843755' class='rg_embed_link' data-song-id='2843755'>Read <a href='https://genius.com/Frank-ocean-godspeed-screenplay-episode-1-scene-1-annotated'>“Godspeed Screenplay (Episode 1, Scene 1)” by Frank\xa0Ocean</a> on Genius</div> <script crossorigin src='//genius.com/songs/2843755/embed.js'></script>",
 'featured_video': False,
 'full_title': 'Godspeed Screenplay (Episode 1, Scene 1) by\xa0Frank\xa0Ocean',
 'header_image_thumbnail_url': 'https://images.genius.com/9491d0ed6473080fd7b62da1568da37b.300x300x1.jpg',
 'header_image_url': 'https://images.genius.com/9491d0ed6473080fd7b62da1568da37b.700x700x1.jpg',
 'id': 2843755,
 'lyrics_owner_id': 104344,
 'lyrics_placeh

In [73]:
session.request('GET', 'https://api.genius.com/search/', params={'q': "1999 prince"}, headers=authorization_header).json()

{'meta': {'status': 200},
 'response': {'hits': [{'highlights': [],
    'index': 'song',
    'type': 'song',
    'result': {'annotation_count': 18,
     'api_path': '/songs/2770',
     'artist_names': 'Prince',
     'full_title': '1999 by\xa0Prince',
     'header_image_thumbnail_url': 'https://images.genius.com/f9d763e693ae440f5cc5a95ba74a6809.300x300x1.jpg',
     'header_image_url': 'https://images.genius.com/f9d763e693ae440f5cc5a95ba74a6809.1000x1000x1.jpg',
     'id': 2770,
     'lyrics_owner_id': 7,
     'lyrics_state': 'complete',
     'path': '/Prince-1999-lyrics',
     'pyongs_count': 14,
     'song_art_image_thumbnail_url': 'https://images.genius.com/05106ba984ad1810e9043d575d8d5d95.300x292x1.jpg',
     'song_art_image_url': 'https://images.genius.com/05106ba984ad1810e9043d575d8d5d95.600x583x1.jpg',
     'stats': {'unreviewed_annotations': 3, 'hot': False, 'pageviews': 240733},
     'title': '1999',
     'title_with_featured': '1999',
     'url': 'https://genius.com/Prince-1999

In [175]:
print(getArtistId('David Bowie'))
uri = 'https://api.genius.com/artists/{}/songs'.format(getArtistId('David Bowie'))
params = {'sort': 'popularity', 'per_page': 50, 'page': 1}
response = session.request('GET', uri, params=params, headers=authorization_header)
print([song for song in response.json()['response']['songs'] if song['primary_artist']['name'] == "David Bowie"][0])

-1


KeyError: 'response'

In [166]:
print([song for song in response.json()['response']['songs'] if song['primary_artist']['name'] == "Aphex Twin"][0])

{'annotation_count': 2, 'api_path': '/songs/128396', 'artist_names': 'Aphex Twin', 'full_title': 'Avril 14th by\xa0Aphex\xa0Twin', 'header_image_thumbnail_url': 'https://images.genius.com/768f27a906ebe53e4f98398167fb84f0.300x300x1.png', 'header_image_url': 'https://images.genius.com/768f27a906ebe53e4f98398167fb84f0.1000x1000x1.png', 'id': 128396, 'lyrics_owner_id': 116340, 'lyrics_state': 'complete', 'path': '/Aphex-twin-avril-14th-lyrics', 'pyongs_count': 4, 'song_art_image_thumbnail_url': 'https://images.genius.com/768f27a906ebe53e4f98398167fb84f0.300x300x1.png', 'song_art_image_url': 'https://images.genius.com/768f27a906ebe53e4f98398167fb84f0.1000x1000x1.png', 'stats': {'unreviewed_annotations': 0, 'hot': False, 'pageviews': 15070}, 'title': 'Avril 14th', 'title_with_featured': 'Avril 14th', 'url': 'https://genius.com/Aphex-twin-avril-14th-lyrics', 'primary_artist': {'api_path': '/artists/38515', 'header_image_url': 'https://images.genius.com/b2d3bde9ef1f7f9b72f3b74f4ebb069f.1000x42

In [82]:
len(current_df.Artist.unique())

113

In [83]:
current_df.drop_duplicates(['Title', 'Artist']).to_csv('songs2.csv')

In [71]:
df.loc[:, ~df.columns.str.contains('^Unnamed')].to_csv('songs_with_lyrics.csv')

In [73]:
df = pd.read_csv('songs_with_lyrics.csv')
df.drop('Unnamed: 0', axis=1).columns

Index(['Id', 'Title', 'Artist', 'Page Views', 'Pyongs Count',
       'Annotation Count', 'Lyrics'],
      dtype='object')

In [131]:
df = pd.read_csv('songs_with_lyrics.csv')
df['Lyrics'] = df['Lyrics'].fillna('').map(repr)
df.to_csv('songs_with_lyrics.csv')

In [129]:
from ast import literal_eval
df['Lyrics'].map(literal_eval)

0        When you were here before\nCouldn't look you i...
1        Karma police, arrest this man\nHe talks in mat...
2        A heart that's full up like a landfill\nA job ...
3        Please, could you stop the noise?\nI'm trying ...
4        Wake from your sleep\nThe drying of your tears...
                               ...                        
13752                                                     
13753                                                     
13754                                                     
13755                                                     
13756                                                     
Name: Lyrics, Length: 13757, dtype: object

In [139]:
df = pd.read_csv('songs_with_lyrics.csv').drop('Unnamed: 0', axis=1)

In [144]:
list(df.loc[df['Lyrics'] == "''"].index)

[91,
 106,
 110,
 137,
 144,
 603,
 765,
 794,
 799,
 800,
 920,
 931,
 936,
 940,
 948,
 954,
 1640,
 1690,
 1854,
 1857,
 2220,
 2263,
 2267,
 2268,
 2271,
 2552,
 2561,
 2732,
 2734,
 2736,
 2740,
 2745,
 2758,
 3233,
 3248,
 3299,
 3323,
 3329,
 3554,
 3563,
 3564,
 3578,
 3835,
 3854,
 3927,
 4064,
 4070,
 4075,
 4076,
 4078,
 4082,
 4083,
 4084,
 4086,
 4088,
 4089,
 4090,
 4138,
 4140,
 4157,
 4169,
 4269,
 4274,
 4314,
 4424,
 5264,
 5407,
 5411,
 5522,
 5551,
 5587,
 5588,
 5602,
 5623,
 5624,
 5625,
 5628,
 5629,
 5632,
 5664,
 5923,
 6041,
 6064,
 6071,
 6085,
 6174,
 6209,
 6240,
 6313,
 6314,
 6316,
 6388,
 6438,
 6443,
 6451,
 6464,
 6465,
 6466,
 6568,
 6579,
 6592,
 6610,
 7652,
 7655,
 7657,
 8170,
 8247,
 8327,
 8911,
 8920,
 8950,
 8953,
 9011,
 9017,
 9520,
 9639,
 9646,
 9664,
 9680,
 9945,
 9949,
 9952,
 9963,
 9978,
 9982,
 9999,
 10012,
 10068,
 10330,
 10418,
 10422,
 10430,
 10525,
 10534,
 10831,
 11898,
 12033,
 12037,
 12349,
 12450,
 12451,
 12452,
 12453,