In [2]:
import requests
from pymongo import MongoClient
from urllib.parse import urljoin
from pprint import pprint
import nltk
import json

# Retrieve Lyrics Via API

In [3]:
# DB CONFIG
client = MongoClient('mongo', 27017)
db = client.music_db
COLLECTION = 'hot-100'
songs_collection = db[COLLECTION]

In [4]:
# Pull all songs from DB
songs = list(songs_collection.find())

In [5]:
MUSICMATCH_API = '309401c7a9740d7c3bfbc94bca9a911c'
APISEED_API = 'ZlmptfetAZEKKe3U4UmsTCyxfwAMZhLpkBAoqKB2iwWGKOmjH4SazO2T53DyabHO'

In [6]:
class ApiSeed:
    BASE_URL = 'https://orion.apiseeds.com/api/music/lyric/'
    
    def __init__(self, api_key):
        """ Load automatically translates responses from HTTP request objects into Dictionaries """
        self.api_key = api_key
        self.load = True
    
    def _api_url(self, url):
        url = urljoin(self.BASE_URL, url)
        url = urljoin(url, f'?apikey={APISEED_API}')
        return url
    
    def _retrieve(self, url):
        response = requests.get(self._api_url(url))
        if self.load:
            response = json.loads(response.content)
        return response
    
    def get_track(self, artist, track):
        return self._retrieve(f'{artist}/{track}')
    

apiseed = ApiSeed(APISEED_API)
res = apiseed.get_track('Kanye West', 'Everything I am')

# Insert Lyrics into DB

In [9]:
import time
def insert_into_db():
    for song in songs:
        pass
    
def update_db(document_id, lyrics: str):
    document = songs_collection.find_one_and_update({"_id": document_id}, 
                                         {"$set": {"lyrics": lyrics}})
    return document

def retrieve_lyrics(track: dict):
    """ Takes mongodb document and gets lyrics"""
    res = apiseed.get_track(track['artist'], track['title'])
    return res

def find_and_update_lyrics(track: dict):
    """ Updates track with lyrics """
    lyrics = retrieve_lyrics(track)
    track['lyrics'] = lyrics
    return track

        
start_index = 4838
all_tracks = []
for i, track in enumerate(songs[start_index:], start_index):
    try:
        track = find_and_update_lyrics(track)

        if track.get('lyrics').get('error'):
            print("Error", track.get('lyrics').get('error'), end=" "); 
        else:
            print("Success", end=" ")
            
    except (json.JSONDecodeError, Exception) as e:
        print(e)
        track['lyrics'] = 'null'
        
    update_db(track['_id'], track['lyrics'])
    print(i, track['title'], end = " | ")
    time.sleep(.33)



Success 4838 You Gonna Fly | Success 4839 Losing My Religion | Success 4840 Shake | Success 4841 Valerie | Success 4842 Marijuana | Success 4843 Dear John | Error Lyric no found, try again later. 4844 Move That Body | Success 4845 Baby One More Time | Success 4846 Kissin U | Success 4847 Bad Romance | Error Lyric no found, try again later. 4848 Sex Therapy | Error Lyric no found, try again later. 4849 Set The Fire To The Third Bar | Success 4850 Red Light | Error Lyric no found, try again later. 4851 Hell Of A Life | Error Lyric no found, try again later. 4852 Jump | Success 4853 Always Strapped | Success 4854 She Got Her Own | Success 4855 Troubadour | Success 4856 Put A Girl In It | Success 4857 Teenage Love Affair | Success 4858 What Kinda Gone | Success 4859 crushcrushcrush | Success 4860 Taking Chances | Success 4861 4 In The Morning | Success 4862 Settlin' | Success 4863 Tarantula | Success 4864 Doe Boy Fresh | Error Lyric no found, try again later. 4865 King Kong | Success 4866 

In [10]:
songs_collection = db[COLLECTION]

def has_lyrics(song):
    if not song.get('lyrics'):
        return False
    
    if song.get('lyrics') == 'null':
        return False

    if song.get('lyrics').get('error'):
        return False
        
    return True


In [11]:
songs = [song for song in songs_collection.find() if has_lyrics(song)]

In [12]:
len(songs)

5889