1. Get Audio Information from Spotify
2. Insert into DB


In [17]:
import requests
from urllib.parse import urljoin

In [18]:
from pymongo import MongoClient
from pprint import pprint
from typing import List

# DB CONFIG
client = MongoClient('mongo', 27017)
db = client.music_db
songs_collection = db.top_songs

In [19]:
# Pull all songs from DB
SONGS = list(songs_collection.find())
len(SONGS)

8124

In [4]:
def has_lyrics(song):
    if not song.get('lyrics'):
        return False
    
    if song.get('lyrics') == 'null':
        return False

    if song.get('lyrics').get('error'):
        return False
        
    return True

songs = [song for song in SONGS]

In [5]:
class Song:
    def __init__(self, song: dict):
        self.data = song
        self.spotify = {}    # Should be assined with the spotify api
    
    @property
    def title(self):
        return self.data['title']
    
    @property
    def artist(self):
        return self.data['artist']
        
    @property
    def lyrics(self):
        return self.data['lyrics']['result']['track']['text']
    
    @property
    def search_phrase(self):
        return self.data['title'] + " by " + self.data['artist']
    
    def __str__(self):
        return str(self.data)
    
    def __repr__(self):
        return self.__str__()
    
song_list = list(map(Song, songs))

In [6]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

CLIENT_SECRET_KEY = 'c784fff30881479abc02c128d341efe2'
CLIENT_ID = '299217ad61af41beb3f025a8b9bf0d99'


client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET_KEY)
spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [7]:
from typing import Iterable

def retrieve_track_id(name):
    results = spotify.search(q='track:' + name, type='track')
    return results

def chunk(it: Iterable, size: int):
    i = 0
    for j in range(size, len(it), size):
        yield it[i:j]
        i = j

In [8]:
from typing import List
from IPython.display import clear_output

    
def retrieve_and_assign_features(songs: List):
    song_ids = [song.spotify['id'] for song in songs]
    features = spotify.audio_features(tracks=song_ids)
    for song, feature in zip(songs, features):
        song.spotify['audio_features'] = feature
    return songs

def retrieve_and_assign_ids(song_list):
    i = 0
    for song in song_list:
        i += 1
        clear_output(wait=True)
        print(f"Retreiving ID: {i} for {song.search_phrase}\r")
        try:
            response = retrieve_track_id(song.search_phrase)
            song_id = response['tracks']['items'][0]['id']
            song.spotify['id'] = song_id
            yield song
        except IndexError:
            print("Couldn't retrieve ID for", song.search_phrase, response)
            continue
    
#song_list = list(retrieve_and_assign_ids(song_list))

In [None]:
len(song_list)

In [14]:
i=0
for lst in chunk(song_list, 50):
    retrieve_and_assign_features(lst)
    print(i)
    clear_output(wait=True)
    i+=1

91


In [15]:
len(song_list)

4646

In [16]:
def update_db(document_id, spotify_data: dict):
    document = songs_collection.find_one_and_update({"_id": document_id}, 
                                         {"$set": {"spotify": spotify_data}})
    return document

def update_db_from_songs(songs):
    for song in songs:
        update_db(song.data['_id'], song.spotify)

update_db_from_songs(song_list)

In [21]:
# Get album IDs
SONGS = list(songs_collection.find())

In [23]:
songs = [song for song in SONGS if song.get('spotify', {}).get('id')]
len(songs)

4646

In [29]:
i = 0
def retrieve_genres(ids: str):
    global i
    i += 1    
    track = spotify.track(ids)

    artist_id = track['artists'][0]['id']
    album_id = track['album']['id']

    genres = spotify.album(album_id)['genres']
    
    if not genres:
        artist = spotify.artist(artist_id)
        print(artist)
        genres = artist['genres']

    return genres

def update_db(document_id, genre: List):
    document = songs_collection.find_one_and_update({"_id": document_id}, 
                                         {"$set": {"genre": genre}})
    return document

def update_db_from_songs(songs):
    for song in songs:
        genres = retrieve_genres(song['spotify']['id'])
        if genres:
            clear_output(wait=True)
            print("Successfully got genre")
            update_db(song['_id'], genres)

songs = reversed(songs[0:])
update_db_from_songs(songs)

Successfully got genre
{'external_urls': {'spotify': 'https://open.spotify.com/artist/2BgLyUKS3eyXxuPD8CwflY'}, 'followers': {'href': None, 'total': 27394}, 'genres': [], 'href': 'https://api.spotify.com/v1/artists/2BgLyUKS3eyXxuPD8CwflY', 'id': '2BgLyUKS3eyXxuPD8CwflY', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/69ec0fce548de3c199f5d967063984a8dfeb4ba1', 'width': 640}, {'height': 300, 'url': 'https://i.scdn.co/image/f5d64713229b9ca621c52ed5e69d6d06903fe273', 'width': 300}, {'height': 64, 'url': 'https://i.scdn.co/image/5607f507d48936d30ac00de66b105d179a1cd6e4', 'width': 64}], 'name': 'American Country Hits', 'popularity': 26, 'type': 'artist', 'uri': 'spotify:artist:2BgLyUKS3eyXxuPD8CwflY'}
{'external_urls': {'spotify': 'https://open.spotify.com/artist/7twNB7T3Jszx4WIQIbwM7p'}, 'followers': {'href': None, 'total': 49}, 'genres': [], 'href': 'https://api.spotify.com/v1/artists/7twNB7T3Jszx4WIQIbwM7p', 'id': '7twNB7T3Jszx4WIQIbwM7p', 'images': [{'height': 640, 'url': 'ht

In [38]:
ids

[]