### Spotify Login - For One Hour
After running this click Sign-In and login to Spotify. If the Time expires then repeat the process again.

In [None]:
from ipyauth import ParamsSpotify, Auth

auth = Auth(ParamsSpotify(redirect_uri='http://localhost:8888/callback', client_id="9e4657eefbac41afa98c61f590d8fd51"))
auth

### Common Stuff and Imports


In [None]:
import requests
from IPython.display import Image
from pandas.io.json import json_normalize
from pandas import DataFrame,read_pickle
import logging
import os

logger = logging.getLogger()
#logger.setLevel(logging.INFO)

def fetch( path, url=None ):
    callPath = url if (url!=None) else ("https://api.spotify.com" + path)
    response = requests.get(callPath , headers= {"Authorization":"Bearer " + auth.access_token })
    if (response.status_code!=200):
        logging.debug("error ")
    return response.json()

def fetchPage( path, offset, limit):
    res = fetch(  path + "?offset=" + str(offset) + "&limit=" + str(limit))
    return res

def fetchAll( path ):
    more = fetchPage( path, 0, 50)
    limit = more["limit"]
    total = more["total"] - limit
    items = more["items"]
    while((total>0) and (more["next"]!=None )):
        more = fetch( None, url = more["next"] )
        items.extend( more["items"])
        total = total - len(more["items"])
    return items

def fetchPageIds( path, ids): 
  res = fetch("{0}?ids={1}".format(path,",".join(ids)))
  return res

def fetchAllIds( path, resultField, ids, pageSize=50, existingDf=None):
    if (existingDf is not None):
        keys = existingDf.index.values
        ids = list(filter( lambda id: (id not in keys),ids))
        existingDf = existingDf.reset_index()
        logging.info("Filtering out {0} IDs. Now {1}".format(len(keys), len(ids)))
    total = len(ids)
    logging.info("Requesting {0} rows. {1} ... {2}".format(total, path, resultField))
    offset = 0
    while (offset < total) :
        result = fetchPageIds(path, ids[offset: min(total, offset + pageSize)])
        items = json_normalize(result[resultField])
        if (existingDf is None):
            logging.info("Creating new DF {0}".format(len(items)))
            existingDf = items
        else:
            existingDf = existingDf.append(items, ignore_index=True )
        offset += len(items)
        
    return existingDf.to_dict(orient="records")

user = fetch("/v1/me")
Image(url=user["images"][0]["url"], width=100)

### Read The User Library
This process may take a little while. The library tracks are cached locally, so this step can be skipped.

In [None]:
data = fetchAll("/v1/me/tracks")
tracksDf = json_normalize(data, sep="_").set_index("track_id")
tracksDf.to_pickle("mytracks.pkl")
tracksDf.head()

### Verify the library cache exists

In [None]:
tracksDf = read_pickle("mytracks.pkl")
tracksCache = tracksDf[["added_at","track_name","track_album_name"]]
tracksCache

### Create a Track to Artist Table
Pick out the track.artists array for each library track record. The meta (parent record) is the track.id. Use a prefix for both the meta and the record because they both use id.


In [None]:
#data[0]
artist_and_track = json_normalize( data=data, record_path=['track','artists'],  meta=[["track","id"],["track","name"]],  record_prefix='artist_',   sep="_" )
artist_and_track = artist_and_track[['track_id','track_name','artist_id','artist_name']]
artist_and_track

### Read the Artists
Load the artists one by one. Use the Pickle File **artists.pkl** as a cache.
* Create a DataFrame using the cache file if one exists (otherwise None)
* Get the list of all unique artist_ids from the previous DF
* Call `fetchAllIds` - using the artists path, the `artists` JSON field path and the cache DF
* Recreate the new artistDf (Dict returned)
* Save the file back to **artists.pkl**

In [None]:
artistsPickle = read_pickle("artists.pkl") if (os.path.isfile("artists.pkl")) else None 
artistIds = list(set(artist_and_track["artist_id"].values))
artists = fetchAllIds("/v1/artists","artists",artistIds,existingDf=artistsPickle)
artistsDf = json_normalize(artists).set_index("id")
artistsDf.to_pickle("artists.pkl")
artistsDf

### Read the Albums

In [None]:
albumsPickle = read_pickle("albums.pkl") if (os.path.isfile("albums.pkl")) else None 
album_ids  = list(set(json_normalize( data=data, sep="_" )["track_album_id"].values))
albums = fetchAllIds("/v1/albums","albums",album_ids,pageSize=20,existingDf=albumsPickle)
albumsDf = json_normalize(albums, sep="_").set_index("id")
albumsDf.to_pickle("albums.pkl")
#albumsDf.columns
albumsDf[["name","release_date","tracks.total"]]
