In [1]:
import requests
from last_fm_secrets import *
import time
import pandas as pd
import math

class Function_Dictionary_Class:
    user_agent = "Halcyon"
    api_key    = api_key
    secret     = secret
    api_root_url        = "http://ws.audioscrobbler.com/2.0"
    method              = "user.getRecentTracks"
    user                = username
    json                = "json"
    root_key            = "recenttracks"
    attributes          = "@attr"
    tracks              = "track"
    mb_id               = "mbid"
    artist              = "artist"
    artists             = "artists"
    name                = "name"
    album               = "album"
    date                = "date"
    uts                 = "uts"
    text                = "#text"
    idd                 = "id"
    release_date        = "release_date"
    total_tracks        = "total_tracks"
    duration            = "duration_ms"
    explicit            = "explicit"
    popularity          = "popularity"
    danceability        = "danceability"
    energy              = "energy"
    key                 = "key"
    loudness            = "loudness"
    mode                = "mode"
    speechiness         = "speechiness"
    acousticness        = "acousticness"
    instrumentalness    = "instrumentalness"
    liveness            = "liveness"
    valence             = "valence"
    tempo               = "tempo"
    time_signature      = "time_signature"

    class DataFrame_Columns_Class:
        track_id        = "track_id"
        artist_id       = "artist_id"
        album_id        = "album_id"
        track_name      = "track_name"
        artist_name     = "artist_name"
        album_name      = "album_name"
        date            = "date"
        album_release   = "album_release_date"
        album_tracks    = "album_total_tracks"
        duration        = "track_duration"
        explicit        = "explicit"
        popularity      = "track_popularity"
    
    def __init__(self):
        self.df_columns = self.DataFrame_Columns_Class()

func_dict = Function_Dictionary_Class()

headers = {
    "user-agent": func_dict.user_agent
}

In [2]:
def retrieve_page(user_name, page):
    #Define the payload we will use
    payload = {
        "api_key":  func_dict.api_key,
        "method":   func_dict.method,
        "format":   func_dict.json,
        "limit":    200,
        "user":     user_name,
        "extended": 1,
        "page":     page
    }

    #Use requests to send the payload to the root URL as a JSON.
    r = requests.get(func_dict.api_root_url, headers=headers, params=payload)
    retrieved_json = r.json()
    
    #Retrieve the attributes from the JSON using the proper keys.
    attributes = retrieved_json[func_dict.root_key][func_dict.attributes]

    #Generate an error if the requested page exceeds the number of total pages returned
    if page > int(attributes['totalPages']):
        raise ValueError("Exceeded total number of pages")
    
    #Return the tracks value from the JSON for processing.
    return retrieved_json[func_dict.root_key][func_dict.tracks]

def parse_page(page_data):
    
    #This temporary list functions to store a list of dictionaries that will then be converted into a DataFrame
    temp_list = []

    #For each entry in the input data, which should be the direct output from the retrieve_page function, add a new item
    #to the list
    for entry in page_data:
        temp_list.append({
            func_dict.df_columns.track_name:    entry[func_dict.name],
            func_dict.df_columns.track_id:      entry[func_dict.mb_id],
            func_dict.df_columns.artist_name:   entry[func_dict.artist][func_dict.name],
            func_dict.df_columns.artist_id:     entry[func_dict.artist][func_dict.mb_id],
            func_dict.df_columns.album_name:    entry[func_dict.album][func_dict.text],
            func_dict.df_columns.album_id:      entry[func_dict.album][func_dict.mb_id],
            func_dict.df_columns.date:          entry[func_dict.date][func_dict.uts]
        })
    
    #Once we're done iterating over the input data, turn it into a dataframe, parse the time column into a pandas datetime
    #given that it is given back as seconds in Unix time, then return the resulting dataframe
    return_df = pd.DataFrame(temp_list)
    return_df[func_dict.df_columns.date] = pd.to_datetime(return_df[func_dict.df_columns.date], unit="s")
    return return_df

In [3]:
def retrieve_all_played_tracks(user_name):

    #Retrieve first page, just for attribute examination
    payload = {
        "api_key": func_dict.api_key,
        "method": func_dict.method,
        "format": func_dict.json,
        "limit": 200,
        "user": user_name,
        "extended": 1,
    }
    r = requests.get(func_dict.api_root_url, headers=headers, params=payload)
    retrieved_json = r.json()
    attributes = retrieved_json[func_dict.root_key][func_dict.attributes]
    total_pages = int(attributes['totalPages'])

    #Sets up a list that will hold the resulting DataFrame from the parse_page function. We will concatenate them all
    #together at the end.
    list_of_dataframes = []

    #Use the retrieved total_pages from the attributes to iterate over each. Can be optimized to not retrieve the first
    #page again, but for brevity of the gist this will just re-request the first page.
    for i in range(total_pages):
        #\r at the beginning is solely for compatibility with VSCode's Python Interactive
        print(f"\rPage {i + 1} of {total_pages}", end="")
        time_start = time.time()
        
        #This will loop on a KeyError since that means we received an empty JSON. This will make it try to retrieve the 
        #failed page again, functionally eliminating the possibility of skipped pages.
        while True:
            try:
                page_data = retrieve_page(user_name, i + 1)
                #If the function gets to this point, break out of the loop and continue
                break
            except KeyError:
                continue
        
        #Parse the page and append it to the list of dataframes, then note how much time was taken for the request to be
        #completed
        list_of_dataframes.append(parse_page(page_data))
        time_taken = time.time() - time_start

        #Debounce the requests to have a maximum of 4 functions per second. This value can be edited to work faster at
        #a heavy risk of getting your API key or IP banned.
        if time_taken < 0.25:
            time.sleep(0.25 - time_taken)
        else:
            continue
    
    #Once all pages have been retrieved, concatenate all the dataframes in the list of dataframes and drop their indices
    #to create a new index. Due to the way the data is formatted and the order of retrieval, this will already be set up
    #time-ascending order, with the first song chronologically at index 0.
    return pd.concat(list_of_dataframes, ignore_index=True)


In [4]:
df = pd.read_hdf("listener_df.h5", key="df", parse_dates="date")

In [5]:
df

Unnamed: 0,track_name,track_id,artist_name,artist_id,album_name,album_id,date
0,Can't Help,2e585c77-48e8-420d-9649-28c188e9fc0f,Parachute,,Can't Help,75fd3dcb-62e2-4a86-b479-c14903d5f57a,2020-12-17 00:55:08
1,Canned Heat,045ef838-e886-4155-ad57-116bda32b97b,Jamiroquai,,Synkronized,2ab9c2ba-8026-4f80-ae12-56450b1165fb,2020-12-17 00:49:36
2,Beach Bones (Feat. Ryan Ross),,More Amor,,Beach Bones (feat. Ryan Ross),,2020-12-17 00:46:10
3,Push Push (Lady Lightning),63aea131-a5f2-41c3-b689-aad85f9c46ec,Bang Camaro,,Bang Camaro,2defe6da-a0d0-419d-83a7-1709e1f3fc62,2020-12-17 00:41:28
4,Procrastinating,4f90aefc-acda-3807-8ba7-4bb829e1936e,Stellar Kart,,Life Is Good: The Best of Stellar Kart,,2020-12-17 00:32:43
...,...,...,...,...,...,...,...
128343,Cupid Shuffle,2907feb9-c3b0-4b03-94af-ba2b38e7ae93,Cupid,,Time For A Change,4bbbe6a2-64a9-4550-8c1d-c9590591b4d7,2013-09-07 11:05:10
128344,Out of My Head,21195bf8-14f7-3601-8064-2026d46ffc28,Theory of a Deadman,,The Truth Is...,4f1279aa-98af-43dd-9b90-8d0d3d00c12d,2013-09-07 11:02:49
128345,Painkiller,039cd1bb-8fc9-3fcc-a902-0af24a1e05cc,Judas Priest,,Judas Priest - The Essential,,2013-09-07 10:56:31
128346,The Anthem,17e96d04-f759-3125-865e-ec4b19f81ba8,Good Charlotte,,The Anthem,29b20ee4-d4df-40d6-98da-7c1d229ee1da,2013-09-07 10:53:36
