# tutorial

In [1]:
import requests
import json

def jprint(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)

def lastfm_get(payload):
    # define headers and URL
    headers = {'user-agent': USER_AGENT}
    url = 'http://ws.audioscrobbler.com/2.0/'

    # Add API key and format to the payload
    payload['api_key'] = API_KEY
    payload['format'] = 'json'

    response = requests.get(url, headers=headers, params=payload)
    return response

In [2]:
API_KEY = '77fdfb965fa75a138d0e93d71038b7b0'
USER_AGENT = 'gnaprs'

r = lastfm_get({
    'method': 'chart.gettopartists'
})

r.status_code
jprint(r.json())

{
    "artists": {
        "@attr": {
            "page": "1",
            "perPage": "50",
            "total": "3822155",
            "totalPages": "76444"
        },
        "artist": [
            {
                "image": [
                    {
                        "#text": "https://lastfm.freetls.fastly.net/i/u/34s/2a96cbd8b46e442fc41c2b86b821562f.png",
                        "size": "small"
                    },
                    {
                        "#text": "https://lastfm.freetls.fastly.net/i/u/64s/2a96cbd8b46e442fc41c2b86b821562f.png",
                        "size": "medium"
                    },
                    {
                        "#text": "https://lastfm.freetls.fastly.net/i/u/174s/2a96cbd8b46e442fc41c2b86b821562f.png",
                        "size": "large"
                    },
                    {
                        "#text": "https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png",
                        "si

In [9]:
import requests_cache
import time
from IPython.core.display import clear_output

requests_cache.install_cache()
responses = []

page = 1
total_pages = 99999 # this is just a dummy number so the loop starts

while page <= total_pages:
    payload = {
        'method': 'chart.gettopartists',
        'limit': 500,
        'page': page
    }

    # print some output so we can see the status
    print("Requesting page {}/{}".format(page, total_pages))
    # clear the output to make things neater
    clear_output(wait = True)

    # make the API call
    response = lastfm_get(payload)

    # if we get an error, print the response and halt the loop
    if response.status_code != 200:
        print(response.text)
        break

    # extract pagination info
    page = int(response.json()['artists']['@attr']['page'])
    total_pages = int(response.json()['artists']['@attr']['totalPages'])

    # append response
    responses.append(response)

    # if it's not a cached result, sleep
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)

    # increment the page number
    page += 1

Requesting page 7645/7645


## data processing

In [13]:
import pickle

filepath_logs='lastfm_topartist.pkl'
with open(filepath_logs, 'wb') as f:
    pickle.dump(responses, f)

In [14]:
import pandas as pd

r0 = responses[0]
r0_json = r0.json()
r0_artists = r0_json['artists']['artist']
r0_df = pd.DataFrame(r0_artists)
# r0_df.head()

In [15]:
frames = [pd.DataFrame(r.json()['artists']['artist']) for r in responses]
artists = pd.concat(frames)
artists.head()

artists.info()
artists.describe()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12000 entries, 0 to 499
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        12000 non-null  object
 1   playcount   12000 non-null  object
 2   listeners   12000 non-null  object
 3   mbid        12000 non-null  object
 4   url         12000 non-null  object
 5   streamable  12000 non-null  object
 6   image       12000 non-null  object
dtypes: object(7)
memory usage: 750.0+ KB


Unnamed: 0,name,playcount,listeners,mbid,url,streamable,image
count,12000,12000,12000,12000.0,12000,12000,12000
unique,9984,9978,9866,6577.0,9984,1,2
top,The Story So Far,803418,105261,,https://www.last.fm/music/Far+East+Movement,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
freq,2,3,4,3907.0,2,12000,11860


In [16]:
artist_counts = [len(r.json()['artists']['artist']) for r in responses]
print(artist_counts[:50])
pd.Series(artist_counts).value_counts()

[500, 1000, 500, 1000, 500, 1000, 500, 1000, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


0       7625
500       16
1000       4
dtype: int64

In [17]:
artists = artists.drop_duplicates().reset_index(drop=True)
artists.describe()

TypeError: unhashable type: 'list'

## second end point

In [None]:
r = lastfm_get({
    'method': 'artist.getTopTags',
    'artist':  'Lana Del Rey'
})

jprint(r.json())

tags = [t['name'] for t in r.json()['toptags']['tag'][:3]]
tags

In [None]:
', '.join(tags)

def lookup_tags(artist):
    response = lastfm_get({
        'method': 'artist.getTopTags',
        'artist':  artist
    })

    # if there's an error, just return nothing
    if response.status_code != 200:
        return None

    # extract the top three tags and turn them into a string
    tags = [t['name'] for t in response.json()['toptags']['tag'][:3]]
    tags_str = ', '.join(tags)

    # rate limiting
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)
    return tags_str

lookup_tags("Billie Eilish")

In [None]:
from tqdm import tqdm
tqdm.pandas()

artists['tags'] = artists['name'].progress_apply(lookup_tags)

# me playing around

In [3]:
u = lastfm_get({
    'method': 'user.getRecentTracks',
    'user': 'gnaprs',
    'from': 0
})

u.status_code
jprint(u.json())

{
    "recenttracks": {
        "@attr": {
            "page": "1",
            "perPage": "50",
            "total": "23528",
            "totalPages": "471",
            "user": "gnaprs"
        },
        "track": [
            {
                "album": {
                    "#text": "Take",
                    "mbid": ""
                },
                "artist": {
                    "#text": "Shaun",
                    "mbid": "b9c8f785-0ef8-4f88-922c-64e9e71a58de"
                },
                "date": {
                    "#text": "09 Dec 2020, 08:22",
                    "uts": "1607502147"
                },
                "image": [
                    {
                        "#text": "https://lastfm.freetls.fastly.net/i/u/34s/2a4ff59dc08ffe9247f94da59ac1c7f9.jpg",
                        "size": "small"
                    },
                    {
                        "#text": "https://lastfm.freetls.fastly.net/i/u/64s/2a4ff59dc08ffe9247f94da59ac1c7f9.jpg",
 

In [6]:
jprint(u.json()['recenttracks']['@attr'])

rt = u.json()['recenttracks']['@attr']
jprint(rt)

{
    "page": "1",
    "perPage": "50",
    "total": "23528",
    "totalPages": "471",
    "user": "gnaprs"
}
{
    "page": "1",
    "perPage": "50",
    "total": "23528",
    "totalPages": "471",
    "user": "gnaprs"
}
