In [4]:
import requests
import json
import requests_cache
import time
from IPython.core.display import clear_output
import pandas as pd
import numpy as np

requests_cache.install_cache()

In [5]:
API_KEY = 'a2ba28906c69f6372a0e34f7b3c5f6d3'
USER_AGENT = 'infoquest'

In [6]:
def lastfm_get(payload):
    # define headers and URL
    headers = {
        'user-agent': USER_AGENT
    }
    url = 'http://ws.audioscrobbler.com/2.0/'

    # Add API key and format to the payload
    payload['api_key'] = API_KEY
    payload['format'] = 'json'

    response = requests.get(url, headers=headers, params=payload)
    return response

def to_json(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=4)
    return text

## Chart

### 1.1 Get Top Artists

In [8]:
responses = []
page = 1
total_pages = 99999

while page <= total_pages:
    payload = {
        'method': 'chart.gettopartists',
        'limit': 500,
        'page': page
    }    
    print("Requesting page {}/{}".format(page, total_pages))
    clear_output(wait = True)
    
    response = lastfm_get(payload)
    
    if response.status_code != 200:
        print(response.text)
        break

    page = int(response.json()['artists']['@attr']['page'])
    total_pages = int(response.json()['artists']['@attr']['totalPages'])
    responses.append(response)

    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)        
    page += 1

Requesting page 7922/7922


In [9]:
def lookup_tags(artist):
    response = lastfm_get({
        'method': 'artist.getTopTags',
        'artist':  artist
    })

    if response.status_code != 200:
        return None

    # extract the top three tags and turn them into a string
    tags = [t['name'] for t in response.json()['toptags']['tag'][:3]]
    tags_str = ', '.join(tags)

    # rate limiting
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)
    return tags_str

In [10]:
frames = [pd.DataFrame(r.json()['artists']['artist']) for r in responses]
artists = pd.concat(frames)

In [None]:
from tqdm import tqdm
tqdm.pandas()

artists['tags'] = artists['name'].progress_apply(lookup_tags)

  from pandas import Panel
  7%|█████▍                                                                      | 717/10000 [05:10<1:05:29,  2.36it/s]

In [11]:
artists

Unnamed: 0,name,playcount,listeners,mbid,url,streamable,image
0,The Weeknd,162970576,1885916,c8b03190-306c-4120-bb0b-6f2ebfc06ea9,https://www.last.fm/music/The+Weeknd,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
1,Ariana Grande,229585236,1627166,f4fdbb4c-e4b7-47a0-b83b-d91bbfcfa387,https://www.last.fm/music/Ariana+Grande,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
2,Taylor Swift,378463801,2724891,20244d07-534f-4eff-b4d4-930878889970,https://www.last.fm/music/Taylor+Swift,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
3,Kanye West,343740236,4840590,164f0d73-1234-4e2c-8743-d77bf2191051,https://www.last.fm/music/Kanye+West,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
4,Billie Eilish,88485731,1097242,,https://www.last.fm/music/Billie+Eilish,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
...,...,...,...,...,...,...,...
495,EOB,364143,49473,,https://www.last.fm/music/EOB,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
496,piXy,64510,3887,,https://www.last.fm/music/piXy,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
497,Anvil,1715095,92674,07a85e96-bb72-4930-b41d-24853f4a4ede,https://www.last.fm/music/Anvil,0,[{'#text': 'https://lastfm.freetls.fastly.net/...
498,Kove,572904,64055,3ccd8183-f0bc-4acd-a4e7-3b1d226d36fd,https://www.last.fm/music/Kove,0,[{'#text': 'https://lastfm.freetls.fastly.net/...


In [10]:
artists.to_csv('artists.csv')

### 1.2 Get Top Tags

In [None]:
r = lastfm_get({
    'method': 'artist.getTopTags',
    'artist':  'Lana Del Rey'
})

In [16]:
r = lastfm_get({
    'method': 'user.getWeeklyAlbumChart',
    'user': 'Efoja_87',
    'from': '1212321600',
    'to': '1212926400'
})

r.json()

{'weeklyalbumchart': {'album': [{'artist': {'mbid': '',
     '#text': 'The Chordettes'},
    '@attr': {'rank': '1'},
    'mbid': '',
    'playcount': '39',
    'name': 'Golden Classics',
    'url': 'https://www.last.fm/music/The+Chordettes/Golden+Classics'},
   {'artist': {'mbid': '', '#text': 'Thin Lizzy'},
    '@attr': {'rank': '2'},
    'mbid': '',
    'playcount': '37',
    'name': 'Thin Lizzy: Greatest Hits (disc 1)',
    'url': 'https://www.last.fm/music/Thin+Lizzy/Thin+Lizzy:+Greatest+Hits+(disc+1)'},
   {'artist': {'mbid': '3bcff06f-675a-451f-9075-99e8657047e8',
     '#text': 'Four Tet'},
    '@attr': {'rank': '3'},
    'mbid': 'ccdecba9-1b74-4a17-95f3-c5debc52d9a8',
    'playcount': '19',
    'name': 'Everything Ecstatic',
    'url': 'https://www.last.fm/music/Four+Tet/Everything+Ecstatic'},
   {'artist': {'mbid': '997117eb-5a4c-4a37-99f9-f2515e2d9739',
     '#text': 'The Chordettes'},
    '@attr': {'rank': '4'},
    'mbid': '82a144b9-2e42-4ebc-bac4-d9303f09f51c',
    'playcou

In [17]:
r = lastfm_get({
    'method': 'user.getWeeklyAlbumChart',
    'user': 'bisa',
    'from': '1212321600',
    'to': '1212926400'
})

r.json()

{'weeklyalbumchart': {'album': [{'artist': {'mbid': '6aa40207-fec8-43a7-991d-b872a42def05',
     '#text': 'Amy Macdonald'},
    '@attr': {'rank': '1'},
    'mbid': '1b30254e-9523-4f85-ba31-2a1ead9b07c9',
    'playcount': '88',
    'name': 'This Is the Life',
    'url': 'https://www.last.fm/music/Amy+Macdonald/This+Is+the+Life'},
   {'artist': {'mbid': '8c5755b4-2de6-4158-bca9-e354cbf07437',
     '#text': 'Delta Goodrem'},
    '@attr': {'rank': '2'},
    'mbid': '1243071c-ce1a-44fc-877c-0d8a8da87564',
    'playcount': '84',
    'name': 'Innocent Eyes',
    'url': 'https://www.last.fm/music/Delta+Goodrem/Innocent+Eyes'},
   {'artist': {'mbid': '8c5755b4-2de6-4158-bca9-e354cbf07437',
     '#text': 'Delta Goodrem'},
    '@attr': {'rank': '3'},
    'mbid': '13993a98-67e7-4d14-ba2b-d92f00b7417b',
    'playcount': '84',
    'name': 'Mistaken Identity',
    'url': 'https://www.last.fm/music/Delta+Goodrem/Mistaken+Identity'},
   {'artist': {'mbid': '', '#text': 'Sparks'},
    '@attr': {'rank': 

In [18]:
r = lastfm_get({
    'method': 'user.getWeeklyChartList',
    'user': 'bisa',
})

r.json()

{'weeklychartlist': {'chart': [{'#text': '',
    'from': '1108296000',
    'to': '1108900800'},
   {'#text': '', 'from': '1108900800', 'to': '1109505600'},
   {'#text': '', 'from': '1109505600', 'to': '1110110400'},
   {'#text': '', 'from': '1110110400', 'to': '1110715200'},
   {'#text': '', 'from': '1110715200', 'to': '1111320000'},
   {'#text': '', 'from': '1111320000', 'to': '1111924800'},
   {'#text': '', 'from': '1111924800', 'to': '1112529600'},
   {'#text': '', 'from': '1112529600', 'to': '1113134400'},
   {'#text': '', 'from': '1113134400', 'to': '1113739200'},
   {'#text': '', 'from': '1113739200', 'to': '1114344000'},
   {'#text': '', 'from': '1114344000', 'to': '1114948800'},
   {'#text': '', 'from': '1114948800', 'to': '1115553600'},
   {'#text': '', 'from': '1115553600', 'to': '1116158400'},
   {'#text': '', 'from': '1116158400', 'to': '1116763200'},
   {'#text': '', 'from': '1116763200', 'to': '1117368000'},
   {'#text': '', 'from': '1117368000', 'to': '1117972800'},
   {