In [7]:
import requests
import json
import requests_cache
import time
from IPython.core.display import clear_output
import pandas as pd
import numpy as np
from tqdm import tqdm

requests_cache.install_cache()

In [8]:
API_KEY = 'a2ba28906c69f6372a0e34f7b3c5f6d3'
USER_AGENT = 'infoquest'

In [9]:
def lastfm_get(payload):
    # define headers and URL
    headers = {
        'user-agent': USER_AGENT
    }
    url = 'http://ws.audioscrobbler.com/2.0/'

    # Add API key and format to the payload
    payload['api_key'] = API_KEY
    payload['format'] = 'json'

    response = requests.get(url, headers=headers, params=payload)
    return response

def to_json(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=4)
    return text

## Chart

### 1.1 Get Top Artists

In [None]:
responses = []
page = 1
total_pages = 99999

while page <= total_pages:
    payload = {
        'method': 'chart.gettopartists',
        'limit': 500,
        'page': page
    }    
    print("Requesting page {}/{}".format(page, total_pages))
    clear_output(wait = True)
    
    response = lastfm_get(payload)
    
    if response.status_code != 200:
        print(response.text)
        break

    page = int(response.json()['artists']['@attr']['page'])
    total_pages = int(response.json()['artists']['@attr']['totalPages'])
    responses.append(response)

    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)        
    page += 1

In [10]:
def lookup_tags(artist):
    response = lastfm_get({
        'method': 'artist.getTopTags',
        'artist':  artist
    })

    if response.status_code != 200:
        return None

    # extract the top three tags and turn them into a string
    tags = [t['name'] for t in response.json()['toptags']['tag'][:3]]
    tags_str = ', '.join(tags)

    # rate limiting
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)
    return tags_str

In [None]:
frames = [pd.DataFrame(r.json()['artists']['artist']) for r in responses]
artists = pd.concat(frames)
tqdm.pandas()
artists['tags'] = artists['name'].progress_apply(lookup_tags)

In [None]:
artists

In [None]:
artists.to_csv('./app/data/artists.csv')

### 1.2 Get Top Tags

### Usernames 

In [12]:
usernames = pd.read_csv('./app/data/usernames_index.csv')
usernames

Unnamed: 0,username
0,Vic_Andi
1,Mrtn_
2,Slycton-archive
3,dylstonee
4,angie26111975
...,...
24996,LadyMando
24997,fokrym
24998,kanyy
24999,GedeonJerubbaal


In [15]:
r = lastfm_get({
    'method': 'user.getWeeklyChartList',
    'user':  'Vic_Andi'
})
r.json()

{'weeklychartlist': {'chart': [{'#text': '',
    'from': '1108296000',
    'to': '1108900800'},
   {'#text': '', 'from': '1108900800', 'to': '1109505600'},
   {'#text': '', 'from': '1109505600', 'to': '1110110400'},
   {'#text': '', 'from': '1110110400', 'to': '1110715200'},
   {'#text': '', 'from': '1110715200', 'to': '1111320000'},
   {'#text': '', 'from': '1111320000', 'to': '1111924800'},
   {'#text': '', 'from': '1111924800', 'to': '1112529600'},
   {'#text': '', 'from': '1112529600', 'to': '1113134400'},
   {'#text': '', 'from': '1113134400', 'to': '1113739200'},
   {'#text': '', 'from': '1113739200', 'to': '1114344000'},
   {'#text': '', 'from': '1114344000', 'to': '1114948800'},
   {'#text': '', 'from': '1114948800', 'to': '1115553600'},
   {'#text': '', 'from': '1115553600', 'to': '1116158400'},
   {'#text': '', 'from': '1116158400', 'to': '1116763200'},
   {'#text': '', 'from': '1116763200', 'to': '1117368000'},
   {'#text': '', 'from': '1117368000', 'to': '1117972800'},
   {

In [23]:
r = lastfm_get({
    'method': 'user.getWeeklyChartList',
    'user':  'Mrtn_'
})
r.json()

{'weeklychartlist': {'chart': [{'#text': '',
    'from': '1108296000',
    'to': '1108900800'},
   {'#text': '', 'from': '1108900800', 'to': '1109505600'},
   {'#text': '', 'from': '1109505600', 'to': '1110110400'},
   {'#text': '', 'from': '1110110400', 'to': '1110715200'},
   {'#text': '', 'from': '1110715200', 'to': '1111320000'},
   {'#text': '', 'from': '1111320000', 'to': '1111924800'},
   {'#text': '', 'from': '1111924800', 'to': '1112529600'},
   {'#text': '', 'from': '1112529600', 'to': '1113134400'},
   {'#text': '', 'from': '1113134400', 'to': '1113739200'},
   {'#text': '', 'from': '1113739200', 'to': '1114344000'},
   {'#text': '', 'from': '1114344000', 'to': '1114948800'},
   {'#text': '', 'from': '1114948800', 'to': '1115553600'},
   {'#text': '', 'from': '1115553600', 'to': '1116158400'},
   {'#text': '', 'from': '1116158400', 'to': '1116763200'},
   {'#text': '', 'from': '1116763200', 'to': '1117368000'},
   {'#text': '', 'from': '1117368000', 'to': '1117972800'},
   {

In [17]:
import datetime
timestamp = datetime.datetime.fromtimestamp(1108900800)
print(timestamp.strftime('%Y-%m-%d %H:%M:%S'))
timestamp = datetime.datetime.fromtimestamp(1615723200)
print(timestamp.strftime('%Y-%m-%d %H:%M:%S'))

2005-02-20 13:00:00
2021-03-14 13:00:00


In [22]:
r = lastfm_get({
    'method': 'user.getWeeklyAlbumChart',
    'user': 'Vic_Andi',
    'from': '1108900800',
    'to': '1356868800'
})

r.json()

{'weeklyalbumchart': {'album': [{'artist': {'mbid': '', '#text': 'Kamelot'},
    '@attr': {'rank': '1'},
    'mbid': '0654834f-a3a3-44a8-8442-1ad217dc23f9',
    'playcount': '335',
    'name': 'Silverthorn',
    'url': 'https://www.last.fm/music/Kamelot/Silverthorn'},
   {'artist': {'mbid': '1d692d71-715c-43fa-b802-aa8001a2cbb8',
     '#text': 'Lumen'},
    '@attr': {'rank': '2'},
    'mbid': 'd391aa9a-a06d-4061-8b85-bde5f16ca299',
    'playcount': '331',
    'name': 'Правда?',
    'url': 'https://www.last.fm/music/Lumen/%D0%9F%D1%80%D0%B0%D0%B2%D0%B4%D0%B0%3F'},
   {'artist': {'mbid': 'a0c5737e-da6d-468f-a78e-19146494f1db',
     '#text': 'Noize MC'},
    '@attr': {'rank': '3'},
    'mbid': '8faa0140-377f-453d-9e47-b08244ef48ba',
    'playcount': '329',
    'name': 'Новый альбом',
    'url': 'https://www.last.fm/music/Noize+MC/%D0%9D%D0%BE%D0%B2%D1%8B%D0%B9+%D0%B0%D0%BB%D1%8C%D0%B1%D0%BE%D0%BC'},
   {'artist': {'mbid': '', '#text': 'Lumen'},
    '@attr': {'rank': '4'},
    'mbid': '',


In [None]:
r = lastfm_get({
    'method': 'artist.getTopTags',
    'artist':  'Lana Del Rey'
})

{'weeklyalbumchart': {'album': [{'artist': {'mbid': '',
     '#text': 'The Chordettes'},
    '@attr': {'rank': '1'},
    'mbid': '',
    'playcount': '39',
    'name': 'Golden Classics',
    'url': 'https://www.last.fm/music/The+Chordettes/Golden+Classics'},
   {'artist': {'mbid': '', '#text': 'Thin Lizzy'},
    '@attr': {'rank': '2'},
    'mbid': '',
    'playcount': '37',
    'name': 'Thin Lizzy: Greatest Hits (disc 1)',
    'url': 'https://www.last.fm/music/Thin+Lizzy/Thin+Lizzy:+Greatest+Hits+(disc+1)'},
   {'artist': {'mbid': '3bcff06f-675a-451f-9075-99e8657047e8',
     '#text': 'Four Tet'},
    '@attr': {'rank': '3'},
    'mbid': 'ccdecba9-1b74-4a17-95f3-c5debc52d9a8',
    'playcount': '19',
    'name': 'Everything Ecstatic',
    'url': 'https://www.last.fm/music/Four+Tet/Everything+Ecstatic'},
   {'artist': {'mbid': '997117eb-5a4c-4a37-99f9-f2515e2d9739',
     '#text': 'The Chordettes'},
    '@attr': {'rank': '4'},
    'mbid': '82a144b9-2e42-4ebc-bac4-d9303f09f51c',
    'playcou

In [None]:
r = lastfm_get({
    'method': 'user.getWeeklyAlbumChart',
    'user': 'bisa',
    'from': '1212321600',
    'to': '1212926400'
})

r.json()

In [None]:
r = lastfm_get({
    'method': 'user.getWeeklyChartList',
    'user': 'bisa',
})

r.json()

### Countries

In [24]:
countries = pd.read_csv('./app/data/countries.csv')
countries

Unnamed: 0,Name,Code
0,Afghanistan,AF
1,Åland Islands,AX
2,Albania,AL
3,Algeria,DZ
4,American Samoa,AS
...,...,...
244,Wallis and Futuna,WF
245,Western Sahara,EH
246,Yemen,YE
247,Zambia,ZM


In [30]:
r = lastfm_get({
    'method': 'geo.getTopArtists',
    'country': 'Spain',
    'limit': 10,
    'page': 1
})
r.json()



{'topartists': {'artist': [{'name': 'David Bowie',
    'listeners': '3653210',
    'mbid': '5441c29d-3602-4898-b1a1-b77fa23b8e50',
    'url': 'https://www.last.fm/music/David+Bowie',
    'streamable': '0',
    'image': [{'#text': 'https://lastfm.freetls.fastly.net/i/u/34s/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'small'},
     {'#text': 'https://lastfm.freetls.fastly.net/i/u/64s/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'medium'},
     {'#text': 'https://lastfm.freetls.fastly.net/i/u/174s/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'large'},
     {'#text': 'https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'extralarge'},
     {'#text': 'https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'mega'}]},
   {'name': 'Radiohead',
    'listeners': '5055140',
    'mbid': 'a74b1b7f-71a5-4011-9441-d0b5e4122711',
    'url': 'https://www.last.fm/music/Radiohead',
    'stre

In [34]:
r = lastfm_get({
    'method': 'geo.getTopArtists',
    'country': 'Colombia',
    'limit': 20,
    'page': 1
})
r.json()

{'topartists': {'artist': [{'name': 'Radiohead',
    'listeners': '5055140',
    'mbid': 'a74b1b7f-71a5-4011-9441-d0b5e4122711',
    'url': 'https://www.last.fm/music/Radiohead',
    'streamable': '0',
    'image': [{'#text': 'https://lastfm.freetls.fastly.net/i/u/34s/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'small'},
     {'#text': 'https://lastfm.freetls.fastly.net/i/u/64s/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'medium'},
     {'#text': 'https://lastfm.freetls.fastly.net/i/u/174s/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'large'},
     {'#text': 'https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'extralarge'},
     {'#text': 'https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png',
      'size': 'mega'}]},
   {'name': 'Queen',
    'listeners': '4416688',
    'mbid': '420ca290-76c5-41af-999e-564d7c71f1a7',
    'url': 'https://www.last.fm/music/Queen',
    'streamable': '0'

In [41]:
i = 0
huge_list = []
for country, code in countries.values:
#     if i >= 1:
#         break;
    i+=1
    response = lastfm_get({
        'method': 'geo.getTopArtists',
        'country': country,
        'limit': 20,
        'page': 1
    })
    
    if response.status_code != 200:        
        break

    json_list = response.json()['topartists']['artist']
    top_artists = {'country': country}
    for index, artist in enumerate(json_list):
        name = artist['name']
        top_artists[f'artist_{index}'] = name
#         top_artists.append(name)
    huge_list.append(top_artists)
        
#     total_pages = int(response.json()['artists']['@attr']['totalPages'])
#     responses.append(response)
 

KeyError: 'topartists'

In [43]:
pd.DataFrame(data=huge_list)

Unnamed: 0,country,artist_0,artist_1,artist_2,artist_3,artist_4,artist_5,artist_6,artist_7,artist_8,...,artist_10,artist_11,artist_12,artist_13,artist_14,artist_15,artist_16,artist_17,artist_18,artist_19
0,Afghanistan,The xx,The Beatles,Radiohead,Lana Del Rey,Pink Floyd,Metallica,Drake,Nirvana,Katy Perry,...,Led Zeppelin,blink-182,Depeche Mode,The Rolling Stones,Oasis,Michael Jackson,Massive Attack,Deftones,Sia,Disturbed
1,Åland Islands,Coldplay,The Weeknd,Ed Sheeran,Sia,Calvin Harris,Kent,David Bowie,Queen,Avicii,...,Kygo,The Beatles,Lana Del Rey,Ariana Grande,Eminem,Johnny Cash,David Guetta,Bruce Springsteen,Stevie Wonder,Major Lazer
2,Albania,The Weeknd,Coldplay,Daft Punk,The Beatles,Radiohead,Arctic Monkeys,David Bowie,Lana Del Rey,Red Hot Chili Peppers,...,Gorillaz,Moby,Kanye West,Pink Floyd,Rihanna,The Rolling Stones,The xx,Florence + the Machine,Tame Impala,Drake
3,Algeria,Coldplay,The Weeknd,Ed Sheeran,Pink Floyd,Metallica,Rihanna,Radiohead,Muse,Lana Del Rey,...,Michael Jackson,Drake,Queen,Nirvana,The Rolling Stones,Sia,David Guetta,Avicii,Led Zeppelin,Imagine Dragons
4,American Samoa,The Beatles,Radiohead,Kanye West,Lana Del Rey,Pink Floyd,Ariana Grande,Muse,Drake,Rihanna,...,David Bowie,Daft Punk,The Weeknd,Queen,Beyoncé,Frank Ocean,Kendrick Lamar,The Strokes,Arcade Fire,The Smiths
5,Andorra,Radiohead,Tame Impala,The xx,The Beatles,Coldplay,Red Hot Chili Peppers,Nirvana,David Bowie,The Weeknd,...,alt-J,Moby,Foals,LCD Soundsystem,Nina Simone,Arctic Monkeys,Queen,Gorillaz,The Cure,The Rolling Stones
6,Angola,Lady Gaga,Rihanna,The Weeknd,Beyoncé,Sia,Ariana Grande,Katy Perry,Adele,Michael Jackson,...,Justin Bieber,The Beatles,Lana Del Rey,Florence + the Machine,Ed Sheeran,Imagine Dragons,Ellie Goulding,Amy Winehouse,Bruno Mars,Avicii
7,Anguilla,The Weeknd,Bon Iver,Selena Gomez,Sia,Post Malone,Ellie Goulding,Justin Bieber,Shawn Mendes,Years & Years,...,Hailee Steinfeld,Sam Feldt,Radiohead,Lady Gaga,Kanye West,Coldplay,Ariana Grande,Drake,Rihanna,Beyoncé
8,Antarctica,,,,,,,,,,...,,,,,,,,,,
9,Antigua and Barbuda,Lady Gaga,Rihanna,Britney Spears,The Weeknd,Adele,Maroon 5,David Guetta,Major Lazer,Kanye West,...,Coldplay,Ariana Grande,Drake,Eminem,Beyoncé,Katy Perry,Florence + the Machine,Madonna,Depeche Mode,Tame Impala
