# **JSON practice on musicbrainz.org Web Service**


In [2]:
import json
import requests

In [23]:
# Process query to find data about Eminem
BASE_URL = 'https://musicbrainz.org/ws/2/'

# resources on the web service which represent core entities of the database. see link for more details:
# https://musicbrainz.org/doc/MusicBrainz_Entity
entity = 'artist/'
r = requests.get(BASE_URL + 'artist/', params = {'query': 'Eminem', 'fmt': 'json'})

r.json()

{'created': '2020-03-14T23:37:32.123Z',
 'count': 10,
 'offset': 0,
 'artists': [{'id': 'b95ce3ff-3d05-4e87-9e01-c97b66af13d4',
   'type': 'Person',
   'type-id': 'b6e035f4-3ce9-331c-97df-83397230b0df',
   'score': 100,
   'name': 'Eminem',
   'sort-name': 'Eminem',
   'gender': 'male',
   'area': {'id': 'b03ff310-d8e2-45cf-9455-769f76641eb2',
    'type': 'City',
    'type-id': '6fd8f29a-3d0a-32fc-980d-ea697b69da78',
    'name': 'Detroit',
    'sort-name': 'Detroit',
    'life-span': {'ended': None}},
   'begin-area': {'id': 'bfae8151-2ba2-4f8f-bf2e-8fc16ce352c6',
    'type': 'City',
    'type-id': '6fd8f29a-3d0a-32fc-980d-ea697b69da78',
    'name': 'St. Joseph',
    'sort-name': 'St. Joseph',
    'life-span': {'ended': None}},
   'ipis': ['00354929235', '00354929431'],
   'isnis': ['0000000117699370', '0000000368611900'],
   'life-span': {'begin': '1972-10-17', 'ended': None},
   'aliases': [{'sort-name': 'Ominem',
     'name': 'Ominem',
     'locale': None,
     'type': None,
     'p

In [24]:
# find all of Eminem's aliases
for alias in r.json()['artists'][0]['aliases']:
    print(alias['name'])

Ominem
Marshall B. Mathers
Marshall Mathers
EminemMusic
Em
Marshall Bruce Mathers III
Slim Shady
M. Mathers
Eminem
Marshall B. Mathers III
The Real Slim Shady
Eminem
EMINƎM
M&M
Marshall Bruce Mathers


In [25]:
# get eminem's artist ID on musicbrainz.org
id_ = r.json()['artists'][0]['id']
id_

'b95ce3ff-3d05-4e87-9e01-c97b66af13d4'

In [19]:
# use browse request to find out how many unique releases there are in the database
entity = 'release?artist='

r = requests.get(BASE_URL + entity + id_, params={'fmt': 'json'})

# how many releases are included in the database
release_total = r.json()['release-count']

# holds set of unique releases
releases = set()

# keeps track of offset parameter in the query
count = 0

while count < release_total:
    r = requests.get(BASE_URL + entity + id, params={'fmt': 'json', 'limit': 100, 'offset': count})
    count += 100

    # check for duplicates
    for release in r.json()['releases']:
        if release['title'] not in releases:
            releases.add(release['title'])
        
len(releases)

198

In [20]:
# print releases
for i in releases:
    print(i)

The Monster
Straight From the Lab EP
Guts Over Fear
Collision Course 3
Sing for the Moment
The Story of Marshall Mathers
Mathers Massacre
Cleanin’ Out My Closet
Campaign Speech
Dreams on Shady Park
Best of Slim Shady
All Eyes on Me (The Eminem Files)
C’Mon Let Me Ride
After Collision
The Freestyle Show
Unmastered Sequence
The Eminem Show
Before The Relapse 2
Kings Never Die
Straight From the Lab
Drop the World
The Freestyle Manual
Berzerk
Relapse: Refill
Remember the Name
The Eminem Show (Edited Version)
Off the Wall
Relapse (Deluxe Version)
Conscience
Superman
I Need a Doctor
Eminem presented by DJ R Dub L
America's Nightmare
Throw That
Curtain Call: The Hits
Return of the Psycho
Quitter / Just Rhymin' With Proof
Freestyles 2
Lyrical Invasion
Wordap.net Presents: The Bassmint Files
Get the Guns
Revival
Freestyles
Greatest Hits
E
Straight from the Lab Part II
Scary Movies (Stand Well Back radio edit)
Infinite
Things Get Worse
Stan
Like Toy Soldiers
Diss Me, Diss You
Maximum Eminem (The

In [29]:
# use another browse query to find out how many unique tracks there are in the database
entity = 'recording?artist='

r = requests.get(BASE_URL + entity + id_, params={'fmt': 'json'})
r.json()

{'recording-count': 3179,
 'recordings': [{'title': "(God Is) Cleanin' Out My Closet",
   'id': '06562717-5266-464a-b69a-79051af8c2f1',
   'video': False,
   'length': 385000,
   'disambiguation': ''},
  {'video': False,
   'disambiguation': '',
   'length': 47306,
   'title': "'Entertainment'(interlude)",
   'id': '09713d9a-dfe4-4c8b-a70e-055d1731ec00'},
  {'id': '24a4a114-d8bd-4b36-8b51-51e94e73b780',
   'title': '’Till I Collapse',
   'length': 297907,
   'disambiguation': 'clean',
   'video': False},
  {'id': '2e8677c2-bbd6-429c-97ec-6a3ac736d733',
   'title': "'Retarded Kid' Freestyle",
   'length': 53093,
   'disambiguation': '',
   'video': False},
  {'title': '[unknown]',
   'id': '45396c00-1e18-4cfd-b722-225421e6e8fb',
   'video': False,
   'disambiguation': '',
   'length': 6334},
  {'title': '’Till I Collapse',
   'id': '58ba910b-4762-4c76-99bf-02031e11dd94',
   'video': False,
   'length': 297893,
   'disambiguation': 'explicit'},
  {'length': 271000,
   'disambiguation': '

In [32]:
recordings_total = r.json()['recording-count']
recordings_total

3179

In [36]:
# need time.sleep to avoid exceeding musicbrainz's 50 requests per second limit
import time

# keeps track of offset parameter
count = 0

# holds set of unique releases
recordings = set()

while count < recordings_total:
    
    r = requests.get(BASE_URL + entity + id_, params={'fmt': 'json', 'limit': 100, 'offset': count})
    count += 100
    
    # check recording is not a music video clip and check for duplicates
    for i in r.json()['recordings']:
        if not i['video'] and i['title'] not in recordings:
            recordings.add(i['title'])
    
    # wait 1 sec
    time.sleep(1)
                
len(recordings)

1964