The purpose of this notebook is to play around with the AcousticBrainz dataset to get acquainted with it.  If you use this notebook, it's recommended you **make your own copy** to leave the original code clean.

## Using the API

In [None]:
import requests

In [None]:

def get_musicbrainz_recordings(genre, limit=25):
  '''
  This function queries the MusicBrainz API to retrieve a list of recordings for a given genre.
  Args:
    genre (str): The genre to search for.
    limit (int): The maximum number of recordings to return.
  Returns:
    list: A list of recording dictionaries.
  '''
  if limit > 25:
    print("Note: The MusicBrainz API limits queries to 25 recordings")

  url = f'https://musicbrainz.org/ws/2/recording?query=tag:{genre}&fmt=json&limit={limit}'
  response = requests.get(url)
  if response.status_code == 200:
      data = response.json()
      recordings = data.get('recordings', [])
      return recordings
  else:
      print(f"Error fetching MusicBrainz data: {response.status_code}")
      return []

In [None]:
def get_acousticbrainz_data(mbid,level='low'):
    '''
    This function queries the AcousticBrainz API to retrieve acoustic brainz data for a given recording.
      Args:
        mbid (str): The MusicBrainz ID of the recording.
        level (str): The level of data to retrieve. Default is 'low'. Other option is 'high'.
      Returns:
        dict: A dictionary of acoustic brainz data.
    '''
    url = f'https://acousticbrainz.org/api/v1/{mbid}/{level}-level'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else: # Some MBIDs are not in the AcousticBrainz data set.
        print(f"Error fetching AcousticBrainz data: {response.status_code}")
        return None

In [None]:
## Example
## This gets a list of the first 10 records tagged with 'house'
house_recordings = get_musicbrainz_recordings('house', limit=10)
## This is the MBID for the fourth record (the first three are not in AcousticBrainz)
house_mbid0 = house_recordings[3]['id']
## Now we feed the MBID into get_acousticbrainz_data to get the low-level data.
house_acoustic_data = get_acousticbrainz_data(house_mbid0, level='low')
house_acoustic_data

## Using Files from the Drive

In [None]:
# this gives this notebook access to the Drive
from google.colab import drive
drive.mount('/content/drive')

import json
import os

Mounted at /content/drive


In [None]:
# Path to a folder containing the .json files.  If you want to change folders for the other .json files,
# you can manually change /ff/f to the desired folder.

directory = '/content/drive/My Drive/may-summer-2024-music-subgenre-classification/AcousticBrainz/low_level/ff/f/'
file_names = os.listdir(directory)
len(file_names)

102

In [None]:
# This code block will load the .json file and save it as a dictionary called data.
# To change which file, just change the value of file_index.

file_index = 0 # The file we are accesses based on its index in file_names.  You can change this.

file_path = directory + file_names[0]
with open(file_path, 'r') as file:
  data = json.load(file)

In [None]:
data