In [16]:
import json
import requests
import requests_cache 
from ratelimit import limits, sleep_and_retry

requests_cache.install_cache(cache_name='discogs_cache', backend='sqlite', expire_after=86400)

# Input variables
label_id = "771357"
min_average = 4.7

In [17]:
# Define function to limit API call rate
# 25 calls per minute
CALLS = 25
RATE_LIMIT = 60

@sleep_and_retry
@limits(calls=CALLS, period=RATE_LIMIT)
def check_limit():
    ''' Empty function just to check for calls to API '''
    return

In [18]:
# Define function to get label's releases recursively
def get_label_recursively(label_id, page, all_releases=None):
    # Variables
    label_url = "https://api.discogs.com/labels/{}/releases?page={}&per_page=25".format(label_id, page)

    # Initialize all_releases if not provided
    if all_releases is None:
        all_releases = {"pagination": {"page": 0, "pages": 0, "per_page": 25, "items": 0}, "releases": []}

    # Make the HTTP request
    check_limit()
    response = requests.get(label_url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the JSON response
        response_dict = response.json()

        # Print the formatted JSON
        # print(json.dumps(response_dict, indent=4))

        # Add releases to the aggregate dictionary
        all_releases["releases"].extend(response_dict["releases"])
        all_releases["pagination"] = response_dict["pagination"]

        # Check if there are more pages to fetch
        next_page = response_dict["pagination"]["urls"].get("next")
        if next_page:
            # Recursively call the function for the next page
            get_label_recursively(label_id, page + 1, all_releases)

    else:
        # Print an error message if the request was not successful
        print(f"Error: Unable to fetch data. Status code: {response.status_code}")

    return all_releases

# Define function to get community release rating
def get_rating(release_url):

    # Variables
    rating_url = release_url + "/rating"
    
    # Make the HTTP request
    check_limit()
    response = requests.get(rating_url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the JSON response
        response_dict = response.json()

        # Print the formatted JSON
        # print(json.dumps(response_dict, indent=4))

        # Return value
        return response_dict
    else:
        # Print an error message if the request was not successful
        print(f"Error: Unable to fetch data. Status code: {response.status_code}")

# Define function to remove duplicates from an input_list and key_path
def remove_duplicates(input_list, key_path):
    unique_list = []
    seen_ids = set()

    for item in input_list:
        item_id = get_nested_value(item, key_path)
        if item_id not in seen_ids:
            seen_ids.add(item_id)
            unique_list.append(item)

    return unique_list

# Handle nested keys
def get_nested_value(dictionary, key_path):
    current_value = dictionary
    for key in key_path:
        current_value = current_value.get(key, {})
    return current_value

# Define function to match releases with filtered_rating_list
def create_matching_releases(response_dict, filtered_rating_list):
    # Create a dictionary mapping release_id to rating_info
    rating_info_dict = {item["release_id"]: item["rating"] for item in filtered_rating_list}

    # # Debug
    # print("##### DEBUG #####")
    # print(type(rating_info_dict))
    # print(json.dumps(rating_info_dict, indent=4))

    # Extract information from response_dict only for matching release_ids
    matching_releases = [
        {
            "release_info": release_info,
            "rating_info": rating_info_dict.get(release_info["id"])
        }
        for release_info in response_dict["releases"] if release_info["id"] in rating_info_dict
    ]

    # # Debug
    # print("##### DEBUG #####")
    # print(type(matching_releases))
    # for item in matching_releases:
    #     print(json.dumps(item, indent=4))

    return matching_releases

# Define function to create output list
def create_output_list(unique_matching_list):
    output_list = []

    for release in unique_matching_list:
        new_dict = {
            "catno": release["release_info"]["catno"],
            "artist": release["release_info"]["artist"],
            "title": release["release_info"]["title"],
            "year": release["release_info"]["year"],
            "format": release["release_info"]["format"],
            "url": "https://www.discogs.com/es/release/{}".format(release["release_info"]["id"]),
            "rating": release["rating_info"]
        }
        output_list.append(new_dict)

    return output_list    



In [19]:
# Call get_label function for first time
# response_label_dict = get_label(label_id,1)
# print()
# print(type(response_dict))

# Call get_label_recursively function
response_label_dict = get_label_recursively(label_id,1)

# Debug
print("##### DEBUG #####")
print(type(response_label_dict))
print(json.dumps(response_label_dict, indent=4))

##### DEBUG #####
<class 'dict'>
{
    "pagination": {
        "page": 6,
        "pages": 6,
        "per_page": 25,
        "items": 137,
        "urls": {
            "first": "https://api.discogs.com/labels/771357/releases?page=1&per_page=25",
            "prev": "https://api.discogs.com/labels/771357/releases?page=5&per_page=25"
        }
    },
    "releases": [
        {
            "status": "Accepted",
            "format": "2x12\", Comp",
            "catno": "TRP006",
            "thumb": "",
            "resource_url": "https://api.discogs.com/releases/8544555",
            "title": "When I Was 14",
            "id": 8544555,
            "year": 2016,
            "artist": "Various"
        },
        {
            "status": "Accepted",
            "format": "2x12\", Comp",
            "catno": "TRP006",
            "thumb": "",
            "resource_url": "https://api.discogs.com/releases/8544555",
            "title": "When I Was 14",
            "id": 8544555,
          

In [20]:
# Get community release rating
response_rating_list = []
for release in response_label_dict["releases"]:
    release_url = release["resource_url"]
    response_rating_dict = get_rating(release_url)
    response_rating_list.append(response_rating_dict) 

# Debug
print("##### DEBUG #####")
print("length response_rating_list: " + str(len(response_rating_list)))
for item in response_rating_list:
   print(item) 

##### DEBUG #####
length response_rating_list: 137
{'release_id': 8544555, 'rating': {'count': 105, 'average': 4.34}}
{'release_id': 8544555, 'rating': {'count': 105, 'average': 4.34}}
{'release_id': 15781803, 'rating': {'count': 82, 'average': 4.61}}
{'release_id': 15781803, 'rating': {'count': 82, 'average': 4.61}}
{'release_id': 12957162, 'rating': {'count': 117, 'average': 4.51}}
{'release_id': 12957162, 'rating': {'count': 117, 'average': 4.51}}
{'release_id': 7865479, 'rating': {'count': 17, 'average': 3.88}}
{'release_id': 7865479, 'rating': {'count': 17, 'average': 3.88}}
{'release_id': 27097950, 'rating': {'count': 0, 'average': 0.0}}
{'release_id': 27097950, 'rating': {'count': 0, 'average': 0.0}}
{'release_id': 8922665, 'rating': {'count': 15, 'average': 4.47}}
{'release_id': 10123074, 'rating': {'count': 45, 'average': 4.27}}
{'release_id': 10123074, 'rating': {'count': 45, 'average': 4.27}}
{'release_id': 8922665, 'rating': {'count': 15, 'average': 4.47}}
{'release_id': 27

In [21]:
# Remove duplicates from "response_rating_list" based on "release_id" key
unique_rating_list = remove_duplicates(response_rating_list, ["release_id"])

# Debug
print("##### DEBUG #####")
print("length unique_rating_list: " + str(len(unique_rating_list)))
for item in unique_rating_list:
   print(item) 

##### DEBUG #####
length unique_rating_list: 119
{'release_id': 8544555, 'rating': {'count': 105, 'average': 4.34}}
{'release_id': 15781803, 'rating': {'count': 82, 'average': 4.61}}
{'release_id': 12957162, 'rating': {'count': 117, 'average': 4.51}}
{'release_id': 7865479, 'rating': {'count': 17, 'average': 3.88}}
{'release_id': 27097950, 'rating': {'count': 0, 'average': 0.0}}
{'release_id': 8922665, 'rating': {'count': 15, 'average': 4.47}}
{'release_id': 10123074, 'rating': {'count': 45, 'average': 4.27}}
{'release_id': 27081873, 'rating': {'count': 4, 'average': 3.5}}
{'release_id': 15415420, 'rating': {'count': 0, 'average': 0.0}}
{'release_id': 27244578, 'rating': {'count': 2, 'average': 3.5}}
{'release_id': 23474363, 'rating': {'count': 5, 'average': 4.8}}
{'release_id': 11281777, 'rating': {'count': 100, 'average': 4.82}}
{'release_id': 14459372, 'rating': {'count': 54, 'average': 4.76}}
{'release_id': 14541303, 'rating': {'count': 104, 'average': 4.52}}
{'release_id': 1932785

In [22]:
# Filter releases with average >= min_average
filtered_rating_list = [item for item in unique_rating_list if item["rating"]["average"] >= min_average]

# Debug
print("##### DEBUG #####")
for item in filtered_rating_list:
    print(item)

##### DEBUG #####
{'release_id': 23474363, 'rating': {'count': 5, 'average': 4.8}}
{'release_id': 11281777, 'rating': {'count': 100, 'average': 4.82}}
{'release_id': 14459372, 'rating': {'count': 54, 'average': 4.76}}
{'release_id': 13280954, 'rating': {'count': 1, 'average': 5.0}}
{'release_id': 11030620, 'rating': {'count': 37, 'average': 4.84}}
{'release_id': 20481352, 'rating': {'count': 3, 'average': 5.0}}
{'release_id': 21454171, 'rating': {'count': 4, 'average': 5.0}}
{'release_id': 18370783, 'rating': {'count': 1, 'average': 5.0}}
{'release_id': 11143870, 'rating': {'count': 77, 'average': 4.77}}
{'release_id': 22088110, 'rating': {'count': 5, 'average': 5.0}}
{'release_id': 14703483, 'rating': {'count': 27, 'average': 4.7}}
{'release_id': 18019900, 'rating': {'count': 42, 'average': 4.76}}
{'release_id': 23100734, 'rating': {'count': 10, 'average': 4.8}}
{'release_id': 25237951, 'rating': {'count': 5, 'average': 5.0}}
{'release_id': 12703540, 'rating': {'count': 12, 'average':

In [23]:
# Create a list with the matching releases
matching_releases_list = create_matching_releases(response_label_dict, filtered_rating_list)

# Debug
print("##### DEBUG #####")
print("length matching_releases: " + str(len(matching_releases_list)))
for item in matching_releases_list:
   print(json.dumps(item, indent=4)) 

##### DEBUG #####
length matching_releases: 18
{
    "release_info": {
        "status": "Accepted",
        "format": "5xFile, FLAC, EP",
        "catno": "none",
        "thumb": "",
        "resource_url": "https://api.discogs.com/releases/23474363",
        "title": "The Botox Queen",
        "id": 23474363,
        "year": 2022,
        "artist": "Vladimir Dubyshkin"
    },
    "rating_info": {
        "count": 5,
        "average": 4.8
    }
}
{
    "release_info": {
        "status": "Accepted",
        "format": "2x12\", Album",
        "catno": "TRP019, trip 19",
        "thumb": "",
        "resource_url": "https://api.discogs.com/releases/11281777",
        "title": "\u041f\u0430\u0440\u0430\u0434\u043e\u043a\u0441 = Paradox",
        "id": 11281777,
        "year": 2017,
        "artist": "Aleksi Per\u00e4l\u00e4"
    },
    "rating_info": {
        "count": 100,
        "average": 4.82
    }
}
{
    "release_info": {
        "status": "Accepted",
        "format": "2x12\",

In [24]:
# Remove duplicates from matching_releases_list based on the "id" key
unique_matching_list = remove_duplicates(matching_releases_list, ["release_info", "id"])

# Debug
print("##### DEBUG #####")
print("length unique_matching_list: " + str(len(unique_matching_list)))
for release in unique_matching_list:
    print("Release Info:")
    print(json.dumps(release["release_info"], indent=4))
    print("Rating Info:")
    print(json.dumps(release["rating_info"], indent=4))
    print()

##### DEBUG #####
length unique_matching_list: 16
Release Info:
{
    "status": "Accepted",
    "format": "5xFile, FLAC, EP",
    "catno": "none",
    "thumb": "",
    "resource_url": "https://api.discogs.com/releases/23474363",
    "title": "The Botox Queen",
    "id": 23474363,
    "year": 2022,
    "artist": "Vladimir Dubyshkin"
}
Rating Info:
{
    "count": 5,
    "average": 4.8
}

Release Info:
{
    "status": "Accepted",
    "format": "2x12\", Album",
    "catno": "TRP019, trip 19",
    "thumb": "",
    "resource_url": "https://api.discogs.com/releases/11281777",
    "title": "\u041f\u0430\u0440\u0430\u0434\u043e\u043a\u0441 = Paradox",
    "id": 11281777,
    "year": 2017,
    "artist": "Aleksi Per\u00e4l\u00e4"
}
Rating Info:
{
    "count": 100,
    "average": 4.82
}

Release Info:
{
    "status": "Accepted",
    "format": "2x12\", Album",
    "catno": "TRP028, trip 28",
    "thumb": "",
    "resource_url": "https://api.discogs.com/releases/14459372",
    "title": "\u0420\u0435

In [25]:
# Create specific output
output_list = create_output_list(unique_matching_list)

# print(output_list)
print(json.dumps(output_list, indent=4))

[
    {
        "catno": "none",
        "artist": "Vladimir Dubyshkin",
        "title": "The Botox Queen",
        "year": 2022,
        "format": "5xFile, FLAC, EP",
        "url": "https://www.discogs.com/es/release/23474363",
        "rating": {
            "count": 5,
            "average": 4.8
        }
    },
    {
        "catno": "TRP019, trip 19",
        "artist": "Aleksi Per\u00e4l\u00e4",
        "title": "\u041f\u0430\u0440\u0430\u0434\u043e\u043a\u0441 = Paradox",
        "year": 2017,
        "format": "2x12\", Album",
        "url": "https://www.discogs.com/es/release/11281777",
        "rating": {
            "count": 100,
            "average": 4.82
        }
    },
    {
        "catno": "TRP028, trip 28",
        "artist": "Aleksi Per\u00e4l\u00e4",
        "title": "\u0420\u0435\u0437\u043e\u043d\u0430\u043d\u0441 = Resonance",
        "year": 2019,
        "format": "2x12\", Album",
        "url": "https://www.discogs.com/es/release/14459372",
        "rating": 