In [14]:
import requests, time
import json
from datetime import datetime

In [7]:
BASE = "https://reverb.com"

HEADERS = {
    "Accept": "application/hal+json",
    "Accept-Version": "3.0",
    "User-Agent": "perpage-tester/0.1",
    "Accept-Language": "it-IT"
}

href = "/api/listings?category=tuners&product_type=accessories"

for per_page in [24, 50, 100, 200, 500]:
    url = f"{BASE}{href}&per_page={per_page}&page=1"
    start = time.perf_counter()
    r = requests.get(url, headers=HEADERS)
    elapsed = time.perf_counter() - start
    print(per_page, r.status_code, f"{elapsed:.2f}s", "len(listings):", 
        len(r.json().get("listings", [])))

    
#MAX LISTINGS PER PAGE = 50

24 200 1.27s len(listings): 24
50 200 1.64s len(listings): 50


KeyboardInterrupt: 

# Filtering function

In [11]:
BASE = "https://reverb.com"

HEADERS = {
    "Accept": "application/hal+json",
    "Accept-Version": "3.0",
    "User-Agent": "perpage-tester/0.1",
    "Accept-Language": "it-IT"
}

href = "/api/listings?category=tuners&product_type=accessories"

In [None]:
#INIT====================================
snapshot_ts = datetime.now(datetime.timezone.utc)

#FILTERING AND FIELD SELECTION/CLEANING====================================
def filter_listing(x):
    """
    Input: a single listing
    Output: a listing with selected, flattened fields
    """
    return {
        #flat fields
        "id": x['id'],
        "make": x.get('make', ""),
        'model': x.get('model', ''),
        'finish': x.get('finish', ''),
        'year': x.get('year', ''),
        'sku': x.get('sku', ''),
        'title': x.get('title', ''),
        'created_at': x.get('created_at', ''),
        'shop_slug': x.get('shop', {}).get('slug', ''),
        'preferred_seller': x.get('shop', {}).get('preferred_seller', ''),
        'condition': x.get('condition_slug', {}).get('slug', ''),
        'offers_enabled': x.get('offers_enabled', ''),
        'has_inventory': x.get('has_inventory'),
        'inventory': x.get('inventory', None),
        'published_at': x.get('published_at', ''),
        'state': x.get('state', {}).get('slug', ''),
        'auction': x.get('auction', ''),
        'permalink': x.get('_links', '').get('self', ''),
        #flattened fields
        'price': x.get('price', {}).get('amount', ''),
        'price_currency': x.get('price', {}).get('currency', ''),
        'price_taxIncluded': x.get('price', {}).get('tax_included', ''),
        'buyer_price': x.get('buyer_price', {}).get('amount', ''),
        'buyer_price_currency': x.get('buyer_price', {}).get('currency', ''),
        'buyer_price_taxIncluded': x.get('buyer_price', {}).get('tax_included', ''),
        #calculated fields
        #description
        #'description': clean_description(x.get('description', ''))
        #shipping: to define based on modeling perspective. Option if modeled in respect to IT: if IT is present, take that price, otherwise region "XX" price
        # SCD 2 Fields
        'snap_valid_from': None,   # snapshot_ts quando entra in questa versione
        'snap_valid_to': None,     # NULL finché corrente
        'snap_is_current': None
    }

def clean_description(s: str) -> str:
    # unescape HTML entities and remove the most common tags quickly
    if not s:
        return ""
    #s = unescape(s)
    # quick & dirty tag strip (good enough for Reverb’s markup)
    return s.replace("<br>", "\n").replace("<br/>", "\n").replace("<br />", "\n") \
            .replace("</p>", "\n").replace("<p>", "").replace("<b>", "").replace("</b>", "") \
            .replace("<i>", "").replace("</i>", "").strip()

#CYCLES============================================
def process_page(listings: list):
    """Returns the page listings as a list of key-value dictionaries"""
    rows = []
    for listing in listings:
        row = filter_listing(listing)
        row.update({
            "snap_valid_from": snapshot_ts,
            "snap_valid_to": None,
            "snap_is_current": True,
        })
        rows.append(row)
        return rows
    



In [13]:
r = requests.get(url, headers=HEADERS)

resp = r.json()
listing = resp['listings'][0]
filtered_listing = filter_listing(listing)
print(filtered_listing)

{'id': 92036178, 'make': 'Fender', 'model': '? Bullet Tuner, Black', 'finish': '', 'year': '', 'sku': '0239979002', 'title': 'NEW Fender Bullet Tuner', 'created_at': '2025-09-01T19:02:10+09:00', 'shop_slug': 'mountain-music-exchange', 'preferred_seller': True, 'condition': '', 'offers_enabled': False, 'has_inventory': True, 'inventory': 1, 'published_at': '2025-09-01T19:02:13+09:00', 'state': 'live', 'auction': False, 'price': '29.99', 'price_currency': 'USD', 'price_taxIncluded': False, 'buyer_price': '29.99', 'buyer_price_currency': 'USD', 'buyer_price_taxIncluded': False}


In [None]:
listing.get('id')