In [54]:
import json
import pandas as pd
import requests
import time
import os

In [55]:
path = "/Users/andrea/Progetti/strumenti_usato/data/reverb_categories.json"

In [56]:
with open(path) as f:
    raw = json.load(f)

In [57]:
data = raw['categories']

In [15]:
categories = []
for el in data:
    categories.append(el['name'])


In [None]:
categories_taxonomy = {}

for el in data:
    categories_taxonomy[el['name']] = [sub['name'] for sub in el['subcategories']]

categories_taxonomy

{'Accessori': ['Accordatori',
  'Alimentatori',
  'Altre corde',
  'Borse per basso',
  'Borse per chitarra',
  'Capotasti',
  'Case candy',
  'Cavi',
  'Corde per banjo',
  'Corde per basso',
  'Corde per chitarra',
  'Corde per mandolino',
  'Corde per ukulele',
  'Cuffie',
  'Custodie per amplificatori',
  'Custodie per basso',
  'Custodie per chitarra',
  'Libri e DVD',
  'Merchandise',
  'Metronomo',
  'Plettri',
  'Sdoppiatori e adattatori per cavo',
  'Slide',
  'Strumenti',
  'Supporti',
  'Tracolle',
  'Umidificatori'],
 'Amplificatori': ['Amplificatori Bass per Cuffie',
  'Amplificatori Chitarra per Pedaliere',
  'Amplificatori boutique',
  'Amplificatori di potenza per chitarra',
  'Amplificatori per Bass Modeling',
  'Amplificatori per Basso a Pedale',
  'Amplificatori per chitarra a modellazione',
  'Amplificatori per chitarra acustica',
  'Amplificatori per cuffie per chitarra',
  'Amplificatori per tastiera',
  'Attenuatori per amplificatore',
  'Cabinet per basso',
  'C

AttributeError: 'dict' object has no attribute 'categories'

# Stima numerosità categorie

In [38]:
BASE_URL = "https://reverb.com/"

cat_links = [] #link API categorie

for el in data:
    cat_links.append(el["_links"]['listings']['href'])

print(len(cat_links))
cat_links

14


['/api/listings?product_type=accessories',
 '/api/listings?product_type=amps',
 '/api/listings?product_type=dj-and-lighting-gear',
 '/api/listings?product_type=band-and-orchestra',
 '/api/listings?product_type=bass-guitars',
 '/api/listings?product_type=drums-and-percussion',
 '/api/listings?product_type=acoustic-guitars',
 '/api/listings?product_type=electric-guitars',
 '/api/listings?product_type=effects-and-pedals',
 '/api/listings?product_type=home-audio',
 '/api/listings?product_type=parts',
 '/api/listings?product_type=pro-audio',
 '/api/listings?product_type=folk-instruments',
 '/api/listings?product_type=keyboards-and-synths']

In [None]:
results = []  # qui accumuliamo i dati

headers = {
    "Authorization": f"Bearer {os.getenv('REVERB_TOKEN')}",
    "Accept-Version": "3.0"
}

for link in cat_links:
    try:
        response = requests.get(BASE_URL + link).json()
        total, cat = response['total'], response['humanized_params']
        
        # aggiungo un dizionario alla lista
        results.append({
            "link": link,
            "total": total,
            "params": cat
        })
        
        time.sleep(0.01)
    except Exception as e:
        print(f"Errore con {link}: {e}")

# creiamo il dataframe
df = pd.DataFrame(results)

Errore con /api/listings?product_type=electric-guitars: 'total'


In [43]:
df

Unnamed: 0,link,total,params
0,/api/listings?product_type=accessories,690515,Accessories
1,/api/listings?product_type=amps,70382,Amps
2,/api/listings?product_type=dj-and-lighting-gear,32522,DJ and Lighting Gear
3,/api/listings?product_type=band-and-orchestra,84904,Band & Orchestra
4,/api/listings?product_type=bass-guitars,52697,Bass Guitars
5,/api/listings?product_type=drums-and-percussion,355343,Drums and Percussion
6,/api/listings?product_type=acoustic-guitars,102342,Acoustic Guitars
7,/api/listings?product_type=effects-and-pedals,215765,Guitar Pedals and Effects
8,/api/listings?product_type=home-audio,42844,Home Audio
9,/api/listings?product_type=parts,385884,Parts


## Script che usa la tabella

In [None]:
import time, random
import pandas as pd
import requests
from tqdm import tqdm

BASE = "https://api.reverb.com"
HEADERS = {
    "Accept": "application/hal+json",
    "Accept-Version": "3.0",
    "Accept-Language": "it-IT",
    "X-Display-Currency": "EUR",
    "X-Shipping-Region": "IT",
    # "Authorization": f"Bearer {TOKEN}",
    "User-Agent": "reverb-category-counter/0.1"
}

params = {
    "condition": "used"
}

def with_per_page_1(href: str) -> str: #asks for a simple result, we don't care
    sep = "&" if "?" in href else "?"
    url = f"{BASE}{href}"
    if "per_page=" not in url:
        url += f"{sep}per_page=1"
        sep = "&"
    if "page=" not in url:
        url += f"{sep}page=1"
    return url

def fetch_total(url: str, max_retries=6, base_delay=1.0, timeout=20.0):
    delay = base_delay
    for _ in range(max_retries):
        start = time.perf_counter()
        r = requests.get(url, headers=HEADERS, params=params, timeout=timeout)
        elapsed = time.perf_counter() - start

        # Rate limit
        if r.status_code == 429:
            wait = float(r.headers.get("Retry-After", delay))
            time.sleep(wait + random.random())
            delay = min(delay * 2, 60)
            continue
        # Server hiccup
        if 500 <= r.status_code < 600:
            time.sleep(delay + random.random())
            delay = min(delay * 2, 60)
            continue

        r.raise_for_status()
        data = r.json()
        return data.get("total"), elapsed
    return None, None

# ---- Example run ----
df = pd.read_csv("data/product_type-cat-href_table.csv")

totals, times = [], []
for i, row in tqdm(df.iterrows(), total=len(df)):
    url = with_per_page_1(row["listings_href"])
    total, elapsed = fetch_total(url)
    totals.append(total)
    times.append(elapsed)
    time.sleep(0.2)  # polite delay

df["total"] = totals
df["response_time"] = times
df["fetched_at"] = pd.Timestamp.utcnow()

print(f"Average response time: {pd.Series(times).mean():.2f} sec")
print(f"Median response time: {pd.Series(times).median():.2f} sec")
print(df.head())


  0%|          | 0/306 [00:00<?, ?it/s]

100%|██████████| 306/306 [05:04<00:00,  1.00it/s]

Average response time: 0.79 sec
Median response time: 0.63 sec
  product_type         category  \
0  accessories           tuners   
1  accessories   power-supplies   
2  accessories    other-strings   
3  accessories    bass-gig-bags   
4  accessories  guitar-gig-bags   

                                       listings_href  total  response_time  \
0  /api/listings?category=tuners&product_type=acc...    836       0.568521   
1  /api/listings?category=power-supplies&product_...    939       0.580544   
2  /api/listings?category=other-strings&product_t...    154       0.548458   
3  /api/listings?category=bass-gig-bags&product_t...    161       0.606473   
4  /api/listings?category=guitar-gig-bags&product...    690       0.569765   

                        fetched_at  
0 2025-09-01 13:20:16.063227+00:00  
1 2025-09-01 13:20:16.063227+00:00  
2 2025-09-01 13:20:16.063227+00:00  
3 2025-09-01 13:20:16.063227+00:00  
4 2025-09-01 13:20:16.063227+00:00  





In [6]:

# Save both CSV and Parquet for convenience
df.to_csv("data/category_counts_used.csv", index=False)

"""
try:
    import pyarrow as pa, pyarrow.parquet as pq
    df.to_parquet("category_counts.parquet", index=False)
except Exception:
    pass
"""

print(df.head(10))


  product_type         category  \
0  accessories           tuners   
1  accessories   power-supplies   
2  accessories    other-strings   
3  accessories    bass-gig-bags   
4  accessories  guitar-gig-bags   
5  accessories            capos   
6  accessories       case-candy   
7  accessories           cables   
8  accessories    banjo-strings   
9  accessories     bass-strings   

                                       listings_href  total  response_time  \
0  /api/listings?category=tuners&product_type=acc...    836       0.568521   
1  /api/listings?category=power-supplies&product_...    939       0.580544   
2  /api/listings?category=other-strings&product_t...    154       0.548458   
3  /api/listings?category=bass-gig-bags&product_t...    161       0.606473   
4  /api/listings?category=guitar-gig-bags&product...    690       0.569765   
5  /api/listings?category=capos&product_type=acce...    206       0.585126   
6  /api/listings?category=case-candy&product_type...   5780       0.

# Mapping and Product taxonomy table

In [None]:
#category mapping
cat_mapping = {}

for el in data:
    cat_mapping[el['slug']] = el

cat_mapping

for key in cat_mapping.keys():
    subcat_mapping = {}
    for subcat in cat_mapping[key]['subcategories']:
        cat_mapping[key][subcat_mapping][subcat['slug']] = subcat


{'accessories': {'name': 'Accessori',
  'description': 'Whether your favorite cable has worn out or you need a snazzy new strap, Reverb has all of the accessories you could ever need to deck out your gear or rig. Hundreds of listings are posted on Reverb daily for brand new cables, strings, picks, cases, gig bags, straps, and a whole host of other music gear accessories.',
  'subcategories': [{'name': 'Accordatori',
    'description': '',
    'id': 147,
    'uuid': 'aff19d6a-ad5e-4b3b-b21c-8aa71ae834c6',
    'full_name': 'Accessori / Accordatori',
    'slug': 'tuners',
    'product_type_slug': 'accessories',
    'root_uuid': '62835d2e-ac92-41fc-9b8d-4aba8c1c25d5',
    '_links': {'image': {'href': 'https://static.reverb-assets.com/assets/products/blank_medium-8120dba69a21448fa293fc2df8f8ce0265daa804f2be9a7413732a58d1b833f8.jpg'},
     'self': {'href': '/api/categories/accessories/tuners'},
     'listings': {'href': '/api/listings?category=tuners&product_type=accessories'},
     'follow'

In [53]:
subcat_list = cat_mapping['bass-guitars']['subcategories']
subcat_mapping = {}

for el in subcat_list:
    subcat_mapping[el['slug']] = el

subcat_mapping


{'4-string': {'name': '4 corde',
  'description': '',
  'id': 25,
  'uuid': 'ac571749-28c7-4eec-a1d9-09dca3cf3e5f',
  'full_name': 'Bassi / 4 corde',
  'slug': '4-string',
  'product_type_slug': 'bass-guitars',
  'root_uuid': '53a9c7d7-d73d-4e7f-905c-553503e50a90',
  '_links': {'image': {'href': 'https://static.reverb-assets.com/assets/products/blank_medium-8120dba69a21448fa293fc2df8f8ce0265daa804f2be9a7413732a58d1b833f8.jpg'},
   'self': {'href': '/api/categories/bass-guitars/4-string'},
   'listings': {'href': '/api/listings?category=4-string&product_type=bass-guitars'},
   'follow': {'href': '/api/my/follows/categories/bass-guitars/4-string'}}},
 '5-string-or-more': {'name': 'A 5 corde o più',
  'description': '',
  'id': 26,
  'uuid': '3178be7d-f1cd-4da5-a606-bf3c1b8e834d',
  'full_name': 'Bassi / A 5 corde o più',
  'slug': '5-string-or-more',
  'product_type_slug': 'bass-guitars',
  'root_uuid': '53a9c7d7-d73d-4e7f-905c-553503e50a90',
  '_links': {'image': {'href': 'https://stati

In [62]:
cat_mapping = {}

for el in data:  # data = raw["categories"]
    # build mapping for subcategories
    subcat_mapping = {sub["slug"]: sub for sub in el.get("subcategories", [])}
    
    # put into main mapping keyed by top-level slug (if present)
    cat_mapping[el["slug"] if "slug" in el else el["name"]] = {
        **el,
        "subcat_mapping": subcat_mapping
    }

In [65]:
cat_mapping['bass-guitars']['subcat_mapping']['4-string']['_links']['listings']['href']


'/api/listings?category=4-string&product_type=bass-guitars'

In [79]:
rows = []
for cat in cat_mapping.keys():
    category = cat
    for subcat in cat_mapping[cat]['subcat_mapping'].keys():
        link = cat_mapping[cat]['subcat_mapping'][subcat]['_links']['listings']['href']
        category = cat
        subcategory = subcat

        rows.append((category, subcategory, link))

df = pd.DataFrame(rows, columns=['product_type', 'category', 'listings_href'])

In [80]:
df

Unnamed: 0,product_type,category,listings_href
0,accessories,tuners,/api/listings?category=tuners&product_type=acc...
1,accessories,power-supplies,/api/listings?category=power-supplies&product_...
2,accessories,other-strings,/api/listings?category=other-strings&product_t...
3,accessories,bass-gig-bags,/api/listings?category=bass-gig-bags&product_t...
4,accessories,guitar-gig-bags,/api/listings?category=guitar-gig-bags&product...
...,...,...,...
301,keyboards-and-synths,keyboard-synths,/api/listings?category=keyboard-synths&product...
302,keyboards-and-synths,arranger-keyboards,/api/listings?category=arranger-keyboards&prod...
303,keyboards-and-synths,workstation-keyboards,/api/listings?category=workstation-keyboards&p...
304,keyboards-and-synths,portable-keyboards,/api/listings?category=portable-keyboards&prod...


In [81]:
df.to_csv("data/cat-subcat-href_table.csv", index=False)