In [94]:
# list categories available in the API

import requests
import pandas as pd


cats = requests.get('https://api.mercadolibre.com/sites/MCO/categories')
cats.json()

[{'id': 'MCO1747', 'name': 'Accesorios para Vehículos'},
 {'id': 'MCO441917', 'name': 'Agro'},
 {'id': 'MCO1403', 'name': 'Alimentos y Bebidas'},
 {'id': 'MCO1071', 'name': 'Animales y Mascotas'},
 {'id': 'MCO1367', 'name': 'Antigüedades y Colecciones'},
 {'id': 'MCO1368', 'name': 'Arte, Papelería y Mercería'},
 {'id': 'MCO1384', 'name': 'Bebés'},
 {'id': 'MCO1246', 'name': 'Belleza y Cuidado Personal'},
 {'id': 'MCO40433', 'name': 'Boletas para Espectáculos'},
 {'id': 'MCO1039', 'name': 'Cámaras y Accesorios'},
 {'id': 'MCO1743', 'name': 'Carros, Motos y Otros'},
 {'id': 'MCO1051', 'name': 'Celulares y Teléfonos'},
 {'id': 'MCO1648', 'name': 'Computación'},
 {'id': 'MCO1144', 'name': 'Consolas y Videojuegos'},
 {'id': 'MCO172890', 'name': 'Construcción'},
 {'id': 'MCO1276', 'name': 'Deportes y Fitness'},
 {'id': 'MCO5726', 'name': 'Electrodomésticos'},
 {'id': 'MCO1000', 'name': 'Electrónica, Audio y Video'},
 {'id': 'MCO175794', 'name': 'Herramientas'},
 {'id': 'MCO1574', 'name': 'Ho

In [95]:
# list children category for a given category

cat_id = 'MCO1168'

subcats = requests.get('https://api.mercadolibre.com/categories/' + cat_id)
subcats.json()

{'id': 'MCO1168',
 'name': 'Música, Películas y Series',
 'picture': 'https://http2.mlstatic.com/storage/categories-api/images/05ed692c-24f0-4f53-b1eb-8e41c321b00b.png',
 'permalink': 'https://www.mercadolibre.com.co/c/musica-peliculas-y-series',
 'total_items_in_this_category': 175543,
 'path_from_root': [{'id': 'MCO1168', 'name': 'Música, Películas y Series'}],
 'children_categories': [{'id': 'MCO456481',
   'name': 'Contenido Deportivo',
   'total_items_in_this_category': 0},
  {'id': 'MCO445795', 'name': 'Cursos', 'total_items_in_this_category': 225},
  {'id': 'MCO1176', 'name': 'Música', 'total_items_in_this_category': 152942},
  {'id': 'MCO456083',
   'name': 'Peliculas Online',
   'total_items_in_this_category': 12},
  {'id': 'MCO1169',
   'name': 'Películas Físicas',
   'total_items_in_this_category': 18326},
  {'id': 'MCO5633',
   'name': 'Series de TV',
   'total_items_in_this_category': 2491},
  {'id': 'MCO456084',
   'name': 'Series y Tv Shows Online',
   'total_items_in_th

In [96]:
# get total number of items in a category

cat_id = 'MCO1176' # category 'Música'

items = requests.get('https://api.mercadolibre.com/sites/MCO/search?category=' + cat_id)

items.json()['paging']['total']

142643

In [97]:
def download_category(cat_id, limit=50, offset=0):
    url = f'https://api.mercadolibre.com/sites/MCO/search?category={cat_id}&limit={limit}&offset={offset}'
    request = requests.get(url)
    items = request.json()
    df = pd.DataFrame()
    for item in items['results']:
        thumbnail = item['thumbnail']
        request = requests.get(thumbnail)
        thumbnail_id = thumbnail.split('/')[-1].split('.')[0]
        with open(f'thumbnails/{thumbnail_id}.jpg', 'wb') as f:
            f.write(request.content)
        df = df.append({
            'title': item['title'], 
            'price': item['price'], 
            'currency_id': item['currency_id'],
            'thumbnail_id': thumbnail_id}, 
            ignore_index=True)
    return df

In [98]:
# download items in the category Música

cat_id = 'MCO1176'

num_items_to_retrieve = 1000 # we will retrieve 1000 items as it is the maximum number of items we can retrieve with public API

offset_list = list(range(0, num_items_to_retrieve, 50))

offset_list

df = pd.DataFrame()

for offset in offset_list:
    print(f'Downloading items from offset {offset}')
    new_df = download_category(cat_id, offset=offset)
    df = pd.concat([df, new_df])

df.to_csv('items.csv', index=False)

Downloading items from offset 0
Downloading items from offset 50
Downloading items from offset 100
Downloading items from offset 150
Downloading items from offset 200
Downloading items from offset 250
Downloading items from offset 300
Downloading items from offset 350
Downloading items from offset 400
Downloading items from offset 450
Downloading items from offset 500
Downloading items from offset 550
Downloading items from offset 600
Downloading items from offset 650
Downloading items from offset 700
Downloading items from offset 750
Downloading items from offset 800
Downloading items from offset 850
Downloading items from offset 900
Downloading items from offset 950


In [99]:
df

Unnamed: 0,title,price,currency_id,thumbnail_id
0,Twice 11th Mini Album - Between 1&2,189905.0,COP,D_856090-MLU70140317571_062023-I
1,Cd - Amor Amarillo - Gustavo Cerati,70000.0,COP,D_735151-MLU73725568023_122023-I
2,Arctic Monkeys - Am,70000.0,COP,D_890982-MLU74223263112_022024-I
3,Arctic Monkeys - Favourite Worst Nightmare,50000.0,COP,D_610357-MLU69233737949_052023-I
4,Cd - Blue Banisters - Lana Del Rey,70000.0,COP,D_957581-MLA49063252094_022022-I
...,...,...,...,...
45,Honeymoon - Lana Del Rey - Disco Cd - (14 Canc...,80000.0,COP,D_908661-MLU69216199114_052023-I
46,Somewhere In Time / Deluxe - Iron Maiden - Bo...,265000.0,COP,D_628661-MLM31493624340_072019-I
47,Disco De Vinilo Harry Styles,203550.0,COP,D_885855-MCO31085169356_062019-I
48,Linkin Park Hybrid Theory 20th Anniversary Edi...,437900.0,COP,D_943407-MLA43825909245_102020-I
