In [1]:
# Standard library imports.
import os
import requests
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Third party imports.
import pandas as pd
from tqdm import tqdm

In [40]:
class MELIData:
    def __init__(self) -> None:
        self.meli_url = "https://api.mercadolibre.com/"
        self.dataset_path = "../data/raw"

    def get_sites(self):
        url = f"{self.meli_url}/sites"
        request = requests.get(url)
        sites = request.json()
        return sites

    def get_categories(self, site_id):
        url = f"{self.meli_url}/sites/{site_id}/categories"
        request = requests.get(url)
        categories = request.json()
        return categories

    def search_item_by_category(self, site_id, cat_id, offset=50):
        url = f"{self.meli_url}/sites/{site_id}/search"
        params = {'category': cat_id, 'offset': offset}
        request = requests.get(url, params=params)
        items = request.json()
        return items

    def get_item_features(self, item_id):
        url = f"{self.meli_url}/items/{item_id}"
        request = requests.get(url)
        features = request.json()
        return features

    def convert_and_save_dataframe(self, arr, df_name):
        dataframe = pd.DataFrame(arr)
        dataframe.to_csv(f"{self.dataset_path}/{df_name}.csv", index=False)

    def read_df(self, df_name):
        dataframe = pd.read_csv(f"{self.dataset_path}/{df_name}.csv")
        return dataframe

In [41]:
melidataset = MELIData()

In [42]:
# Get sites - each country has its ID
sites = melidataset.get_sites()
print("sites:", type(sites))
print("sites[0]:", type(sites[0]))

sites: <class 'list'>
sites[0]: <class 'dict'>


In [43]:
melidataset.convert_and_save_dataframe(sites, "sites")

In [44]:
# Get categories - Categories of items belonging to the Marketplace
site_id = 'MCO'
categories = melidataset.get_categories(site_id)
print("categories:", type(categories))
print("categories[0]:", type(categories[0]))

categories: <class 'list'>
categories[0]: <class 'dict'>


In [45]:
melidataset.convert_and_save_dataframe(categories, "categories_MCO")

In [None]:
# Get items
items = pd.DataFrame()
item_attributes = pd.DataFrame()
item_features = pd.DataFrame()

site_id = 'MCO'
df_name = f"categories_{site_id}"
categories = melidataset.read_df(df_name)

# Get the first 1000 items per search
# for each category in the marketplace.
offsets = list(range(0, 1000, 50))

for _, row in tqdm(categories.iterrows(), total=len(categories)):
    # print("key:", type(key))
    category_id = row['id']

    for offset in tqdm(offsets, total=len(offsets)):
        item_cats = melidataset.search_item_by_category(site_id, category_id, offset)
        if 'results' in item_cats:
            resulting_items = item_cats['results']
            if resulting_items is not None:
                for resulting_item in tqdm(resulting_items, total=len(resulting_items)):
                    shipping_subdict = resulting_item.pop('shipping')
                    if shipping_subdict is not None:
                        resulting_item.update(shipping_subdict)

                    installments_subdict = resulting_item.pop('installments')
                    if installments_subdict is not None:
                        resulting_item.update(installments_subdict)

                    resulting_item['seller_id'] = resulting_item['seller']['id']
                    resulting_item.pop('seller')

                    attributes = resulting_item['attributes']
                    attributes_ = [{'item_id': resulting_item['id'], **attr} for attr in attributes]

                    resulting_item.pop('attributes')

                    items_df = pd.DataFrame([resulting_item])
                    item_attributes_df = pd.DataFrame(attributes_)

                    features = melidataset.get_item_features(resulting_item['id'])
                    features_df = pd.DataFrame([features])
                
                    items = pd.concat([items, items_df], ignore_index=True)
                    item_attributes = pd.concat([item_attributes, item_attributes_df], ignore_index=True)
                    item_features = pd.concat([item_features, features_df], ignore_index=True)
                os.system('clear')

melidataset.convert_and_save_dataframe(items, "items_MCO")
melidataset.convert_and_save_dataframe(item_attributes, "item_attributes_MCO")
melidataset.convert_and_save_dataframe(item_features, "item_features_MCO")
