In [None]:
# @title Angel functions
def utc_now(type='utc', date_format='%Y%m%d%H%M%S'):

    """ obtener la fecha en este momento """

    from datetime import datetime

    # todo: agregar con timezone

    value = datetime.utcnow().replace(microsecond=0)

    if type == 'utc':
        return value

    if type == 'utc_isoformat':
        return value.isoformat(sep=' ')

    if type == 'utc_custom':
        return value.strftime(date_format)


def measure_execution_time(func):

    def wrapper(*args, **kwargs):

        from time import time

        start_time = time()
        result = func(*args, **kwargs)
        end_time = time()
        execution_time = end_time - start_time
        print(f"Execution time of {func.__name__}: {execution_time:.2f} seconds")

        return result

    return wrapper

def create_directory(directory_path):
    """
    Create a directory if it doesn't already exist.

    Args:
    directory_path (str): The path of the directory to be created.

    Returns:
    bool: True if the directory was successfully created or already exists, False otherwise.
    """

    import os

    try:
        os.makedirs(directory_path, exist_ok=True)
        print(f"Directory '{directory_path}' created successfully.")
        return True

    except Exception as e:
        print(f"Error creating directory '{directory_path}': {e}")
        return False


# Enclave Functions

In [None]:
# @title Evitar que nos detenga el API
def avoid_rate_limit(url, url_params):

    import requests
    from time import sleep
    from random import random

    max_delay = 32  # Maximum delay between requests in seconds
    retry_attempts = 5  # Maximum number of retry attempts
    base_delay = 1  # Base delay between requests in seconds

    for _ in range(retry_attempts):

        response = requests.get(url, params=url_params) # esta parte es la variable

        if response.status_code == 200:

            # Request was successful, process response here
            # print("Request successful")

            return response.json()

        elif response.status_code == 429:

            # Too many requests, apply exponential backoff
            delay = min(base_delay * (2 ** _), max_delay)
            delay_with_jitter = delay * (1 + random())  # Add some random jitter
            print(f"Too many requests, retrying in {delay_with_jitter:.2f} seconds...")
            sleep(delay_with_jitter)

        else:

            # Other errors, handle accordingly
            print(f"Error: {response.status_code}")
            break

    return None

In [None]:
# @title Curado de json en los articulos
def cured_json_sku(dict_value):

    list_of_values = [
        # dict_value['seller']['id']
        dict_value['id']                            # item_id / advertisement_id
        , dict_value['seller']['nickname']          # seller_nickname
        , dict_value['catalog_product_id']          # catalog_product_id
        , dict_value['inventory_id']                # inventory_id
        , dict_value['category_id']                 # category_id
        , dict_value['domain_id']                   # domain_id
        , dict_value['title']                       # item_title
        , dict_value['listing_type_id']             # listing_type_id
        , dict_value['available_quantity']          # stock
        , dict_value['price']                       # price
        , dict_value['shipping']['logistic_type']   # logistic_type
        # , dict_value['shipping']
    ]

    return list_of_values

In [None]:
# @title Curado de listas para dataframe
def seller_skus_to_df(seller_skus):

    import pandas as pd

    col_names = ['item_id', 'seller_nickname', 'catalog_product_id', 'inventory_id', 'category_id', 'domain_id'
    , 'item_title', 'listing_type_id', 'stock', 'price', 'logistic_type']

    df = pd.DataFrame.from_records(seller_skus, columns=col_names)

    return df


def seller_agg_values_to_df(seller_agg_values):

    import pandas as pd

    col_names = ['seller_nickname', 'total_items']

    df = pd.DataFrame.from_records(seller_agg_values, columns=col_names)

    return df

# Main

In [None]:
# @title Parametros
URL_BASE = 'https://api.mercadolibre.com/sites'
SITE_ID = '/MLM'

d_api_meli = {
        "categorias_meli": {
            "path": "/categories"
            , "query": {
                }
            }
        , "items_meli": {
            "path": "/search"
            , "query": {
                "category": "MLM1000"
                , "offset": 1
                , "limit": 1
                }
            }

        , "user_meli": {
            "path": "/search"
            , "query": {
                "nickname": "GAAN3088776"
                , "offset": 0
                , "limit": 50
                }
            }
        , "category_fee": {
            "path": "/listing_prices"
            , "query": {
                }
            }
        }

In [None]:
#@title Obtener lista de vendedores que participen en la categoria de MLM1000
def get_nicknames(magic_number=20):

    from time import sleep

    seller_set = set()

    process_name = 'user_meli'

    url = URL_BASE +  SITE_ID + d_api_meli[process_name]['path']


    for i in range(magic_number):

        # requests.get(url, params=d[process_name]['query'])

        URL_PARAMS = {
            "category": "MLM1000"
            , "offset": 50 * i
            , "limit": 50
            }

        # reponse_value = requests.get(url, params=URL_PARAMS)
        reponse_value = avoid_rate_limit(url, URL_PARAMS)

        list_of_products = reponse_value['results']

        # curar la informacion de los articulos
        for product_data_raw in list_of_products:

            seller_set.add(product_data_raw['seller']['nickname'])


        sleep(1)

    return list(seller_set)


In [None]:
# Lista de sellers unicos en categoria MLM1000
seller_list = get_nicknames(1) # numero de veces que va a intentar encontrar seller nuevos

In [None]:
print(f"Cantidad de vendedores unicos: {len(seller_list)}")
seller_list[0:5]

In [None]:
# @title montar nuestro google drive en caso de que utilicemos Google Colab
def prep_path():

    import os

    from google.colab import drive
    drive.mount('/content/drive')

    # project_path = f"/content/drive/MyDrive/meli_case_{utc_now('utc_custom')}"

    # # crear folder donde guardaremos todo lo del proyecto
    # create_directory(project_path)
    # os.chdir(project_path)

    # este seria el path en caso de que cargaramos el folder en nuestra carpeta principal
    os.chdir('/content/drive/MyDrive/meli_challenge_202404-main')

    # generar carpetas del proyecto
    create_directory('seller_skus')
    create_directory('seller_agg')

    return 'directorios listo'


In [None]:
# preparar el directorio en google drive
prep_path()

In [None]:
!pwd
!ls

In [None]:
# con 402 vendedores dura 1928.54 segundos

In [None]:
#@title get_seller_skus
@measure_execution_time
def get_seller_skus(unique_seller_list):

    from time import time, sleep # medir el tiempo de cada seller

    process_name = 'user_meli'
    url = URL_BASE +  SITE_ID + d_api_meli[process_name]['path']

    # numero de vuelta inicial
    index_value = 1

    # header de los prints
    print(f"{'i':<6}{'seller_nickname':<40}{'#items':<8}Execution time")

    for seller_nickname in unique_seller_list:

        # reiniciar valores de las listas
        seller_skus = []
        seller_agg_values = []

        # momento que inicio la vuelta del seller
        start_time = time()

        loop_delta = True
        LOOP_COUNT = 0


        while loop_delta:

            URL_PARAMS = {
                "nickname": seller_nickname
                , "offset": 50 * LOOP_COUNT
                , "limit": 50
                }

            # get_user = requests.get(url, params=URL_PARAMS)
            get_user = avoid_rate_limit(url, URL_PARAMS)

            # #
            # if LOOP_COUNT == 0:
            #     print(seller_nickname, get_user.json()['paging'])

            # list_of_products = get_user.json()['results']
            list_of_products = get_user['results']

            # curar la informacion de los articulos
            for product_data_raw in list_of_products:

                seller_skus.append(cured_json_sku(product_data_raw))


            # Conocer si ya agotamos la cantidad de articulos del vendedor
            loop_delta = len(list_of_products) == 50

            # agregarle un valor al contador
            LOOP_COUNT += 1

            if LOOP_COUNT == 20:
                loop_delta = False

            # esperar para no exaustar el API
            sleep(0.5)

        #imprimir el seller y el tiempo

        # guardar csv de items por seller
        seller_skus_to_df(seller_skus).to_csv(f"seller_skus/{seller_nickname}.csv", index=False)

        # guardar cantidad de items por vendedor
        seller_total_items = get_user['paging']['total']
        seller_agg_values_to_df([[seller_nickname, seller_total_items]]).to_csv(f"seller_agg/{seller_nickname}.csv", index=False)
        print(f"{index_value:<6}{seller_nickname:<40}{seller_total_items:<8}{(time() - start_time):.1f} seconds")

        # numero de vuelta en la que va
        index_value += 1

    return 'terminado'


In [None]:
# obtener los productos de los vendedores
# seller_skus, seller_agg_values = get_seller_skus(seller_list)
get_seller_skus(seller_list)
# get_seller_skus(['GAAN3088776', 'TRONSMARTOFFICIALSTOREMX'])

In [None]:
!pwd

In [None]:
import os

def get_files_in_folder(folder_path):
    files = []
    with os.scandir(folder_path) as entries:
        for entry in entries:
            if entry.is_file():
                files.append(entry.name)
    return files

# folder_path = f"{project_path}/seller_skus"
folder_path = f"seller_skus"
files = get_files_in_folder(folder_path)
print("Files in folder:", len(files))

In [None]:
import pandas as pd

df_seller_skus = pd.concat([pd.read_csv(f"seller_skus/{f}") for f in files])
df_seller_skus

In [None]:
df_seller_skus.to_csv(f"seller_skus.csv", index=False)

In [None]:
!ls

In [None]:
unique_category = list(df_seller_skus['category_id'].unique())
print(len(unique_category))

In [None]:
category_list = unique_category
# category_list = ['MLM168467', 'MLM120141', 'MLM438388','MLM1714']
# category_list = ['MLM168467']
from time import sleep

process_name = 'category_fee'

url = URL_BASE +  SITE_ID + d_api_meli[process_name]['path']

conteo_ronda = 1

category_list_all = []

for category_name in category_list:

    # print(category_name)

    URL_PARAMS = {
        "category_id": category_name
        , "price": 1000
        }

    # reponse_value = requests.get(url, params=URL_PARAMS)
    reponse_value = avoid_rate_limit(url, URL_PARAMS)

    # print(url)
    # print(reponse_value)
    # check = requests.get(url, params=URL_PARAMS)
    # print(check.url)

    for jeje in reponse_value:

        percentage_fee = jeje['sale_fee_details']['percentage_fee']

        if percentage_fee != 0:

            # print(category_name, jeje['listing_type_id'], percentage_fee)

            category_list_all.append([category_name, jeje['listing_type_id'], percentage_fee / 100])

    sleep(0.25)

    conteo_ronda += 1

    if conteo_ronda % 500 == 0:

        print('+500')

# https://api.mercadolibre.com/sites/MLM/listing_prices?category_id=MLM168467&price=3999
# https://api.mercadolibre.com/sites/MLM/listing_prices?category_id=MLM168467&price=1000
# https://api.mercadolibre.com/sites/MLM/listing_prices?category=MLM168467&price=1000

In [None]:
# @title Curado de listas para dataframe
def category_fee_to_df(category_fee):

    import pandas as pd

    col_names = ['category_id', 'listing_type_id', 'listing_type_fee']

    df = pd.DataFrame.from_records(category_fee, columns=col_names)

    return df


In [None]:
df_category = category_fee_to_df(category_list_all)

In [None]:
df_category
df_category.to_csv(f"category_fee.csv", index=False)

In [None]:
df_category['listing_type_fee'].mean()