In [None]:
!pip install requests

In [1]:
import sys
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
from tqdm.notebook import tqdm as tqdm
import requests

# Extracción

## Busqueda de items

<a href="https://developers.mercadolibre.com.ar/es_ar/items-y-busquedas#Resumen-de-los-recursos-disponibles">https://developers.mercadolibre.com.ar/es_ar/items-y-busquedas#Resumen-de-los-recursos-disponibles</a>

In [4]:
search_endpoint = 'https://api.mercadolibre.com/sites/MLA/search?q={query_term}&offset={page}'

In [5]:
query_terms = ['Google Home', 'Apple TV', 'Amazon Fire TV']
search_data = []
page = 0
for query_term in query_terms:
    while True:
        response = requests.get(search_endpoint.format(query_term = query_term, page = page))
        if response.status_code == 200:
            response = response.json()
            
            # Si no hay mas items, pasamos al siguiente termino de busqueda
            if len(response['results']) == 0:
                page = 0
                break
            
            for result in response['results']:
                search_data.append({
                    'q' : response['query'],
                    'total_items' : response['paging']['total'],
                    'page' : page,
                    'item_id' : result['id']
                })
            page = page + 50
            
search_df = pd.DataFrame(search_data)
print('{0:,} registros a las {1}'.format(len(search_df), datetime.now()))
search_df.head()

392 registros a las 2020-08-30 17:19:53.342744


Unnamed: 0,q,total_items,page,item_id
0,Google Home,96,0,MLA746509501
1,Google Home,96,0,MLA868448532
2,Google Home,96,0,MLA747593028
3,Google Home,96,0,MLA721141246
4,Google Home,96,0,MLA763380385


Guardamos los datos en un CSV

In [6]:
search_df.to_csv('search.csv', index=False)

## Data de items

In [7]:
item_endpoint = 'https://api.mercadolibre.com/items/{item_id}'

In [8]:
items_data = []
for item_id in search_df['item_id'].unique():
    response = requests.get(item_endpoint.format(item_id = item_id))
    if response.status_code == 200:
        response = response.json()
        record = {
            'id' : response['id'],
            'site_id' : response['site_id'],
            'title' : response['title'],
            'seller_id' : response['seller_id'],
            'category_id' : response['category_id'],
            'price' : response['price'],
            'initial_quantity' : response['initial_quantity'],
            'available_quantity' : response['available_quantity'],
            'sold_quantity' : response['sold_quantity'],
            'buying_mode' : response['buying_mode'],
            'condition' : response['condition'],
            'pictures_quantity' : len(response['pictures']),
            'accepts_mercadopago' : response['accepts_mercadopago'],
            'domain_id' : response['domain_id'],
            'attributes_quantity' : len(response['attributes']),
            
        }
        items_data.append(record)
            
items_df = pd.DataFrame(items_data)
print('{0:,} registros a las {1}'.format(len(items_df), datetime.now()))
items_df.head()

392 registros a las 2020-08-30 17:21:38.674886


Unnamed: 0,id,site_id,title,seller_id,category_id,price,initial_quantity,available_quantity,sold_quantity,buying_mode,condition,pictures_quantity,accepts_mercadopago,domain_id,attributes_quantity
0,MLA746509501,MLA,Google Home Mini Parlante Original Español Ok ...,91988078,MLA409415,5499.0,142,1,50,buy_it_now,new,6,True,MLA-SMART_SPEAKERS,14
1,MLA868448532,MLA,Google Home Mini Asistente Virtual,557639592,MLA409415,3890.0,43,1,25,buy_it_now,new,3,True,MLA-SMART_SPEAKERS,13
2,MLA747593028,MLA,Google Home Mini Asistente Virtual En Español ...,114495635,MLA409415,4870.0,926,2,450,buy_it_now,new,7,True,MLA-SMART_SPEAKERS,14
3,MLA721141246,MLA,Google Home Parlante Manos Libres Nuevo Origin...,309453406,MLA409415,10999.0,1031,500,25,buy_it_now,new,9,True,MLA-SMART_SPEAKERS,16
4,MLA763380385,MLA,Google Home Mini Asistente Virtual Español You...,309453406,MLA409415,4499.0,256,1,150,buy_it_now,new,18,True,MLA-SMART_SPEAKERS,13


Guardamos los datos en un CSV

In [9]:
items_df.to_csv('items.csv', index=False)

## Data de sellers

In [10]:
seller_endpoint = 'https://api.mercadolibre.com/users/{seller_id}'

In [11]:
sellers_data = []
for seller_id in items_df['seller_id'].unique():
    response = requests.get(seller_endpoint.format(seller_id = seller_id))
    if response.status_code == 200:
        response = response.json()
        record = {
            'id' : response['id'],
            'nickname' : response['nickname'],
            'user_type' : response['user_type'],
            'points' : response['points'],
            'seller_reputation_level_id' : response['seller_reputation']['level_id'],
            'seller_reputation_power_status' : response['seller_reputation']['power_seller_status'],
            'total_transactions' : response['seller_reputation']['transactions']['total'],
            'canceled_transactions' : response['seller_reputation']['transactions']['canceled'],
            'completed_transactions' : response['seller_reputation']['transactions']['completed'],
            'positive_rating' : response['seller_reputation']['transactions']['ratings']['positive'],
            'negative_rating' : response['seller_reputation']['transactions']['ratings']['negative'],
            'neutral_rating' : response['seller_reputation']['transactions']['ratings']['neutral'],
            
        }
        sellers_data.append(record)
            
sellers_df = pd.DataFrame(sellers_data)
print('{0:,} registros a las {1}'.format(len(sellers_df), datetime.now()))
sellers_df.head()

295 registros a las 2020-08-30 17:22:56.389600


Unnamed: 0,id,nickname,user_type,points,seller_reputation_level_id,seller_reputation_power_status,total_transactions,canceled_transactions,completed_transactions,positive_rating,negative_rating,neutral_rating
0,91988078,GAMING-CITY,normal,67677,5_green,platinum,145203,9805,135398,0.96,0.02,0.02
1,557639592,TECHSHOPFULL,normal,9,5_green,gold,206,10,196,1.0,0.0,0.0
2,114495635,GRYTECNO,normal,8031,5_green,platinum,24678,640,24038,0.98,0.01,0.01
3,309453406,BAZA 10,normal,6147,5_green,platinum,32061,2634,29427,0.94,0.04,0.02
4,284824585,SKY.VISION ELECTRONICA,normal,3687,5_green,platinum,19123,1500,17623,0.93,0.05,0.02


Guardamos los datos en un CSV

In [12]:
sellers_df.to_csv('sellers.csv', index=False)

## Data de las categorias

<a href="https://developers.mercadolibre.com.ar/es_ar/categorias-y-publicaciones">https://developers.mercadolibre.com.ar/es_ar/categorias-y-publicaciones</a>

In [13]:
category_endpoint = 'https://api.mercadolibre.com/categories/{category_id}'

In [14]:
categories_data = []
for category_id in items_df['category_id'].unique():
    response = requests.get(category_endpoint.format(category_id = category_id))
    if response.status_code == 200:
        response = response.json()
        record = {
            'id' : response['id'],
            'name' : response['name'],
            'total_items' : response['total_items_in_this_category'],
            
        }
        categories_data.append(record)
            
categories_df = pd.DataFrame(categories_data)
print('{0:,} registros a las {1}'.format(len(categories_df), datetime.now()))
categories_df.head()

2 registros a las 2020-08-30 17:22:57.261856


Unnamed: 0,id,name,total_items
0,MLA409415,Asistentes Virtuales,345
1,MLA352001,Media Streaming,1610


Guardamos los datos en un CSV

In [15]:
categories_df.to_csv('categories.csv', index=False)

## Data de las visitas de los sellers en los ultimos 30 dias

<a href="https://developers.mercadolibre.com.ar/es_ar/metricas">https://developers.mercadolibre.com.ar/es_ar/metricas</a>

In [16]:
seller_visits_by_day_endpoint = 'https://api.mercadolibre.com/users/{seller_id}/items_visits/time_window?last=30&unit=day'

In [17]:
seller_visits_data = []
for seller_id in sellers_df['id'].unique():
    response = requests.get(seller_visits_by_day_endpoint.format(seller_id = seller_id))
    if response.status_code == 200:
        response = response.json()
            
        for result in response['results']:
            seller_visits_data.append({
                'seller_id' : seller_id,
                'total_visits' : response['total_visits'],
                'day' : result['date'][:10],
                'total_in_day' : result['total']
            })
            
seller_visits_df = pd.DataFrame(seller_visits_data)
print('{0:,} registros a las {1}'.format(len(seller_visits_df), datetime.now()))
seller_visits_df.head()

8,850 registros a las 2020-08-30 17:46:51.505870


Unnamed: 0,seller_id,total_visits,day,total_in_day
0,91988078,932765,2020-07-31,41720
1,91988078,932765,2020-08-01,31217
2,91988078,932765,2020-08-02,30380
3,91988078,932765,2020-08-03,34533
4,91988078,932765,2020-08-04,29593


Guardamos los datos en un CSV

In [18]:
seller_visits_df.to_csv('seller_visits.csv', index=False)

## Data de las visitas de los items en los ultimos 30 dias

<a href="https://developers.mercadolibre.com.ar/es_ar/metricas">https://developers.mercadolibre.com.ar/es_ar/metricas</a>

In [20]:
item_visits_by_day_endpoint = 'https://api.mercadolibre.com/items/{item_id}/visits/time_window?last=30&unit=day'

In [21]:
item_visits_data = []
for item_id in items_df['id'].unique():
    response = requests.get(item_visits_by_day_endpoint.format(item_id = item_id))
    if response.status_code == 200:
        response = response.json()
            
        for result in response['results']:
            item_visits_data.append({
                'item_id' : item_id,
                'total_visits' : response['total_visits'],
                'day' : result['date'][:10],
                'total_in_day' : result['total']
            })
            
item_visits_df = pd.DataFrame(item_visits_data)
print('{0:,} registros a las {1}'.format(len(item_visits_df), datetime.now()))
item_visits_df.head()

11,760 registros a las 2020-08-30 17:54:13.527279


Unnamed: 0,item_id,total_visits,day,total_in_day
0,MLA746509501,8424,2020-07-31,358
1,MLA746509501,8424,2020-08-01,253
2,MLA746509501,8424,2020-08-02,335
3,MLA746509501,8424,2020-08-03,430
4,MLA746509501,8424,2020-08-04,403


Guardamos los datos en un CSV

In [22]:
item_visits_df.to_csv('item_visits.csv', index=False)