# MercadoLibre web scraping

Imports

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

Request

In [None]:
def fetch_page_data(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup
    else:
        print(f"Failed to retrieve page with status code {response.status_code}")
        return None

Page list

In [None]:
def get_page_list(data):
    page_list = []
    for page_item in data:
        title = page_item.select_one('h2.poly-box').get_text(strip=True)
        currency = page_item.select_one('span.andes-money-amount__currency-symbol').get_text(strip=True)
        price = page_item.select_one('span.andes-money-amount__fraction').get_text(strip=True)
        location = page_item.select_one('span.poly-component__location').get_text(strip=True)
        attributes = page_item.select('li.poly-attributes-list__item')
        rooms, bathrooms, size = '', '', ''
        for attributes_item in attributes:
            item_text = attributes_item.get_text(strip=True)
            if 'dormitorio' in item_text:
                rooms = item_text
            elif 'baño' in item_text:
                bathrooms = item_text
            elif 'cubierto' in item_text:
                size = item_text
        page_list.append({
            'title': title,
            'currency': currency,
            'price': price,
            'location': location,
            'rooms': rooms,
            'bathrooms': bathrooms,
            'size': size,
        })     
    return page_list

Pages

In [None]:
def scrape_all_pages():
    base_url = "https://listado.mercadolibre.com.uy/inmuebles/casas/venta/"
    all_pages = []
    items_count = 0
    while True:
        url = f"{base_url}_Desde_{items_count}_NoIndex_True"
        soup = fetch_page_data(url)
        page_data = soup.find_all("div", {"class": 'ui-search-result__wrapper'})
        if len(page_data) != 0:
            page_list = get_page_list(page_data)
            all_pages.extend(page_list)
            items_count += 48
        else:
            break
    return all_pages

Export Excel file

In [None]:
def export_dataframe(data):
    dataframe = pd.DataFrame(data)
    dataframe.to_excel('mercadolibre_listings.xlsx')
    print('File exported')

Execute function

In [None]:
export_dataframe(scrape_all_pages())
