# MercadoLibre web scraping

Imports

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

Request

In [None]:
# Fetch page data
def fetch_page_data(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup
    else:
        print(f"Failed to retrieve page with status code {response.status_code}")
        return None

Page list

In [None]:
# Get products data from page
def get_page_list(data):
    page_list = []
    for page_item in data:
        title = page_item.select_one('h2.poly-box').get_text(strip=True)
        currency = page_item.select_one('span.andes-money-amount__currency-symbol').get_text(strip=True)
        price = page_item.select_one('span.andes-money-amount__fraction').get_text(strip=True)
        location = page_item.select_one('span.poly-component__location').get_text(strip=True)
        url = page_item.select_one('a.poly-component__title').get('href')
        attributes = page_item.select('li.poly-attributes-list__item')
        rooms, bathrooms, size = '', '', ''
        for attributes_item in attributes:
            item_text = attributes_item.get_text(strip=True)
            if 'dormitorio' in item_text:
                rooms = item_text
            elif 'baño' in item_text:
                bathrooms = item_text
            elif 'cubierto' in item_text:
                size = item_text
        page_list.append({
            'title': title,
            'currency': currency,
            'price': price,
            'location': location,
            'rooms': rooms,
            'bathrooms': bathrooms,
            'size': size,
            'url': url,
        })     
    return page_list

Pages

In [None]:
# Scrape all pages
def scrape_all_pages():
    base_url = "https://listado.mercadolibre.com.uy/inmuebles/casas/venta/"
    all_pages = []
    items_count = 0
    while True:
        url = f"{base_url}_Desde_{items_count}_NoIndex_True"
        soup = fetch_page_data(url)
        page_data = soup.find_all("div", {"class": 'ui-search-result__wrapper'})
        if len(page_data) != 0:
            page_list = get_page_list(page_data)
            all_pages.extend(page_list)
            items_count += 48
        else:
            break
    return all_pages

Export Excel file

In [None]:
# Export dataframe to CSV and XLSX file
def export_dataframe(data):
    dataframe = pd.DataFrame(data)
    dataframe.to_excel("./data/mercadolibre_listings.xlsx", index=False)
    dataframe.to_csv("./data/mercadolibre_listings.csv", index=False)
    print("File exported")

Execute function

In [None]:
# Execute function and export to CSV and XLSX file
export_dataframe(scrape_all_pages())


Prepare data

In [87]:
# Read csv file
ml_df = pd.read_csv('mercadolibre_listings.csv')
ml_df.head()

# Replace US$ and $ for USD and UYU
ml_df['currency'] = ml_df['currency'].replace({'US$': 'USD', '$': 'UYU'})

# Replace strings for numbers
ml_df['price'] = pd.to_numeric(ml_df['price'].str.replace('.', '', regex=False))

# Split columns in minimum and maximum values
def create_maxmin(df, colname, cmin, cmax, r1, r2):
    df[colname] = df[colname].str.replace(r1, "", regex=False)
    df[colname] = df[colname].str.replace("s", "", regex=False)
    df[[cmin, cmax]] = df[colname].str.split(r2, 1, expand=True)
    df[cmin] = pd.to_numeric(df[cmin].str.replace('.', '', regex=False))
    df[cmax] = pd.to_numeric(df[cmax].str.replace('.', '', regex=False))
    df = df.drop(colname, axis=1)
    df[cmin] = df[cmin].fillna(df[cmax])
    df[cmax] = df[cmax].fillna(df[cmin])
    return df

ml_df = create_maxmin(ml_df, "rooms", 'rmin', 'rmax', " dormitorio", " a ") # Rooms
ml_df = create_maxmin(ml_df, "bathrooms", 'bmin', 'bmax', " baño", " a ") # Bathrooms
ml_df = create_maxmin(ml_df, "size", 'smin', 'smax', " m² cubierto", " - ") # Size

ml_df.head()

  df[[cmin, cmax]] = df[colname].str.split(r2, 1, expand=True)
  df[[cmin, cmax]] = df[colname].str.split(r2, 1, expand=True)
  df[[cmin, cmax]] = df[colname].str.split(r2, 1, expand=True)


Unnamed: 0,title,currency,price,location,url,rmin,rmax,bmin,bmax,smin,smax
0,Gardens View Solanas 100% Financiado,USD,505000,"4XC2+FXF, Calandria, 20003 Punta Ballena, Depa...",https://casa.mercadolibre.com.uy/MLU-691254456...,2.0,3.0,3.0,3.0,160.0,191.0
1,Venta Casas 3 Y 4 Dormitorios A Estrenar. Pina...,USD,210000,"Av Gral Leandro Gómez Esquina, Pinares, Maldonado",https://casa.mercadolibre.com.uy/MLU-640742423...,3.0,4.0,3.0,3.0,130.0,150.0
2,Venta Casas 3 Y 4 Dormitorios. Proyecto Calypt...,USD,350000,"Niteroi 952, Barra De Carrasco, Canelones",https://casa.mercadolibre.com.uy/MLU-687629166...,3.0,4.0,2.0,3.0,102.0,189.0
3,Casas Premium A Construir Donde Quieras. Espec...,USD,267100,"La Tahona Departamento De Canelones, Uruguay, ...",https://casa.mercadolibre.com.uy/MLU-640993147...,3.0,5.0,3.0,5.0,157.0,313.0
4,"Living Golf, Barrio Cerrado En Punta Ballena",USD,310000,"4WMW+3C Punta Ballena, Maldonado Department, U...",https://casa.mercadolibre.com.uy/MLU-673203584...,2.0,2.0,2.0,2.0,700.0,700.0
