In [10]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

# Configurar o driver do Selenium
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Executar sem abrir o navegador
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Função para extrair latitude e longitude da URL do mapa
def extract_lat_long(map_url):
    try:
        start = map_url.find("markers=") + 25
        end = map_url.find("&", start)
        lat_long = map_url[start:end]
        latitude, longitude = lat_long.split("%2C")
        return latitude, longitude
    except Exception as e:
        print(f"Erro ao extrair latitude/longitude: {e}")
        return None, None

# Função para raspar dados de uma URL
def scrape_data(url):
    driver.get(url)
    time.sleep(3)  # Aguarda o carregamento da página

    try:
        # Raspar os preços (aluguel, condomínio, IPTU, seguro incêndio, taxa de serviço, total)
        aluguel = driver.find_element(By.XPATH, "//span[text()='Aluguel']/following::p").text
        condominio = driver.find_element(By.XPATH, "//span[text()='Condomínio']/following::p").text
        iptu = driver.find_element(By.XPATH, "//span[text()='IPTU']/following::p").text
        seguro_incendio = driver.find_element(By.XPATH, "//span[text()='Seguro incêndio']/following::p").text
        taxa_servico = driver.find_element(By.XPATH, "//span[text()='Taxa de serviço']/following::p").text
        total = driver.find_element(By.XPATH, "//span[text()='Total']/following::h4").text

        # Raspar o bairro
        bairro = driver.find_element(By.XPATH, "//small[contains(@class, 'pwAPLE')]").text

        # Raspar informações adicionais (metragem, quartos, banheiros, vagas, andar, aceita pet, sem mobília)
        metragem = driver.find_element(By.XPATH, "//p[contains(text(),'m²')]").text
        quartos = driver.find_element(By.XPATH, "//p[contains(text(),'quarto')]").text
        banheiros = driver.find_element(By.XPATH, "//p[contains(text(),'banheiro')]").text
        vagas = driver.find_element(By.XPATH, "//p[contains(text(),'vaga')]").text
        andar = driver.find_element(By.XPATH, "//p[contains(text(),'andar')]").text
        aceita_pet = driver.find_element(By.XPATH, "//p[contains(text(),'Aceita pet')]").text
        sem_mobilia = driver.find_element(By.XPATH, "//p[contains(text(),'Sem mobília')]").text

        # Clicar no botão do mapa e extrair latitude e longitude
        try:
            map_button = driver.find_element(By.XPATH, "//button[@aria-label='Abrir mapa']")
            map_button.click()
            time.sleep(6)  # Espera o mapa carregar

            # Captura o URL da imagem do mapa contendo as coordenadas
            map_image = driver.find_element(By.XPATH, "//img[contains(@src, 'maps.googleapis.com')]")
            map_url = map_image.get_attribute('src')

            # Extrai latitude e longitude do URL do mapa
            latitude, longitude = extract_lat_long(map_url)
        except Exception as e:
            print(f"Erro ao capturar coordenadas do mapa: {e}")
            latitude, longitude = None, None

        # Retornar todas as informações
        return {
            'aluguel': aluguel,
            'condominio': condominio,
            'iptu': iptu,
            'seguro_incendio': seguro_incendio,
            'taxa_servico': taxa_servico,
            'total': total,
            'bairro': bairro,
            'metragem': metragem,
            'quartos': quartos,
            'banheiros': banheiros,
            'vagas': vagas,
            'andar': andar,
            'aceita_pet': aceita_pet,
            'sem_mobilia': sem_mobilia,
            'latitude': latitude,
            'longitude': longitude
        }
    except Exception as e:
        print(f"Erro ao raspar dados da URL {url}: {e}")
        return None

# Carregar o CSV com as URLs
df = pd.read_csv('urls.csv')

# Lista para armazenar os dados raspados
data_list = []

# Iterar sobre as URLs e raspar os dados
for index, row in df.iterrows():
    url = row['urls']
    print(f"Raspando dados da URL: {url}")
    data = scrape_data(url)
    if data:
        data_list.append(data)

# Criar um DataFrame com os dados raspados
df_scraped = pd.DataFrame(data_list)

# Salvar os dados raspados em um novo arquivo CSV
df_scraped.to_csv('imoveis_detalhados.csv', index=False)

# Finalizar o driver do Selenium
driver.quit()


Raspando dados da URL: https://www.quintoandar.com.br/imovel/892992336/alugar/casa-2-quartos-setor-habitacional-jardim-botanico-lago-sul-brasilia?from_route=%22search_results%22&house_tags=exclusivity&search_id=%225fb08272-6d0a-49f6-8bff-97d4dca5dcb9%22&search_rank=%7B%22sortMode%22%3A%22relevance%22%2C%22searchMode%22%3A%22list%22%2C%22resultsOrigin%22%3A%22search%22%2C%22rank%22%3A0%2C%22personalization%22%3Atrue%7D
Erro ao raspar dados da URL https://www.quintoandar.com.br/imovel/892992336/alugar/casa-2-quartos-setor-habitacional-jardim-botanico-lago-sul-brasilia?from_route=%22search_results%22&house_tags=exclusivity&search_id=%225fb08272-6d0a-49f6-8bff-97d4dca5dcb9%22&search_rank=%7B%22sortMode%22%3A%22relevance%22%2C%22searchMode%22%3A%22list%22%2C%22resultsOrigin%22%3A%22search%22%2C%22rank%22%3A0%2C%22personalization%22%3Atrue%7D: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//span[text()='Seguro incêndio']/following::p"}
  (Session info: chr

WebDriverException: Message: disconnected: not connected to DevTools
  (failed to check if window was closed: disconnected: not connected to DevTools)
  (Session info: chrome=128.0.6613.138)
Stacktrace:
0   chromedriver                        0x000000010f181338 chromedriver + 5096248
1   chromedriver                        0x000000010f178b6a chromedriver + 5061482
2   chromedriver                        0x000000010ed03fd0 chromedriver + 389072
3   chromedriver                        0x000000010eceb466 chromedriver + 287846
4   chromedriver                        0x000000010eceb363 chromedriver + 287587
5   chromedriver                        0x000000010ed06302 chromedriver + 398082
6   chromedriver                        0x000000010ed91c73 chromedriver + 969843
7   chromedriver                        0x000000010ed74c93 chromedriver + 851091
8   chromedriver                        0x000000010ed43c79 chromedriver + 650361
9   chromedriver                        0x000000010ed4449e chromedriver + 652446
10  chromedriver                        0x000000010f1440b0 chromedriver + 4845744
11  chromedriver                        0x000000010f148fc8 chromedriver + 4865992
12  chromedriver                        0x000000010f149695 chromedriver + 4867733
13  chromedriver                        0x000000010f126ce9 chromedriver + 4725993
14  chromedriver                        0x000000010f149989 chromedriver + 4868489
15  chromedriver                        0x000000010f118c04 chromedriver + 4668420
16  chromedriver                        0x000000010f168e68 chromedriver + 4996712
17  chromedriver                        0x000000010f169067 chromedriver + 4997223
18  chromedriver                        0x000000010f17876e chromedriver + 5060462
19  libsystem_pthread.dylib             0x00007ff81ec734e1 _pthread_start + 125
20  libsystem_pthread.dylib             0x00007ff81ec6ef6b thread_start + 15


In [11]:
# Criar um DataFrame com os dados raspados
df_scraped = pd.DataFrame(data_list)

# Salvar os dados raspados em um novo arquivo CSV
df_scraped.to_csv('imoveis_detalhados.csv', index=False)

In [6]:
%pip install webdriver-manager

Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting python-dotenv (from webdriver-manager)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv, webdriver-manager
Successfully installed python-dotenv-1.0.1 webdriver-manager-4.0.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
