In [None]:
import pandas as pd
import time
import random
from bs4 import BeautifulSoup as bs
import undetected_chromedriver as uc

# Load the CSV file with property IDs
ids_casas = pd.read_csv('ids_casas.csv')

# Start browser with undetected_chromedriver
browser = uc.Chrome()

# Function to parse property data from its ID
def parsear_inmueble(id_inmueble):
    try:
        print('\nCasa numero: ' + str(id_inmueble))  # Convert to str if id_inmueble is numeric
        url = "https://www.idealista.com/inmueble/" + str(id_inmueble) + "/"  # Convert to str
        browser.get(url)
        time.sleep(random.randint(4, 6))  # Wait for the page to load completely

        html = browser.page_source
        soup = bs(html, 'lxml')

        # Extract title
        try:
            titulo = soup.find('span', {'class': 'main-info__title-main'})
            titulo = titulo.text if titulo else None
            print(f"Título: {titulo}")
        except Exception as e:
            titulo = None
            print(f"Error al extraer el título: {e}")

        # Extract location
        try:
            localizacion = soup.find('span', {'class': 'main-info__title-minor'})
            localizacion = localizacion.text.split(',')[0] if localizacion else None
            print(f"Localización: {localizacion}")
        except Exception as e:
            localizacion = None
            print(f"Error al extraer la localización: {e}")

        # Extract price
        try:
            precio_tag = soup.find('span', {'class': 'txt-bold'})
            if precio_tag:
                precio = int(precio_tag.text.replace('.', '').replace('€', '').strip())
            else:
                precio = None
            print(f"Precio: {precio}")
        except Exception as e:
            precio = None
            print(f"Error al extraer el precio: {e}")

        # Extract basic features
        try:
            c1 = soup.find('div', {'class': 'details-property-feature-one'})
            caract_basicas = [caract.text.strip() for caract in c1.find_all('li')] if c1 else []
            caract_basicas = '; '.join(caract_basicas) if caract_basicas else None
            print(f"Características básicas: {caract_basicas}")
        except Exception as e:
            caract_basicas = None
            print(f"Error al extraer las características básicas: {e}")

        # Extract extra features
        try:
            c2 = soup.find('div', {'class': 'details-property-feature-two'})
            caract_extra = [caract.text.strip() for caract in c2.find_all('li')] if c2 else []
            caract_extra = '; '.join(caract_extra) if caract_extra else None
            print(f"Características extra: {caract_extra}")
        except Exception as e:
            caract_extra = None
            print(f"Error al extraer las características extra: {e}")

        # Create dictionary with property data
        casas = {
            'Titulo': titulo,
            'Localizacion': localizacion,
            'Precio': precio,
            'Caracteristicas_basicas': caract_basicas,
            'Caracteristicas_extra': caract_extra
        }

        # Convert to DataFrame
        df_casas = pd.DataFrame([casas])  # Create DataFrame with one row
        return df_casas

    except Exception as e:
        # If an error occurs, print it and skip to the next property
        print(f"Error al procesar el inmueble {id_inmueble}: {e}")
        return None

# Call the function with the ID in row 0 (start from the beginning)
df_casas = parsear_inmueble(ids_casas['id'].iloc[0])

# Iterate over the rest of the IDs starting from index 1
for i in range(1, len(ids_casas)):
    df_inmueble = parsear_inmueble(ids_casas['id'].iloc[i])
    
    # Only concatenate if df_inmueble is not None
    if df_inmueble is not None:
        df_casas = pd.concat([df_casas, df_inmueble], ignore_index=True)
    
    time.sleep(random.randint(4, 8))  # Wait between requests

# Save DataFrame to CSV
df_casas.to_csv(f'casas_hasta_{len(df_casas)}.csv', index=False, sep=';', encoding='utf-16')

# Close browser when finished
browser.quit()
