In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
import os
import zipfile

def obtain_webpage(species_type):
    base_webpage = "https://www.miteco.gob.es/es/biodiversidad/temas/inventarios-nacionales/inventario-especies-terrestres/inventario-nacional-de-biodiversidad/bdn-ieet-atlas-vert-"
    if species_type == "mamiferos":
        return f"{base_webpage}mamif.html"
    elif species_type == "aves":
        return f"{base_webpage}aves.html"
    elif species_type == "reptiles":
        return f"{base_webpage}reptiles.html"
    elif species_type == "anfibios":
        return f"{base_webpage}anfibios.html"
    elif species_type == "peces":
        return f"{base_webpage}peces.html"
    else:
        return None
    
# Function to download a file
def download_file(url, folder):
    filename = os.path.join(folder, os.path.basename(url))  # Extract filename
    response = requests.get(url, stream=True)  # Download file
    if response.status_code == 200:
        with open(filename, "wb") as file:
            for chunk in response.iter_content(chunk_size=1024):
                file.write(chunk)
    else:
        print(f"Failed to download: {url}")

In [None]:
driver = webdriver.Chrome()

for group in ["mamiferos", "aves", "reptiles", "anfibios", "peces"]:

    os.makedirs(f"Datos IEET - {group}", exist_ok=True)
    driver.get(obtain_webpage(group))

    table = driver.find_element(By.ID, "xsltExecuted")
    # Find all rows in the table
    table_rows = table.find_elements(By.TAG_NAME, "tr")

    # Iterate through each row
    rows = {}
    for row in table_rows:
        # Get all columns (cells) in the row
        columns = row.find_elements(By.TAG_NAME, "td")
        
        # Extract the text and hyperlinks (if present)
        for col in columns:
            # Check if the column contains a link
            link = col.find_element(By.TAG_NAME, "a") if col.find_elements(By.TAG_NAME, "a") else None
            if link:
                rows[link.text.strip()] = link.get_attribute("href")

    for key, value in rows.items():
        
        driver.get(value)
        species_links = {}

        # Find the table by ID
        table = driver.find_element(By.ID, "xsltExecuted")

        # Get all table rows except the header
        table_rows = table.find_elements(By.TAG_NAME, "tr")[1:]

        for row in table_rows:
            cells = row.find_elements(By.TAG_NAME, "td")
            
            if len(cells) >= 4:  # Ensure it's a valid row with data
                scientific_name = cells[0].text.strip()
                zip_link = cells[2].find_element(By.TAG_NAME, "a").get_attribute("href") if cells[2].find_elements(By.TAG_NAME, "a") else None
                pdf_link = cells[3].find_element(By.TAG_NAME, "a").get_attribute("href") if cells[3].find_elements(By.TAG_NAME, "a") else None

                species_links[scientific_name] = (zip_link, pdf_link)

        # Print results
        for species, links in species_links.items():
            # Directory to save downloads
            download_dir = f"Datos IEET - {group}/{species}"
            os.makedirs(download_dir, exist_ok=True)
            try:
                download_file(links[0], download_dir)
            except:
                print(f"Failed to download {species} from group {group}")
            try:
                download_file(links[1], download_dir)
            except:
                print(f"Failed to download {species} from group {group}")
            # Extract zip files
            for file in os.listdir(download_dir):
                if os.path.splitext(file)[1] == ".zip":
                    with zipfile.ZipFile(f"{download_dir}/{file}", "r") as zip_ref:
                        zip_ref.extractall(download_dir)
                    os.remove(f"{download_dir}/{file}")

# Close the browser
driver.quit()

Failed to download Delichon urbica from group aves
Failed to download Gelochelidon nilotica from group aves
Failed to download Hirundo daurica from group aves
Failed to download Lagopus mutus from group aves
Failed to download Lanius meridionalis from group aves
Failed to download Miliaria calandra from group aves
Failed to download Tachymarptis melba from group aves
