In [4]:
# listing of internal and external links in https://nazaret.eus/
# and give me a 404 or 200 status code for each link

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

def check_links(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        soup = BeautifulSoup(response.content, "html.parser")

        internal_links = []
        external_links = []

        for link in soup.find_all("a", href=True):
            href = link["href"]
            absolute_url = urljoin(url, href)

            parsed_url = urlparse(absolute_url)
            if parsed_url.netloc == urlparse(url).netloc or not parsed_url.netloc:  # Internal link
                try:
                    internal_response = requests.head(absolute_url, allow_redirects=True) # Use HEAD for efficiency
                    internal_links.append((absolute_url, internal_response.status_code))
                except requests.exceptions.RequestException as e:
                    internal_links.append((absolute_url, "Error: " + str(e)))

            else:  # External link
                try:
                    external_response = requests.head(absolute_url, allow_redirects=True) # Use HEAD for efficiency
                    external_links.append((absolute_url, external_response.status_code))
                except requests.exceptions.RequestException as e:
                    external_links.append((absolute_url, "Error: " + str(e)))


        return internal_links, external_links

    except requests.exceptions.RequestException as e:
        print(f"Error accessing {url}: {e}")
        return [], []


if __name__ == "__main__":
    target_url = "https://nazaret.eus/"
    internal, external = check_links(target_url)

    print("Internal Links:")
    for link, status in internal:
        print(f"{link} - {status}")

    print("\nExternal Links:")
    for link, status in external:
        print(f"{link} - {status}")

Internal Links:
https://nazaret.eus/#content - 200
https://nazaret.eus/ - 200
https://nazaret.eus/ - 200
https://nazaret.eus/ - 200
https://nazaret.eus/nazaret/ - 200
https://nazaret.eus/#popmake-5086 - 200
https://nazaret.eus/contacto/ - 200
https://nazaret.eus/inika - 200
https://nazaret.eus/#pll_switcher - 200
https://nazaret.eus/ - 200
https://nazaret.eus/eu/nazaret-2/ - 200
https://nazaret.eus/ - 200
https://nazaret.eus/bachillerato-humanidades/ - 200
https://nazaret.eus/bachillerato-ciencias/ - 200
https://nazaret.eus/bachillerato-ebbd/ - 200
https://nazaret.eus/bachillerato-general/ - 200
https://nazaret.eus/ - 200
https://nazaret.eus/ - 200
https://nazaret.eus/administracion-finanzas/ - 200
https://nazaret.eus/asistencia-direccion/ - 200
https://nazaret.eus/nba/ - 200
https://nazaret.eus/doble-ciclo/ - 200
https://nazaret.eus/desarrollo-web/ - 200
https://nazaret.eus/marketing/ - 200
https://nazaret.eus/integracion-social/ - 200
https://nazaret.eus/documentacion-sanitaria/ - 20