In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}

def is_valid_url(url):
    """Check if a URL is valid and returns a 200 status code."""
    try:
        response = requests.get(url, headers=headers, timeout=5)
        print(url)
        return response.status_code == 200
    except requests.RequestException:
        return False

def get_all_valid_subpages(url):
    """Fetch all valid subpages from the same domain as the provided URL."""
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')
        base_domain = urlparse(url).netloc
        links = set()
        visited = set()

        for link in soup.find_all('a', href=True):
            # Build absolute URL and parse its domain
            full_url = urljoin(url, link['href'])
            link_domain = urlparse(full_url).netloc

            # Only check subpages within the same domain and avoid rechecking
            if link_domain == base_domain and full_url not in visited:
                visited.add(full_url)  # Mark as visited to avoid repeats

                # Check if the URL is valid and accessible
                if is_valid_url(full_url):
                    links.add(full_url)

        return links

    except requests.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return set()

# Example usage:
url = "https://www.medulinriviera.info"
subpages = get_all_valid_subpages(url)
print(subpages)


www.medulinriviera.info
https://www.medulinriviera.info
https://www.medulinriviera.info/de/
https://www.medulinriviera.info/fr/
https://www.medulinriviera.info/hr/
https://www.medulinriviera.info/it/
https://www.medulinriviera.info/sl/
https://www.medulinriviera.info/arrival-in-medulin/
https://www.medulinriviera.info/tourist-agencies/
https://www.medulinriviera.info/getting-around/
https://www.medulinriviera.info/climate-and-weather/
https://www.medulinriviera.info/pet-friendly/
https://www.medulinriviera.info/banks-money/
https://www.medulinriviera.info/health-and-health-care-services/
https://www.medulinriviera.info/wi-fi-points/
https://www.medulinriviera.info/useful-phone-numbers/
https://www.medulinriviera.info/home/accessible-tourism/
https://www.medulinriviera.info/accommodation/hotels/
https://www.medulinriviera.info/accommodation/camping/
https://www.medulinriviera.info/accommodation/mobile-homes/
https://www.medulinriviera.info/accommodation/resorts/
https://www.medulinrivie