# 1. Generate file with nearby cities
Provide the min, max search range and the start city.

In [None]:
import pandas as pd
from math import radians, cos, sin, sqrt, atan2

# Parameters
city = "Groningen"
min_radius_km = 200
max_radius_km = 800

# Load the GeoNames data
file_path = "cities15000.txt"  # Update with the actual file path
columns = [
    "geonameid", "name", "asciiname", "alternatenames", "latitude", "longitude", 
    "feature class", "feature code", "country code", "cc2", "admin1 code", 
    "admin2 code", "admin3 code", "admin4 code", "population", "elevation", 
    "dem", "timezone", "modification date"
]

# Load the file into a pandas DataFrame
cities = pd.read_csv(file_path, sep='\t', names=columns, low_memory=False)

# Haversine function to calculate distance between two lat/lon pairs
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of Earth in kilometers
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    return R * c

# Function to find cities within a radius
def find_cities_within_radius(lat, lon, min_radius_km, max_radius_km, min_population=0):
    cities['distance_km'] = cities.apply(
        lambda row: haversine(lat, lon, row['latitude'], row['longitude']), axis=1
    )
    nearby_cities = cities[(cities['distance_km'] <= max_radius_km) & (cities['distance_km'] >= min_radius_km) & (cities['population'] >= min_population)]
    return nearby_cities[['name', 'latitude', 'longitude', 'population', 'distance_km', 'country code']]

def get_coordinates_by_city_name(city_name, country_code=None):
    """
    Returns the latitude and longitude of a city given its name.
    
    Parameters:
    - city_name (str): The name of the city to search for.
    - country_code (str): Optional. ISO 3166-1 alpha-2 country code to narrow down the search.
    
    Returns:
    - (float, float): A tuple containing (latitude, longitude) if found.
    - None: If the city is not found.
    """
    # Filter DataFrame by city name (case insensitive)
    filtered = cities[cities['name'].str.lower() == city_name.lower()]

    # Optionally filter by country code
    if country_code:
        filtered = filtered[filtered['country code'].str.upper() == country_code.upper()]

    if not filtered.empty:
        # Get the first matching result
        city_data = filtered.iloc[0]
        return city_data['latitude'], city_data['longitude']
    else:
        return None
    
latitude, longitude = get_coordinates_by_city_name(city)  # Example coordinates for Paris
nearby_cities = find_cities_within_radius(latitude, longitude, min_radius_km, max_radius_km, 100_000)

# Display the result
print(nearby_cities)
# Save the nearby cities to a text file
output_file_path = "nearby_cities.csv"
nearby_cities.to_csv(output_file_path, sep='\t', index=False)
print(f"Nearby cities saved to {output_file_path}")


# 2. Filter the generated list.

In [None]:
import pandas as pd

# Load the nearby_cities.csv file
nearby_cities_df = pd.read_csv('nearby_cities.csv', sep='\t')

# Filter the dataframe to keep only entries where country code is in the list
filtered_countries = ['FR', 'DE', 'BE', 'DK','PL']
filtered_df = nearby_cities_df[nearby_cities_df['country code'].isin(filtered_countries)]

# Check the resulting dataframe
print(filtered_df.head())
print(f"Size of filtered_df: {filtered_df.shape}")


# 3. Fetching data

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

def fetch_hotel_prices_and_links(city, max_price=1000, group_adults=24, checkin="2025-03-07", checkout="2025-03-09", breakfast_included=False, save_file = False):
    # or use your browser of choice
    options = webdriver.FirefoxOptions()
    options.add_argument('--headless')
    driver = webdriver.Firefox(options=options)
    if breakfast_included:
        driver.get(f"https://www.booking.com/searchresults.en-gb.html?&ss={city}&ssne={city}&ssne_untouched={city}&lang=en-gb&sb=1&src_elem=sb&dest_type=city&checkin={checkin}&checkout={checkout}&group_adults={group_adults}&no_rooms=1&group_children=0&sr_view=list&order=price&nflt=mealplan%3D1")
    else:
        driver.get(f"https://www.booking.com/searchresults.en-gb.html?&ss={city}&ssne={city}&ssne_untouched={city}&lang=en-gb&sb=1&src_elem=sb&dest_type=city&checkin={checkin}&checkout={checkout}&group_adults={group_adults}&no_rooms=1&group_children=0&sr_view=list&order=price")

    prices = driver.find_elements(By.CSS_SELECTOR, 'span[data-testid="price-and-discounted-price"]')
    hotels = []
    prices = driver.find_elements(By.CSS_SELECTOR, 'span[data-testid="price-and-discounted-price"]')

    if save_file:
        with open(f"./saved_pages/{city}.html", "w", encoding='utf-8') as f:
            f.write(driver.page_source)

    for i in range(len(prices)):
        retry_count = 3  # Retry a few times in case of a stale element
        while retry_count > 0:
            try:
                prices = driver.find_elements(By.CSS_SELECTOR, 'span[data-testid="price-and-discounted-price"]')
                price = prices[i]

                price_text = price.text.replace("€", "").replace(",", "").strip()
                price_value = float(price_text)

                if price_value < max_price:
                    try:
                        # 6 divs higher should always still be in the selected hotel's div
                        parent_div = price.find_element(By.XPATH, './ancestor::div[6]')
                        availability_button = parent_div.find_element(By.CSS_SELECTOR, 'a[data-testid="availability-cta-btn"]')

                        # If button is found, retrieve the link
                        link = availability_button.get_attribute('href')
                        hotels.append({"price": price_value, "link": link})

                    except NoSuchElementException:
                        # Handle cases where neither attempt finds the button
                        print("Availability button not found for this listing.")
                        hotels.append({"price": price_value, "link": "No link available"})

                break  # Break out of the retry loop if successful
            except StaleElementReferenceException:
                retry_count -= 1
                if retry_count == 0:
                    print(f"Failed to process price due to stale element: {i}")
            except TimeoutException:
                retry_count -= 1
                if retry_count == 0:
                    print("Availability button did not appear in time.")
                    hotels.append({"price": price_value, "link": "No link available"})

    driver.quit()
    return hotels


# 4. Multithreaded search
Be careful when using this!

In [None]:
import os
from concurrent.futures import ThreadPoolExecutor, as_completed

# Parameters
max_price = 1300
breakfast_included = True
checkin = "2025-03-07"   # Format: YYYY-MM-DD
checkout = "2025-03-09"    # Format: YYYY-MM-DD
people_count = 24

# Wrapper function for fetching data
def fetch_and_save(city_list, max_price=1000, group_adults=24, checkin="2025-03-07", checkout="2025-03-09", breakfast_included=False, save_file = False):
    # Ensure the output file exists and has the correct headers
    save_file_path = 'hotels.csv'
    if breakfast_included:
        save_file_path = 'hotels_breakfast.csv'
    
    if not os.path.exists(save_file_path):
        with open(save_file_path, 'w') as file:
            file.write("City\tPrice\tLink\n")

    def worker(city):
        # Fetch hotel prices and links for a single city
        print(f"Fetching data for {city}...")
        return {city: fetch_hotel_prices_and_links(city, max_price, group_adults, checkin, checkout, breakfast_included, save_file)}

    with ThreadPoolExecutor(max_workers=5) as executor:
        # Submit all city requests
        future_to_city = {executor.submit(worker, city): city for city in city_list}

        # Process results as they complete
        for future in as_completed(future_to_city):
            city = future_to_city[future]
            try:
                data = future.result()  # Get the result
                hotels = data[city]

                # Append results to the file
                with open(save_file_path, 'a', encoding='utf-8') as file:
                    for hotel in hotels:
                        file.write(f"{city}\t{hotel['price']}\t{hotel['link']}\n")

            except Exception as e:
                print(f"Error fetching data for {city}: {e}")

city_list = filtered_df['name']

print("Starting parallel hotel data fetching...")
fetch_and_save(city_list, max_price=max_price, group_adults=people_count, checkin=checkin, checkout=checkout, breakfast_included=breakfast_included)

save_file_path = 'hotels.csv'
if breakfast_included:
    save_file_path = 'hotels_breakfast.csv'
    
print(f"Data fetching complete. Check '{save_file_path}' for results.")


# 5. Single thread search
Is slow, but will go undetected for longer

In [None]:
import os

# Parameters
max_price = 1000
breakfast_included = False
checkin = "2025-03-07"   # Format: YYYY-MM-DD
checkout = "2025-03-09"    # Format: YYYY-MM-DD
people_count = 24


if not os.path.exists('hotels.csv'):
    with open('hotels.csv', 'w') as file:
        file.write("City\tPrice\tLink\n")
        
for city in filtered_df['name']:
    print(f"Fetching prices for {city}")
    hotels = fetch_hotel_prices_and_links(city, max_price=max_price, group_adults=people_count, checkin=checkin, checkout=checkout, breakfast_included=breakfast_included)
    with open('hotels.csv', 'a', encoding='utf-8') as file:
        for hotel in hotels:
            file.write(f"{city}\t{hotel['price']}\t{hotel['link']}\n")


# 6. Create list of unique cities
This can be loaded in [Google Maps](https://www.google.com/maps/d)

In [None]:
if os.path.exists('hotels_info.txt'):
    hotels_df = pd.read_csv('hotels_info.txt', sep=',')
    print(hotels_df.head())
    unique_cities = hotels_df['City'].unique()
    print(unique_cities)
    unique_cities_df = pd.DataFrame(unique_cities, columns=['City'])
    unique_cities_df.to_csv('unique_cities.csv', index=False)