In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

def scrape_city_coordinates_robust(df):
    """
    Scrapes decimal latitude and longitude for all cities in the DataFrame 
    from Wikipedia, starting from the Main Page.
    """
    # Initialize the WebDriver (assuming Chrome)
    # NOTE: You must have the corresponding driver installed for this to work.
    options = webdriver.ChromeOptions()
    # options.add_argument("--headless") # Uncomment to run without opening the browser GUI
    driver = webdriver.Chrome(options=options)
    
    base_url = "https://en.wikipedia.org/wiki/Main_Page"
    
    # Create a copy for results and initialize coordinate columns
    df_result = df.copy()
    df_result['Latitude'] = None
    df_result['Longitude'] = None
    
    print("Starting Web Scraping process...")
    
    for index, row in df_result.iterrows():
        city = row['City']
        country = row['Country']
        query = f"{city}, {country}" # Search format: "City, Country"
        
        try:
            driver.get(base_url)
            
            # Use WebDriverWait to ensure the search bar is clickable
            search_input = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.NAME, "search"))
            )
            search_input.clear()
            search_input.send_keys(query)
            search_input.send_keys(Keys.RETURN)
            
            # Handle potential Search Results page (disambiguation)
            try:
                # We wait briefly for a search results indicator
                WebDriverWait(driver, 3).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "mw-search-results"))
                )
                # If found, click the very first result
                first_result = driver.find_element(By.CSS_SELECTOR, "ul.mw-search-results li a")
                first_result.click()
            except:
                # If timeout, we assume the direct article page loaded successfully
                pass
            
            # Parse the final page source to find the decimal coordinates
            time.sleep(1) # Small buffer
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            
            # Coordinates are usually in the span with class "geo" (decimal format: lat; lon)
            geo_tag = soup.find("span", {"class": "geo"})
            
            if geo_tag:
                coords_text = geo_tag.get_text()
                if ';' in coords_text:
                    lat, lon = coords_text.split(';')
                    # Convert to float and store
                    df_result.at[index, 'Latitude'] = float(lat.strip())
                    df_result.at[index, 'Longitude'] = float(lon.strip())
                    print(f"✅ Found {city}: {lat.strip()}, {lon.strip()}")
                else:
                    print(f"⚠️ Format issue for {city}: Text found but parsing failed.")
            else:
                print(f"❌ Coordinates not found for {city}")
                
        except Exception as e:
            print(f"❌ General error processing {city}: {e}")

    # Close the browser instance
    driver.quit()
    return df_result

In [None]:
# 1. Load the cleaned data (or use the in-memory 'city_data')
city_data_from_cleaned = pd.read_csv('city_data_cleaned.csv')

# 2. Execute the scraping function
city_data_with_coords = scrape_city_coordinates_robust(city_data_from_cleaned)

# 3. Save the enriched data to a new CSV file
# This prevents needing to re-scrape all 84 cities every time the notebook is run!
city_data_with_coords.to_csv('city_data_with_coords.csv', index=False)
print("\n--- Data successfully scraped and saved to 'city_data_with_coords.csv' ---")

### 3. Interactive Map Visualization

Using the newly acquired **Latitude** and **Longitude** data, we construct an interactive map with **Plotly Express**. This map provides a visual interface for comparing cities across Europe.

**Map Features (as required):**

* **Marker Size:** Scales by **Population**.
* **Marker Color:** Scales by **Average Monthly Salary** (for visual insight).
* **Hover Text (Tooltip):** Displays **Country**, **Population**, **Average Monthly Salary**, and **Average Cost of Living**.

In [None]:
# Load the data, ensuring coordinates are included (using the file saved above)
city_data_final = pd.read_csv('city_data_with_coords.csv')

# Filter out any rows that failed scraping (where Latitude is missing)
map_df = city_data_final.dropna(subset=['Latitude', 'Longitude'])

if not map_df.empty:
    fig_map = px.scatter_mapbox(
        map_df,
        lat="Latitude",
        lon="Longitude",
        hover_name="City",
        # Custom tooltip content
        hover_data={
            "Latitude": False,
            "Longitude": False,
            "Country": True,
            "Population": ':,', 
            "Average Monthly Salary": ':,.0f €', 
            "Average Cost of Living": ':,.0f €'  
        },
        color="Average Monthly Salary", 
        size="Population",              
        color_continuous_scale="Viridis",
        zoom=3, # Initial zoom level for Europe
        height=700,
        title="Interactive Map of European Cities"
    )

    # Set map style (OpenStreetMap is free and detailed)
    fig_map.update_layout(mapbox_style="open-street-map")
    fig_map.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
    fig_map.show()
else:
    print("Map generation skipped: Coordinate data is missing. Please run the scraping cell above.")