In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Set up Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Run in headless mode for faster execution
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

# Initialize WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# Open the World Bank Country Data page
url = "https://data.worldbank.org/country"
driver.get(url)
time.sleep(5)  # Allow page to load

# Fetch all country links
countries = driver.find_elements(By.CSS_SELECTOR, "a[href^='/country/']")

# Extract country names and URLs
country_data = []
for country in countries:
    name = country.text.strip()
    link = country.get_attribute("href")
    if name:  # Ensure non-empty names
        country_data.append({"Country": name, "URL": link})

# Close the driver
driver.quit()

# Convert to DataFrame
df_countries = pd.DataFrame(country_data)

# Save to CSV
df_countries.to_csv("worldbank_countries.csv", index=False)

# Display the first few rows
print(df_countries.head())


          Country                                                URL
0     Afghanistan  https://data.worldbank.org/country/afghanistan...
1         Albania  https://data.worldbank.org/country/albania?vie...
2         Algeria  https://data.worldbank.org/country/algeria?vie...
3  American Samoa  https://data.worldbank.org/country/american-sa...
4         Andorra  https://data.worldbank.org/country/andorra?vie...
