In [3]:
# Import libraries and modules 
import csv, time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC

In [None]:
# Paths
url = 'https://kmpdc.go.ke/Registers/H-Facilities.php'
driver_path = r"C:\chromedriver-win64\chromedriver.exe"

# Chrome options
chrome_options = Options()
chrome_options.add_argument("--start-maximized") 
chrome_options.add_argument("--disable-notifications")

In [None]:

# Initialize driver
service = Service(executable_path=driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
    driver.get(url)

    # Wait to access the website
    wait = WebDriverWait(driver, 10)

    # Wait for table to load full contents
    wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 
                                               "#DataTables_Table_0 tbody tr")))
    # Set number of records displayed per page
    items = Select(driver.find_element(By.XPATH, '//label/select'))
    items.select_by_visible_text('100')

    # Determine total pages
    pages_text = driver.find_element(By.ID, "DataTables_Table_0_info").text
    pages = int(pages_text.split(' ')[-2].replace(',', ''))//100

    # Create CSV file
    with open("kmpdc_registered_facilities_0927.csv", 'w', newline="", 
              encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['facility_name', 'reg_no', 'facility_type', 'level', 
                         'county', 'status'])

        # Scrape data
        for page in range(1, pages + 2):
            rows = driver.find_elements(By.CSS_SELECTOR, 
                                        "#DataTables_Table_0 > tbody > tr")
            for row in rows:
                cols = row.find_elements(By.TAG_NAME, "td")
                data = [col.text.strip() for col in cols]
                facility_name = data[0]; reg_no = data[1]; facility_type = data[3]; 
                level = data[4]; county = data[5]; status=data[6]
                writer.writerow([facility_name, reg_no, facility_type, level, county, status])

            # Naviate to the next page
            try:
                next_button = driver.find_element(By.XPATH, "//li[7]/a")
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                next_button.click()
                time.sleep(2)  
            except Exception as e:
                print(f"Error clicking next on page {page}: {e}")
                break  

except Exception as e:
    print(f"Error on {page}:", e)

finally:
    driver.quit()


