In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urlparse, parse_qs
from time import sleep
import requests, csv

WAIT_TIMEOUT = 20

## 1. [PlacidWay](https://www.placidway.com/article/3207/Best-Autism-Treatment-Centers-in-the-World)

Best Autism Treatment Centers in the World

In [None]:
url = 'https://www.placidway.com/article/3207/Best-Autism-Treatment-Centers-in-the-World'

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

soup.find('title')

In [None]:
rows = soup.find('table').find_all('tr')[1:]

len(rows)

In [None]:
rows_infos = []
for row in rows:
    cells = row.find_all('td')
    link_element = cells[0].find('a')
    if 'href' not in link_element.attrs:
        continue
    link = link_element['href']
    name = link_element.text.strip()
    country = cells[1].text.strip()
    rows_infos.append((name, country, link))

len(rows_infos), rows_infos[0], rows_infos[-1]

In [None]:
data = []
for name, country, row_url in rows_infos:
    try:
        row_response = requests.get(row_url)
        row_soup = BeautifulSoup(row_response.text, 'html.parser')

        second_name = row_soup.find('div', class_='heading-thumb-wrapper').find('h1').text.strip()
        address_text = row_soup.find('address').text.strip()
        location_element = row_soup.find('section', id='location')
        latitude = location_element.find('input', id='latitude')['value'].strip() if location_element else ''
        longitude = location_element.find('input', id='longitude')['value'].strip() if location_element else ''

        data.append((name, second_name, country, address_text, latitude, longitude))
    except:
        print('Error in Link:', row_url)
        continue

len(data), data[0], data[-1]

In [None]:
with open('autism_treatment_centers_placidway.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['name', 'second_name', 'country', 'address', 'latitude', 'longitude'])
    writer.writerows(data)

## 2. [Bookimed](https://us-uk.bookimed.com/clinics/illness=autism/)

16 Verified Autism Treatment Clinics Globally

In [None]:
url = 'https://us-uk.bookimed.com/clinics/illness=autism/'
driver = webdriver.Chrome()

driver.get(url)
driver.maximize_window()

WebDriverWait(driver, WAIT_TIMEOUT).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.cookie-button.cookie-solution button'))).click()
driver.execute_script('arguments[0].remove()', WebDriverWait(driver, WAIT_TIMEOUT).until(EC.presence_of_element_located((By.CLASS_NAME, 'clinics-sticky'))))
driver.title

In [None]:
cards = WebDriverWait(driver, WAIT_TIMEOUT).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#clinics.clinics-list div[index]')))

len(cards)

In [None]:
data: list[list[str]] = []
for card in cards:
    try:
        content_element = WebDriverWait(card, WAIT_TIMEOUT).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.clinic-card > .clinic-card__content-container > .clinic-card__content')))
        header_element = content_element.find_element(By.CSS_SELECTOR, '.clinic-card__title')
        title_element = header_element.find_element(By.CSS_SELECTOR, 'div:first-child > h2 > a')
        title = title_element.text.strip()
        link = title_element.get_attribute('href')
        address = header_element.find_element(By.CSS_SELECTOR, '.clinic-card__country ').text.strip()

        WebDriverWait(card, WAIT_TIMEOUT).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.clinic-card__text-container .clinic-card__show-text'))).click()
        modal_wrapper = WebDriverWait(card, WAIT_TIMEOUT).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.clinic-card__modal .card-info__wrapper')))
        modal_info = modal_wrapper.find_element(By.CSS_SELECTOR, '.card-info')
        modal_info_block = modal_info.find_element(By.CSS_SELECTOR, '.card-info__container > .card-info__content > .card-info__content-item.active > .card-info__info-block')
        modal_info_block_content = [el.text.strip() for el in modal_info_block.find_elements(By.CLASS_NAME, 'card-info__info-clinic')]
        free_quote_link = modal_info.find_element(By.CSS_SELECTOR, '.card-info__buttons > a').get_attribute('href')
        WebDriverWait(modal_wrapper, WAIT_TIMEOUT).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.card-info__close'))).click()
        
        data.append([title, link, address, ', '.join(modal_info_block_content), free_quote_link])
    except:
        print(f'Error in Card: {card.get_attribute('index')}')
        continue

len(data), data[0], data[-1]

In [None]:
for clinic_data in data:
    try:
        driver.get(clinic_data[1])
        clinic_map = WebDriverWait(driver, WAIT_TIMEOUT).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'section.clinic-page__map')))
        address = clinic_map.find_element(By.CLASS_NAME, 'clinic-page__map-title').text.strip()
        driver.execute_script('arguments[0].scrollIntoView(true);', clinic_map)

        clinic_map_iframe = WebDriverWait(driver, WAIT_TIMEOUT).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'section.clinic-page__map .lazy-component > iframe')))
        map_parsed_url = urlparse(clinic_map_iframe.get_attribute('src'))
        map_query = parse_qs(map_parsed_url.query)
        [latitude, longitude] = map_query['ll'][0].split(',')

        clinic_data[1] = clinic_data[2]
        clinic_data[2] = address
        clinic_data.extend([latitude, longitude])
    except:
        print(f'Error in Clinic: {clinic_data[0]}')
        continue

data[0], data[-1]

In [None]:
for clinic_data in data:
    clinic_data[5] = '' if clinic_data[5] == '0' else clinic_data[5]
    clinic_data[6] = '' if clinic_data[6] == '0' else clinic_data[6]

with open('autism_treatment_centers_bookimed.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['name', 'country', 'address', 'info', 'free_quote_link', 'latitude', 'longitude'])
    writer.writerows(data)

driver.quit()

## 3. [Autism Now](https://autismnow.org/map/)

A growing collection of agencies across the United States that offer services and resources centered on autism and other developmental disabilities

In [11]:
url = 'https://autismnow.org/map/'
driver = webdriver.Chrome()

In [12]:
i = 47
def click_link() -> str:
    global i
    driver.get(url)
    WebDriverWait(driver, WAIT_TIMEOUT).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'li#toc-nav'))).click()
    ele_to_remove = WebDriverWait(driver, WAIT_TIMEOUT).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div#screenOptions')))
    driver.execute_script('arguments[0].remove()', ele_to_remove)
    states_links = WebDriverWait(driver, WAIT_TIMEOUT).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div#stateTOC li > a')))
    if i >= len(states_links):
        return None
    state_name = states_links[i].text.strip()
    WebDriverWait(driver, WAIT_TIMEOUT).until(EC.element_to_be_clickable(states_links[i])).click()
    i += 1
    return state_name

In [13]:
clinics_data = []
while state_name := click_link():
    clinics_rows = WebDriverWait(driver, WAIT_TIMEOUT).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.state-list tbody > tr')))
    clinics_matrix = [clinic.find_elements(By.CSS_SELECTOR, 'td') for clinic in clinics_rows]
    clinics_names = [row[0].find_element(By.CSS_SELECTOR, 'a').text.strip() for row in clinics_matrix]
    clinics_links = [row[0].find_element(By.CSS_SELECTOR, 'a').get_attribute('href') for row in clinics_matrix]
    clinics_cities = [row[1].text.strip() for row in clinics_matrix]
    clinics_counties = [row[2].text.strip() for row in clinics_matrix]
    iter_list = list(zip(clinics_names, clinics_links, clinics_cities, clinics_counties))
    for name, link, city, county in iter_list[9:] if i == 48 else iter_list:
        driver.get(link)
        contact_element = WebDriverWait(driver, WAIT_TIMEOUT).until(EC.presence_of_element_located((By.XPATH, "//h4[contains(text(), 'Contact information')]/following-sibling::p[1]")))
        clinics_data.append([name, state_name, city, county, contact_element.text.strip()])
        print(name)

The Arc of Augusta
The Arc of Central Virginia
The Arc of Greater Prince William/INSIGHT
The Arc of Greater Williamsburg
The Arc of Harrisonburg and Rockingham
The Arc of Loudoun
The Arc of North Central Virginia
The Arc of Northern Shenandoah Valley
The Arc of Northern Virginia
The Arc of Rockbridge
The Arc of South Hampton Roads
The Arc of Southern Virginia
The Arc of Southside
The Arc of the Piedmont
The Arc of the Roanoke Valley
The Arc of the Virginia Peninsula
The Arc of Virginia
The Arc of Warren County
The Arc South of the James
The Greater Richmond ARC
AdultASD.org
Autism Guidebook for Washington State
Autism Resources – Washington State Dept. Of Health
Autism Society of Washington
Easter Seals of Washington
Lake Washington School District Transition Academy
Northwest Autism Foundation
The Arc of Grays Harbor
The Arc of Island and Skagit
The Arc of King County
The Arc of Kitsap & Jefferson Counties
The Arc of Snohomish County
The Arc of Southwest Washington
The Arc of Spokane


In [14]:
with open('autism_treatment_centers_autism_now.csv', 'a', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['name', 'state', 'city', 'county', 'contact info'])
    writer.writerows(clinics_data)

driver.quit()