# Purpose

Figure out pulling location IDs for only DCFC US (non-Tesla to start with) in the US. Need location IDs to efficiently scrape Plugshare with some of our other code.

# Imports

In [1]:

%load_ext autoreload
%autoreload 2

import numpy as np
from rich import print
import os
import pandas as pd
from bs4 import BeautifulSoup
import requests
from tqdm import tqdm
from typing import List, Union, Set

from evlens.data.plugshare import Scraper

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException, ElementNotInteractableException

# Electrify America in Springfield, VA mall parking lot
TEST_LOCATION = 252784

from dotenv import load_dotenv
load_dotenv(override=True)

from evlens.logs import setup_logger
logger = setup_logger("Notebook-0.2")
logger.info("TEST!")

2024-06-22_T19_58_32EDT: INFO (Notebook-0.2:L31) - TEST!


# Set up our constants

In [2]:
ALLOWABLE_PLUG_TYPES = [
    # 'Tesla Supercharger',
    'SAE Combo DC CCS',
    # 'J-1772'
]

# Grabbing the data from the first page

First let's focus on simply pulling location IDs from the pins we see on the default map view before we get into the details of rastering across the US.

## Helper Functions

In [3]:
URL = "https://developer.plugshare.com/embed"

class SeleniumDriver():
    
    def __init__(self, timeout: int = 3):
        self.timeout = timeout
        self.chrome_options = Options()
            
        # Removes automation infobar
        self.chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])


        # Get rid of kruft that will slow us down
        self.chrome_options.add_argument("--disable-extensions")
        self.chrome_options.add_argument("--disable-notifications")

        # Turn off geolocation to speed things up
        prefs = {"profile.default_content_setting_values.geolocation":2} 
        self.chrome_options.add_experimental_option("prefs", prefs)
        

        self.driver = webdriver.Chrome(options=self.chrome_options)
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, timeout)

In [4]:
def get_elements(driver, criterion, xpath: str):
    elements = driver.find_elements(criterion, xpath)
    if len(elements) == 1:
        logger.info("Found only one element")
        return elements[0]
    elif len(elements) == 0:
        logger.error("Found no elements")
        return None
    
    logger.info(f"Found {len(elements)} elements")
    return elements

In [5]:
def pick_plug_filters(driver: webdriver, wait: WebDriverWait, plugs_to_use: List[str] = ALLOWABLE_PLUG_TYPES):
    # Filter for only plug types we care about
    
    
    # First turn off all filters
    check_none_plug_type_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="outlet_off"]')))
    check_none_plug_type_button.click()

    # Get all plug type filter items
    plug_type_elements = driver.find_element(By.XPATH, '//*[@id="outlets"]').find_elements(By.XPATH, './child::*')

    # Filter for the plug types we care about
    plug_types_of_interest = [p for p in plug_type_elements if p.text in plugs_to_use]

    # Click the ones we care about
    for p in plug_types_of_interest:
        checkbox = p.find_element(By.CSS_SELECTOR, 'input[type="checkbox"]')
        checkbox.click()

In [6]:
from urllib.parse import urlparse

def parse_location_link(wait, pin_element):
    pin_element.click()
    location_link = wait.until(EC.visibility_of_element_located((
        By.XPATH,
        '//*[@id="charger_info_footer"]/a'
    )))
    link_parsed = urlparse(location_link.get_attribute('href'))
    return link_parsed.path.rsplit("/", 1)[-1]

In [7]:
from typing import List, Union
from selenium.webdriver.remote.webelement import WebElement
from time import sleep

def scroll_back_to_map_view(driver: webdriver, map_iframe: WebElement):
    '''
    Scrolls to iframe so pins are fully in viewport for clicking/scraping

    Parameters
    ----------
    map_iframe_element : WebElement
        WebElement for the iframe
    '''
    # Scroll to iframe so pins are in viewport and we can click/scrape them
    # 1) Get the iframe height
    iframe_height = int(map_iframe.get_attribute("height"))

    # 2) Scroll up to the element
    ActionChains(driver)\
            .scroll_to_element(map_iframe)\
            .perform()
            
    # Get current window position and scroll up to current_y + iframe_height/2
    current_window_coords = driver.execute_script("return [window.pageXOffset, window.pageYOffset]")

    # Note that y-coord is measured 0 at top of page -> more positive as it scrolls down
    driver.execute_script(f"window.scrollTo({current_window_coords[0]}, {current_window_coords[1] - int(iframe_height)})")

In [8]:
class SearchCriterion():
    def __init__(
        self,
        latitude: float,
        longitude: float,
        radius_in_miles: float,
        wait_time_for_map_pan: float
    ):
        self.latitude = latitude
        self.longitude = longitude
        self.radius = radius_in_miles
        self.time_to_pan = wait_time_for_map_pan

In [9]:
def search_location(
    driver: webdriver,
    wait,
    search_criterion: SearchCriterion
    ):
    
    # Just in case we're not seeing default content initially
    driver.switch_to.default_content()
    
    coordinate_search_box = wait.until(
        EC.visibility_of_element_located((By.XPATH, '//*[@id="search"]'))
    )
    coordinate_search_box.clear()
    coordinate_search_box.send_keys(",".join([
        str(search_criterion.latitude),
        str(search_criterion.longitude)
    ]))

    radius_search_box = driver.find_element(By.XPATH, '//*[@id="radius"]')
    radius_search_box.clear()
    radius_search_box.send_keys(search_criterion.radius)

    search_button = driver.find_element(By.XPATH, '//*[@id="geocode"]')
    search_button.click()
    
    # Give the iframe a moment to pan
    sleep(search_criterion.time_to_pan)
    
    

In [10]:
def grab_location_ids(driver, wait, search_criterion, location_ids: Union[None, Set[str]]) -> Set[str]:
    map_iframe = driver.find_element(By.XPATH, '//*[@id="widget"]/iframe')
    scroll_back_to_map_view(driver, map_iframe)
    driver.switch_to.frame(map_iframe)

    pins = wait.until(EC.visibility_of_all_elements_located((
        By.CSS_SELECTOR,
        'img[src="https://maps.gstatic.com/mapfiles/transparent.png"]'
    )))

    num_pins_in_view = len(pins)
    for i in tqdm(range(num_pins_in_view), desc='Parsing pins'):
    # Do another search if it's not the first time
        if i != 0:
            search_location(driver, wait, search_criterion)
            map_iframe = driver.find_element(By.XPATH, '//*[@id="widget"]/iframe')
            scroll_back_to_map_view(driver, map_iframe)
            driver.switch_to.frame(map_iframe)

            pins = wait.until(EC.visibility_of_all_elements_located((
                By.CSS_SELECTOR,
                'img[src="https://maps.gstatic.com/mapfiles/transparent.png"]'
            )))

        try:
            location_ids.add(parse_location_link(wait, pins[i]))
        except (ElementClickInterceptedException, ElementNotInteractableException):
            logger.error(f"Pin {i} not clickable")
        except (NoSuchElementException):
            logger.error(f"Pin {i} not found weirdly...")
            
    return location_ids

## Parsing It

In [12]:
s.driver.quit()

In [13]:
from time import time
start_time = time()

# Tests with speedtest.net results of 117.55 down/35.83 up (Mbps)
# indicate that a safe value is 1.5 seconds, can go as low as 1.3 seconds
SLEEP_FOR_IFRAME_PAN = 1.5

# Use the scraper to get a copy of the driver that will work easily
s = SeleniumDriver()

s.driver.get(URL)

pick_plug_filters(s.driver, s.wait)

# Moynihan Train Station - should have only one pin for CCS
TEST_COORDS = (40.7525834,-73.9999498) # Lat, long
RADIUS = 1 # miles

search_criterion = SearchCriterion(TEST_COORDS[0], TEST_COORDS[1], RADIUS, SLEEP_FOR_IFRAME_PAN)
search_location(s.driver, s.wait, search_criterion)

location_ids = set()
grab_location_ids(s.driver, s.wait, search_criterion, location_ids)
s.driver.switch_to.default_content()
s.driver.quit()

print(f"Took {time() - start_time} seconds to execute")

# Should return [563873, 574882]
location_ids

Parsing pins: 100%|██████████| 2/2 [00:02<00:00,  1.39s/it]


{'563873', '574882'}

The current problem: the pin popup shoves the viewport so I lose track of my pin elements and they go stale. BUT if I zoom out to make room, the pin elements *also* go stale. 

* I think I have to:
    1. Click the first pin and record its info
    2. Re-query the map (UGH time consuming)
    3. Click the next pin and record
    4. Rinse and repeat for all pins I originally found...

In [240]:
from tqdm import tqdm

location_ids = set()
for i, pin in tqdm(enumerate(pins), desc="Parsing pins"):
    try:
        location_ids.add(parse_location_link(s.driver, pin))
    except (ElementClickInterceptedException, ElementNotInteractableException):
        logger.error(f"Pin {i} not clickable")
    except (NoSuchElementException):
        logger.error(f"Pin {i} not found weirdly...")
        
location_ids

StaleElementReferenceException: Message: stale element reference: stale element not found in the current frame
  (Session info: chrome=126.0.6478.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
0   chromedriver                        0x0000000104b67a20 chromedriver + 4389408
1   chromedriver                        0x0000000104b6032c chromedriver + 4358956
2   chromedriver                        0x000000010477cafc chromedriver + 281340
3   chromedriver                        0x0000000104780fcc chromedriver + 298956
4   chromedriver                        0x0000000104782e44 chromedriver + 306756
5   chromedriver                        0x0000000104782ebc chromedriver + 306876
6   chromedriver                        0x00000001047c0bec chromedriver + 560108
7   chromedriver                        0x00000001047b5bac chromedriver + 514988
8   chromedriver                        0x00000001047b5718 chromedriver + 513816
9   chromedriver                        0x00000001047f7cec chromedriver + 785644
10  chromedriver                        0x00000001047b3ed0 chromedriver + 507600
11  chromedriver                        0x00000001047b48a8 chromedriver + 510120
12  chromedriver                        0x0000000104b2f3a4 chromedriver + 4158372
13  chromedriver                        0x0000000104b33e08 chromedriver + 4177416
14  chromedriver                        0x0000000104b15064 chromedriver + 4051044
15  chromedriver                        0x0000000104b346f4 chromedriver + 4179700
16  chromedriver                        0x0000000104b08064 chromedriver + 3997796
17  chromedriver                        0x0000000104b520bc chromedriver + 4300988
18  chromedriver                        0x0000000104b52238 chromedriver + 4301368
19  chromedriver                        0x0000000104b5ff24 chromedriver + 4357924
20  libsystem_pthread.dylib             0x000000019f3e2f94 _pthread_start + 136
21  libsystem_pthread.dylib             0x000000019f3ddd34 thread_start + 8


In [190]:
# Try switching out of (likely stale) iframe and re-switching to it
s.driver.switch_to.default_content()

map_iframe = s.driver.find_element(By.XPATH, '//*[@id="widget"]/iframe')
s.driver.switch_to.frame(map_iframe)

# Expect one element
pins = get_elements(s.driver, By.CSS_SELECTOR, 'img[src="https://maps.gstatic.com/mapfiles/transparent.png"]')

2024-06-21_T23_46_57EDT: INFO (Notebook-0.2:L10) - Found 3 elements


In [81]:
location_ids

['141640', '141640']

In [None]:
s.driver.switch_to.default_content()

In [31]:
s.driver.quit()