# Purpose

Figure out pulling location IDs for only DCFC US (non-Tesla to start with) in the US. Need location IDs to efficiently scrape Plugshare with some of our other code.

# Imports

In [1]:

%load_ext autoreload
%autoreload 2

import numpy as np
from rich import print
import os
import pandas as pd
from bs4 import BeautifulSoup
import requests
from tqdm import tqdm
from typing import List, Union, Set

from evlens.data.plugshare import Scraper

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException, ElementNotInteractableException

# Electrify America in Springfield, VA mall parking lot
TEST_LOCATION = 252784

from dotenv import load_dotenv
load_dotenv(override=True)

from evlens.logs import setup_logger
logger = setup_logger("Notebook-0.2")
logger.info("TEST!")

2024-06-22_T19_58_32EDT: INFO (Notebook-0.2:L31) - TEST!


# Set up our constants

# Grabbing the data from the first page

First let's focus on simply pulling location IDs from the pins we see on the default map view before we get into the details of rastering across the US.

## Helper Functions

In [None]:
URL = "https://developer.plugshare.com/embed"

In [3]:


class SeleniumDriver():
    
    def __init__(self, timeout: int = 3):
        self.timeout = timeout
        self.chrome_options = Options()
            
        # Removes automation infobar
        self.chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])


        # Get rid of kruft that will slow us down
        self.chrome_options.add_argument("--disable-extensions")
        self.chrome_options.add_argument("--disable-notifications")

        # Turn off geolocation to speed things up
        prefs = {"profile.default_content_setting_values.geolocation":2} 
        self.chrome_options.add_experimental_option("prefs", prefs)
        

        self.driver = webdriver.Chrome(options=self.chrome_options)
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, timeout)

In [4]:
def get_elements(driver, criterion, xpath: str):
    elements = driver.find_elements(criterion, xpath)
    if len(elements) == 1:
        logger.info("Found only one element")
        return elements[0]
    elif len(elements) == 0:
        logger.error("Found no elements")
        return None
    
    logger.info(f"Found {len(elements)} elements")
    return elements

## Parsing It

In [23]:
from time import time
from evlens.data.plugshare import LocationIDScraper, SearchCriterion

from datetime import date
TODAY_STRING = date.today().strftime("%m-%d-%Y")

# Moynihan Train Station - should have only one pin for CCS
TEST_COORDS = (40.7525834,-73.9999498) # Lat, long
RADIUS = 1 # miles
SLEEP_FOR_IFRAME_PAN = 1.5

start_time = time()
lis = LocationIDScraper(
    f"../data/external/plugshare/{TODAY_STRING}/",
    timeout=3,
    headless=False
)

sc = SearchCriterion(
    TEST_COORDS[0],
    TEST_COORDS[1],
    RADIUS,
    SLEEP_FOR_IFRAME_PAN
)
df_location_ids = lis.run([sc])

print(f"Took {time() - start_time} seconds to execute")

# Should return [563873, 574882]
df_location_ids

2024-06-23_T17_30_38EDT: INFO (evlens.data.plugshare:L563) - Beginning location ID scraping!
Parsing pins: 100%|██████████| 2/2 [00:02<00:00,  1.41s/it]
Searching map tiles: 100%|██████████| 1/1 [00:06<00:00,  6.06s/it]
2024-06-23_T17_30_46EDT: INFO (evlens.data.plugshare:L330) - Saving checkpoint 'df_location_ids'...
2024-06-23_T17_30_46EDT: INFO (evlens.data.plugshare:L338) - Save complete!
2024-06-23_T17_30_46EDT: INFO (evlens.data.plugshare:L604) - All location IDs scraped (that we could)!


Unnamed: 0,parsed_datetime,plug_types_searched,location_id,search_cell_latitude,search_cell_longitude
0,06-23-2024_T21_30_44,[SAE Combo DC CCS],563873,40.752583,-73.99995
1,06-23-2024_T21_30_44,[SAE Combo DC CCS],574882,40.752583,-73.99995


The current problem: the pin popup shoves the viewport so I lose track of my pin elements and they go stale. BUT if I zoom out to make room, the pin elements *also* go stale. 

* I think I have to:
    1. Click the first pin and record its info
    2. Re-query the map (UGH time consuming)
    3. Click the next pin and record
    4. Rinse and repeat for all pins I originally found...

In [240]:
from tqdm import tqdm

location_ids = set()
for i, pin in tqdm(enumerate(pins), desc="Parsing pins"):
    try:
        location_ids.add(parse_location_link(s.driver, pin))
    except (ElementClickInterceptedException, ElementNotInteractableException):
        logger.error(f"Pin {i} not clickable")
    except (NoSuchElementException):
        logger.error(f"Pin {i} not found weirdly...")
        
location_ids

StaleElementReferenceException: Message: stale element reference: stale element not found in the current frame
  (Session info: chrome=126.0.6478.114); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
0   chromedriver                        0x0000000104b67a20 chromedriver + 4389408
1   chromedriver                        0x0000000104b6032c chromedriver + 4358956
2   chromedriver                        0x000000010477cafc chromedriver + 281340
3   chromedriver                        0x0000000104780fcc chromedriver + 298956
4   chromedriver                        0x0000000104782e44 chromedriver + 306756
5   chromedriver                        0x0000000104782ebc chromedriver + 306876
6   chromedriver                        0x00000001047c0bec chromedriver + 560108
7   chromedriver                        0x00000001047b5bac chromedriver + 514988
8   chromedriver                        0x00000001047b5718 chromedriver + 513816
9   chromedriver                        0x00000001047f7cec chromedriver + 785644
10  chromedriver                        0x00000001047b3ed0 chromedriver + 507600
11  chromedriver                        0x00000001047b48a8 chromedriver + 510120
12  chromedriver                        0x0000000104b2f3a4 chromedriver + 4158372
13  chromedriver                        0x0000000104b33e08 chromedriver + 4177416
14  chromedriver                        0x0000000104b15064 chromedriver + 4051044
15  chromedriver                        0x0000000104b346f4 chromedriver + 4179700
16  chromedriver                        0x0000000104b08064 chromedriver + 3997796
17  chromedriver                        0x0000000104b520bc chromedriver + 4300988
18  chromedriver                        0x0000000104b52238 chromedriver + 4301368
19  chromedriver                        0x0000000104b5ff24 chromedriver + 4357924
20  libsystem_pthread.dylib             0x000000019f3e2f94 _pthread_start + 136
21  libsystem_pthread.dylib             0x000000019f3ddd34 thread_start + 8


In [190]:
# Try switching out of (likely stale) iframe and re-switching to it
s.driver.switch_to.default_content()

map_iframe = s.driver.find_element(By.XPATH, '//*[@id="widget"]/iframe')
s.driver.switch_to.frame(map_iframe)

# Expect one element
pins = get_elements(s.driver, By.CSS_SELECTOR, 'img[src="https://maps.gstatic.com/mapfiles/transparent.png"]')

2024-06-21_T23_46_57EDT: INFO (Notebook-0.2:L10) - Found 3 elements


In [81]:
location_ids

['141640', '141640']

In [None]:
s.driver.switch_to.default_content()

In [31]:
s.driver.quit()