# Purpose

Find a way to get data from Plugshare.com since they're not responding to my API access request. The comments and metadata from stations across different networks should be extremely useful in diagnosing electrical and non-electrical customer experience issues.

# Imports

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
from rich import print
import os
import pandas as pd
from bs4 import BeautifulSoup
import requests

from evlens.data.plugshare import Scraper

from dotenv import load_dotenv
load_dotenv(override=True)

from evlens.logs import setup_logger
logger = setup_logger("Notebook-0.1")
logger.info("TEST!")

2024-06-16_T22_10_52EDT: INFO (Notebook-0.1:L18) - TEST!


In [2]:
# Electrify America in Springfield, VA mall parking lot
TEST_LOCATION = 252784

# Testing our custom scraper

In [78]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException

# Electrify America in Springfield, VA mall parking lot
TEST_LOCATION = 252784
url = f"https://www.plugshare.com/location/{TEST_LOCATION}"

chrome_options = Options()
# chrome_options.add_argument('--headless=new')
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_argument("--disable-extensions")

driver = webdriver.Chrome(options=chrome_options)
driver.maximize_window()

driver.get(url)


wait = WebDriverWait(driver, 5)

# Wait for the cookie dialog to appear
try:
    iframe = wait.until(EC.visibility_of_element_located((
        By.ID,
        "global-consent-notice"
    )))
    logger.info("Found the banner!")
    driver.switch_to.frame(iframe)
    
except (NoSuchElementException, TimeoutException) as e1:
    logger.warning("No cookie dialog iframe found, moving on!")

2024-06-16_T16_03_39EDT: INFO (Notebook-0.1:L31) - Found the banner!


*Note that the only button is the Accept one, we need another mechanism for finding the Manage Settings link...*

In [79]:
# Example: Find a link with class "my-link" and href attribute starting with "https://..."
manage_cookies_link = wait.until(EC.element_to_be_clickable((
        By.XPATH,
        "/html/body/app-root/app-theme/div/div/app-notice/app-theme/div/div/app-home/div/div[2]/app-footer/div/div/app-section-links/span/a"
    )))
manage_cookies_link

<selenium.webdriver.remote.webelement.WebElement (session="5fefa6b7c37ee906f2cff31d1ecc7a9e", element="f.E6EB0A15BAB1CC4BE0B30E3921F6534E.d.99CDEDC2E7F962853D2B80E20931F10A.e.8")>

In [80]:
manage_cookies_link.click()

In [81]:
reject_all_button = wait.until(EC.element_to_be_clickable((
        By.XPATH,
        "//*[@id=\"denyAll\"]"
    )))
reject_all_button

<selenium.webdriver.remote.webelement.WebElement (session="5fefa6b7c37ee906f2cff31d1ecc7a9e", element="f.E6EB0A15BAB1CC4BE0B30E3921F6534E.d.99CDEDC2E7F962853D2B80E20931F10A.e.20")>

In [82]:
reject_all_button.click()

In [83]:
reject_all_button_confirm = wait.until(EC.element_to_be_clickable((
        By.XPATH,
        "//*[@id=\"mat-dialog-0\"]/ng-component/app-theme/div/div/div[2]/button[2]"
    )))
reject_all_button_confirm

<selenium.webdriver.remote.webelement.WebElement (session="5fefa6b7c37ee906f2cff31d1ecc7a9e", element="f.E6EB0A15BAB1CC4BE0B30E3921F6534E.d.99CDEDC2E7F962853D2B80E20931F10A.e.580")>

In [84]:
reject_all_button_confirm.click()

In [85]:
# Switch back to main frame
driver.switch_to.default_content()

# Exit login dialog
try:
    # Wait for the exit button
    wait = WebDriverWait(driver, 1)
    esc_button = wait.until(EC.visibility_of_element_located((
        By.XPATH,
        # "//*[@id=\"dialogContent_authenticate\"]/button/md-icon" # old
        "//*[@id=\"dialogContent_authenticate\"]/button" # from chrome
    )))
    esc_button.click()
    logger.info("Found the login escape button and clicked it!")

except (NoSuchElementException, TimeoutException):
    logger.error("Login dialog exit button not found.")

except Exception as e:
    logger.error(f"Unknown error trying to exit login dialog: {e}")

2024-06-16_T16_05_14EDT: INFO (Notebook-0.1:L14) - Found the login escape button and clicked it!


In [86]:
driver.quit()

In [None]:
print('a')

In [29]:
# Do a single element scrape
try: ## FIND STATION NAME
    wait.until(EC.visibility_of_element_located((
        By.XPATH,
        "//*[@id=\"display-name\"]/div/h1"
    )))
    name = driver.find_element(
        By.XPATH,
        "//*[@id=\"display-name\"]/div/h1"
        ).text
except:
    logger.error("Station name error", exc_info=True)
    name = np.nan
    
name

2024-06-14_T09_24_24EDT: ERROR (Notebook-0.1:L12) - Station name error
Traceback (most recent call last):
  File "/var/folders/98/fhwnl49n19l_xywxzghbm4jm0000gn/T/ipykernel_58677/2682536154.py", line 3, in <module>
    wait.until(EC.visibility_of_element_located((
  File "/Users/davemcrench/Documents/Projects/evlens/.venv/lib/python3.11/site-packages/selenium/webdriver/support/wait.py", line 105, in until
    raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: 



nan

In [25]:
driver.quit()

In [13]:
100 % 100

0

## Using the Scraper class

In [5]:
# Why is it taking so long to even *start* trying to exit the login dialog?!
s = Scraper("../data/external/plugshare/06-16-2024/", timeout=3, headless=False)

# Scrape only one location that I can test via browser
df = s.run(1,1)
df.info()
df.head()

2024-06-16_T22_16_16EDT: INFO (evlens.data.plugshare:L201) - Beginning scraping!
Parsing stations:   0%|          | 0/1 [00:00<?, ?it/s]2024-06-16_T22_16_19EDT: INFO (evlens.data.plugshare:L81) - Found the cookie banner!
2024-06-16_T22_16_19EDT: INFO (evlens.data.plugshare:L85) - Switching to cookie dialog iframe...
2024-06-16_T22_16_19EDT: INFO (evlens.data.plugshare:L88) - Selecting 'Manage Settings' link...
2024-06-16_T22_16_20EDT: INFO (evlens.data.plugshare:L95) - Clicking 'Reject All' button...
2024-06-16_T22_16_22EDT: INFO (evlens.data.plugshare:L102) - Confirming rejection...
2024-06-16_T22_16_22EDT: INFO (evlens.data.plugshare:L110) - Switching back to main page content...
2024-06-16_T22_16_22EDT: INFO (evlens.data.plugshare:L58) - Attempting to exit login dialog...
2024-06-16_T22_16_25EDT: ERROR (evlens.data.plugshare:L69) - Login dialog exit button not found.
2024-06-16_T22_16_25EDT: INFO (evlens.data.plugshare:L118) - Starting page scrape...
2024-06-16_T22_16_28EDT: ERROR (

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      0 non-null      float64
 1   Address   1 non-null      object 
 2   Rating    1 non-null      object 
 3   Wattage   1 non-null      object 
 4   Hours     1 non-null      object 
 5   Checkins  1 non-null      object 
 6   Comments  1 non-null      object 
 7   Car       1 non-null      object 
dtypes: float64(1), object(7)
memory usage: 196.0+ bytes


Unnamed: 0,Name,Address,Rating,Wattage,Hours,Checkins,Comments,Car
0,,,,,,,", , ,",


In [9]:
df.loc[0, 'Comments']

'J-1772 1 Plug 7 kW\n1 Station\nElectrify America, CCS/SAE 3 Plugs 350 kW\n3 Stations\nElectrify America, , Jun 16, 2024\ncheck_circleTim\nHyundai Ioniq 5 2024242 Kilowatts, Jun 13, 2024\ncheck_circleKMac\nKia EV6 2022215 Kilowatts\nStill no screen on charger 1, but works in the app., Jun 13, 2024\ncheck_circleShane\nNissan LEAF 2010\nSadly they removed the chademo charger. Note there already to be free j1772 chargers in front of target, Jun 12, 2024\ncheck_circleBennett Morrow\nHyundai Ioniq 5 2022, Jun 7, 2024\ncheck_circleJ C\nBMW iX 2024, 0.08 mi\n9.3 Springfield Town Center - Target - East Lot (2)\nJ-1772, 0.18 mi\n4.8 Springfield Town Center - LA Fitness\nJ-1772, 0.22 mi\n1 Springfield Town Center - Frontier Garage\nJ-1772, 0.22 mi\n10 Springfield Town Center - Target - West Lot\nNACS (Tesla), 0.23 mi\n4.7 Springfield Town Center - Maggianos\nJ-1772, , , '