# Purpose

Find a way to get data from Plugshare.com since they're not responding to my API access request. The comments and metadata from stations across different networks should be extremely useful in diagnosing electrical and non-electrical customer experience issues.

# Imports

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
from rich import print
import os
import pandas as pd
from bs4 import BeautifulSoup
import requests

from evlens.data.plugshare import Scraper

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException

from dotenv import load_dotenv
load_dotenv(override=True)

from evlens.logs import setup_logger
logger = setup_logger("Notebook-0.1")
logger.info("TEST!")

2024-06-17_T23_33_38EDT: INFO (Notebook-0.1:L25) - TEST!


# Testing our custom scraper

## Using the Scraper class

In [2]:
# Use the scraper to get a copy of the driver that will work easily

# Electrify America in Springfield, VA mall parking lot
TEST_LOCATION = 252784

s = Scraper("../data/external/plugshare/06-17-2024/", timeout=3, headless=False)
driver = s.driver

TEST_URL = f"https://www.plugshare.com/location/{TEST_LOCATION}"
s.driver.maximize_window()
s.driver.get(TEST_URL)
s.reject_all_cookies_dialog()
s.exit_login_dialog()

2024-06-17_T23_33_46EDT: INFO (evlens.data.plugshare:L85) - Found the cookie banner!
2024-06-17_T23_33_46EDT: INFO (evlens.data.plugshare:L89) - Switching to cookie dialog iframe...
2024-06-17_T23_33_46EDT: INFO (evlens.data.plugshare:L92) - Selecting 'Manage Settings' link...
2024-06-17_T23_33_47EDT: INFO (evlens.data.plugshare:L99) - Clicking 'Reject All' button...
2024-06-17_T23_33_49EDT: INFO (evlens.data.plugshare:L106) - Confirming rejection...
2024-06-17_T23_33_49EDT: INFO (evlens.data.plugshare:L114) - Switching back to main page content...
2024-06-17_T23_33_49EDT: INFO (evlens.data.plugshare:L62) - Attempting to exit login dialog...
2024-06-17_T23_33_49EDT: INFO (evlens.data.plugshare:L70) - Successfully exited the login dialog!


In [3]:
more_comments_link = driver.find_element(
    By.XPATH,
    "//*[@id=\"checkins\"]/div[2]/span[3]"
)
more_comments_link.click()

detailed_checkins = driver.find_element(
    By.XPATH,
    "//*[@id=\"dialogContent_reviews\"]/div/div"
).find_elements(By.XPATH, "./child::*")

# checkins = pd.Series([d.text for d in detailed_checkins])\
#     .str.replace("check_circle", "")\
#     .replace({"": np.nan})\
#     .dropna()
    
# checkins

Hierarchy of a check-in:

1. What I call `detailed_checkins` is the set of check-in objects
    1. `class="details"` is the check-in stripped of profile picture
        1. `class="date ng-binding"` is useful for timestamping
        2. `class="user"` contains user data (that I will likely ignore)
            1. `class="name ng-binding"` is username
        2. `class="car ng-binding"` gets me car info (USEFUL)
        3. `class="additional"` provides even more info
            1. `class="problem ng-scope"` is useful if they complain of a problem and it's tracked (but will often be missing I imagine)
            2. `class="connector ng-binding"` gives connector info (e.g. CCS/SAE)
            3. `class="kilowatts ng-scope"` gives the kW charging level observed
            4. `class="comment ng-binding"` is the money, free-text comments!

In [16]:
from selenium.webdriver.remote.webelement import WebElement

class CheckIn:
    '''
    Tracks all the different components of a single check-in and can return as a single-row pandas DataFrame to be used elsewhere.
    '''
    def __init__(self, checkin_element: WebElement):
        self.element = checkin_element
        
    def parse(self) -> pd.DataFrame:
        
        output = dict()
        
        # Details part
        details_element = self.element.find_element(By.CLASS_NAME, "details")
        details_children = details_element.find_elements(By.XPATH, "./child::*")
        for d in details_children:
            if d.get_attribute("class") == 'date ng-binding':
                output['date'] = pd.to_datetime(d.text)
            elif d.get_attribute("class") == 'car ng-binding':
                output['car'] = d.text
            elif d.get_attribute("class") == 'additional':
                self.additional_children = d.find_elements(By.XPATH, "./child::*")
        
        # "Additional" part
        print([d.get_attribute("class") for d in self.additional_children])
        for d in self.additional_children:
            if d.get_attribute("class") == 'problem ng-scope':
                output['problem'] = d.text
            elif d.get_attribute("class") == 'connector ng-binding':
                output['connector_type'] = d.text
            elif d.get_attribute("class") == 'kilowatts ng-scope':
                output['charge_power_kilowatts'] = d.text
            elif d.get_attribute("class") == 'comment ng-binding':
                output['comment'] = d.text
                
        
        # Check what columns we're missing and fill with null
        expected_columns = [
            'date',
            'car',
            'problem',
            'connector_type',
            'charge_power_kilowatts',
            'comment'
        ]
        for c in expected_columns:
            if c not in output.keys():
                output[c] = np.nan
        
        
        return pd.DataFrame(output, index=[0])
    

In [17]:
# Should be a safe and simple check-in
test_checkin = detailed_checkins[2]

c = CheckIn(test_checkin)
c.parse()

Unnamed: 0,date,car,connector_type,charge_power_kilowatts,comment,problem
0,2024-06-13,Kia EV6 2022,CCS/SAE,215 Kilowatts,"Still no screen on charger 1, but works in the...",


In [13]:
c.additional_children

[<selenium.webdriver.remote.webelement.WebElement (session="29236a6b2f9db3863db24d92841db0a1", element="f.678FA61FCB3468775B78587BAC86C68E.d.AEF94DC063D3A5EA3913D847140D63F2.e.477")>,
 <selenium.webdriver.remote.webelement.WebElement (session="29236a6b2f9db3863db24d92841db0a1", element="f.678FA61FCB3468775B78587BAC86C68E.d.AEF94DC063D3A5EA3913D847140D63F2.e.478")>,
 <selenium.webdriver.remote.webelement.WebElement (session="29236a6b2f9db3863db24d92841db0a1", element="f.678FA61FCB3468775B78587BAC86C68E.d.AEF94DC063D3A5EA3913D847140D63F2.e.479")>,
 <selenium.webdriver.remote.webelement.WebElement (session="29236a6b2f9db3863db24d92841db0a1", element="f.678FA61FCB3468775B78587BAC86C68E.d.AEF94DC063D3A5EA3913D847140D63F2.e.480")>,
 <selenium.webdriver.remote.webelement.WebElement (session="29236a6b2f9db3863db24d92841db0a1", element="f.678FA61FCB3468775B78587BAC86C68E.d.AEF94DC063D3A5EA3913D847140D63F2.e.481")>,
 <selenium.webdriver.remote.webelement.WebElement (session="29236a6b2f9db3863db2

In [11]:
details_children = test_checkin.find_element(By.CLASS_NAME, "details").find_elements(By.XPATH, "./child::*")

[i.text for i in details_children]

['Jun 13, 2024',
 'check_circleKMac',
 'Kia EV6 2022',
 'CCS/SAE 215 Kilowatts\nStill no screen on charger 1, but works in the app.',
 '']

In [13]:
[i.get_attribute("class") for i in details_children]

['date ng-binding',
 'user',
 'car ng-binding',
 'additional',
 'official-response ng-hide']

In [15]:
test_checkin.find_element(By.CLASS_NAME, "details").find_element(By.CLASS_NAME, "date").text





MaxRetryError: HTTPConnectionPool(host='localhost', port=52214): Max retries exceeded with url: /session/80d92420fc8e7d1002b54c00e56c127d/element/f.C17C1A056E39A8EC1C4BE1EB535DD6C7.d.42E124C63874B577E951AB840C3232A0.e.376/element (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x11c04e9d0>: Failed to establish a new connection: [Errno 61] Connection refused'))

In [None]:
try: # SCRAPE CAR
    
    carList = []
    cars = driver.find_elements(By.CLASS_NAME, "car ng-binding") # PRINTS TYPE OF CAR FOR EACH PERSON
    for car in cars:
        carList.append(car)
    # cars = ', '.join(carList)
except (NoSuchElementException, TimeoutException):
    logger.error("Car details error", exc_info=True)
    
carList

In [19]:
# Why is it taking so long to even *start* trying to exit the login dialog?!
s = Scraper("../data/external/plugshare/06-17-2024/", timeout=3, headless=True)

# Scrape only one location that I can test via browser
df = s.run(TEST_LOCATION, TEST_LOCATION)
df.info()
df.head()

2024-06-17_T23_56_26EDT: INFO (evlens.data.plugshare:L287) - Beginning scraping!
Parsing stations:   0%|          | 0/1 [00:00<?, ?it/s]2024-06-17_T23_56_28EDT: INFO (evlens.data.plugshare:L147) - Found the cookie banner!
2024-06-17_T23_56_28EDT: INFO (evlens.data.plugshare:L151) - Switching to cookie dialog iframe...
2024-06-17_T23_56_28EDT: INFO (evlens.data.plugshare:L154) - Selecting 'Manage Settings' link...
2024-06-17_T23_56_28EDT: INFO (evlens.data.plugshare:L161) - Clicking 'Reject All' button...
2024-06-17_T23_56_31EDT: INFO (evlens.data.plugshare:L168) - Confirming rejection...
2024-06-17_T23_56_31EDT: INFO (evlens.data.plugshare:L176) - Switching back to main page content...
2024-06-17_T23_56_31EDT: INFO (evlens.data.plugshare:L124) - Attempting to exit login dialog...
2024-06-17_T23_56_34EDT: ERROR (evlens.data.plugshare:L135) - Login dialog exit button not found.
2024-06-17_T23_56_34EDT: INFO (evlens.data.plugshare:L193) - Starting page scrape...
2024-06-17_T23_56_37EDT: E

TypeError: cannot unpack non-iterable NoneType object

Parse the results and figure out which station IDs we should put on our do-not-fly list and which to keep

1. Ones that are fully null somehow should be discarded entirely
2. Parse the remaining ones' addresses so they can be binned by country
    * Note but remove the ones outside the US for now