# Purpose

Find a way to get data from Plugshare.com since they're not responding to my API access request. The comments and metadata from stations across different networks should be extremely useful in diagnosing electrical and non-electrical customer experience issues.

# Imports

In [1]:

%load_ext autoreload
%autoreload 2

import numpy as np
from rich import print
import os
import pandas as pd
from bs4 import BeautifulSoup
import requests

from evlens.data.plugshare import MainMapScraper

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException

# Electrify America in Springfield, VA mall parking lot
TEST_LOCATION = 252784

from dotenv import load_dotenv
load_dotenv(override=True)

from evlens.logs import setup_logger
logger = setup_logger("Notebook-0.1")
logger.info("TEST!")

2024-07-19_T23_11_51EDT: INFO (Notebook-0.1:L28) - TEST!


In [18]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

# Testing our custom scraper

## Using the Scraper class

In [2]:
# Use the scraper to get a copy of the driver that will work easily

# Electrify America in Springfield, VA mall parking lot
TEST_LOCATION = 252784

s = Scraper("../data/external/plugshare/06-17-2024/", timeout=3, headless=False)
driver = s.driver

TEST_URL = f"https://www.plugshare.com/location/{TEST_LOCATION}"
s.driver.maximize_window()
s.driver.get(TEST_URL)
s.reject_all_cookies_dialog()
s.exit_login_dialog()

2024-06-17_T23_33_46EDT: INFO (evlens.data.plugshare:L85) - Found the cookie banner!
2024-06-17_T23_33_46EDT: INFO (evlens.data.plugshare:L89) - Switching to cookie dialog iframe...
2024-06-17_T23_33_46EDT: INFO (evlens.data.plugshare:L92) - Selecting 'Manage Settings' link...
2024-06-17_T23_33_47EDT: INFO (evlens.data.plugshare:L99) - Clicking 'Reject All' button...
2024-06-17_T23_33_49EDT: INFO (evlens.data.plugshare:L106) - Confirming rejection...
2024-06-17_T23_33_49EDT: INFO (evlens.data.plugshare:L114) - Switching back to main page content...
2024-06-17_T23_33_49EDT: INFO (evlens.data.plugshare:L62) - Attempting to exit login dialog...
2024-06-17_T23_33_49EDT: INFO (evlens.data.plugshare:L70) - Successfully exited the login dialog!


In [3]:
more_comments_link = driver.find_element(
    By.XPATH,
    "//*[@id=\"checkins\"]/div[2]/span[3]"
)
more_comments_link.click()

detailed_checkins = driver.find_element(
    By.XPATH,
    "//*[@id=\"dialogContent_reviews\"]/div/div"
).find_elements(By.XPATH, "./child::*")

# checkins = pd.Series([d.text for d in detailed_checkins])\
#     .str.replace("check_circle", "")\
#     .replace({"": np.nan})\
#     .dropna()
    
# checkins

Hierarchy of a check-in:

1. What I call `detailed_checkins` is the set of check-in objects
    1. `class="details"` is the check-in stripped of profile picture
        1. `class="date ng-binding"` is useful for timestamping
        2. `class="user"` contains user data (that I will likely ignore)
            1. `class="name ng-binding"` is username
        2. `class="car ng-binding"` gets me car info (USEFUL)
        3. `class="additional"` provides even more info
            1. `class="problem ng-scope"` is useful if they complain of a problem and it's tracked (but will often be missing I imagine)
            2. `class="connector ng-binding"` gives connector info (e.g. CCS/SAE)
            3. `class="kilowatts ng-scope"` gives the kW charging level observed
            4. `class="comment ng-binding"` is the money, free-text comments!

In [23]:
detailed_checkins[0].text



MaxRetryError: HTTPConnectionPool(host='localhost', port=53400): Max retries exceeded with url: /session/29236a6b2f9db3863db24d92841db0a1/element/f.678FA61FCB3468775B78587BAC86C68E.d.AEF94DC063D3A5EA3913D847140D63F2.e.387/text (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x1131bbc10>: Failed to establish a new connection: [Errno 61] Connection refused'))

In [6]:
s.driver.quit()

In [21]:
# Why is it taking so long to even *start* trying to exit the login dialog?!
s = Scraper("../data/external/plugshare/06-17-2024/", timeout=3, headless=True)

# Scrape only one location that I can test via browser
df_locations, df_checkins = s.run(TEST_LOCATION, TEST_LOCATION)
df_locations.info()
df_checkins.info()
df_locations.head()

2024-06-18_T16_23_42EDT: INFO (evlens.data.plugshare:L293) - Beginning scraping!
Parsing stations:   0%|          | 0/1 [00:00<?, ?it/s]2024-06-18_T16_25_06EDT: ERROR (evlens.data.plugshare:L312) - Cookie banner or 'Manage Settings' link not found. Assuming cookies are not rejected.
2024-06-18_T16_25_06EDT: INFO (evlens.data.plugshare:L128) - Attempting to exit login dialog...
2024-06-18_T16_25_09EDT: ERROR (evlens.data.plugshare:L139) - Login dialog exit button not found.
2024-06-18_T16_25_09EDT: INFO (evlens.data.plugshare:L197) - Starting page scrape...
2024-06-18_T16_25_12EDT: ERROR (evlens.data.plugshare:L205) - Station name error, skipping...
Parsing stations:   0%|          | 0/1 [01:30<?, ?it/s]


TypeError: cannot unpack non-iterable NoneType object

In [32]:
# Why is it taking so long to even *start* trying to exit the login dialog?!
s = Scraper("../data/external/plugshare/06-17-2024/", timeout=3, headless=True)

# Scrape only one location that I can test via browser
df_locations, df_checkins = s.run(TEST_LOCATION, TEST_LOCATION)
df_locations.info()
df_checkins.info()
df_locations.head()

2024-06-19_T08_09_43EDT: INFO (evlens.data.plugshare:L296) - Beginning scraping!
Parsing stations:   0%|          | 0/1 [00:00<?, ?it/s]2024-06-19_T08_09_47EDT: ERROR (evlens.data.plugshare:L316) - Cookie banner or 'Manage Settings' link not found. Assuming cookies are not rejected.
2024-06-19_T08_09_47EDT: INFO (evlens.data.plugshare:L128) - Attempting to exit login dialog...
2024-06-19_T08_09_50EDT: ERROR (evlens.data.plugshare:L139) - Login dialog exit button not found.
2024-06-19_T08_09_51EDT: INFO (evlens.data.plugshare:L198) - Starting page scrape...
2024-06-19_T08_09_54EDT: ERROR (evlens.data.plugshare:L206) - Station name error, skipping...
Parsing stations:   0%|          | 0/1 [00:10<?, ?it/s]


TypeError: cannot unpack non-iterable NoneType object

In [25]:
df_checkins[df_checkins['date'].isnull()]

Unnamed: 0,date,car,problem,connector_type,charge_power_kilowatts,comment,location_id
0,NaT,,,,242 Kilowatts,,252784


Parse the results and figure out which station IDs we should put on our do-not-fly list and which to keep

1. Ones that are fully null somehow should be discarded entirely
2. Parse the remaining ones' addresses so they can be binned by country
    * Note but remove the ones outside the US for now

# Adding Ability to Capture Data via Network Traffic

Leveraging what we've learned with selenium-wire for location ID scraping.

In [6]:
s.driver.quit()

2024-07-19_T23_20_32EDT: INFO (mitmproxy.proxy.mode_servers:L154) - HTTP(S) proxy at 127.0.0.1:63562 stopped.


In [204]:
df_checkins.info()

<class 'pandas.core.frame.DataFrame'>
Index: 41 entries, 0 to 49
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype              
---  ------                  --------------  -----              
 0   id                      41 non-null     int64              
 1   evse_id                 26 non-null     float64            
 2   comment                 41 non-null     object             
 3   created_at              41 non-null     datetime64[ns, UTC]
 4   finished                30 non-null     datetime64[ns, UTC]
 5   charging_time           30 non-null     timedelta64[ns]    
 6   connector_type          25 non-null     float64            
 7   charge_power_kilowatts  30 non-null     float64            
 8   problem                 41 non-null     int64              
 9   problem                 41 non-null     object             
 10  rating                  41 non-null     int64              
 11  vehicle_name            41 non-null     object      

In [None]:
import nest_asyncio
nest_asyncio.apply()

s = MainMapScraper(
    f"../../data/external/plugshare/07-19-2024/",
    timeout=3,
    progress_bars=True,
    headless=False,
    save_every=100
)

location_id = '252784'
# s.driver.get(f"https://www.plugshare.com/location/{location_id}")
# s.reject_all_cookies_dialog()
# s.exit_login_dialog()

df_station, df_checkins, df_evses = s.run([location_id])
df_station

# df = s._catch_api_response(location_id)
# df.info()
# df.head()

2024-07-23_T09_16_54EDT: INFO (mitmproxy.proxy.mode_servers:L139) - HTTP(S) proxy listening at 127.0.0.1:60608.
2024-07-23_T09_16_55EDT: INFO (evlens.data.plugshare:L567) - Beginning scraping!
2024-07-23_T09_16_55EDT: INFO (mitmproxy.proxy.server:L372) - client connect
Parsing stations:   0%|          | 0/1 [00:00<?, ?it/s]2024-07-23_T09_16_55EDT: INFO (mitmproxy.proxy.server:L372) - client connect
2024-07-23_T09_16_55EDT: INFO (mitmproxy.proxy.server:L372) - client connect
2024-07-23_T09_16_55EDT: INFO (mitmproxy.proxy.server:L372) - server connect www.plugshare.com:443 (104.17.95.180:443)
2024-07-23_T09_16_55EDT: INFO (mitmproxy.proxy.server:L372) - server connect www.plugshare.com:443 (104.17.95.180:443)
2024-07-23_T09_16_55EDT: INFO (mitmproxy.proxy.server:L372) - server connect accounts.google.com:443 (142.251.163.84:443)
2024-07-23_T09_16_55EDT: INFO (mitmproxy.proxy.server:L372) - client connect
2024-07-23_T09_16_55EDT: INFO (mitmproxy.proxy.server:L372) - client connect
2024-07

Unnamed: 0,location_id,name,description,amenities,photos,plugscore,evse_count,access,phone,address,location_type,service_hours,open247,coming_soon,parking,parking_level,overhead_clearance_meters,checkin_count,kilowatts_max,network,id,last_scraped
0,252784,Springfield Town Center - Target - East Lot (1),Three 150kW DC Fast Chargers and one J1772 cha...,3;9;4;8;2,https://photos.plugshare.com/photos/1176040.pn...,10.0,4,1,18336322778,"6600 Springfield Mall, Springfield, Virginia, ...",Shopping Center,,True,False,PULL_IN,,,461,350.0,Electrify America,1e6a48ec-3dd9-499c-9f15-d7f30a7c3733,2024-07-23 13:17:47.773695+00:00


2024-07-23_T09_27_40EDT: INFO (mitmproxy.proxy.server:L372) - Closing connection due to inactivity: Client(127.0.0.1:61650, state=can_write)
2024-07-23_T09_27_40EDT: INFO (mitmproxy.proxy.server:L372) - client disconnect
2024-07-23_T09_27_40EDT: INFO (mitmproxy.proxy.server:L372) - server disconnect ad.mrtnsvr.com:443 (34.102.163.6:443)


In [205]:
df_evses.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             4 non-null      int64  
 1   name           4 non-null      object 
 2   network_names  4 non-null      object 
 3   kilowatts      4 non-null      float64
 4   manufacturer   4 non-null      object 
 5   model          4 non-null      object 
 6   station_id     4 non-null      int64  
 7   available      4 non-null      int64  
dtypes: float64(1), int64(3), object(4)
memory usage: 388.0+ bytes


In [196]:
s.save_to_bigquery(df_checkins, 'checkins')

2024-07-22_T23_26_05EDT: INFO (evlens.data.plugshare:L553) - Saving 41 rows to BigQuery...


ValueError: Duplicate column names found: ['id', 'evse_id', 'comment', 'created_at', 'finished', 'charging_time', 'connector_type', 'charge_power_kilowatts', 'problem', 'problem', 'rating', 'vehicle_name', 'vehicle_year']

In [195]:
df_checkins.info()
df_checkins.head()

<class 'pandas.core.frame.DataFrame'>
Index: 41 entries, 0 to 49
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype              
---  ------                  --------------  -----              
 0   id                      41 non-null     int64              
 1   evse_id                 26 non-null     float64            
 2   comment                 41 non-null     object             
 3   created_at              41 non-null     datetime64[ns, UTC]
 4   finished                30 non-null     datetime64[ns, UTC]
 5   charging_time           30 non-null     timedelta64[ns]    
 6   connector_type          25 non-null     float64            
 7   charge_power_kilowatts  30 non-null     float64            
 8   problem                 41 non-null     int64              
 9   problem                 41 non-null     object             
 10  rating                  41 non-null     int64              
 11  vehicle_name            41 non-null     object      

Unnamed: 0,id,evse_id,comment,created_at,finished,charging_time,connector_type,charge_power_kilowatts,problem,problem.1,rating,vehicle_name,vehicle_year
0,9623943,,,2024-07-21 19:51:48+00:00,2024-07-21 20:51:49+00:00,0 days 01:00:01,13.0,153.0,0,Not specified,1,Rivian R1S 2024,2024.0
1,9622144,554351.0,"Smooth, quick, rapid charging",2024-07-21 12:38:08+00:00,2024-07-21 13:38:09+00:00,0 days 01:00:01,13.0,130.0,0,Not specified,1,Nissan Ariya 2024,2024.0
2,9619854,,,2024-07-20 23:50:43+00:00,NaT,NaT,13.0,205.0,0,Not specified,1,BMW i4 M50 2024,2024.0
3,9607081,554351.0,,2024-07-18 12:33:01+00:00,2024-07-18 13:03:00+00:00,0 days 00:29:59,13.0,0.0,0,Not specified,1,Hyundai Ioniq Electric 2019,2019.0
5,9575818,,Charger 3 good to go!,2024-07-11 14:27:16+00:00,NaT,NaT,,,0,Not specified,1,BMW i4 eDrive40 2023,2023.0


In [190]:
df_station.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 22 columns):
 #   Column                     Non-Null Count  Dtype              
---  ------                     --------------  -----              
 0   location_id                1 non-null      object             
 1   name                       1 non-null      object             
 2   description                1 non-null      object             
 3   amenities                  1 non-null      object             
 4   photos                     1 non-null      object             
 5   plugscore                  1 non-null      float64            
 6   evse_count                 1 non-null      int64              
 7   access                     1 non-null      int64              
 8   phone                      1 non-null      object             
 9   address                    1 non-null      object             
 10  location_type              1 non-null      object             
 11  service_ho

In [188]:
s.driver.quit()

2024-07-22_T22_35_53EDT: INFO (mitmproxy.proxy.mode_servers:L154) - HTTP(S) proxy at 127.0.0.1:63474 stopped.


In [187]:
df_checkins

Unnamed: 0,id,evse_id,comment,created_at,finished,charging_time,connector_type,charge_power_kilowatts,problem,problem.1,rating,vehicle_name,vehicle_year
0,9623943,,,2024-07-21 19:51:48+00:00,2024-07-21 20:51:49+00:00,0 days 01:00:01,13.0,153.0,0,Not specified,1,Rivian R1S 2024,2024.0
1,9622144,554351.0,"Smooth, quick, rapid charging",2024-07-21 12:38:08+00:00,2024-07-21 13:38:09+00:00,0 days 01:00:01,13.0,130.0,0,Not specified,1,Nissan Ariya 2024,2024.0
2,9619854,,,2024-07-20 23:50:43+00:00,NaT,NaT,13.0,205.0,0,Not specified,1,BMW i4 M50 2024,2024.0
3,9607081,554351.0,,2024-07-18 12:33:01+00:00,2024-07-18 13:03:00+00:00,0 days 00:29:59,13.0,0.0,0,Not specified,1,Hyundai Ioniq Electric 2019,2019.0
5,9575818,,Charger 3 good to go!,2024-07-11 14:27:16+00:00,NaT,NaT,,,0,Not specified,1,BMW i4 eDrive40 2023,2023.0
6,9565533,554346.0,,2024-07-08 21:26:37+00:00,2024-07-08 22:26:36+00:00,0 days 00:59:59,2.0,0.0,0,Not specified,1,Hyundai Ioniq Electric 2019,2019.0
7,9560329,,,2024-07-07 18:32:11+00:00,2024-07-07 19:32:11+00:00,0 days 01:00:00,13.0,,0,Not specified,1,Ford Mustang Mach-E 2021,2021.0
8,9539670,554352.0,,2024-07-03 17:25:05+00:00,2024-07-03 17:55:04+00:00,0 days 00:29:59,,0.0,0,Not specified,1,Mercedes EQE 350,
9,9526795,554352.0,,2024-06-30 17:10:06+00:00,2024-06-30 17:25:10+00:00,0 days 00:15:04,13.0,245.0,0,Not specified,1,Genesis GV60 2023,2023.0
11,9489888,,,2024-06-22 17:33:17+00:00,2024-06-22 18:33:17+00:00,0 days 01:00:00,13.0,230.0,0,Not specified,1,Kia EV6 2022,2022.0


In [9]:
from seleniumwire2.utils import decode

body = decode(r.response.body, r.response.headers.get("Content-Encoding", "identity"))
body

b'{"access":1,"access_restriction":null,"access_restriction_description":null,"access_restriction_descriptions":[],"access_restriction_items":[],"access_restrictions":[],"address":"6600 Springfield Mall, Springfield, Virginia, 22150","all_promos":[],"amenities":[{"location_id":252784,"type":2},{"location_id":252784,"type":8},{"location_id":252784,"type":3},{"location_id":252784,"type":9},{"location_id":252784,"type":4}],"available_station_count":null,"coming_soon":false,"confidence":2,"connector_types":["CCS/SAE","J-1772"],"cost":true,"cost_description":"Please refer to station details for up to date pricing info.","cpo_id":3,"created_at":"2020-07-27T20:24:59Z","custom_ports":"","datasources":[],"description":"Three 150kW DC Fast Chargers and one J1772 charging station. The extra wide spot has the CHAdeMO connector.","e164_phone_number":"+18336322778","enabled":true,"entrance_latitude":null,"entrance_longitude":null,"formatted_phone_number":"+1 833-632-2778","has_dynamic_pricing":false

In [20]:
from json import loads

df_station = pd.json_normalize(loads(body))
# df_checkins = df_station['rev']
df_station.connector_types = df_station.connector_types.str.join(";")
df_station

Unnamed: 0,access,access_restriction,access_restriction_description,access_restriction_descriptions,access_restriction_items,access_restrictions,address,all_promos,amenities,available_station_count,coming_soon,confidence,connector_types,cost,cost_description,cpo_id,created_at,custom_ports,datasources,description,e164_phone_number,enabled,entrance_latitude,entrance_longitude,formatted_phone_number,has_dynamic_pricing,hours,icon,icon_type,id,in_use_station_count,is_fast_charger,latitude,locale,locale_v2,locked,longitude,majority_network_id,meta_description,name,nissan_nctc,ocpi_ids,open247,opened_at,opening_date,overhead_clearance_meters,parking_attributes,parking_level,parking_type_name,payment_enabled,phone,photos,poi_name,promos,pwps_action,pwps_version,reverse_geocoded_address,reviews,score,station_count,stations,thumbnail_url,title_description,total_photos,total_reviews,under_repair,updated_at,url,valid_outlets,opening_times.exceptional_closings,opening_times.exceptional_openings,opening_times.regular_hours,opening_times.twenty_four_seven,opening_times.twentyfourseven,reverse_geocoded_address_components.administrative_area_1,reverse_geocoded_address_components.administrative_area_2,reverse_geocoded_address_components.administrative_area_3,reverse_geocoded_address_components.country_code,reverse_geocoded_address_components.locality,reverse_geocoded_address_components.postal_code,reverse_geocoded_address_components.route,reverse_geocoded_address_components.street_number,reverse_geocoded_address_components.sublocality_1,reverse_geocoded_address_components.sublocality_2,reverse_geocoded_address_components.sublocality_3
0,1,,,[],[],[],"6600 Springfield Mall, Springfield, Virginia, ...",[],"[{'location_id': 252784, 'type': 2}, {'locatio...",,False,2,CCS/SAE;J-1772,True,Please refer to station details for up to date...,3,2020-07-27T20:24:59Z,,[],Three 150kW DC Fast Chargers and one J1772 cha...,18336322778,True,,,+1 833-632-2778,False,,https://assets.plugshare.com/icons/Y.png,Y,252784,,True,38.775891,US,US,True,-77.171858,47,4 Electric Vehicle (EV) Charging Stations at S...,Springfield Town Center - Target - East Lot (1),False,[200224],True,,,,[PULL_IN],,Free,,18336322778,"[{'caption': '', 'created_at': '2023-07-26T13:...",Shopping Center,[],NO_DISPLAY,,"6600 Springfield Mall, Springfield, VA 22150, USA","[{'amps': None, 'comment': '', 'connector_type...",10.0,4,"[{'amps': None, 'available': 0, 'available_cha...",https://assets.plugshare.com/network-images/el...,Springfield Town Center - Target - East Lot (1...,29,458,False,2024-07-18T12:33:01Z,https://www.plugshare.com/location/252784,"[{'connector': 26, 'image': 'https://assets.pl...",,,,True,True,VA,Fairfax County,Lee,US,Springfield,22150,Springfield Mall,6600,,,


In [83]:
test = df_station.copy(deep=True)
test['photos'] = ';'.join([p['url'] for p in df_station.loc[0, 'photos']])
test

Unnamed: 0,access,access_restriction,access_restriction_description,access_restriction_descriptions,access_restriction_items,access_restrictions,address,all_promos,amenities,available_station_count,coming_soon,confidence,connector_types,cost,cost_description,cpo_id,created_at,custom_ports,datasources,description,e164_phone_number,enabled,entrance_latitude,entrance_longitude,formatted_phone_number,has_dynamic_pricing,hours,icon,icon_type,id,in_use_station_count,is_fast_charger,latitude,locale,locale_v2,locked,longitude,majority_network_id,meta_description,name,nissan_nctc,ocpi_ids,open247,opened_at,opening_date,overhead_clearance_meters,parking_attributes,parking_level,parking_type_name,payment_enabled,phone,photos,poi_name,promos,pwps_action,pwps_version,reverse_geocoded_address,reviews,score,station_count,stations,thumbnail_url,title_description,total_photos,total_reviews,under_repair,updated_at,url,valid_outlets,opening_times.exceptional_closings,opening_times.exceptional_openings,opening_times.regular_hours,opening_times.twenty_four_seven,opening_times.twentyfourseven,reverse_geocoded_address_components.administrative_area_1,reverse_geocoded_address_components.administrative_area_2,reverse_geocoded_address_components.administrative_area_3,reverse_geocoded_address_components.country_code,reverse_geocoded_address_components.locality,reverse_geocoded_address_components.postal_code,reverse_geocoded_address_components.route,reverse_geocoded_address_components.street_number,reverse_geocoded_address_components.sublocality_1,reverse_geocoded_address_components.sublocality_2,reverse_geocoded_address_components.sublocality_3
0,1,,,[],[],[],"6600 Springfield Mall, Springfield, Virginia, ...",[],"[{'location_id': 252784, 'type': 2}, {'locatio...",,False,2,CCS/SAE;J-1772,True,Please refer to station details for up to date...,3,2020-07-27T20:24:59Z,,[],Three 150kW DC Fast Chargers and one J1772 cha...,18336322778,True,,,+1 833-632-2778,False,,https://assets.plugshare.com/icons/Y.png,Y,252784,,True,38.775891,US,US,True,-77.171858,47,4 Electric Vehicle (EV) Charging Stations at S...,Springfield Town Center - Target - East Lot (1),False,[200224],True,,,,[PULL_IN],,Free,,18336322778,https://photos.plugshare.com/photos/1176040.pn...,Shopping Center,[],NO_DISPLAY,,"6600 Springfield Mall, Springfield, VA 22150, USA","[{'amps': None, 'comment': '', 'connector_type...",10.0,4,"[{'amps': None, 'available': 0, 'available_cha...",https://assets.plugshare.com/network-images/el...,Springfield Town Center - Target - East Lot (1...,29,458,False,2024-07-18T12:33:01Z,https://www.plugshare.com/location/252784,"[{'connector': 26, 'image': 'https://assets.pl...",,,,True,True,VA,Fairfax County,Lee,US,Springfield,22150,Springfield Mall,6600,,,


In [None]:
df_station.loc[0, '']

There's a lot of data here that we've intercepted! Way more than I think they probably show on the website itself. API docs were taken offline for some reason, but [here](https://web.archive.org/web/20220727185118/https://developer.plugshare.com/docs/#introduction) is an archive that may help. Some notes on *station* data:

1. What is `amenities` showing us? Seems to be a listing of nice things located at the station (no idea why we have a repeat of the location ID over and over, but [here](https://web.archive.org/web/20220727185118/https://developer.plugshare.com/docs/#amenities-list) is the mapping of type numbers to plaintext descriptions)
2. Useful columns (cross-reference with what we already have in `locationID` table so we don't repeat unnecessarily):
    1. `amenities`
    2. `name`
    3. `description`
    4. `stations`
        * Has a LOT of data about each plug (I think), including things like max and min power output and even the make and model of the EVSE!
        * Also captures via the `available` enum the real-time state of the plug:
            * 0 = Unknown
            * 1 = Available
            * 2 = In Use
            * 3 = Offline
            * 4 = Being repaired
    6. `photos`: I wasn't sure about this one initially, but I like it for the potential to use as visual input data to a model later on if we deem it useful (within reason and legal bounds of course)
    7. `score` AKA PlugScore
    8. `cost_description`: likely a handy thing to know and be able to account for in the data
    9. `access`: mainly because we want to filter for values of `1` which means "open for public use".
    10. `phone`
    11. `address`
    12. `poi_name` AKA `location_type` in our BQ table
    13. `hours`
    14. `open247` flag which will be useful for computational stuff without having to parse `hours`
    15. `coming_soon` flag so we don't route there on accident
    16. `parking_attributes` which include things like 'PULL_THROUGH' and 'PULL_IN' as well as 'TRAILER_FRIENDLY', which could be handy for ideal charger routing
    17. `parking_level` since Z coordinates can matter too!
    18. `overhead_clearance_meters`: I doubt this is non-null very often, but useful if we know vehicle being driven by user and want to warn about overhead issues/not route them there

In [53]:

df_evses = pd.DataFrame(df_station.loc[0, 'stations'])
df_evses.info()
df_evses

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 32 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   amps                     0 non-null      object 
 1   available                4 non-null      int64  
 2   available_changed_at     4 non-null      object 
 3   cost                     4 non-null      int64  
 4   cost_description         4 non-null      object 
 5   cpo_id                   4 non-null      int64  
 6   cpo_name                 4 non-null      object 
 7   created_at               4 non-null      object 
 8   hours                    4 non-null      object 
 9   id                       4 non-null      int64  
 10  kilowatts                4 non-null      float64
 11  latitude                 4 non-null      float64
 12  location_id              4 non-null      int64  
 13  longitude                4 non-null      float64
 14  manufacturer             4 non

Unnamed: 0,amps,available,available_changed_at,cost,cost_description,cpo_id,cpo_name,created_at,hours,id,kilowatts,latitude,location_id,longitude,manufacturer,model,name,network,network_ext_id,network_id,nissan_nctc,ocpi_ids,ocpp_version,outlets,payment_enabled,pre_charge_instructions,preferred_feed_id,promos,pwps_version,qr_enabled,requiresAccessCard,volts
0,,0,2024-07-18T10:45:25Z,2,"$0.48 per kWh, 1-350 kWh\n\nParking Info\n$0.4...",3,Electrify America,2020-07-27T20:25:01Z,,554346,7.0,38.77602,252784,-77.172,BTC,EVP-2001-30,200224-50,"{'description': None, 'e164_phone_number': '+1...",200224-50,47,False,[EA0139],,"[{'amps': 30, 'available': 0, 'available_chang...",,,,[],,,False,
1,,0,2024-07-18T10:45:36Z,2,"$0.48 per kWh, 1-350 kWh\n\nParking Info\n$0.4...",3,Electrify America,2020-07-27T20:43:20Z,,554351,350.0,38.77602,252784,-77.172,BTC,HPCD6-500-05-005,200224-01,"{'description': None, 'e164_phone_number': '+1...",200224-01,47,False,"[BT112236L0572, BT08200221020]",,"[{'amps': 350, 'available': 0, 'available_chan...",,,,[],,,False,
2,,0,2024-07-18T10:45:36Z,2,"$0.48 per kWh, 1-350 kWh\n\nParking Info\n$0.4...",3,Electrify America,2020-07-27T20:43:23Z,,554352,350.0,38.77602,252784,-77.172,BTC,HPCD6-500-05-005,200224-02,"{'description': None, 'e164_phone_number': '+1...",200224-02,47,False,"[BT08200221021, BT112248L0613]",,"[{'amps': 350, 'available': 0, 'available_chan...",,,,[],,,False,
3,,0,2024-07-18T10:45:38Z,0,"$0.48 per kWh, 1-350 kWh\n\nParking Info\n$0.4...",3,Electrify America,2020-07-27T20:43:26Z,,554353,350.0,38.77602,252784,-77.172,BTC,HPCD6-500-05-005,200224-03,"{'description': None, 'e164_phone_number': '+1...",200224-03,47,False,"[BT112318D0184, BT07192920811]",,"[{'amps': 350, 'available': 0, 'available_chan...",,,,[],,,False,


"stations" as Plugshare calls them, are what I would call "EVSEs" or "charging units". Basically it's the tower with a single screen that can be operated to charge a vehicle. Often at Tesla Supercharger sites there's only one plug per EVSE but for others there are usually two (and if one of the two plugs is being used, the other can't be typically). 

Columns of interest:

1. `id` of course
1. `name`: (apparently it's the thing the customer sees on physical machine AKA 'the pump')
2. `network`['name']: we really don't need the other network info so may as well just capture the name
2. `kilowatts` will be useful for determining max possible charging
3. `manufacturer`
4. `model` along with manufacturer could be useful for getting a good idea of likely charging speed when paired with knowledge of driver's vehicle
5. `location_id` to make it easy to match to what we call a station
6. `available` since we may as well, even if it's not likely to be populated often.

In [60]:
# Explode to grab EVSE ID for mapping
df_plugs = pd.DataFrame(df_evses['outlets'].explode().tolist())
df_plugs['evse_id'] = df_evses.explode('outlets')['id'].values
df_plugs

Unnamed: 0,amps,available,available_changed_at,connector,connector_name,connector_type,description,evse_ext_id,id,kilowatts,network_ext_id,ocpi_id,outlet_index,power,prices,status,status_changed_at,volts,evse_id
0,30,0,,2,J-1772,2,,EA0139,3598478,7.0,,1,,0,"[{'cpo_changed_at': None, 'created_at': '2024-...",UNKNOWN,2024-05-10T10:45:21Z,240,554346
1,350,0,,13,CCS/SAE,13,,BT112236L0572,3594949,350.0,,1,,0,"[{'cpo_changed_at': None, 'created_at': '2024-...",UNKNOWN,2024-05-08T12:02:48Z,1000,554351
2,350,0,,13,CCS/SAE,13,,BT112248L0613,3596163,350.0,,1,,0,"[{'cpo_changed_at': None, 'created_at': '2024-...",UNKNOWN,2024-05-09T00:01:55Z,1000,554352
3,350,0,,13,CCS/SAE,13,,BT112318D0184,3598485,350.0,,1,,0,"[{'cpo_changed_at': None, 'created_at': '2024-...",UNKNOWN,2024-05-10T10:45:23Z,1000,554353
4,350,0,,13,CCS/SAE,13,,BT07192920811,3708954,350.0,,1,,0,[],UNKNOWN,2024-06-26T00:01:45Z,1000,554353


In [86]:
from pprint import pprint

df_checkins = pd.DataFrame(df_station.loc[0, 'reviews'])
df_checkins.info()
df_checkins

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 26 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   amps                            0 non-null      object 
 1   comment                         50 non-null     object 
 2   connector_type                  29 non-null     float64
 3   created_at                      50 non-null     object 
 4   finished                        33 non-null     object 
 5   id                              50 non-null     int64  
 6   is_visible                      50 non-null     bool   
 7   kilowatts                       36 non-null     float64
 8   language                        24 non-null     object 
 9   problem                         50 non-null     int64  
 10  problem_description             50 non-null     object 
 11  rating                          50 non-null     int64  
 12  response                        0 non-

Unnamed: 0,amps,comment,connector_type,created_at,finished,id,is_visible,kilowatts,language,problem,problem_description,rating,response,station_id,user,vehicle_default_img,vehicle_make,vehicle_make_image_url,vehicle_make_profile_image_url,vehicle_name,vehicle_type,volts,waiting,spam_category,spam_category_description,outlet_id
0,,,13.0,2024-07-18T12:33:01Z,2024-07-18T13:03:00Z,9607081,True,0.0,,0,Not specified,1,,554351.0,"{'about': '', 'allow_notifications': None, 'al...",https://assets.plugshare.com/vehicles/makes/mo...,Hyundai,https://assets.plugshare.com/vehicles/makes/im...,https://assets.plugshare.com/vehicles/makes/pr...,Hyundai Ioniq Electric 2019,391,,False,,,
1,,This was the first time I had charged my new c...,13.0,2024-07-16T11:33:47Z,,9599015,True,238.0,eng,0,Not specified,1,,554353.0,"{'about': '', 'allow_notifications': None, 'al...",https://assets.plugshare.com/vehicles/makes/mo...,Hyundai,,,Hyundai Ioniq 6 2024,1250,,False,100.0,GeoDiscrepancy,
2,,Charger 3 good to go!,,2024-07-11T14:27:16Z,,9575818,True,,eng,0,Not specified,1,,,"{'about': '', 'allow_notifications': None, 'al...",https://assets.plugshare.com/vehicles/makes/mo...,BMW,,,BMW i4 eDrive40 2023,788,,False,,,
3,,,2.0,2024-07-08T21:26:37Z,2024-07-08T22:26:36Z,9565533,True,0.0,,0,Not specified,1,,554346.0,"{'about': '', 'allow_notifications': None, 'al...",https://assets.plugshare.com/vehicles/makes/mo...,Hyundai,https://assets.plugshare.com/vehicles/makes/im...,https://assets.plugshare.com/vehicles/makes/pr...,Hyundai Ioniq Electric 2019,391,,False,,,3598478.0
4,,,13.0,2024-07-07T18:32:11Z,2024-07-07T19:32:11Z,9560329,True,,,0,Not specified,1,,,"{'about': '', 'allow_notifications': None, 'al...",https://assets.plugshare.com/vehicles/makes/mo...,Ford,https://assets.plugshare.com/vehicles/makes/im...,https://assets.plugshare.com/vehicles/makes/pr...,Ford Mustang Mach-E 2021,398,,False,,,
5,,,,2024-07-03T17:25:05Z,2024-07-03T17:55:04Z,9539670,True,0.0,,0,Not specified,1,,554352.0,"{'about': '', 'allow_notifications': None, 'al...",https://assets.plugshare.com/vehicles/makes/mo...,Mercedes,,,Mercedes EQE 350,817,,False,,,
6,,,13.0,2024-06-30T17:10:06Z,2024-06-30T17:25:10Z,9526795,True,245.0,,0,Not specified,1,,554352.0,"{'about': '', 'allow_notifications': None, 'al...",,Genesis,,,Genesis GV60 2023,994,,False,,,
7,,,13.0,2024-06-30T00:14:27Z,,9523462,True,242.0,,0,Not specified,1,,554352.0,"{'about': '', 'allow_notifications': None, 'al...",https://assets.plugshare.com/vehicles/makes/mo...,Kia,,,Kia EV6 2024,1279,,False,100.0,GeoDiscrepancy,
8,,,13.0,2024-06-22T17:33:17Z,2024-06-22T18:33:17Z,9489888,True,230.0,,0,Not specified,1,,,"{'about': '', 'allow_notifications': None, 'al...",https://assets.plugshare.com/vehicles/makes/mo...,Kia,,,Kia EV6 2022,653,,False,,,
9,,,2.0,2024-06-21T14:30:50Z,2024-06-21T17:30:50Z,9483716,True,,,0,Not specified,1,,554346.0,"{'about': '', 'allow_notifications': None, 'al...",https://assets.plugshare.com/vehicles/makes/mo...,Chevrolet,https://assets.plugshare.com/vehicles/makes/im...,https://assets.plugshare.com/vehicles/makes/pr...,Chevrolet Bolt EV 2017,63,,False,,,3598478.0


Now some notes on checkins data:
    
1. `comment` is of course critical
2. `created_at` is handy to know when the data were generated
3. `finished`: useful for timedelta on how long the charge took *BUT* problem is that this may be simply the user's estimate, not the actual time it took thema
    * Could have utility in terms of weighting how seriously we take a comment though, with unrealistic estimates suggesting someone is a n00b EV driver and comments should be assessed as such?
4. `id` may be useful for later referencing the checkin data but not critical
5. `connector_type` for the plug they used (need to get the Enum values for this)
6. `kilowatts`: obviously useful, likely a maximum often but sometimes max observed during actual charge
7. `problem_description`: will be "Not specified" if there was no problem
8. `rating`
    * 1 = positive/successful
    * 0 = Neutral/providing tips to other drivers
    * -1 = Trouble charging or other problem
10. `station_id` AKA `evse_id`. May be useful as a correlate to sentiment in `rating` (e.g. a certain plug is known to be bad over long time periods and thus should drag down our reliability score for the entire station)
11. `vehicle_name` for user/driver
12. `vehicle_type` may be useful to understand how long they should vs. did charge, but it's an integer so I need to figure out where the category mapping is...
13. `spam_category` simply for checking if it's not null and downgrading (or filtering out) the reviews that may be spam

In [148]:
df_evses.columns

Index(['id', 'name', 'network_names', 'kilowatts', 'manufacturer', 'model',
       'location_id', 'available'],
      dtype='object')

In [153]:
body = decode(r.response.body, r.response.headers.get("Content-Encoding", "identity"))
df_station = pd.json_normalize(loads(body))
df_station.loc[0]

access                                                                                                       1
access_restriction                                                                                        None
access_restriction_description                                                                            None
access_restriction_descriptions                                                                             []
access_restriction_items                                                                                    []
access_restrictions                                                                                         []
address                                                      6600 Springfield Mall, Springfield, Virginia, ...
all_promos                                                                                                  []
amenities                                                    [{'location_id': 252784, 'type': 2}, {'locatio...
a

In [182]:
df_station.parking.str.join(';')

0    PULL_IN
Name: parking, dtype: object

In [183]:
df_station, df_checkins, df_evses = s._parse_api_response(r)
print(df_station.shape)
df_station

Unnamed: 0,location_id,name,description,amenities,photos,plugscore,evse_count,access,phone,address,location_type,service_hours,open247,coming_soon,parking,parking_level,overhead_clearance_meters,checkin_count,kilowatts_max,network
0,252784,Springfield Town Center - Target - East Lot (1),Three 150kW DC Fast Chargers and one J1772 cha...,2;8;3;9;4,https://photos.plugshare.com/photos/1176040.pn...,10.0,4,1,18336322778,"6600 Springfield Mall, Springfield, Virginia, ...",Shopping Center,,True,False,PULL_IN,,,458,350.0,Electrify America


In [168]:
df_evses.head()

Unnamed: 0,id,name,network_names,kilowatts,manufacturer,model,station_id,available
0,554346,200224-50,Electrify America,7.0,BTC,EVP-2001-30,252784,0
1,554351,200224-01,Electrify America,350.0,BTC,HPCD6-500-05-005,252784,0
2,554352,200224-02,Electrify America,350.0,BTC,HPCD6-500-05-005,252784,0
3,554353,200224-03,Electrify America,350.0,BTC,HPCD6-500-05-005,252784,0


In [38]:
s.driver.quit()

2024-07-19_T23_57_48EDT: INFO (mitmproxy.proxy.mode_servers:L154) - HTTP(S) proxy at 127.0.0.1:49732 stopped.
