In [1]:
import json
import pandas as pd
import requests
import numpy as np
import os

from pathlib import Path
from typing import Union
from collections import defaultdict
from sqlalchemy import create_engine
import sqlite3
import hvplot.pandas
import holoviews as hv
import geoviews as gv
gv.extension('bokeh')
from scipy.stats import linregress
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.plotting import figure, output_file, show 
from citipy import citipy
import geopandas as gpd

In [2]:
# see website for information:
# https://developer.nrel.gov/docs/transportation/alt-fuel-stations-v1/all/

api_key = 'AWMepE53xJxr8Qu8yP3w6J6PLAf3ye6sO7Fsdvlc'
base_url = 'https://developer.nrel.gov/api/alt-fuel-stations/v1.json'

# List of state codes
state_codes = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA',
               'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK',
               'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']

# Variable to store the total number of electric vehicle stations in the US
total_stations_us = 0

# Dictionary to store the totals for each state
state_totals = {}

for state_code in state_codes:
    query_params = {
        'fuel_type_code': 'ELEC',
        'state': state_code,
        'country': 'US',
        'api_key': api_key
    }

    # Make the API request
    response = requests.get(base_url, params=query_params)

    # Parse the JSON response
    data = response.json()

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Get the total number of electric vehicle stations for the specified state
        total_stations = data.get('total_results', 0)

        # Increment the total stations in the US variable
        total_stations_us += total_stations

        # Store the result in the dictionary
        state_totals[state_code] = total_stations

    else:
        # Print an error message if the request was not successful
        print(f'Error: {response.status_code} - {response.text}')

# Sort the dictionary by state in decreasing order based on total stations
sorted_totals = dict(sorted(state_totals.items(), key=lambda item: item[1], reverse=True))

# Create a folder named 'data_query' if it doesn't exist
output_folder = 'data_query'
os.makedirs(output_folder, exist_ok=True)

# Save the sorted totals to a JSON file in the 'data_query' folder
state_data_file_path = os.path.join(output_folder, 'electric_stations_by_state.json')
with open(state_data_file_path, 'w', encoding='utf-8') as state_data_file:
    json.dump(sorted_totals, state_data_file, ensure_ascii=False, indent=4)

# Print the total number of electric vehicle stations in the US
print(f'Total electric vehicle stations in the US for fuel type ELEC: {total_stations_us}')

print(f'The sorted totals have been saved to: {state_data_file_path}')


Total electric vehicle stations in the US for fuel type ELEC: 79633
The sorted totals have been saved to: data_query\electric_stations_by_state.json


In [3]:
api_key = 'AWMepE53xJxr8Qu8yP3w6J6PLAf3ye6sO7Fsdvlc'
base_url = 'https://developer.nrel.gov/api/alt-fuel-stations/v1.json'

# List of all US state codes
state_codes = [
    'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA',
    'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK',
    'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'
]

# List of attributes to retrieve, including the new ones
attributes_to_retrieve = [
    'id', 'station_name', 'street_address', 'city', 'state', 'zip',
    'latitude', 'longitude', 'owner_type_code', 'ev_connector_types', 'ev_pricing',
    'geocode_status', 'access_code', 'ev_level1_evse_num', 'ev_level2_evse_num', 'ev_dc_fast_num', 'ev_other_evse'
]

# List to store detailed information for stations meeting the criteria
filtered_stations = []

for state_code in state_codes:
    query_params = {
        'fuel_type_code': 'ELEC',
        'state': state_code,
        'country': 'US',
        'restricted_access': 'false',  # Add restricted_access parameter
        'api_key': api_key
    }

    # Make the API request
    response = requests.get(base_url, params=query_params)

    # Parse the JSON response
    data = response.json()

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Add detailed information for each station meeting the criteria
        filtered_stations.extend([
            {attr: station[attr] for attr in attributes_to_retrieve}
            for station in data.get('fuel_stations', [])
        ])

    else:
        # Print an error message if the request was not successful
        print(f'Error: {response.status_code} - {response.text}')

# Print the total count of electric vehicle stations in the US after filtering
total_us_stations = sum(len(data.get('fuel_stations', [])) for state_code in state_codes)
print(f'Total electric vehicle stations in the US after filtering: {total_us_stations}')

# Create a folder named 'data_query' if it doesn't exist
output_folder = 'data_query'
os.makedirs(output_folder, exist_ok=True)

# Save the filtered stations to a JSON file in the 'data_query' folder
output_file_path = os.path.join(output_folder, 'filtered_stations_attributes.json')
with open(output_file_path, 'w', encoding='utf-8') as output_file:
    json.dump(filtered_stations, output_file, ensure_ascii=False, indent=4)
    
    
print(f'The filtered stations with specified attributes have been saved to: {output_file_path}')
        

Total electric vehicle stations in the US after filtering: 7100
The filtered stations with specified attributes have been saved to: data_query\filtered_stations_attributes.json


In [4]:
# Read in JSON previously created from API request
output_file_path = Path("data_query/filtered_stations_attributes.json")

# Load data from the JSON file
if output_file_path.exists():
    with output_file_path.open("r", encoding="utf8") as f:
        station_data = json.load(f)

# Create a DataFrame from the loaded data
station_df = pd.DataFrame(station_data)

# Organize the columns in the specified sequence
station_df = station_df[['id', 'station_name', 'street_address', 'city', 'state', 'zip',
         'latitude', 'longitude', 'owner_type_code', 'ev_connector_types', 'ev_pricing',
         'geocode_status', 'access_code', 'ev_level1_evse_num', 'ev_level2_evse_num', 'ev_dc_fast_num', 'ev_other_evse']]

# Display the DataFrame
# print(station_df.columns)

station_df.head()


Unnamed: 0,id,station_name,street_address,city,state,zip,latitude,longitude,owner_type_code,ev_connector_types,ev_pricing,geocode_status,access_code,ev_level1_evse_num,ev_level2_evse_num,ev_dc_fast_num,ev_other_evse
0,17,Spire - Montgomery Operations Center,2951 Chestnut St,Montgomery,AL,36107,32.367916,-86.267021,T,,,200-9,private,,,,
1,597,Spire,2828 Dauphin St,Mobile,AL,36606,30.689832,-88.108861,T,,,200-9,private,,,,
2,13712,Gala Gas Co Inc,54 Old US Highway 82,Eufaula,AL,36027,31.912022,-85.150318,P,,,GPS,private,,,,
3,13723,Superior Gas,421 Noble St,Anniston,AL,36201,33.642079,-85.828466,P,,,GPS,public,,,,
4,13725,Superior Gas,702 Memorial Dr,Piedmont,AL,36272,33.921589,-85.620314,P,,,200-9,public,,,,


In [5]:
api_key = 'AWMepE53xJxr8Qu8yP3w6J6PLAf3ye6sO7Fsdvlc'
base_url = 'https://developer.nrel.gov/api/alt-fuel-stations/v1.json'

# Set the state parameter to 'CA' for California
state_code = 'CA'

# List of attributes to retrieve for map plotting
attributes_to_retrieve = [
    'id', 'station_name', 'street_address', 'city', 'state', 'zip',
    'latitude', 'longitude', 'owner_type_code', 'ev_connector_types', 'ev_pricing',
    'geocode_status', 'access_code'
]

# List to store detailed information for stations meeting the criteria
filtered_stations = []

query_params = {
    'fuel_type_code': 'ELEC',
    'state': state_code,
    'country': 'US',
    'restricted_access': 'false',  # Add restricted_access parameter
    'api_key': api_key
}

# Make the API request
response = requests.get(base_url, params=query_params)

# Parse the JSON response
data = response.json()

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Add detailed information for each station meeting the criteria
    for station in data.get('fuel_stations', []):
        # Check if all essential attributes have usable values
        if all(
            station.get(attr) is not None and station.get(attr) != ''
            for attr in attributes_to_retrieve
        ):
            # Combine 'ev_level1_evse_num', 'ev_level2_evse_num', 'ev_dc_fast_num', 'ev_other_evse'
            evse_combined = {
                'evse_combined': {
                    'ev_level1': station.get('ev_level1_evse_num', 0),
                    'ev_level2': station.get('ev_level2_evse_num', 0),
                    'ev_dc_fast': station.get('ev_dc_fast_num', 0),
                    'ev_other': station.get('ev_other_evse', 0)
                }
            }
            station.update(evse_combined)

            # Remove the individual evse attributes from the station
            for evse_attr in ['ev_level1_evse_num', 'ev_level2_evse_num', 'ev_dc_fast_num', 'ev_other_evse']:
                del station[evse_attr]

            filtered_stations.append({attr: station[attr] for attr in attributes_to_retrieve})

else:
    # Print an error message if the request was not successful
    print(f'Error: {response.status_code} - {response.text}')

# Create a DataFrame from the loaded data
df = pd.DataFrame(filtered_stations)

# Display the DataFrame
print(df)

# Print the total count of electric vehicle stations in California after filtering
total_ca_stations = len(df)
print(f'Total electric vehicle stations in California after filtering: {total_ca_stations}')

# Create a folder named 'data_query' if it doesn't exist
output_folder = 'data_query'
os.makedirs(output_folder, exist_ok=True)

# Save the filtered stations to a JSON file in the 'data_query' folder
output_file_path = os.path.join(output_folder, 'filtered_stations_combined_evse_CA.json')
with open(output_file_path, 'w', encoding='utf-8') as output_file:
    json.dump(filtered_stations, output_file, ensure_ascii=False, indent=4)

print(f'The filtered stations with combined EVSE attributes in California have been saved to: {output_file_path}')


          id                                   station_name  \
0       1523                  Los Angeles Convention Center   
1       1556                            LADWP - Sylmar West   
2       1573                         LADWP - Fairfax Center   
3       1583                 California Air Resources Board   
4       6355                         Scripps Green Hospital   
...      ...                                            ...   
1763  323695  El Dorado County Department of Transportation   
1764  323696                             Georgetown Library   
1765  323697                 El Dorado County Senior Center   
1766  323703                                 7Charge - Napa   
1767  323706                         7Charge - Walnut Creek   

               street_address          city state    zip   latitude  \
0          1201 S Figueroa St   Los Angeles    CA  90015  34.040539   
1        13201 Sepulveda Blvd        Sylmar    CA  91342  34.303090   
2          2311 S Fairfax Ave 

In [6]:
df.dtypes

id                      int64
station_name           object
street_address         object
city                   object
state                  object
zip                    object
latitude              float64
longitude             float64
owner_type_code        object
ev_connector_types     object
ev_pricing             object
geocode_status         object
access_code            object
dtype: object

In [7]:
df.head(30)


Unnamed: 0,id,station_name,street_address,city,state,zip,latitude,longitude,owner_type_code,ev_connector_types,ev_pricing,geocode_status,access_code
0,1523,Los Angeles Convention Center,1201 S Figueroa St,Los Angeles,CA,90015,34.040539,-118.271387,P,[J1772],Free; parking fee,GPS,public
1,1556,LADWP - Sylmar West,13201 Sepulveda Blvd,Sylmar,CA,91342,34.30309,-118.480505,LG,[J1772],Free,200-8,private
2,1573,LADWP - Fairfax Center,2311 S Fairfax Ave,Los Angeles,CA,90016,34.036777,-118.368841,LG,[J1772],Free,200-8,private
3,1583,California Air Resources Board,9530 Telstar Ave,El Monte,CA,91731,34.06872,-118.064,SG,[J1772],Free,GPS,public
4,6355,Scripps Green Hospital,10666 N Torrey Pines Rd,La Jolla,CA,92037,32.89947,-117.243,P,[J1772],Free; parking fee,GPS,public
5,6425,Galleria at Tyler,1299 Galleria at Tyler,Riverside,CA,92503,33.909914,-117.459053,P,[J1772],Free,GPS,public
6,6505,City of Pasadena - Holly Street Garage,150 E Holly St,Pasadena,CA,91103,34.14762,-118.147111,LG,[J1772],Free,200-8,public
7,6506,City of Pasadena - De Lacey Garage,45 De Lacey Ave,Pasadena,CA,91105,34.145138,-118.152655,LG,[J1772],Free; parking fee,GPS,public
8,6507,City of Pasadena - Schoolhouse Garage,33 E Green St,Pasadena,CA,91105,34.145119,-118.150133,LG,[J1772],"$1 for the first two hours, $2 each additional...",GPS,public
9,7172,Hyatt Regency Long Beach,200 S Pine Ave,Long Beach,CA,90802,33.7637,-118.192,P,[J1772],Free,GPS,public


In [8]:
df.columns


Index(['id', 'station_name', 'street_address', 'city', 'state', 'zip',
       'latitude', 'longitude', 'owner_type_code', 'ev_connector_types',
       'ev_pricing', 'geocode_status', 'access_code'],
      dtype='object')

In [9]:
df['owner_type_code']

0        P
1       LG
2       LG
3       SG
4        P
        ..
1763    LG
1764     P
1765    LG
1766     P
1767     P
Name: owner_type_code, Length: 1768, dtype: object

In [10]:
#  create ColumnDataSource from data frame

source = ColumnDataSource(df)

In [11]:
hover = HoverTool(tooltips=[('zip', '@zip')])

map_plot_3 = df.hvplot.points(
    'longitude',
    'latitude',
    geo=True,
    tiles='OSM',
    frame_width=800,
    frame_height=600,
    size=1.0,
    scale=1.0,
    color='city',
   
 
)

# # map_plot_3=df.hvplot.points(
# #    'longitude',
# #     'latitude',
# #     geo = True,
# #     tiles = 'OSM',
# #     frame_width = 800,
# #     frame_height = 600,
# #     size = 0.5,
# #     scale = 0.5,
# #     color = 'city',
# #     tools = [hover],
# # #     hover_cols='all'
# )

# Display the map
map_plot_3    

In [13]:
df.dtypes()


TypeError: 'Series' object is not callable

In [14]:
df

Unnamed: 0,id,station_name,street_address,city,state,zip,latitude,longitude,owner_type_code,ev_connector_types,ev_pricing,geocode_status,access_code
0,1523,Los Angeles Convention Center,1201 S Figueroa St,Los Angeles,CA,90015,34.040539,-118.271387,P,[J1772],Free; parking fee,GPS,public
1,1556,LADWP - Sylmar West,13201 Sepulveda Blvd,Sylmar,CA,91342,34.303090,-118.480505,LG,[J1772],Free,200-8,private
2,1573,LADWP - Fairfax Center,2311 S Fairfax Ave,Los Angeles,CA,90016,34.036777,-118.368841,LG,[J1772],Free,200-8,private
3,1583,California Air Resources Board,9530 Telstar Ave,El Monte,CA,91731,34.068720,-118.064000,SG,[J1772],Free,GPS,public
4,6355,Scripps Green Hospital,10666 N Torrey Pines Rd,La Jolla,CA,92037,32.899470,-117.243000,P,[J1772],Free; parking fee,GPS,public
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1763,323695,El Dorado County Department of Transportation,2441 Headington Rd,Placerville,CA,95667,38.715353,-120.842135,LG,[J1772],Free,GPS,public
1764,323696,Georgetown Library,6680 Orleans St,Georgetown,CA,95634,38.906376,-120.837532,P,[J1772],Free,GPS,public
1765,323697,El Dorado County Senior Center,937 Spring St,Placerville,CA,95667,38.734068,-120.803026,LG,[J1772],24 hours daily,GPS,public
1766,323703,7Charge - Napa,490 Devlin Rd,Napa,CA,94558,38.222550,-122.261940,P,"[CHADEMO, J1772COMBO]",$0.46 per kWh; $1 per minute idle fee after a ...,200-8,public


In [15]:
df2 = df[['id', 'station_name', 'street_address', 'city', 'state', 'zip',
         'latitude', 'longitude', 'owner_type_code']]
df2

Unnamed: 0,id,station_name,street_address,city,state,zip,latitude,longitude,owner_type_code
0,1523,Los Angeles Convention Center,1201 S Figueroa St,Los Angeles,CA,90015,34.040539,-118.271387,P
1,1556,LADWP - Sylmar West,13201 Sepulveda Blvd,Sylmar,CA,91342,34.303090,-118.480505,LG
2,1573,LADWP - Fairfax Center,2311 S Fairfax Ave,Los Angeles,CA,90016,34.036777,-118.368841,LG
3,1583,California Air Resources Board,9530 Telstar Ave,El Monte,CA,91731,34.068720,-118.064000,SG
4,6355,Scripps Green Hospital,10666 N Torrey Pines Rd,La Jolla,CA,92037,32.899470,-117.243000,P
...,...,...,...,...,...,...,...,...,...
1763,323695,El Dorado County Department of Transportation,2441 Headington Rd,Placerville,CA,95667,38.715353,-120.842135,LG
1764,323696,Georgetown Library,6680 Orleans St,Georgetown,CA,95634,38.906376,-120.837532,P
1765,323697,El Dorado County Senior Center,937 Spring St,Placerville,CA,95667,38.734068,-120.803026,LG
1766,323703,7Charge - Napa,490 Devlin Rd,Napa,CA,94558,38.222550,-122.261940,P


In [17]:
df2.dtypes

id                   int64
station_name        object
street_address      object
city                object
state               object
zip                 object
latitude           float64
longitude          float64
owner_type_code     object
dtype: object

In [19]:
df2['station_name'] = df2['station_name'].astype("string")
df2['street_address'] = df2['street_address'].astype("string")
df2['city'] = df2['city'].astype("string")
df2['state'] = df2['state'].astype("string")
df2['zip'] = df2['zip'].astype(np.int64)
df2['owner_type_code'] = df2['owner_type_code'].astype("string")
df2.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['station_name'] = df2['station_name'].astype("string")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['street_address'] = df2['street_address'].astype("string")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['city'] = df2['city'].astype("string")
A value is trying to be set on a copy of

id                          int64
station_name       string[python]
street_address     string[python]
city               string[python]
state              string[python]
zip                         int64
latitude                  float64
longitude                 float64
owner_type_code    string[python]
dtype: object

In [20]:
df2

Unnamed: 0,id,station_name,street_address,city,state,zip,latitude,longitude,owner_type_code
0,1523,Los Angeles Convention Center,1201 S Figueroa St,Los Angeles,CA,90015,34.040539,-118.271387,P
1,1556,LADWP - Sylmar West,13201 Sepulveda Blvd,Sylmar,CA,91342,34.303090,-118.480505,LG
2,1573,LADWP - Fairfax Center,2311 S Fairfax Ave,Los Angeles,CA,90016,34.036777,-118.368841,LG
3,1583,California Air Resources Board,9530 Telstar Ave,El Monte,CA,91731,34.068720,-118.064000,SG
4,6355,Scripps Green Hospital,10666 N Torrey Pines Rd,La Jolla,CA,92037,32.899470,-117.243000,P
...,...,...,...,...,...,...,...,...,...
1763,323695,El Dorado County Department of Transportation,2441 Headington Rd,Placerville,CA,95667,38.715353,-120.842135,LG
1764,323696,Georgetown Library,6680 Orleans St,Georgetown,CA,95634,38.906376,-120.837532,P
1765,323697,El Dorado County Senior Center,937 Spring St,Placerville,CA,95667,38.734068,-120.803026,LG
1766,323703,7Charge - Napa,490 Devlin Rd,Napa,CA,94558,38.222550,-122.261940,P


In [21]:
# hover = HoverTool(tooltips=[('owner_type_code', '@owner_type_code')])

map_plot_4 = df2.hvplot.points(
    'longitude',
    'latitude',
    geo=True,
    tiles='OSM',
    frame_width=800,
    frame_height=600,
    size=0.5,
    scale=0.5,
    color='city',
    hover_cols='all'
)
    
    # Display the map
map_plot_4   

In [22]:
#  export to csvd

df2.to_csv("data_query/California.csv", index_label="Index")

In [29]:
conn = sqlite3.connect('california2.db')

In [26]:
# create_sql = "CREATE TABLE IF NOT EXISTS California (id INTEGER, station_name TEXT, street_address TEXT, city TEXT, state TEXT, zip INTEGER, latitude INTEGER, longitude INTEGER, owner_type_code TEXT)"

# cursor = conn.cursor()

# cursor.execute(create_sql)


<sqlite3.Cursor at 0x270d3a367c0>

In [27]:
# for row in df2.itertuples():
#     insert_sql = f"INSERT INTO California (id, station_name, street_address, city, state, zip, latitude, longitude, owner_type_code) VALUES ({row[1]}, '{row[2]}', '{row[3]}', '{row[4]}', '{row[5]}', {row[6]}, {row[7]}, {row[8]}, '{row[9]}')"
#     cursor.execute(insert_sql)
    


OperationalError: near "s": syntax error

In [28]:
# for row in df2.itertuples():
#     insert_sql = f"INSERT INTO California (id, station_name, street_address, city, state, zip, latitude, longitude, owner_type_code) VALUES ({row[1]}, '{row[2]}', '{row[3]}', '{row[4]}', '{row[5]}', {row[6]}, {row[7]}, {row[8]}, '{row[9]}')"
#     cursor.execute(insert_sql)

OperationalError: near "s": syntax error

In [30]:
#  DataFrame all at once method 
df2.to_sql(name="California", con=conn, if_exists='replace', index=False)

1768

In [31]:
conn.commit()