# URBN PL 213 - Final Project: Data Request
### Kongpob Leemingsawat
### 5/13/2023

# Library

In [2]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd 
import numpy as np 
import json
import requests
from IPython.display import display

# Pull Data

#### Description: 
To obtain Electric Vehicle (EV) Charging Station data for Los Angeles from the National Renewable Energy Laboratory (1), follow these instructions:

1. Import the calenviroscreen dataset and extract a separate list containing only the zip codes in Los Angeles county. 
2. Develop a function that includes the necessary parameters to request data from the National Renewable Energy Laboratory.
3. Utilize a loop to request data from the National Renewable Energy Laboratory.
4. Compile a master dataset that combines all the datasets obtained in step 3.


#### Reference: 
1. National Renewable Energy Laboratory: [Documentation](https://developer.nrel.gov/docs/transportation/alt-fuel-stations-v1/all/#request-parameters) 
2. Calenviroscreen : [Documentation](https://oehha.ca.gov/calenviroscreen)

#### STEP 1:

In [3]:
# import data - Calenviroscreen
calenviroscreen = gpd.read_file("Data/CalEnviroScreen/CES4 Final Shapefile.shp")

# filter the data to cover only Los Angeles
calenviroscreen_LA = calenviroscreen[calenviroscreen['County'] == 'Los Angeles']

# create a list for zip code
cal_zip = calenviroscreen_LA['ZIP'].value_counts()
cal_zip_df = pd.DataFrame({'zip': cal_zip.index, 'count': cal_zip.values})

# convert to list 
ziplist = cal_zip_df["zip"].astype(str).tolist()

#### STEP 2:

In [None]:
def get_EVS(zip):
    url = "https://developer.nrel.gov/api/alt-fuel-stations/v1.json?"
    APIkey = "vgWG8OuzEuSucgBMCVkvH2oBDGkOD62MVH7s97qJ"
    params ={
        "api_key" : APIkey,
        "status" : "all", # E = Available / P = Planned / T = Temporarily Unavailable / all = All 
        "access" : "all", # public = Public / private = Private
        "fuel_type" : "ELEC", # ELEC = Electric
        "cards_accepted" : "all", 
        "owner_type" : "all",
        "federal_agency" : "all",
        "federal_agency_id" : "all",
        "cng_fill_type" : "all",
        "cng_vehicle_class" : "all",
        "ev_network" : "all",
        "ev_charging_level" : "all",
        "ev_connector_type" : "all",
        "state" : "CA", 
        "zip" : zip, # LA zipcode 90001 to 90899 and 91001 to 93599
        "country" : "US",
        "limit" : "200", # Maximum = 200 stations/hour
    }
    
    # Request 
    r = requests.get(url, params = params)
    json_read = json.loads(r.text)
    data = pd.DataFrame(json_read["fuel_stations"]) 
    data.head()
    
    # Return
    return data

#### STEP 3:

In [8]:
for i in ziplist:
    EV_data = get_EVS(i)
    print(i)
    EV_data.to_csv(r'Data/Station_Data_Request/'+str(i)+'.csv', index = True)

# note: I contain all these requested dataset in folder called station_data 

90044
91331
90201
91342
90255
90650
90280
90026
90011
90250
90805
90706
91402
91406
91706
93535
93550
90004
90034
91744
91335
90063
90731
90027
90042
90019
90001
90018
90006
90066
90744
91352
91343
91344
90640
91770
90813
90037
90065
91745
90005
90007
91702
91605
90023
90022
93536
90032
90046
91732
90660
91766
90745
90016
91311
91304
91405
90024
90043
90220
91801
90049
90002
90033
91606
90045
90221
90723
90815
93551
90020
90703
90262
91748
90057
91401
90806
90247
90638
90242
90029
90031
90810
91306
90036
91733
91355
90003
91601
91789
90012
90803
90025
90802
90503
90028
90047
90601
91754
91505
91016
91792
91387
91107
91390
90291
90604
91765
90275
91205
90059
90808
91767
91768
90017
90505
91746
90241
93534
91423
91741
91303
90301
91722
91790
90804
91731
91711
90501
91803
90035
90266
91103
90807
91356
91326
90710
91351
90038
90605
90041
91104
90303
90278
90270
90068
91007
90008
90712
91776
90064
90732
91724
91001
90039
91321
91206
90504
91301
91750
90048
91042
91791
91340
91780
91006
9132

#### STEP 4:

In [9]:
import pandas as pd
import os

# Set the path to the data folder
data_folder = 'Data/Station_Data_Request'

# Create an empty list to store the dataframes
dfs = []

# Loop through all the files in the folder
for file in os.listdir(data_folder):
    
    # Check if the file is a CSV file
    if file.endswith('.csv'):
        
        # Read the file into a dataframe
        df = pd.read_csv(os.path.join(data_folder, file))
        
        # Append the dataframe to the list of dataframes
        dfs.append(df)

# Concatenate all the dataframes into a single dataframe
df_all = pd.concat(dfs, ignore_index=True)

# Print the shape of the combined dataframe
print(df_all.shape)

(3878, 72)


In [10]:
# copy the data frame 
ev_df = df_all.copy()
ev_df.head()

Unnamed: 0.1,Unnamed: 0,access_code,access_days_time,access_detail_code,cards_accepted,date_last_confirmed,expected_date,fuel_type_code,groups_with_access_code,id,...,rd_blended_with_biodiesel,rd_max_biodiesel_level,nps_unit_name,access_days_time_fr,intersection_directions_fr,bd_blends_fr,groups_with_access_code_fr,ev_pricing_fr,ev_network_ids,federal_agency
0,0,public,,,,2023-05-13,,ELEC,Public,117709.0,...,,,,,,,Public,,{'station': ['465ba1dd-d734-8158-93cc-33000041...,
1,1,public,24 hours daily,,,2023-05-14,,ELEC,Public,185373.0,...,,,,,,,Public,,"{'station': ['210019'], 'posts': ['210019-01',...",
2,2,public,Mon 5:00am - 10:59pm; Tue 5:00am - 10:59pm; We...,,,2023-05-14,,ELEC,Public,197430.0,...,,,,,,,Public,,{'station': ['426696ba-2bb3-4da7-9425-22ecfc8c...,
3,3,public,Mon 5:00am - 10:59pm; Tue 5:00am - 10:59pm; We...,,,2023-05-14,,ELEC,Public,212402.0,...,,,,,,,Public,,{'station': ['dc78ae7f-0e96-4030-81a5-6fa8bff6...,
4,0,public,24 hours daily,,,2023-05-14,,ELEC,Public,176659.0,...,,,,,,,Public,,"{'station': ['USCPIL793021'], 'posts': ['12282...",


#### Export Data: 

In [12]:
# export the data
ev_df.to_csv(r'Data/completed_station_dataset.csv', index=True)

#### Clean data

In [20]:
# import data - EV Charging Stations
ev_df = pd.read_csv("Data/completed_station_dataset.csv")

# subset only the necessary columns 
ev_df = ev_df[["id", "station_name", "open_date", "status_code", "fuel_type_code", "ev_connector_types", "ev_network",
               "groups_with_access_code","access_days_time", "street_address", "city", "zip", "state",
               "longitude", "latitude"]]
# export again
ev_df.to_csv(r'Data/station_dataset.csv', index=True)

#### Note:
- (Unclean) Completed Dataset : completed_station_dataset.csv
- (clean) Completed Dataset : station_dataset.csv