In [1]:
import pandas as pd
import requests
import boto3
import time
import json
import pyarrow.parquet as pq
from concurrent.futures import ThreadPoolExecutor

In [2]:
# Initialize the clients for S3 and DynamoDB
from botocore.client import Config
s3 = boto3.client('s3', region_name= "us-east-1",config=Config(signature_version='s3v4'))
s3_resource = boto3.resource('s3')
sfn = boto3.client('stepfunctions')

# Define S3 bucket name for project
bucket_name = 'final-project-nrel-stations'

# define folder name inside s3 bucket
folder_name = 'station-parquet-files'

# Check if s3 bucket exists; if not, create 
if 'Buckets' in s3.list_buckets():
    for bucket in s3.list_buckets()['Buckets']:
        if bucket['Name'] == 'final-project-nrel-stations':
            print('S3 bucket exists')
            break
    else:
        s3.create_bucket(Bucket=bucket_name)

S3 bucket exists


In [3]:
# Define lambda function
def lambda_handler(event,context):

    # Define start time
    start_time = time.time()

    # Extract relevant attributes from the event
    api_key = event['api_key']
    state=event['state']

    url = f"https://developer.nrel.gov/api/alt-fuel-stations/v1.json?api_key={api_key}&state={state}"
    response = requests.get(url)
    data = response.json()
    
    # Count the number of fuel stations returned
    print(f"Number of fuel stations in {state}: {data['total_results']}")
    
    # Extract the list of fuel stations
    stations = data['fuel_stations']
    stations_df = pd.DataFrame(stations)

    # Upload stations dataframe to S3 bucket as parquet with state initials
    try:
        stations_df.to_parquet(f"/tmp/{state}_stations.parquet")
        s3.upload_file(f"/tmp/{state}_stations.parquet", bucket_name, f"{folder_name}/{state}_stations.parquet")
        print(f'parquet file uploaded for state {state} to S3')
        
        # Define end time
        end_time = time.time()
        
        # Construct a success response
        return {
            "statusCode": 200,
            "body": json.dumps({
                "message": f"Success! Parquet file uploaded for state {state} to S3",
                "processingTime": f"{end_time - start_time:.2f} seconds"
            })
        }
    except Exception as e:
        print(e)
        print('Error uploading file to S3')
        return {
            "statusCode": 500,
            "body": json.dumps({
                "message": "Failed to upload parquet file to S3",
                "error": str(e)
            })
        }

In [9]:
# Testing lambda locally
api_key = 'uV8N2wWyuRXAFp1hJLIxVlHiq5pBOs1injQxbnjo'
state = 'IL'
event = {'api_key': api_key, 'state': state}
lambda_handler(event, None)

Number of fuel stations in IL: 2373
parquet file uploaded for state IL to S3
Time taken to get EV data for IL: 1.9844880104064941 seconds


In [None]:
# Defining step function to invoke lambda concurrently for all states
