# Ingest EIA
Example of reading from the Energy Information Administration (EIA).  This example
requires an API key that is freely available with registration.
API documentation is at https://www.eia.gov/opendata/documentation.php.


In [0]:
# Assign variables to clarify inputs to the spark.conf.set() call.
my_scope = "Fall2025SecretScope"   # Databricks secret scope.
my_key = "assign1store"             # Key vaault secret containing storage account access key.
storage_end_point = "assign1store.dfs.core.windows.net"  # Storage account uri.
container_name = "misc"    # Container name.

# The following spark configuration call uses the variables set above.
spark.conf.set(
    "fs.azure.account.key." + storage_end_point,
    dbutils.secrets.get(scope=my_scope, key=my_key))

# To set the URI to be used in the code below, the container name (assign-1-blob) in the string.
uri = "abfss://" + container_name + "@" + storage_end_point + "/" 
print(uri)


In [0]:
# Retrieve the account key from the key vault.
print(dbutils.secrets.list(scope=my_scope))  # Get the list of secrets in the scope.
eia_key = dbutils.secrets.get(scope=my_scope, key="eia-key")


In [0]:
# Get the average monthly price of electricity for MN since 2019 by sector.
import requests

request_params = {"api_key" : eia_key,
                    "start" : "2019-01",
                    "data[]" : "price",
                    "facets[stateid][][]" : "MN"}

api_response = requests.get(
    "https://api.eia.gov/v2/electricity/retail-sales/data/",
    params=request_params
)

print(api_response.content)


In [0]:
from pyspark.sql.types import DoubleType, DateType

# Convert the data to appropriate types.  Strings are OK for all except the following.
price_types_df = price_df.withColumn("price", price_df["price"].cast(DoubleType()))
price_types_df = price_types_df.withColumn("period", price_df["period"].cast(DateType()))

display(price_types_df)


In [0]:
import json

# Parse the result and convert the monthly price list to a  dataframe.
json_data = json.loads(api_response.content)

response_json = json_data['response']  
total_data_points = response_json['total']
price_df = spark.createDataFrame(response_json['data'])

print(total_data_points)
display(price_df)



In [0]:
# Save the dataframe to delta.
price_types_df.write.mode("overwrite").format("delta").save(uri + "Bronze/EIA")

In [0]:
# Read back in.
price_read_df = spark.read.format("delta").load(uri + "Bronze/EIA")

display(price_read_df)