**Date Created: 10/19/2025**

In [2]:
import os
import json
import requests
import pandas as pd
from datetime import datetime
from mysql import connector
from dotenv import load_dotenv



In [3]:
load_dotenv() # this must be run in order to grant getenv function permission to access objects in .env file. 

True

In [4]:
API_KEY = os.getenv("API_KEY")
API_HOST = os.getenv("API_HOST")

## GET Request (Extract)

In [5]:
# API Request
url = f"https://api.eia.gov/v2/electricity/retail-sales/data/?api_key={API_KEY}&frequency=monthly&data[0]=customers&data[1]=price&data[2]=revenue&data[3]=sales&facets[stateid][]=CA&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000"
# Don't forget to add "api_key =" and "&" after you insert API KEY variable
# Full documentation here: https://www.eia.gov/opendata/documentation/APIv2.1.0.pdf

In [None]:
# Storing the API response into a variable
response = requests.get(url)
# Printing response would return the 200 code
json_data = response.json() # accessing the json formatted data
json_data # examine what the data hierarchy looks like within the "response" key, where the data we want is imbedded

# there are a total of 1770 records

{'response': {'total': '1770',
  'dateFormat': 'YYYY-MM',
  'frequency': 'monthly',
  'data': [{'period': '2025-07',
    'stateid': 'CA',
    'stateDescription': 'California',
    'sectorid': 'ALL',
    'sectorName': 'all sectors',
    'customers': '16397392',
    'price': '30.04',
    'revenue': '6795.9437',
    'sales': '22624.37124',
    'customers-units': 'number of customers',
    'price-units': 'cents per kilowatt-hour',
    'revenue-units': 'million dollars',
    'sales-units': 'million kilowatt hours'},
   {'period': '2025-07',
    'stateid': 'CA',
    'stateDescription': 'California',
    'sectorid': 'TRA',
    'sectorName': 'transportation',
    'customers': '13',
    'price': '17.38',
    'revenue': '10.4139',
    'sales': '59.92411',
    'customers-units': 'number of customers',
    'price-units': 'cents per kilowatt-hour',
    'revenue-units': 'million dollars',
    'sales-units': 'million kilowatt hours'},
   {'period': '2025-07',
    'stateid': 'CA',
    'stateDescriptio

In [14]:
json_data # this contains a lot of nested data, we need to specify what we want to access
records = json_data["response"]["data"] 
records
# each dictionary is one row of data

[{'period': '2025-07',
  'stateid': 'CA',
  'stateDescription': 'California',
  'sectorid': 'ALL',
  'sectorName': 'all sectors',
  'customers': '16397392',
  'price': '30.04',
  'revenue': '6795.9437',
  'sales': '22624.37124',
  'customers-units': 'number of customers',
  'price-units': 'cents per kilowatt-hour',
  'revenue-units': 'million dollars',
  'sales-units': 'million kilowatt hours'},
 {'period': '2025-07',
  'stateid': 'CA',
  'stateDescription': 'California',
  'sectorid': 'TRA',
  'sectorName': 'transportation',
  'customers': '13',
  'price': '17.38',
  'revenue': '10.4139',
  'sales': '59.92411',
  'customers-units': 'number of customers',
  'price-units': 'cents per kilowatt-hour',
  'revenue-units': 'million dollars',
  'sales-units': 'million kilowatt hours'},
 {'period': '2025-07',
  'stateid': 'CA',
  'stateDescription': 'California',
  'sectorid': 'RES',
  'sectorName': 'residential',
  'customers': '14450126',
  'price': '32.58',
  'revenue': '2693.30255',
  'sal

## Transform

In [26]:
rows = [] # creating an empty list that will hold a row for each item
column_names = ['period', 'stateid', 'sectorid', 'sectorName', 'customers', 'price', 'revenue', 'sales']

In [27]:
# loop through each record in the API response for the fields that are relevant, excluded state and unit measure descriptions
for record in records:
        period = record["period"]
        stateid = record["stateid"]
        sectorid = record["sectorid"]
        sectorName = record["sectorName"]
        customers = record["customers"]
        price = record["price"]
        revenue = record["revenue"]
        sales = record["sales"]

        # putting each row into a tuple (instead of list, for its immutability)
        data_tuple = (period, stateid, sectorid, sectorName, customers, price, revenue, sales)

        # append each row tuple into the rows list
        rows.append(data_tuple)

print(rows)


[('2025-07', 'CA', 'ALL', 'all sectors', '16397392', '30.04', '6795.9437', '22624.37124'), ('2025-07', 'CA', 'TRA', 'transportation', '13', '17.38', '10.4139', '59.92411'), ('2025-07', 'CA', 'RES', 'residential', '14450126', '32.58', '2693.30255', '8266.15262'), ('2025-07', 'CA', 'OTH', 'other', None, None, None, None), ('2025-07', 'CA', 'IND', 'industrial', '150177', '25.31', '1029.65545', '4067.87834'), ('2025-07', 'CA', 'COM', 'commercial', '1797076', '29.94', '3062.5718', '10230.41616'), ('2025-06', 'CA', 'ALL', 'all sectors', '15894182', '28.05', '5685.41465', '20266.76603'), ('2025-06', 'CA', 'TRA', 'transportation', '13', '17.37', '10.73789', '61.826'), ('2025-06', 'CA', 'RES', 'residential', '14001964', '33.52', '2260.57293', '6744.58573'), ('2025-06', 'CA', 'OTH', 'other', None, None, None, None), ('2025-06', 'CA', 'IND', 'industrial', '142008', '22.14', '867.25803', '3916.56689'), ('2025-06', 'CA', 'COM', 'commercial', '1750197', '26.69', '2546.8458', '9543.78741'), ('2025-05

In [None]:
# create pandas dataframe
df = pd.DataFrame(rows, columns=column_names)
df
df.info()

# why is Dtype all objects? Might throw error when loading into SQL.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1770 entries, 0 to 1769
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   period      1770 non-null   object
 1   stateid     1770 non-null   object
 2   sectorid    1770 non-null   object
 3   sectorName  1770 non-null   object
 4   customers   1055 non-null   object
 5   price       1475 non-null   object
 6   revenue     1475 non-null   object
 7   sales       1475 non-null   object
dtypes: object(8)
memory usage: 110.8+ KB
