In [2]:
import pandas as pd
from pathlib import Path

# Explore EIA API extra aggregates (not present in EIA Bulk data)

**Summary:** The API contains additional (more precise) aggregates of some fuel types compared to the bulk data. But only a trivial amount (<1%) of fuel receipts come from these categories (DFO/RFO/WC), so the advantage of the API in fuel precision is small.

The additional aggregates (like "all fossil fuels" or "nat gas plus other gas") could be useful in error checking or possibly for deducing more precise aggregates for redacted items. But that would probably be an involved process.

API query:

(reproduciblilty note: replace `api_key=<REDACTED>` at the top)
```
https://api.eia.gov/v2/electricity/electric-power-operational-data/data/?api_key=<REDACTED>&frequency=quarterly&data[0]=cost-per-btu&data[1]=receipts-btu&facets[fueltypeid][]=ANT&facets[fueltypeid][]=BIS&facets[fueltypeid][]=DFO&facets[fueltypeid][]=FOS&facets[fueltypeid][]=LFG&facets[fueltypeid][]=MLG&facets[fueltypeid][]=NGO&facets[fueltypeid][]=OOG&facets[fueltypeid][]=PET&facets[fueltypeid][]=RC&facets[fueltypeid][]=RFO&facets[fueltypeid][]=WOC&facets[fueltypeid][]=WOO&facets[location][]=90&facets[location][]=AK&facets[location][]=AL&facets[location][]=AR&facets[location][]=AZ&facets[location][]=CA&facets[location][]=CO&facets[location][]=CT&facets[location][]=DC&facets[location][]=DE&facets[location][]=ENC&facets[location][]=ESC&facets[location][]=FL&facets[location][]=GA&facets[location][]=HI&facets[location][]=IA&facets[location][]=ID&facets[location][]=IL&facets[location][]=IN&facets[location][]=KS&facets[location][]=KY&facets[location][]=LA&facets[location][]=MA&facets[location][]=MAT&facets[location][]=MD&facets[location][]=ME&facets[location][]=MI&facets[location][]=MN&facets[location][]=MO&facets[location][]=MS&facets[location][]=MT&facets[location][]=MTN&facets[location][]=NC&facets[location][]=ND&facets[location][]=NE&facets[location][]=NEW&facets[location][]=NH&facets[location][]=NJ&facets[location][]=NM&facets[location][]=NV&facets[location][]=NY&facets[location][]=OH&facets[location][]=OK&facets[location][]=OR&facets[location][]=PA&facets[location][]=PCC&facets[location][]=PCN&facets[location][]=RI&facets[location][]=SAT&facets[location][]=SC&facets[location][]=SD&facets[location][]=TN&facets[location][]=TX&facets[location][]=US&facets[location][]=UT&facets[location][]=VA&facets[location][]=VT&facets[location][]=WA&facets[location][]=WI&facets[location][]=WNC&facets[location][]=WSC&facets[location][]=WV&facets[location][]=WY&facets[sectorid][]=1&facets[sectorid][]=2&facets[sectorid][]=3&facets[sectorid][]=4&facets[sectorid][]=5&facets[sectorid][]=6&facets[sectorid][]=7&facets[sectorid][]=90&facets[sectorid][]=94&facets[sectorid][]=96&facets[sectorid][]=97&facets[sectorid][]=98&facets[sectorid][]=99&start=2015-Q1&end=2015-Q1&sort[0][column]=period&sort[0][direction]=asc&offset=0&length=5000```

In [3]:
# I manually edited the file to remove nested metadata at the top/bottom and make it line-delimited for easier parsing.
path_api_stuff = Path('./api.eia.gov2015_other_fuels_etc.json')
assert path_api_stuff.exists()

In [5]:
api_raw = pd.read_json(path_api_stuff, lines=True)

In [6]:
api_raw.head(3)

Unnamed: 0,period,location,stateDescription,sectorid,sectorDescription,fueltypeid,fuelTypeDescription,cost-per-btu,cost-per-btu-units,receipts-btu,receipts-btu-units
0,2015-Q1,WSC,West South Central,90,Electric Power Sector Non-CHP,BIS,bituminous coal and synthetic coal,,dollars per million Btu,2264.73334,billion Btu
1,2015-Q1,WSC,West South Central,90,Electric Power Sector Non-CHP,DFO,distillate fuel oil,,dollars per million Btu,610.38749,billion Btu
2,2015-Q1,WSC,West South Central,90,Electric Power Sector Non-CHP,FOS,fossil fuels,,dollars per million Btu,974228.31436,billion Btu


In [7]:
api_raw.describe()

Unnamed: 0,sectorid,cost-per-btu,receipts-btu
count,4938.0,562.0,4938.0
mean,55.890846,3.594858,26575.28
std,45.685782,5.591763,189805.5
min,1.0,0.0,0.0
25%,4.0,0.0,0.0
50%,90.0,0.0,0.0
75%,97.0,4.191625,1588.159
max,99.0,24.8889,6106581.0


In [8]:
nonzero_receipts = api_raw.loc[api_raw['receipts-btu'].ne(0)]
nonzero_receipts.shape

(2042, 11)

60% of this data is zeros...

In [9]:
nonzero_receipts.dropna(subset=['receipts-btu']).shape

(2042, 11)

In [10]:
nonzero_receipts.describe()

Unnamed: 0,sectorid,cost-per-btu,receipts-btu
count,2042.0,302.0,2042.0
mean,61.738002,6.689769,64264.81
std,44.72223,6.124013,291068.2
min,1.0,0.0,0.044
25%,3.0,2.15805,398.3035
50%,90.0,3.8918,3265.067
75%,98.0,12.889,27513.62
max,99.0,24.8889,6106581.0


In [11]:
api_raw.columns

Index(['period', 'location', 'stateDescription', 'sectorid',
       'sectorDescription', 'fueltypeid', 'fuelTypeDescription',
       'cost-per-btu', 'cost-per-btu-units', 'receipts-btu',
       'receipts-btu-units'],
      dtype='object')

In [12]:
key_columns = ['location', 'sectorid', 'fueltypeid']

In [14]:
for key in key_columns:
    print(nonzero_receipts[key].value_counts())

US     70
MAT    60
SAT    59
ENC    53
NY     51
       ..
SD     16
OR     14
NE     12
ID     12
AK      8
Name: location, Length: 61, dtype: int64
99    310
98    300
90    294
1     238
94    228
2     206
7     147
97    147
3     132
5      17
96     17
6       6
Name: sectorid, dtype: int64
FOS    465
NGO    447
PET    340
DFO    333
BIS    290
RFO    102
WOC     65
Name: fueltypeid, dtype: int64


In [15]:
#SELECT
#    energy_source_code,
#    sum(fuel_received_units * fuel_mmbtu_per_unit) as mmbtu,
#    count(*) as n
#FROM fuel_receipts_costs_eia923
#where report_date >= date('2013-01-01')
#group by 1
#order by 2 desc;
frc_totals = pd.read_clipboard()

In [17]:
frc_totals['mmbtu_frac'] = frc_totals['mmbtu'] / frc_totals['mmbtu'].sum()
frc_totals

Unnamed: 0,energy_source_code,mmbtu,n,mmbtu_frac
0,NG,95839230000.0,193469,0.4578083
1,SUB,54558620000.0,41837,0.2606176
2,BIT,49500690000.0,60145,0.2364567
3,LIG,6728104000.0,2087,0.03213905
4,PC,902379200.0,1280,0.004310518
5,WC,774321600.0,4789,0.003698807
6,RFO,610400700.0,2392,0.002915784
7,DFO,429707500.0,23477,0.002052642
8,,84378.56,1,4.030625e-07
9,WO,51281.1,24,2.449614e-07


A trivial amount (<1%) of fuel receipts come from DFO/RFO/WC, so the advantage of the API in fuel precision is small.

The additional aggregates (like "all fossil fuels" or "nat gas plus other gas") could be useful in error checking or possibly for deducing more precise aggregates for redacted items. But that would probably be an involved process.