# Update ETL with a new year of LBNL interconnection queue data

Most of the meaningful changes are covered in the Queued Up report tabs that LBNL publishes with the data.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import dbcp

In [3]:
pd.set_option('display.max_columns', None)

## Load raw data

In [4]:
lbnl_uri = "gs://dgm-archive/lbnl_iso_queue/queues_2024_clean_data.xlsx"
raw_lbnl_24 = dbcp.extract.lbnl_iso_queue.extract(lbnl_uri)["lbnl_iso_queue"]

lbnl_uri = "gs://dgm-archive/lbnl_iso_queue/queues_2023_clean_data.xlsx"
raw_lbnl_23 = dbcp.extract.lbnl_iso_queue.extract(lbnl_uri)["lbnl_iso_queue"]

print(raw_lbnl_24.shape)
print(raw_lbnl_23.shape)

(36441, 33)
(32513, 29)


In [5]:
set(raw_lbnl_24.columns) - set(raw_lbnl_23.columns)

{'cluster', 'county_state_pairs', 'fips_codes', 'project_type'}

In [6]:
set(raw_lbnl_23.columns) - set(raw_lbnl_24.columns)

set()

In [7]:
raw_lbnl_24[["cluster", "county_state_pairs", "fips_codes", "project_type"]]

Unnamed: 0,cluster,county_state_pairs,fips_codes,project_type
0,,"Abbeville County, SC",45001.0,Generation
1,,"Abbeville County, SC",45001.0,Generation
2,,"Abbeville County, SC",45001.0,Generation
3,,"Abbeville County, SC",45001.0,Generation
4,,"Abbeville County, SC",45001.0,Generation
...,...,...,...,...
36436,,"Zavala, TX",48507.0,Generation
36437,,"Zavala, TX",48507.0,Generation
36438,,,,
36439,,,,


# Look at transformed data

In [22]:
from dbcp.transform.lbnl_iso_queue import transform

In [None]:
clean_24 = transform({"lbnl_iso_queue": raw_lbnl_24})

In [43]:
# add missing columns just to make sure the transform runs, these get dropped anyways
raw_lbnl_23["fips_codes"] = "00000"
raw_lbnl_23["county_state_pairs"] = "county, state"

In [None]:
clean_23 = transform({"lbnl_iso_queue": raw_lbnl_23})

In [46]:
proj_23 = clean_23["iso_projects"]

In [47]:
proj_24 = clean_24["iso_projects"]

In [48]:
proj_23.queue_status.value_counts()

withdrawn      15120
active         10709
operational     3895
suspended        304
Name: queue_status, dtype: int64

In [49]:
proj_24.queue_status.value_counts()

withdrawn      19105
active          9885
operational     4156
suspended        509
unknown            4
Name: queue_status, dtype: int64

In [50]:
len(proj_23[proj_23.queue_status == "withdrawn"])/len(proj_23)

0.5035300386306114

In [52]:
len(proj_24[proj_24.queue_status == "withdrawn"])/len(proj_24)

0.5676045039959595

In [51]:
proj_23["withdrawn_date"].isna().sum()/len(proj_23)

0.740375649393899

In [53]:
proj_24["withdrawn_date"].isna().sum()/len(proj_24)

0.7133307584895571

# Look at data warehouse tables

Run `make all` to generate new data warehouse tables

In [8]:
iso_projects = pd.read_parquet("/app/data/output/data_warehouse/iso_projects.parquet")

In [9]:
iso_rc = pd.read_parquet("/app/data/output/data_warehouse/iso_resource_capacity.parquet")

In [10]:
withdrawn = iso_projects[iso_projects.queue_status == "withdrawn"]

In [11]:
withdrawn["withdrawn_date"].isna().sum()/len(withdrawn)

0.49604815493326354

In [32]:
iso_projects[iso_projects.project_id == 34253]

Unnamed: 0,project_id,date_proposed_raw,developer,entity,interconnection_status_lbnl,interconnection_status_raw,point_of_interconnection,project_name,queue_date,actual_completion_date,withdrawn_date,actual_completion_date_raw,withdrawn_date_raw,queue_id,queue_status,queue_year,region,resource_type_lbnl,utility,year_proposed,date_proposed,interconnection_date,interconnection_date_raw,interconnection_service_type,project_type,cluster,queue_date_raw,is_actionable,is_nearly_certain
31574,34253,47088.0,,BPA,In Progress (unknown study),,Big Eddy-Ostrander No 1 500 kV,,2022-08-18,NaT,NaT,,,G0787,active,2022,West,Battery+Other¬†Storage,Bonneville Power Administration,2028,2028-12-01,NaT,,NRIS/ERIS,Generation,Transition Cluster,44791.0,False,False


In [36]:
iso_rc[iso_rc.project_id == 34253].groupby("project_id")[["capacity_mw"]].sum()

Unnamed: 0_level_0,capacity_mw
project_id,Unnamed: 1_level_1
34253,1200.0


In [29]:
raw_lbnl_24[raw_lbnl_24.queue_id == "G0697"]

Unnamed: 0,queue_id,queue_status,queue_date,date_proposed,date_operational,date_withdrawn,interconnection_date,interconnection_status_raw,interconnection_status_lbnl,county_1,county_2,county_3,state,county_state_pairs,fips_codes,point_of_interconnection,region,project_name,utility,entity,developer,cluster,interconnection_service_type,project_type,resource_type_1,resource_type_2,resource_type_3,capacity_mw_resource_1,capacity_mw_resource_2,capacity_mw_resource_3,resource_type_lbnl,queue_year,year_proposed
7260,G0697,active,44461.0,48549.0,,,,,In Progress (unknown study),Crook,,,OR,"Crook County, OR",41013.0,Ponderosa Substation 500 kV,West,,Bonneville Power Administration,BPA,,Transition Cluster,NRIS/ERIS,Generation,Solar,Battery,Pumped Storage,1200.0,1200.0,1200.0,Solar+Battery+Other¬†Storage,2021.0,2032.0


In [34]:
raw_lbnl_23[raw_lbnl_23.queue_id == "G0697"]

Unnamed: 0,queue_id,queue_status,queue_date,queue_year,interconnection_date,date_withdrawn,date_operational,entity,project_name,developer,utility,county_1,county_2,county_3,state,region,interconnection_service_type,point_of_interconnection,date_proposed,year_proposed,interconnection_status_raw,interconnection_status_lbnl,resource_type_lbnl,resource_type_1,resource_type_2,resource_type_3,capacity_mw_resource_1,capacity_mw_resource_2,capacity_mw_resource_3
1781,G0697,active,9/22/2021,2021.0,,,,BPA,,,BPA,crook,,,OR,West (non-ISO),NRIS & ERIS,Ponderosa Substation 500 kV,12/1/2025,2025.0,,In Progress (unknown study),Solar+Battery,Solar,Battery,,650.0,650.0,


# Data Mart Tables
The LBNL data is combined with GridStatus queue data to create data mart tables. Namely, the CAISO data from LBNL is used in the data mart tables and GridStatus provides the remaining ISOs. Refer to the GridStatus data update notebook to investigate changes in the data mart tables.