# Update interconnection FYI data and validate against LBNL + GridStatus data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import dbcp
from dbcp.extract.helpers import cache_gcs_archive_file_locally

In [3]:
pd.set_option('display.max_columns', None)

# Raw Data

In [18]:
old_fyi = dbcp.extract.fyi_queue.extract("gs://dgm-archive/interconnection.fyi/interconnection_fyi_dataset_2025-09-01.csv")
old_fyi = old_fyi["fyi_queue"]

In [19]:
new_fyi = dbcp.extract.fyi_queue.extract("gs://dgm-archive/interconnection.fyi/interconnection_fyi_dataset_2025-10-01.csv")
new_fyi = new_fyi["fyi_queue"]

## Compare max dates of raw data
Print out the latest date a project entered a queue for each ISO in the old and new data. We should expect the latest project date in the new data to be larger than the that of the old data. Notable exceptions:
* PJM: PJM [is working through a backlog of projects](https://www.utilitydive.com/news/pjm-fast-track-reliability-projects-interconnection-queue-invenergy/729311/) and isn't accepting new projects until mid 2026.

In [28]:
for power_market in old_fyi.power_market.unique():
    print(power_market)
    old_df = old_fyi[old_fyi.power_market == power_market]
    new_df = new_fyi[new_fyi.power_market == power_market]
    
    old_df.loc[:, 'queue_date'] = pd.to_datetime(old_df.loc[:, 'queue_date'])
    new_df.loc[:, 'queue_date'] = pd.to_datetime(new_df.loc[:, 'queue_date'])
    
    print(f" - Old max date {old_df['queue_date'].max()}")
    print(f" - New max date {new_df['queue_date'].max()}")
    print()

AESO
 - Old max date 2025-06-26 00:00:00
 - New max date 2025-08-05 00:00:00

West
 - Old max date 2025-08-26 00:00:00
 - New max date 2025-09-10 00:00:00

Southeast
 - Old max date 2025-07-30 00:00:00
 - New max date 2025-08-22 00:00:00

CAISO
 - Old max date 2025-02-12 00:00:00
 - New max date 2025-02-12 00:00:00

ERCOT
 - Old max date 2025-07-29 00:00:00
 - New max date 2025-08-26 00:00:00

ISO-NE
 - Old max date 2025-01-14 00:00:00
 - New max date 2025-01-14 00:00:00

MISO
 - Old max date 2025-08-19 00:00:00
 - New max date 2025-09-18 00:00:00

NYISO
 - Old max date 2025-06-03 00:00:00
 - New max date 2025-06-03 00:00:00

PJM
 - Old max date 2023-07-08 00:00:00
 - New max date 2023-07-08 00:00:00

SPP
 - Old max date 2025-07-01 00:00:00
 - New max date 2025-09-15 00:00:00



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  old_df.loc[:, 'queue_date'] = pd.to_datetime(old_df.loc[:, 'queue_date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df.loc[:, 'queue_date'] = pd.to_datetime(new_df.loc[:, 'queue_date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  old_df.loc[:, 'queue_date'] = pd.to_datetime(old_df.loc[:

## Compare data mart tables
Compare the old and new total active capacity in regions.

### How to grab the new data
To get the new data, replace the URI in `dbcp.etl.etl_fyi_queue` with the updated GCS URI. Then run `make all`. There might be some data validation errors due to small changes in the expected number of projects. If the changes seem reasonable, just update the expected value in the assertion. If they don't seem reason, do some digging!

Once the ETL successfully finishes the new data is available in the databse.

<!-- - download the `dev` data to compare to
- load the relevent tables

data warehouse
- check the old and new iso have a similar n and capacity
- plot total capacity


data mart:
- total capacity, n_projects and max date have all the same: caiso, ercot, pjm
- total capacity, n_projects and max date have all increased: miso, pjm, spp, nyiso, isone
- withdrawn and in service capacity have increased: miso, pjm, spp, nyiso, isone

- active capacity has changed for isos in GS_REGIONS
- how much has the active capacity changed by? -->

In [4]:
fyi_projects_long = pd.read_parquet("/app/data/output/data_mart/fyi_projects_long_format.parquet")

In [5]:
# filter for active projects
fyi_projects_long = fyi_projects_long[fyi_projects_long.queue_status.isin(["active"])]

### How to grab the old data
The following code grabs the latest version number for data in the development datasets then downloads the parquet file.

In [30]:
from google.cloud import bigquery

def get_bigquery_table_version(dataset_id, table_name, project_id="dbcp-dev-350818"):
    """
    Get the data version of a BigQuery table.

    The dbcp.commands.publish script generates a version number for each data release
    and adds it as a label to the BQ tables.

    Args:
        dataset_id: the BQ dataset ID
        table_name: the name of the table
        project_id: the GCP project id

    Return:
        the current DBCP version number of the requested table
    """
    client = bigquery.Client()

    table_ref = f"{project_id}.{dataset_id}.{table_name}"
    table = client.get_table(table_ref)  # Fetch table metadata

    labels = table.labels  # Get the labels dictionary
    return labels["version"]

# TODO: update this once we figure out where the long format table will land

In [4]:
from dbcp.extract.helpers import cache_gcs_archive_file_locally

table_name = "fyi_projects_long_format"
version = get_bigquery_table_version("data_mart", table_name)
uri = f"gs://dgm-outputs/{version}/data_mart/{table_name}.parquet"
data_cache = "/app/data/gcp_outputs"

fyi_projects_long_format_path = cache_gcs_archive_file_locally(uri, data_cache)
old_fyi_projects_long = pd.read_parquet(iso_projects_long_format_path)

### Compare to LBNL + GridStatus ISO queue data

In [6]:
iso_projects_long = pd.read_parquet("/app/data/output/data_mart/iso_projects_long_format.parquet")

In [7]:
iso_projects_long.queue_status.value_counts()

active    11064
Name: queue_status, dtype: Int64

In [85]:
iso_projects_long.resource_clean.value_counts()

Solar                    5213
Battery Storage          3787
Onshore Wind             1010
Natural Gas               444
Unknown                   289
Offshore Wind              68
Other                      52
Hydro                      47
Oil                        35
Geothermal                 34
Coal                       21
Nuclear                    21
Other Storage              15
Biofuel                    11
Pumped Storage              8
Biomass                     4
Municipal Solid Waste       3
Hydrogen                    1
Steam                       1
Name: resource_clean, dtype: Int64

In [86]:
fyi_projects_long.resource_clean.value_counts()

Solar              4758
Battery Storage    2968
Onshore Wind        944
Natural Gas         576
Other               511
Hydro                47
Nuclear              42
Geothermal           39
Offshore Wind        24
Oil                  23
Coal                 21
Biofuel              20
Pumped Storage       16
Biomass              12
Other Storage        11
Waste Heat            4
Name: resource_clean, dtype: int64

In [32]:
fyi_projects_renewable = fyi_projects_long[fyi_projects_long.resource_class == "renewable"]
iso_projects_renewable = iso_projects_long[iso_projects_long.resource_class == "renewable"]

In [40]:
def agg_iso_projects_long_format(df, iso_col, id_col):
    """Calculate some aggregate metrics for each ISO"""
    agg = df.groupby(iso_col).agg({id_col: "count", "capacity_mw": "sum", "date_entered_queue": "max"})
    agg = agg.rename(columns={id_col: "n_projects", "capacity_mw": "total_capacity_mw", "date_entered_queue": "max_date_entered_queue"})
    return agg

fyi_project_agg = agg_iso_projects_long_format(fyi_projects_long, "power_market", "project_id")
iso_project_agg = agg_iso_projects_long_format(iso_projects_long, "iso_region", "surrogate_id")

In [41]:
fyi_project_agg.max_date_entered_queue

power_market
AESO        2025-08-05
CAISO       2025-02-12
ERCOT       2025-08-26
ISONE       2024-12-12
MISO        2025-09-18
NYISO       2025-06-03
PJM         2023-06-30
SPP         2025-09-15
Southeast   2025-08-22
West        2025-09-10
Name: max_date_entered_queue, dtype: datetime64[ns]

In [42]:
both_project_aggs = fyi_project_agg.merge(iso_project_agg, how="outer", left_index=True, right_index=True, validate="1:1", suffixes=("_fyi", "_iso"))
both_project_aggs

Unnamed: 0,n_projects_fyi,total_capacity_mw_fyi,max_date_entered_queue_fyi,n_projects_iso,total_capacity_mw_iso,max_date_entered_queue_iso
AESO,226,47322.074,2025-08-05,,,NaT
CAISO,658,196593.869732,2025-02-12,900.0,269052.636325,2023-04-17 00:00:00
ERCOT,1850,383975.98,2025-08-26,1813.0,381547.57,2025-06-19 00:00:00
ISONE,157,38651.556822,2024-12-12,184.0,47019.0878,2024-12-12 00:00:00
MISO,1770,331486.43,2025-09-18,2261.0,389585.19,2025-02-19 05:00:00
NYISO,176,28569.12,2025-06-03,366.0,53351.28,2025-06-03 00:00:00
PJM,1796,191922.1128,2023-06-30,1658.0,134888.5638,2023-06-30 00:00:00
SPP,705,152335.537,2025-09-15,858.0,190902.661,2025-06-30 00:00:00
Southeast,794,121365.093,2025-08-22,930.0,128113.102,2024-12-19 00:00:00
West,1884,460295.3609,2025-09-10,2026.0,383993.53,2024-12-30 00:00:00


In [43]:
# Calculate the differences between the old and new
for col in iso_project_agg.columns:
    if pd.api.types.is_datetime64_any_dtype(iso_project_agg[col]):
        continue
    else:
        both_project_aggs[f"{col}_pct_diff"] = (both_project_aggs[f"{col}_fyi"] - both_project_aggs[f"{col}_iso"]) / both_project_aggs[f"{col}_iso"]

Ideally a less than 20% percent change in capacity for each region

In [44]:
iso_project_agg

both_project_aggs.sort_values(by="total_capacity_mw_iso", ascending=False)[["n_projects_pct_diff", "total_capacity_mw_pct_diff"]] * 100

Unnamed: 0,n_projects_pct_diff,total_capacity_mw_pct_diff
MISO,-21.716055,-14.91298
West,-7.008885,19.870603
ERCOT,2.040816,0.636463
CAISO,-26.888889,-26.931075
SPP,-17.832168,-20.202507
PJM,8.323281,42.281975
Southeast,-14.623656,-5.267228
NYISO,-51.912568,-46.450919
ISONE,-14.673913,-17.79603
AESO,,


In [8]:
# dig deeper into PJM and NYISO
def agg_resource_long_format(df, id_col):
    """Calculate some aggregate metrics for each ISO"""
    agg = df.groupby("resource_clean").agg({id_col: "count", "capacity_mw": "sum", "date_entered_queue": "max"})
    agg = agg.rename(columns={id_col: "n_projects", "capacity_mw": "total_capacity_mw", "date_entered_queue": "max_date_entered_queue"})
    return agg
    
pjm_fyi = fyi_projects_long[fyi_projects_long["power_market"] == "PJM"]
pjm_iso = iso_projects_long[iso_projects_long["iso_region"] == "PJM"]
fyi_pjm_project_agg = agg_resource_long_format(pjm_fyi, "project_id")
iso_pjm_project_agg = agg_resource_long_format(pjm_iso, "surrogate_id")

In [9]:
both_pjm_project_aggs = fyi_pjm_project_agg.merge(iso_pjm_project_agg, how="outer", left_index=True, right_index=True, validate="1:1", suffixes=("_fyi", "_iso"))
both_pjm_project_aggs

Unnamed: 0_level_0,n_projects_fyi,total_capacity_mw_fyi,max_date_entered_queue_fyi,n_projects_iso,total_capacity_mw_iso,max_date_entered_queue_iso
resource_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Storage,327.0,35142.99,2023-06-29,330.0,32474.92,2023-06-29
Biomass,3.0,40.07,2022-12-23,3.0,20.07,2022-12-23
Coal,2.0,50.0,2015-10-26,1.0,0.0,NaT
Hydro,2.0,65.0,2021-03-31,2.0,65.0,2021-03-31
Natural Gas,72.0,15004.176,2023-06-30,57.0,13858.333,2023-06-30
Nuclear,6.0,1551.9,2016-04-29,5.0,1502.8,NaT
Offshore Wind,19.0,19366.03,2023-06-02,,,NaT
Oil,,,NaT,1.0,6.6,2022-06-10
Onshore Wind,110.0,11620.7028,2023-03-11,100.0,7196.6828,2023-03-11
Other,119.0,24962.954,2023-02-15,3.0,27.954,2021-10-04


In [10]:
# Calculate the differences between the old and new
for col in iso_pjm_project_agg.columns:
    if pd.api.types.is_datetime64_any_dtype(iso_pjm_project_agg[col]):
        continue
    else:
        both_pjm_project_aggs[f"{col}_pct_diff"] = (both_pjm_project_aggs[f"{col}_fyi"] - both_pjm_project_aggs[f"{col}_iso"]) / both_pjm_project_aggs[f"{col}_iso"]

In [11]:
both_pjm_project_aggs.sort_values(by="total_capacity_mw_iso", ascending=False)[["n_projects_pct_diff", "total_capacity_mw_pct_diff"]] * 100

Unnamed: 0_level_0,n_projects_pct_diff,total_capacity_mw_pct_diff
resource_clean,Unnamed: 1_level_1,Unnamed: 2_level_1
Solar,2.158273,13.72223
Battery Storage,-0.909091,8.215786
Natural Gas,26.315789,8.26826
Onshore Wind,10.0,61.47304
Unknown,,
Nuclear,20.0,3.267234
Hydro,0.0,0.0
Other,3866.666667,89200.11
Biomass,0.0,99.65122
Oil,,


In [27]:
iso_ng_queue_ids = iso_projects_long[(iso_projects_long.iso_region == "PJM") & (iso_projects_long.resource_clean == "Natural Gas")].queue_id.unique()

In [30]:
fyi_projects_long[
    (fyi_projects_long.power_market == "PJM") 
    & (fyi_projects_long.resource_clean == "Natural Gas")
    & (~fyi_projects_long.queue_id.isin(iso_ng_queue_ids))]

Unnamed: 0,state,county,project_id,queue_id,date_proposed_online,developer,power_market,interconnection_status,point_of_interconnection,project_name,date_entered_queue,queue_status,iso_region,utility,is_actionable,is_nearly_certain,actual_completion_date,withdrawn_date,capacity_mw,resource_clean,state_id_fips,county_id_fips,frac_locations_in_county,source,state_permitting_type,co2e_tonnes_per_year,ordinance_earliest_year_mentioned,ordinance_jurisdiction_name,ordinance_jurisdiction_type,ordinance_text,ordinance_via_reldi,ordinance_via_solar_nrel,ordinance_via_wind_nrel,ordinance_via_nrel_is_de_facto,ordinance_via_self_maintained,ordinance_is_restrictive,is_hybrid,resource_class,surrogate_id
21979,New Jersey,Bergen,pjm-ab2-092,AB2-092,2027-06-30,,PJM,IA Executed,Bergen 138kV,Bergen Generating Station,2016-04-26,active,pjm,PSEG,False,True,NaT,NaT,51.1,Natural Gas,34,34003,1.0,fyi,Local,159956.38436,,,,,False,,,,,False,False,fossil,21979
22505,Illinois,Grundy,pjm-ad1-039,AD1-039,2025-02-28,,PJM,IA Executed,Kendall-Tazewell & Dresden-Mole Creek,Three Rivers Energy Center,2017-07-29,active,pjm,ComEd,False,True,NaT,NaT,102.7,Natural Gas,17,17063,1.0,fyi,Local,321477.899682,,,,,False,False,False,False,False,False,False,fossil,22505
22798,Illinois,Will,pjm-ad2-194,AD2-194,2027-01-01,,PJM,IA Executed,Elwood 345 kV,Jackson Generation,2018-03-31,active,pjm,ComEd,False,True,NaT,NaT,120.0,Natural Gas,17,17197,1.0,fyi,Local,66106.189334,,,,,False,False,False,False,False,False,False,fossil,22798
25957,Virginia,Caroline,pjm-ah1-682,AH1-682,2027-11-30,,PJM,System Impact Study,Ladysmith CT 230 kV,,NaT,active,pjm,Dominion,True,False,NaT,NaT,21.0,Natural Gas,51,51033,1.0,fyi,Hybrid,65735.500422,,,,,False,False,False,False,True,True,False,fossil,25957
25958,Virginia,Caroline,pjm-ah1-683,AH1-683,2027-10-15,,PJM,System Impact Study,Ladysmith CT 230 kV,,NaT,active,pjm,Dominion,True,False,NaT,NaT,25.0,Natural Gas,51,51033,1.0,fyi,Hybrid,78256.548121,,,,,False,False,False,False,True,True,False,fossil,25958
25959,Virginia,Fauquier,pjm-ah1-684,AH1-684,2028-04-15,,PJM,System Impact Study,Remington CT 230 kV,,NaT,active,pjm,Dominion,True,False,NaT,NaT,23.0,Natural Gas,51,51061,1.0,fyi,Hybrid,71996.024272,,,,,False,,,,False,False,False,fossil,25959
25960,Virginia,Fauquier,pjm-ah1-685,AH1-685,2028-05-31,,PJM,System Impact Study,Remington CT 230 kV,,NaT,active,pjm,Dominion,True,False,NaT,NaT,26.0,Natural Gas,51,51061,1.0,fyi,Hybrid,81386.810046,,,,,False,,,,False,False,False,fossil,25960
25965,Virginia,Caroline,pjm-ah1-691,AH1-691,2029-05-31,,PJM,System Impact Study,Ladysmith CT 230 kV,,NaT,active,pjm,Dominion,True,False,NaT,NaT,26.0,Natural Gas,51,51033,1.0,fyi,Hybrid,81386.810046,,,,,False,False,False,False,True,True,False,fossil,25965
25966,Virginia,Fauquier,pjm-ah1-693,AH1-693,2029-04-15,,PJM,System Impact Study,Remington CT 230 kV,,NaT,active,pjm,Dominion,True,False,NaT,NaT,20.0,Natural Gas,51,51061,1.0,fyi,Hybrid,62605.238497,,,,,False,,,,False,False,False,fossil,25966
25967,Virginia,Caroline,pjm-ah1-694,AH1-694,2028-10-15,,PJM,System Impact Study,Ladysmith CT 230 kV,,NaT,active,pjm,Dominion,True,False,NaT,NaT,18.0,Natural Gas,51,51033,1.0,fyi,Hybrid,56344.714647,,,,,False,False,False,False,True,True,False,fossil,25967


In [39]:
# TODO: look into these projects and see what their resource is labeled as in raw data
fyi_projects_long[
    (fyi_projects_long.power_market == "PJM") 
    & (fyi_projects_long.resource_clean == "Other")]

Unnamed: 0,state,county,project_id,queue_id,date_proposed_online,developer,power_market,interconnection_status,point_of_interconnection,project_name,date_entered_queue,queue_status,iso_region,utility,is_actionable,is_nearly_certain,actual_completion_date,withdrawn_date,capacity_mw,resource_clean,state_id_fips,county_id_fips,frac_locations_in_county,source,state_permitting_type,co2e_tonnes_per_year,ordinance_earliest_year_mentioned,ordinance_jurisdiction_name,ordinance_jurisdiction_type,ordinance_text,ordinance_via_reldi,ordinance_via_solar_nrel,ordinance_via_wind_nrel,ordinance_via_nrel_is_de_facto,ordinance_via_self_maintained,ordinance_is_restrictive,is_hybrid,resource_class,surrogate_id
270,,,pjm-1119633,1119633,NaT,,PJM,,OVEC-PJM,,2009-03-10,active,pjm,,False,False,NaT,NaT,142.0,Other,,,1.0,fyi,,,,,,,False,False,True,True,,True,False,fossil,270
609,,,pjm-1119653,1119653,NaT,,PJM,,OVEC-PJM,,2009-03-10,active,pjm,,False,False,NaT,NaT,118.0,Other,,,1.0,fyi,,,,,,,False,False,True,True,,True,False,fossil,609
886,,,pjm-1121578,1121578,NaT,,PJM,,PJM-WEC,,2009-03-12,active,pjm,,False,False,NaT,NaT,10.0,Other,,,1.0,fyi,,,,,,,False,False,True,True,,True,False,fossil,886
956,,,pjm-1132597,1132597,NaT,,PJM,,PJM-MECS,,2009-03-25,active,pjm,,False,False,NaT,NaT,25.0,Other,,,1.0,fyi,,,,,,,False,False,True,True,,True,False,fossil,956
1081,,,pjm-1132611,1132611,NaT,,PJM,,PJM-FE,,2009-03-25,active,pjm,,False,False,NaT,NaT,38.0,Other,,,1.0,fyi,,,,,,,False,False,True,True,,True,False,fossil,1081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29562,,,pjm-y3-069-4549810,Y3-069 4549810,NaT,,PJM,System Impact Study,NIPS-PJM,,2013-04-02,active,pjm,,True,False,NaT,NaT,233.0,Other,,,1.0,fyi,,,,,,,False,False,True,True,,True,False,fossil,29562
29626,,,pjm-z1-027-4566961,Z1-027 4566961,NaT,,PJM,System Impact Study,NIPS-PJM,,2013-06-07,active,pjm,,True,False,NaT,NaT,124.0,Other,,,1.0,fyi,,,,,,,False,False,True,True,,True,False,fossil,29626
29640,,,pjm-z1-046-4586438,Z1-046 4586438,NaT,,PJM,System Impact Study,TVA-PJM,,2013-08-27,active,pjm,,True,False,NaT,NaT,240.0,Other,,,1.0,fyi,,,,,,,False,False,True,True,,True,False,fossil,29640
29662,,,pjm-z1-067-4593370,Z1-067 4593370,NaT,,PJM,System Impact Study,DUK-PJM,,2013-10-01,active,pjm,,True,False,NaT,NaT,50.0,Other,,,1.0,fyi,,,,,,,False,False,True,True,,True,False,fossil,29662


In [12]:
nyiso_fyi = fyi_projects_long[fyi_projects_long["power_market"] == "NYISO"]
nyiso_iso = iso_projects_long[iso_projects_long["iso_region"] == "NYISO"]
fyi_nyiso_project_agg = agg_resource_long_format(nyiso_fyi, "project_id")
iso_nyiso_project_agg = agg_resource_long_format(nyiso_iso, "surrogate_id")

In [13]:
both_nyiso_project_aggs = fyi_nyiso_project_agg.merge(iso_nyiso_project_agg, how="outer", left_index=True, right_index=True, validate="1:1", suffixes=("_fyi", "_iso"))
both_nyiso_project_aggs

Unnamed: 0_level_0,n_projects_fyi,total_capacity_mw_fyi,max_date_entered_queue_fyi,n_projects_iso,total_capacity_mw_iso,max_date_entered_queue_iso
resource_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Storage,9.0,629.6,2020-11-18,162.0,21332.56,2024-10-15
Natural Gas,,,NaT,2.0,0.0,2024-10-04
Offshore Wind,5.0,4371.0,2024-08-01,,,NaT
Onshore Wind,24.0,3972.3,2024-10-15,31.0,5612.1,2024-10-15
Other,34.0,9355.1,2025-06-03,,,NaT
Solar,104.0,10241.12,2024-10-15,103.0,10046.12,2024-10-15
Unknown,,,NaT,68.0,16360.5,2025-06-03


In [14]:
# Calculate the differences between the old and new
for col in iso_nyiso_project_agg.columns:
    if pd.api.types.is_datetime64_any_dtype(iso_nyiso_project_agg[col]):
        continue
    else:
        both_nyiso_project_aggs[f"{col}_pct_diff"] = (both_nyiso_project_aggs[f"{col}_fyi"] - both_nyiso_project_aggs[f"{col}_iso"]) / both_nyiso_project_aggs[f"{col}_iso"]

In [15]:
both_nyiso_project_aggs.sort_values(by="total_capacity_mw_iso", ascending=False)[["n_projects_pct_diff", "total_capacity_mw_pct_diff"]] * 100

Unnamed: 0_level_0,n_projects_pct_diff,total_capacity_mw_pct_diff
resource_clean,Unnamed: 1_level_1,Unnamed: 2_level_1
Battery Storage,-94.444444,-97.048643
Unknown,,
Solar,0.970874,1.941048
Onshore Wind,-22.580645,-29.219009
Natural Gas,,
Offshore Wind,,
Other,,


In [46]:
fyi_projects_long[(fyi_projects_long.power_market == "NYISO") & (fyi_projects_long.resource_clean == "Battery Storage")]

Unnamed: 0,state,county,project_id,queue_id,date_proposed_online,developer,power_market,interconnection_status,point_of_interconnection,project_name,date_entered_queue,queue_status,iso_region,utility,is_actionable,is_nearly_certain,actual_completion_date,withdrawn_date,capacity_mw,resource_clean,state_id_fips,county_id_fips,frac_locations_in_county,source,state_permitting_type,co2e_tonnes_per_year,ordinance_earliest_year_mentioned,ordinance_jurisdiction_name,ordinance_jurisdiction_type,ordinance_text,ordinance_via_reldi,ordinance_via_solar_nrel,ordinance_via_wind_nrel,ordinance_via_nrel_is_de_facto,ordinance_via_self_maintained,ordinance_is_restrictive,is_hybrid,resource_class,surrogate_id
18331,New York,Kings,nyiso-0522,522,2027-06-01,,NYISO,Facility Study,Hudson Avenue East 138kV,NYC Energy,2015-12-16,active,nyiso,ConEd,True,False,NaT,NaT,79.9,Battery Storage,36,36047,1.0,fyi,Hybrid,,,,,,False,,,,,False,False,storage,18331
18456,New York,Montgomery,nyiso-0683,683,2027-06-01,Key Capture Energy,NYISO,System Impact Study,Coldenham 115kV,KCE NY 2,2018-01-18,active,nyiso,CHG&E,True,False,NaT,NaT,200.0,Battery Storage,36,36057,1.0,fyi,Hybrid,,,,,,False,False,False,False,True,True,False,storage,18456
18515,New York,Ulster,nyiso-0744,744,2028-04-01,,NYISO,Facility Study,East Walden - Modena 115kV,Magruder BESS,2018-07-18,active,nyiso,CHG&E,True,False,NaT,NaT,20.0,Battery Storage,36,36111,1.0,fyi,Hybrid,,,,,,False,,,,,False,False,storage,18515
18556,New York,Erie,nyiso-0787,787,2027-01-01,,NYISO,System Impact Study,Gardenville Subsation 115kV,"Levy Grid, LLC",2018-12-20,active,nyiso,NM-NG,True,False,NaT,NaT,150.0,Battery Storage,36,36029,1.0,fyi,Hybrid,,,,,,False,False,False,False,True,True,False,storage,18556
18576,New York,Kings,nyiso-0815,815,2028-03-01,,NYISO,System Impact Study,Gowanus 345kV,Bayonne Energy Center III,2019-04-05,active,nyiso,ConEd,True,False,NaT,NaT,49.8,Battery Storage,36,36047,1.0,fyi,Hybrid,,,,,,False,,,,,False,False,storage,18576
18585,New York,Richmond,nyiso-0827,827,NaT,,NYISO,Facility Study,Fresh Kills 13.8kV,Arthur Kill Energy Storage 1,2019-04-22,active,nyiso,ConEd,True,False,NaT,NaT,15.0,Battery Storage,36,36085,1.0,fyi,Hybrid,,,,,,False,,,,,False,False,storage,18585
18657,New York,Queens,nyiso-0907,907,2028-01-01,,NYISO,System Impact Study,Hell Gate 138kV,Harlem River Yard,2019-09-04,active,nyiso,NYPA,True,False,NaT,NaT,79.9,Battery Storage,36,36081,1.0,fyi,Hybrid,,,,,,False,,,,,False,False,storage,18657
18680,New York,Queens,nyiso-0930,930,2027-03-01,,NYISO,Facility Study,North Queens 27kV,Luyster Creek Energy Storage 3,2019-10-25,active,nyiso,ConEd,True,False,NaT,NaT,15.0,Battery Storage,36,36081,1.0,fyi,Hybrid,,,,,,False,,,,,False,False,storage,18680
18850,New York,Suffolk,nyiso-1113,1113,2027-12-01,Caithness Energy,NYISO,Facility Study,Sills Road 138 kV,CLIES 20 MW,2020-11-18,active,nyiso,LIPA,True,False,NaT,NaT,20.0,Battery Storage,36,36103,1.0,fyi,Hybrid,,2021.0,Riverhead,city,"In October 2021, the Town of Riverhead adopted...",True,True,False,True,,True,False,storage,18850


In [49]:
iso_projects_long[(iso_projects_long.iso_region == "NYISO") & (iso_projects_long.resource_clean == "Battery Storage")]

Unnamed: 0,state,county,county_id_fips,queue_id,resource_clean,project_id,date_proposed_online,developer,entity,interconnection_status,point_of_interconnection,project_name,date_entered_queue,queue_status,iso_region,utility,capacity_mw,state_id_fips,state_permitting_type,co2e_tonnes_per_year,ordinance_earliest_year_mentioned,ordinance_jurisdiction_name,ordinance_jurisdiction_type,ordinance_text,ordinance_via_reldi,ordinance_via_solar_nrel,ordinance_via_wind_nrel,ordinance_via_nrel_is_de_facto,ordinance_via_self_maintained,ordinance_is_restrictive,is_hybrid,is_actionable,is_nearly_certain,resource_class,frac_locations_in_county,source,surrogate_id
552,New York,Suffolk,36103,C24-104,Battery Storage,38920,2028-06-01,,NYISO,SRIS/SIS Pending,West Yaphank 69kV Substation,Horseblock Energy Storage,2024-08-27,active,NYISO,LIPA,100.00,36,Hybrid,,2021.0,Riverhead,city,"In October 2021, the Town of Riverhead adopted...",True,True,False,True,,True,False,False,False,storage,1.0,gridstatus,2519
562,New York,Suffolk,36103,C24-274,Battery Storage,38989,2028-12-01,,NYISO,SRIS/SIS Pending,Southampton 69kV,Southampton Power Station Battery Energy Stora...,2024-09-24,active,NYISO,LIPA,31.96,36,Hybrid,,2021.0,Riverhead,city,"In October 2021, the Town of Riverhead adopted...",True,True,False,True,,True,False,False,False,storage,1.0,gridstatus,2588
567,New York,Ulster,36111,C24-173,Battery Storage,39017,2029-12-01,,NYISO,SRIS/SIS Pending,Saugerties 69 kV,KCE NY 34,2024-09-17,active,NYISO,CHGE,100.00,36,Hybrid,,,,,,False,,,,,False,False,False,False,storage,1.0,gridstatus,2616
573,New York,Suffolk,36103,C24-218,Battery Storage,39044,2029-02-01,,NYISO,SRIS/SIS Pending,Riverhead - Tuthill 69kV,"Poplar Energy Center, LLC",2024-09-20,active,NYISO,LIPA,60.00,36,Hybrid,,2021.0,Riverhead,city,"In October 2021, the Town of Riverhead adopted...",True,True,False,True,,True,False,False,False,storage,1.0,gridstatus,2643
574,New York,Suffolk,36103,C24-164,Battery Storage,39046,2029-12-01,,NYISO,SRIS/SIS Pending,Riverhead - Tuthill 69kV,KCE NY 28,2024-09-16,active,NYISO,LIPA,45.00,36,Hybrid,,2021.0,Riverhead,city,"In October 2021, the Town of Riverhead adopted...",True,True,False,True,,True,False,False,False,storage,1.0,gridstatus,2645
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7008,New York,Jefferson,36045,1237,Battery Storage,56895,2026-12-01,,NYISO,Accepted Cost Allocation/IA in Progress,North Carthage-Taylorville #8 and Black River-...,Sugar Maple Energy Storage,2021-08-24,active,NYISO,NM-NG,20.00,36,Hybrid,,2019.0,Worth,city,"In April 2019, the Worth town board adopted re...",True,,,,True,True,False,True,False,storage,1.0,gridstatus,20327
7014,New York,Niagara,36063,C24-129,Battery Storage,56905,2028-12-01,,NYISO,SRIS/SIS Pending,Dupont to Packard 115 kV Line #184,Buffalo Road Energy Storage,2024-09-05,active,NYISO,NM-NG,100.00,36,Hybrid,,2018.0,multiple,multiple,"Niagara County: In June 2021, Niagara County a...",True,,,,True,True,False,False,False,storage,1.0,gridstatus,20337
7037,New York,Orange,36071,C24-215,Battery Storage,56950,2029-03-01,,NYISO,SRIS/SIS Pending,Cuddebackville-Shoemaker stations 69 kV Line #13,Mount Hope Storage,2024-09-20,active,NYISO,O&R,50.00,36,Hybrid,,,,,,False,False,False,False,,False,False,False,False,storage,1.0,gridstatus,20382
7050,New York,Lewis,36049,C24-263,Battery Storage,56976,2028-07-01,,NYISO,SRIS/SIS Pending,"Austin Rd. - Edic #11, 345 kV",Jefferson Energy Storage,2024-09-23,active,NYISO,NM-NG,100.00,36,Hybrid,,,,,,False,False,False,False,,False,False,False,False,storage,1.0,gridstatus,20408
