# ACP / EIA Combined table

In [1]:
import pandas as pd

In [2]:
from google.cloud import bigquery

def get_bigquery_table_version(dataset_id, table_name, project_id="dbcp-dev-350818"):
    """
    Get the data version of a BigQuery table.

    The dbcp.commands.publish script generates a version number for each data release
    and adds it as a label to the BQ tables.

    Args:
        dataset_id: the BQ dataset ID
        table_name: the name of the table
        project_id: the GCP project id

    Return:
        the current DBCP version number of the requested table
    """
    client = bigquery.Client()

    table_ref = f"{project_id}.{dataset_id}.{table_name}"
    table = client.get_table(table_ref)  # Fetch table metadata

    labels = table.labels  # Get the labels dictionary
    return labels["version"]

## Get old data from published archives

In [3]:
from dbcp.extract.helpers import cache_gcs_archive_file_locally

table_name = "county_concrete_mw"
version = get_bigquery_table_version("data_mart_dev", table_name)
uri = f"gs://dgm-outputs/{version}/data_mart/{table_name}.parquet"
data_cache = "/app/data/gcp_outputs"

county_concrete_mw_path = cache_gcs_archive_file_locally(uri, data_cache)
county_concrete_mw_old = pd.read_parquet(county_concrete_mw_path)



In [4]:
county_concrete_mw_old.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 463 entries, 0 to 462
Data columns (total 9 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   state_id_fips                    463 non-null    string 
 1   county_id_fips                   463 non-null    string 
 2   state                            463 non-null    string 
 3   county                           463 non-null    string 
 4   iso_region                       463 non-null    string 
 5   resource_clean                   463 non-null    string 
 6   capacity_under_construction_mw   280 non-null    float64
 7   capacity_awaiting_permitting_mw  228 non-null    float64
 8   capacity_total_proposed_mw       463 non-null    float64
dtypes: float64(3), string(6)
memory usage: 32.7 KB


## Get new data from local parquet

In [6]:
county_concrete_mw_new = pd.read_parquet(
    "../../../data/output/data_mart/county_concrete_mw.parquet")
county_concrete_mw_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 427 entries, 0 to 426
Data columns (total 9 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   state_id_fips                    427 non-null    string 
 1   county_id_fips                   427 non-null    string 
 2   state                            427 non-null    string 
 3   county                           427 non-null    string 
 4   iso_region                       427 non-null    string 
 5   resource_clean                   427 non-null    string 
 6   capacity_under_construction_mw   263 non-null    float64
 7   capacity_awaiting_permitting_mw  204 non-null    float64
 8   capacity_total_proposed_mw       427 non-null    float64
dtypes: float64(3), string(6)
memory usage: 30.1 KB


## Sanity check: are old and new tables different

In [8]:
county_concrete_mw_new.equals(county_concrete_mw_old)

False

In [26]:
# pd.testing.assert_frame_equal(
#     capacity_by_iso_region_new,
#     capacity_by_iso_region_old
# )

## Simplify ISO region

In [9]:
GS_REGIONS = ("MISO", "NYISO", "ISONE", "PJM", "ERCOT", "SPP", "CAISO")

In [10]:
county_concrete_mw_new["iso_region_clean"] = county_concrete_mw_new["iso_region"].mask(
    ~county_concrete_mw_new["iso_region"].isin(GS_REGIONS), other="NON-ISO")

county_concrete_mw_old["iso_region_clean"] = county_concrete_mw_old["iso_region"].mask(
    ~county_concrete_mw_old["iso_region"].isin(GS_REGIONS), other="NON-ISO")

## % change in capacity by ISO region

In [11]:
capacity_by_iso_region_new = county_concrete_mw_new.groupby("iso_region_clean").sum()[
['capacity_under_construction_mw', 'capacity_awaiting_permitting_mw', 'capacity_total_proposed_mw']]

capacity_by_iso_region_old = county_concrete_mw_old.groupby("iso_region_clean").sum()[
['capacity_under_construction_mw', 'capacity_awaiting_permitting_mw', 'capacity_total_proposed_mw']]

In [12]:
capacity_by_iso_region_pct_change = (capacity_by_iso_region_new - capacity_by_iso_region_old) / capacity_by_iso_region_old

In [13]:
capacity_by_iso_region_pct_change.sort_values(by="capacity_total_proposed_mw")

Unnamed: 0_level_0,capacity_under_construction_mw,capacity_awaiting_permitting_mw,capacity_total_proposed_mw
iso_region_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ISONE,0.14767,-0.666376,-0.407696
ERCOT,-0.260584,-0.320579,-0.287065
MISO,0.266163,-0.460555,-0.100948
CAISO,0.089109,-0.251107,-0.077473
SPP,0.0,0.0,0.0
NYISO,-0.562084,0.589992,0.177722
NON-ISO,0.307078,0.500378,0.423192
PJM,-0.064461,0.907947,0.544954


## Drill down into individual projects

In order to understand these numbers better, look at individual projects in the old and new data

In [14]:
# Note that this requires comparing old and new results of the query `eia860m_current.sql`
# Until this has been added as an intermediate table, it can be saved as a parquet manually and compared

eia_old = pd.read_parquet("eia860m_current_old.parquet")
eia_new = pd.read_parquet("eia860m_current_new.parquet")

In [15]:
eia_new.head()

Unnamed: 0,report_date,plant_id_eia,plant_name_eia,utility_id_eia,utility_name_eia,generator_id,capacity_mw,state_id_fips,county_id_fips,state,...,operational_status_category,raw_operational_status_code,planned_derate_date,planned_generator_retirement_date,planned_net_summer_capacity_derate_mw,planned_net_summer_capacity_uprate_mw,planned_uprate_date,technology_description,raw_state,raw_county
0,2024-12-01,229,Cow Creek,14328,Pacific Gas & Electric Co.,1,0.7,6,6089,California,...,retired,RE,NaT,NaT,,,NaT,Conventional Hydroelectric,CA,Shasta
1,2024-12-01,229,Cow Creek,14328,Pacific Gas & Electric Co.,2,0.7,6,6089,California,...,retired,RE,NaT,NaT,,,NaT,Conventional Hydroelectric,CA,Shasta
2,2024-12-01,253,Kilarc,14328,Pacific Gas & Electric Co.,1,1.5,6,6089,California,...,retired,RE,NaT,NaT,,,NaT,Conventional Hydroelectric,CA,Shasta
3,2024-12-01,594,Indian River Generating Station,9332,Indian River Operations Inc,4,445.5,10,10005,Delaware,...,existing,OP,NaT,2025-02-01,,,NaT,Conventional Steam Coal,DE,Sussex
4,2024-12-01,645,Big Bend,18454,Tampa Electric Co,ST4,486.0,12,12057,Florida,...,existing,OP,NaT,NaT,37.0,,NaT,Conventional Steam Coal,FL,Hillsborough


In [16]:
eia_old['iso_region_clean'] = eia_old['iso_region'].mask(~eia_old["iso_region"].isin(GS_REGIONS), other="NON-ISO")
eia_new['iso_region_clean'] = eia_new['iso_region'].mask(~eia_new["iso_region"].isin(GS_REGIONS), other="NON-ISO")

In [17]:
eia_new.columns

Index(['report_date', 'plant_id_eia', 'plant_name_eia', 'utility_id_eia',
       'utility_name_eia', 'generator_id', 'capacity_mw', 'state_id_fips',
       'county_id_fips', 'state', 'county', 'iso_region',
       'current_planned_generator_operating_date', 'energy_source_code_1',
       'prime_mover_code', 'energy_storage_capacity_mwh',
       'fuel_type_code_pudl', 'generator_retirement_date', 'latitude',
       'longitude', 'operational_status_code', 'operational_status_category',
       'raw_operational_status_code', 'planned_derate_date',
       'planned_generator_retirement_date',
       'planned_net_summer_capacity_derate_mw',
       'planned_net_summer_capacity_uprate_mw', 'planned_uprate_date',
       'technology_description', 'raw_state', 'raw_county',
       'iso_region_clean'],
      dtype='object')

In [18]:
eia_new.groupby(
    ["operational_status_code", "raw_operational_status_code", "operational_status_category"]
).plant_id_eia.count()

operational_status_code  raw_operational_status_code  operational_status_category
1                        P                            proposed                        75
2                        L                            proposed                        49
3                        T                            proposed                        31
4                        U                            proposed                       120
5                        V                            proposed                       133
6                        TS                           proposed                        81
7                        OA                           existing                        16
                         OP                           existing                       600
                         OS                           existing                         6
                         SB                           existing                         3
8                        RE 

In [19]:
# Combination of plant ID and generator ID is unique
eia_new.groupby(["plant_id_eia", "generator_id"]).count()["plant_name_eia"].max()

1

In [20]:
eia_plant_region_status_capacity_new = eia_new.groupby(
    ['plant_id_eia', 'iso_region_clean', 'operational_status_category'], dropna=False
).agg({"capacity_mw": sum, "plant_id_eia": 'nunique'}).rename(columns={"plant_id_eia": "plant_count"}).unstack()
eia_plant_region_status_capacity_old = eia_old.groupby(
    ['plant_id_eia', 'iso_region_clean', 'operational_status_category'], dropna=False
).agg({"capacity_mw": sum, "plant_id_eia": 'nunique'}).rename(columns={"plant_id_eia": "plant_count"}).unstack()

In [21]:
eia_plant_region_status_capacity_combined = eia_plant_region_status_capacity_old.join(
    eia_plant_region_status_capacity_new, 
    how="outer",
    lsuffix="_old",
    rsuffix="_new",
)

In [45]:
# Step 1: Stack to reshape the columns
stacked = (eia_plant_region_status_capacity_combined
    .reset_index().groupby("iso_region_clean").sum().stack(level=1)
          )

# Step 2: Create percent change for each metric group
for metric in ['capacity_mw', 'plant_count']:
    old = f"{metric}_old"
    new = f"{metric}_new"
    pct_change = f"{metric}_pct_change"

    stacked[pct_change] = (stacked[new] - stacked[old]) / stacked[old] * 100

# Step 3: Unstack back to original column layout
result = stacked.unstack()

  .reset_index().groupby("iso_region_clean").sum().stack(level=1)


In [48]:
result

Unnamed: 0_level_0,capacity_mw_new,capacity_mw_new,capacity_mw_new,capacity_mw_new,capacity_mw_old,capacity_mw_old,capacity_mw_old,capacity_mw_old,plant_count_new,plant_count_new,...,plant_id_eia,plant_id_eia,capacity_mw_pct_change,capacity_mw_pct_change,capacity_mw_pct_change,capacity_mw_pct_change,plant_count_pct_change,plant_count_pct_change,plant_count_pct_change,plant_count_pct_change
operational_status_category,existing,proposed,retired,Unnamed: 4_level_1,existing,proposed,retired,Unnamed: 8_level_1,existing,proposed,...,retired,Unnamed: 13_level_1,existing,proposed,retired,Unnamed: 17_level_1,existing,proposed,retired,Unnamed: 21_level_1
iso_region_clean,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
CAISO,3925.500012,5949.900004,9.6,,956.5,2388.400002,0.0,,26.0,59.0,...,,8777289.0,310.40251,149.116563,inf,,-21.212121,84.375,inf,
ERCOT,4074.900002,19191.499981,19.4,,5840.900045,8317.899995,399.699997,,17.0,89.0,...,,9869606.0,-30.235067,130.725303,-95.14636,,13.333333,81.632653,0.0,
ISONE,216.599999,973.199999,0.0,,180.7,925.8,0.0,,50.0,22.0,...,,7178116.0,19.867183,5.119896,,,47.058824,4.761905,,
MISO,18904.099933,5205.100012,1582.699999,,1126.700004,6303.399992,0.0,,84.0,61.0,...,,10741019.0,1577.829047,-17.42393,inf,,320.0,10.909091,inf,
NON-ISO,9799.300017,14124.69998,586.099998,,6129.30001,8761.500015,272.0,,135.0,98.0,...,,19001212.0,59.876332,61.213262,115.47794,,206.818182,7.692308,400.0,
NYISO,724.699999,1472.500001,0.4,,49.1,325.9,1.8,,53.0,38.0,...,,7260420.0,1375.967408,351.825713,-77.777777,,231.25,90.0,0.0,
PJM,6623.700007,3580.399997,2115.3,,513.800001,5302.399998,37.9,,58.0,54.0,...,,11105965.0,1189.159205,-32.47586,5481.266476,,100.0,-1.818182,350.0,


In [49]:
# (
#     eia_plant_region_status_capacity_combined
#     .reset_index()
#     .groupby("iso_region_clean").sum()
#     .assign(
#         plant_count_pct_change=lambda df: (df.plant_count_new - df.plant_count_old) / df.plant_count_old,
#         capacity_mw_pct_change=lambda df: (df.capacity_mw_new - df.capacity_mw_old) / df.capacity_mw_old,
#     )
# )

In [50]:
eia_combined = pd.merge(
    eia_old, 
    eia_new[
        ['report_date', 
         'plant_id_eia',
         'plant_name_eia',
         'generator_id',
         'utility_id_eia',
         'utility_name_eia',
         'iso_region_clean',
         'capacity_mw', 
         'current_planned_generator_operating_date', 
         'operational_status_code',
         'operational_status_category',
         'raw_operational_status_code',
        ]
    ],
    on=["plant_id_eia", "plant_name_eia", "generator_id",'utility_id_eia', 'utility_name_eia' , "iso_region_clean"],
    how="outer",
    suffixes=("_old", "_new"),
)

#### Check that join did not cause a fan-out

In [51]:
eia_combined.shape

(1712, 38)

In [52]:
eia_new["unique_id"] = eia_new["plant_id_eia"].map(str) + "_" + eia_new["generator_id"].map(str)
eia_old["unique_id"] = eia_old["plant_id_eia"].map(str) + "_" + eia_old["generator_id"].map(str)

In [53]:
len(set(eia_new.unique_id.to_list() + eia_old.unique_id.to_list()))

1698

In [54]:
eia_combined.query("report_date_old.isnull()").iso_region_clean.unique()

array(['CAISO', 'PJM', 'NON-ISO', 'MISO', 'NYISO', 'ISONE', 'ERCOT'],
      dtype=object)

### ERCOT

In [55]:
(
    eia_combined.query("iso_region_clean == 'ERCOT'")
    .groupby(
        ["raw_operational_status_code_old", 
         "operational_status_category_old", 
         "raw_operational_status_code_new",
         "operational_status_category_new",
        ],
        dropna=False
    )
    .agg({"plant_id_eia": "nunique", "capacity_mw_old": sum, "capacity_mw_new": sum})
    .rename(columns={"plant_id_eia": "plant_count"})
    .assign(capacity_pct_change=lambda df: (df.capacity_mw_new - df.capacity_mw_old) / df.capacity_mw_old)
).sort_values(by="capacity_pct_change")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,plant_count,capacity_mw_old,capacity_mw_new,capacity_pct_change
raw_operational_status_code_old,operational_status_category_old,raw_operational_status_code_new,operational_status_category_new,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
L,proposed,,,6,360.900003,0.0,-1.0
V,proposed,,,5,878.0,0.0,-1.0
OP,existing,,,14,5653.400045,0.0,-1.0
U,proposed,,,7,876.499999,0.0,-1.0
P,proposed,,,5,960.099998,0.0,-1.0
RE,retired,,,2,399.699997,0.0,-1.0
T,proposed,,,1,334.299988,0.0,-1.0
TS,proposed,,,9,1546.300006,0.0,-1.0
V,proposed,V,proposed,3,925.0,925.0,0.0
U,proposed,V,proposed,1,200.0,200.0,0.0


In [56]:
old_by_status_category = (
    eia_combined.query("iso_region_clean == 'ERCOT'")
    .groupby(
        [
         "operational_status_category_old", 
        ],
        dropna=False
    )
    .agg({"plant_id_eia": "nunique", "capacity_mw_old": sum})
    .rename(columns={"plant_id_eia": "plant_count"})
)
old_by_status_category

Unnamed: 0_level_0,plant_count,capacity_mw_old
operational_status_category_old,Unnamed: 1_level_1,Unnamed: 2_level_1
existing,15,5840.900045
proposed,49,8317.899995
retired,2,399.699997
,91,0.0


## Focus on ERCOT plants that disappeared

In [57]:
eia_ercot = eia_combined.query("iso_region_clean == 'ERCOT'")

#### % plants ERCOT disappearing

In [58]:
ercot_total_by_status_cat = (
    eia_ercot
    .groupby("operational_status_category_old")
    .agg({"capacity_mw_old": sum, "capacity_mw_new": sum})
)

ercot_disappeared_by_status_cat = (
    eia_ercot.query("operational_status_category_new.isnull()")
    .groupby("operational_status_category_old")
    .agg({"capacity_mw_old": sum, "capacity_mw_new": sum})
)

(ercot_disappeared_by_status_cat - ercot_total_by_status_cat) / ercot_total_by_status_cat

Unnamed: 0_level_0,capacity_mw_old,capacity_mw_new
operational_status_category_old,Unnamed: 1_level_1,Unnamed: 2_level_1
existing,-0.032101,-1.0
proposed,-0.404165,-1.0
retired,0.0,


In [59]:
(
    eia_combined.query("iso_region_clean == 'ERCOT' and operational_status_code_new.isnull()")
    .groupby(
        ["operational_status_code_old", "raw_operational_status_code_old", "operational_status_category_old"],
        dropna=False
    )
    .agg({"plant_id_eia": "nunique", "capacity_mw_old": sum})
).rename(columns={"plant_id_eia": "plant_count"})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,plant_count,capacity_mw_old
operational_status_code_old,raw_operational_status_code_old,operational_status_category_old,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,P,proposed,5,960.099998
2.0,L,proposed,6,360.900003
3.0,T,proposed,1,334.299988
4.0,U,proposed,7,876.499999
5.0,V,proposed,5,878.0
6.0,TS,proposed,9,1546.300006
7.0,OP,existing,14,5653.400045
8.0,RE,retired,2,399.699997


In [60]:
eia_combined.query(
    "iso_region_clean == 'ERCOT' " \
    "and operational_status_code_old.isnull() " \
    "and raw_operational_status_code_new == 'OP' " \
)

Unnamed: 0,report_date_old,plant_id_eia,plant_name_eia,utility_id_eia,utility_name_eia,generator_id,capacity_mw_old,state_id_fips,county_id_fips,state,...,technology_description,raw_state,raw_county,iso_region_clean,report_date_new,capacity_mw_new,current_planned_generator_operating_date_new,operational_status_code_new,operational_status_category_new,raw_operational_status_code_new
776,NaT,7512,Arthur Von Rosenberg,16604,City of San Antonio - (TX),2,,,,,...,,,,ERCOT,2024-12-01,187.5,NaT,7.0,existing,OP
804,NaT,50815,"Odyssey Energy Altura Cogen, LLC",55879,Odyssey Operating Services,GEN1,,,,,...,,,,ERCOT,2024-12-01,84.900002,NaT,7.0,existing,OP
805,NaT,50815,"Odyssey Energy Altura Cogen, LLC",55879,Odyssey Operating Services,GEN2,,,,,...,,,,ERCOT,2024-12-01,84.900002,NaT,7.0,existing,OP
806,NaT,50815,"Odyssey Energy Altura Cogen, LLC",55879,Odyssey Operating Services,GEN3,,,,,...,,,,ERCOT,2024-12-01,84.900002,NaT,7.0,existing,OP
807,NaT,50815,"Odyssey Energy Altura Cogen, LLC",55879,Odyssey Operating Services,GEN4,,,,,...,,,,ERCOT,2024-12-01,84.900002,NaT,7.0,existing,OP
808,NaT,50815,"Odyssey Energy Altura Cogen, LLC",55879,Odyssey Operating Services,GEN5,,,,,...,,,,ERCOT,2024-12-01,84.900002,NaT,7.0,existing,OP
809,NaT,50815,"Odyssey Energy Altura Cogen, LLC",55879,Odyssey Operating Services,GEN6,,,,,...,,,,ERCOT,2024-12-01,129.199997,NaT,7.0,existing,OP
810,NaT,50815,"Odyssey Energy Altura Cogen, LLC",55879,Odyssey Operating Services,GEN7,,,,,...,,,,ERCOT,2024-12-01,89.900002,NaT,7.0,existing,OP
861,NaT,55091,Midlothian Energy Facility,12501,Midlothian Energy LLC,STK1,,,,,...,,,,ERCOT,2024-12-01,289.0,NaT,7.0,existing,OP
920,NaT,56754,Goat Wind LP,64644,Goat Wind LLC,1,,,,,...,,,,ERCOT,2024-12-01,80.0,NaT,7.0,existing,OP


In [61]:
eia_combined.query("iso_region_clean == 'ERCOT' and operational_status_code_new.isnull() and raw_operational_status_code_old == 'OP'")

Unnamed: 0,report_date_old,plant_id_eia,plant_name_eia,utility_id_eia,utility_name_eia,generator_id,capacity_mw_old,state_id_fips,county_id_fips,state,...,technology_description,raw_state,raw_county,iso_region_clean,report_date_new,capacity_mw_new,current_planned_generator_operating_date_new,operational_status_code_new,operational_status_category_new,raw_operational_status_code_new
16,2024-09-01,3439,Laredo,16604,City of San Antonio - (TX),4,131.800003,48,48479,Texas,...,Natural Gas Fired Combustion Turbine,TX,Webb,ERCOT,NaT,,NaT,,,
17,2024-09-01,3439,Laredo,16604,City of San Antonio - (TX),5,131.800003,48,48479,Texas,...,Natural Gas Fired Combustion Turbine,TX,Webb,ERCOT,NaT,,NaT,,,
20,2024-09-01,3441,Nueces Bay,16604,City of San Antonio - (TX),7,351.0,48,48355,Texas,...,Natural Gas Fired Combined Cycle,TX,Nueces,ERCOT,NaT,,NaT,,,
21,2024-09-01,3441,Nueces Bay,16604,City of San Antonio - (TX),8,189.600006,48,48355,Texas,...,Natural Gas Fired Combined Cycle,TX,Nueces,ERCOT,NaT,,NaT,,,
22,2024-09-01,3441,Nueces Bay,16604,City of San Antonio - (TX),9,189.600006,48,48355,Texas,...,Natural Gas Fired Combined Cycle,TX,Nueces,ERCOT,NaT,,NaT,,,
23,2024-09-01,4939,Barney M Davis,16604,City of San Antonio - (TX),1,352.0,48,48355,Texas,...,Natural Gas Steam Turbine,TX,Nueces,ERCOT,NaT,,NaT,,,
24,2024-09-01,4939,Barney M Davis,16604,City of San Antonio - (TX),2,351.0,48,48355,Texas,...,Natural Gas Fired Combined Cycle,TX,Nueces,ERCOT,NaT,,NaT,,,
25,2024-09-01,4939,Barney M Davis,16604,City of San Antonio - (TX),3,189.600006,48,48355,Texas,...,Natural Gas Fired Combined Cycle,TX,Nueces,ERCOT,NaT,,NaT,,,
26,2024-09-01,4939,Barney M Davis,16604,City of San Antonio - (TX),4,189.600006,48,48355,Texas,...,Natural Gas Fired Combined Cycle,TX,Nueces,ERCOT,NaT,,NaT,,,
95,2024-09-01,55137,Rio Nogales Power Project,16604,City of San Antonio - (TX),CTG3,189.0,48,48187,Texas,...,Natural Gas Fired Combined Cycle,TX,Guadalupe,ERCOT,NaT,,NaT,,,


- Compare % change for existing plants to % change in retirement (if existing decreases, expect retirement to increase)
- For all plants make sure status change is sensible (join on table and ID)
- For data integrity, compute % of plants that behave unexpectedly (i.e. disappear from data)
    * On merged table, combine before and after state and group by these "paths" to see how common different changes are
    * Check individual generator capacity changing
    * Generate chart for previous quarterly updates to get historic context

## By State

In [62]:
capacity_by_state_new = county_concrete_mw_new.groupby("state").sum()[
['capacity_under_construction_mw', 'capacity_awaiting_permitting_mw', 'capacity_total_proposed_mw']]

capacity_by_state_old = county_concrete_mw_old.groupby("state").sum()[
['capacity_under_construction_mw', 'capacity_awaiting_permitting_mw', 'capacity_total_proposed_mw']]

capacity_by_state_pct_change = (capacity_by_state_new - capacity_by_state_old) / capacity_by_state_old
capacity_by_state_pct_change.sort_values(by="capacity_total_proposed_mw")

Unnamed: 0_level_0,capacity_under_construction_mw,capacity_awaiting_permitting_mw,capacity_total_proposed_mw
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Missouri,-0.967333,,-0.967333
Montana,-1.0,-0.950943,-0.956347
Hawaii,-1.0,inf,-0.938
Maryland,-0.716358,0.060606,-0.678708
Connecticut,0.004589,-1.0,-0.665416
Wyoming,-0.345279,-0.99825,-0.611689
South Carolina,-0.210526,-1.0,-0.582405
Mississippi,1.0,-0.659013,-0.581103
New Jersey,-0.968182,0.0,-0.560526
Idaho,-1.0,inf,-0.539171
