# ACP / EIA Combined table

In [2]:
import pandas as pd

In [6]:
from google.cloud import bigquery

def get_bigquery_table_version(dataset_id, table_name, project_id="dbcp-dev-350818"):
    """
    Get the data version of a BigQuery table.

    The dbcp.commands.publish script generates a version number for each data release
    and adds it as a label to the BQ tables.

    Args:
        dataset_id: the BQ dataset ID
        table_name: the name of the table
        project_id: the GCP project id

    Return:
        the current DBCP version number of the requested table
    """
    client = bigquery.Client()

    table_ref = f"{project_id}.{dataset_id}.{table_name}"
    table = client.get_table(table_ref)  # Fetch table metadata

    labels = table.labels  # Get the labels dictionary
    return labels["version"]

In [13]:
from dbcp.extract.helpers import cache_gcs_archive_file_locally

table_name = "county_concrete_mw"
version = get_bigquery_table_version("data_mart_dev", table_name)
uri = f"gs://dgm-outputs/{version}/data_mart/{table_name}.parquet"
data_cache = "/app/data/gcp_outputs"

county_concrete_mw_path = cache_gcs_archive_file_locally(uri, data_cache)
county_concrete_mw_old = pd.read_parquet(county_concrete_mw_path)

In [14]:
county_concrete_mw_old.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 455 entries, 0 to 454
Data columns (total 9 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   state_id_fips                    455 non-null    string 
 1   county_id_fips                   455 non-null    string 
 2   state                            455 non-null    string 
 3   county                           455 non-null    string 
 4   iso_region                       455 non-null    string 
 5   resource_clean                   455 non-null    string 
 6   capacity_under_construction_mw   295 non-null    float64
 7   capacity_awaiting_permitting_mw  202 non-null    float64
 8   capacity_total_proposed_mw       455 non-null    float64
dtypes: float64(3), string(6)
memory usage: 32.1 KB


In [22]:
county_concrete_mw_new = pd.read_parquet(
    "../../../data/output/data_mart/county_concrete_mw.parquet")
county_concrete_mw_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 463 entries, 0 to 462
Data columns (total 9 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   state_id_fips                    463 non-null    string 
 1   county_id_fips                   463 non-null    string 
 2   state                            463 non-null    string 
 3   county                           463 non-null    string 
 4   iso_region                       463 non-null    string 
 5   resource_clean                   463 non-null    string 
 6   capacity_under_construction_mw   280 non-null    float64
 7   capacity_awaiting_permitting_mw  228 non-null    float64
 8   capacity_total_proposed_mw       463 non-null    float64
dtypes: float64(3), string(6)
memory usage: 32.7 KB


## Sanity check: are old and new tables different

In [24]:
capacity_by_iso_region_new.equals(capacity_by_iso_region_old)

False

In [26]:
# pd.testing.assert_frame_equal(
#     capacity_by_iso_region_new,
#     capacity_by_iso_region_old
# )

## Simplify ISO region

In [33]:
GS_REGIONS = ("MISO", "NYISO", "ISONE", "PJM", "ERCOT", "SPP", "CAISO")

In [35]:
county_concrete_mw_new["iso_region_clean"] = county_concrete_mw_new["iso_region"].mask(
    ~county_concrete_mw_new["iso_region"].isin(GS_REGIONS), other="NON-ISO")

county_concrete_mw_old["iso_region_clean"] = county_concrete_mw_old["iso_region"].mask(
    ~county_concrete_mw_old["iso_region"].isin(GS_REGIONS), other="NON-ISO")

## % change in capacity by ISO region

In [36]:
capacity_by_iso_region_new = county_concrete_mw_new.groupby("iso_region_clean").sum()[
['capacity_under_construction_mw', 'capacity_awaiting_permitting_mw', 'capacity_total_proposed_mw']]

capacity_by_iso_region_old = county_concrete_mw_old.groupby("iso_region_clean").sum()[
['capacity_under_construction_mw', 'capacity_awaiting_permitting_mw', 'capacity_total_proposed_mw']]

In [37]:
capacity_by_iso_region_pct_change = (capacity_by_iso_region_new - capacity_by_iso_region_old) / capacity_by_iso_region_old

In [40]:
capacity_by_iso_region_pct_change.sort_values(by="capacity_total_proposed_mw")

Unnamed: 0_level_0,capacity_under_construction_mw,capacity_awaiting_permitting_mw,capacity_total_proposed_mw
iso_region_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MISO,-0.403034,-0.12856,-0.290079
PJM,-0.267633,-0.130375,-0.187237
SPP,0.128253,-0.171739,-0.036461
ISONE,-0.574351,1.355116,-0.03497
CAISO,-0.164055,0.567367,0.083519
NYISO,0.982113,0.023332,0.237549
ERCOT,0.201572,1.508437,0.56039
NON-ISO,0.065555,1.547812,0.637978


## By State

In [41]:
capacity_by_state_new = county_concrete_mw_new.groupby("state").sum()[
['capacity_under_construction_mw', 'capacity_awaiting_permitting_mw', 'capacity_total_proposed_mw']]

capacity_by_state_old = county_concrete_mw_old.groupby("state").sum()[
['capacity_under_construction_mw', 'capacity_awaiting_permitting_mw', 'capacity_total_proposed_mw']]

capacity_by_state_pct_change = (capacity_by_state_new - capacity_by_state_old) / capacity_by_state_old
capacity_by_state_pct_change.sort_values(by="capacity_total_proposed_mw")

Unnamed: 0_level_0,capacity_under_construction_mw,capacity_awaiting_permitting_mw,capacity_total_proposed_mw
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rhode Island,-0.996573,0.0,-0.983097
Louisiana,-0.865342,-0.571429,-0.74955
North Carolina,-0.973665,0.149715,-0.68576
New Mexico,-0.855556,0.0,-0.642523
Pennsylvania,0.657201,-0.829352,-0.561816
South Carolina,0.452599,-0.732278,-0.529103
Illinois,-0.905673,2.219531,-0.525004
Idaho,0.327217,-1.0,-0.513998
Arkansas,-0.195084,-1.0,-0.508443
North Dakota,-0.504337,,-0.504337
