Skip to content

Commit

Permalink
Merge pull request #2563 from catalyst-cooperative/xbrl_meta_reshape
Browse files Browse the repository at this point in the history
Rename and test XBRL metadata calculations
  • Loading branch information
cmgosnell committed Jun 8, 2023
2 parents 8df08c4 + 0f72803 commit ced19c5
Show file tree
Hide file tree
Showing 10 changed files with 1,436 additions and 308 deletions.
22 changes: 4 additions & 18 deletions devtools/debug-ferc1-etl.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,6 @@
"pd.options.display.max_columns = None"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e21fea69-9d14-48d6-b0c9-149c37844e95",
"metadata": {},
"outputs": [],
"source": [
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
"handler = logging.StreamHandler(stream=sys.stdout)\n",
"formatter = logging.Formatter(\"%(message)s\")\n",
"handler.setFormatter(formatter)\n",
"logger.handlers = [handler]"
]
},
{
"cell_type": "markdown",
"id": "14145b03-fb79-46a6-a0e6-82da0c549d66",
Expand Down Expand Up @@ -128,10 +113,11 @@
"source": [
"from dagster import build_op_context\n",
"\n",
"from pudl.extract.ferc1 import xbrl_metadata_json\n",
"from pudl.extract.ferc1 import raw_xbrl_metadata_json\n",
"from pudl.transform.ferc1 import clean_xbrl_metadata_json\n",
"\n",
"context = build_op_context()\n",
"xbrl_metadata_json_dict = xbrl_metadata_json(context)"
"xbrl_metadata_json_dict = clean_xbrl_metadata_json(raw_xbrl_metadata_json(context))"
]
},
{
Expand Down Expand Up @@ -386,7 +372,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.11.3"
}
},
"nbformat": 4,
Expand Down
168 changes: 168 additions & 0 deletions migrations/versions/88d9201ae4c4_update_schema_for_ferc1_exploder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""update schema for ferc1 exploder
Revision ID: 88d9201ae4c4
Revises: 92780dd3d879
Create Date: 2023-06-08 11:04:31.244964
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '88d9201ae4c4'
down_revision = '92780dd3d879'
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('denorm_depreciation_amortization_summary_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.drop_column('depreciation_amortization_value')

with op.batch_alter_table('denorm_electric_operating_expenses_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.drop_column('expense')

with op.batch_alter_table('denorm_electric_operating_revenues_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.add_column(sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."))
batch_op.add_column(sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'))
batch_op.drop_column('revenue')

with op.batch_alter_table('denorm_electric_plant_depreciation_functional_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'))
batch_op.drop_column('utility_plant_value')

with op.batch_alter_table('denorm_electricity_sales_by_rate_schedule_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.drop_column('sales_revenue')

with op.batch_alter_table('denorm_income_statement_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.drop_column('income')

with op.batch_alter_table('denorm_plant_in_service_ferc1', schema=None) as batch_op:
batch_op.alter_column('report_year',
existing_type=sa.INTEGER(),
nullable=False)
batch_op.alter_column('utility_id_ferc1',
existing_type=sa.INTEGER(),
nullable=False)
batch_op.alter_column('record_id',
existing_type=sa.TEXT(),
nullable=True)

with op.batch_alter_table('denorm_utility_plant_summary_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'))
batch_op.drop_column('utility_plant_value')

with op.batch_alter_table('depreciation_amortization_summary_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.drop_column('depreciation_amortization_value')

with op.batch_alter_table('electric_operating_expenses_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.drop_column('expense')

with op.batch_alter_table('electric_operating_revenues_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.add_column(sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."))
batch_op.add_column(sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'))
batch_op.drop_column('revenue')

with op.batch_alter_table('electric_plant_depreciation_functional_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'))
batch_op.drop_column('utility_plant_value')

with op.batch_alter_table('electricity_sales_by_rate_schedule_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.drop_column('sales_revenue')

with op.batch_alter_table('income_statement_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'))
batch_op.drop_column('income')

with op.batch_alter_table('utility_plant_summary_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'))
batch_op.drop_column('utility_plant_value')

# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('utility_plant_summary_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('utility_plant_value', sa.FLOAT(), nullable=True))
batch_op.drop_column('ending_balance')

with op.batch_alter_table('income_statement_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('income', sa.FLOAT(), nullable=True))
batch_op.drop_column('dollar_value')

with op.batch_alter_table('electricity_sales_by_rate_schedule_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('sales_revenue', sa.FLOAT(), nullable=True))
batch_op.drop_column('dollar_value')

with op.batch_alter_table('electric_plant_depreciation_functional_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('utility_plant_value', sa.FLOAT(), nullable=True))
batch_op.drop_column('ending_balance')

with op.batch_alter_table('electric_operating_revenues_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('revenue', sa.FLOAT(), nullable=True))
batch_op.drop_column('row_type_xbrl')
batch_op.drop_column('ferc_account')
batch_op.drop_column('dollar_value')

with op.batch_alter_table('electric_operating_expenses_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('expense', sa.FLOAT(), nullable=True))
batch_op.drop_column('dollar_value')

with op.batch_alter_table('depreciation_amortization_summary_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('depreciation_amortization_value', sa.FLOAT(), nullable=True))
batch_op.drop_column('dollar_value')

with op.batch_alter_table('denorm_utility_plant_summary_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('utility_plant_value', sa.FLOAT(), nullable=True))
batch_op.drop_column('ending_balance')

with op.batch_alter_table('denorm_plant_in_service_ferc1', schema=None) as batch_op:
batch_op.alter_column('record_id',
existing_type=sa.TEXT(),
nullable=False)
batch_op.alter_column('utility_id_ferc1',
existing_type=sa.INTEGER(),
nullable=True)
batch_op.alter_column('report_year',
existing_type=sa.INTEGER(),
nullable=True)

with op.batch_alter_table('denorm_income_statement_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('income', sa.FLOAT(), nullable=True))
batch_op.drop_column('dollar_value')

with op.batch_alter_table('denorm_electricity_sales_by_rate_schedule_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('sales_revenue', sa.FLOAT(), nullable=True))
batch_op.drop_column('dollar_value')

with op.batch_alter_table('denorm_electric_plant_depreciation_functional_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('utility_plant_value', sa.FLOAT(), nullable=True))
batch_op.drop_column('ending_balance')

with op.batch_alter_table('denorm_electric_operating_revenues_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('revenue', sa.FLOAT(), nullable=True))
batch_op.drop_column('row_type_xbrl')
batch_op.drop_column('ferc_account')
batch_op.drop_column('dollar_value')

with op.batch_alter_table('denorm_electric_operating_expenses_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('expense', sa.FLOAT(), nullable=True))
batch_op.drop_column('dollar_value')

with op.batch_alter_table('denorm_depreciation_amortization_summary_ferc1', schema=None) as batch_op:
batch_op.add_column(sa.Column('depreciation_amortization_value', sa.FLOAT(), nullable=True))
batch_op.drop_column('dollar_value')

# ### end Alembic commands ###
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
"fsspec>=2021.7,<2023.5.1", # For caching datastore on GCS
"gcsfs>=2021.7,<2023.5.1", # For caching datastore on GCS
"geopandas>=0.13,<0.14",
"grpcio<1.55.0",
"jinja2>=2,<3.2",
"matplotlib>=3.3,<3.8", # Should make this optional with a "viz" extras
"networkx>=2.2,<3.2",
Expand Down
2 changes: 1 addition & 1 deletion src/pudl/extract/ferc1.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def create_raw_ferc1_assets() -> list[SourceAsset]:
),
},
)
def xbrl_metadata_json(context) -> dict[str, dict[str, list[dict[str, Any]]]]:
def raw_xbrl_metadata_json(context) -> dict[str, dict[str, list[dict[str, Any]]]]:
"""Extract the FERC 1 XBRL Taxonomy metadata we've stored as JSON.
Returns:
Expand Down
11 changes: 4 additions & 7 deletions src/pudl/glue/ferc1_eia.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,15 @@
from dagster import AssetIn, Definitions, JobDefinition, asset, define_asset_job

import pudl
from pudl.extract.ferc1 import raw_ferc1_assets, xbrl_metadata_json
from pudl.extract.ferc1 import raw_ferc1_assets, raw_xbrl_metadata_json
from pudl.io_managers import ferc1_dbf_sqlite_io_manager, ferc1_xbrl_sqlite_io_manager
from pudl.metadata.fields import apply_pudl_dtypes
from pudl.resources import dataset_settings
from pudl.transform.classes import StringNormalization, normalize_strings_multicol
from pudl.transform.ferc1 import (
Ferc1AbstractTableTransformer,
TableIdFerc1,
clean_xbrl_metadata_json,
ferc1_transform_asset_factory,
)
from pudl.transform.params.ferc1 import FERC1_STRING_NORM
Expand Down Expand Up @@ -289,13 +290,10 @@ def plants_ferc1_raw(**transformed_plant_tables):
)
return all_plants

tfr_mapping = {
table_name: GenericPlantFerc1TableTransformer for table_name in plant_tables
}
transform_assets = [
ferc1_transform_asset_factory(
table_name,
tfr_mapping,
GenericPlantFerc1TableTransformer,
io_manager_key=None,
convert_dtypes=False,
generic=True,
Expand All @@ -306,8 +304,7 @@ def plants_ferc1_raw(**transformed_plant_tables):
return Definitions(
assets=transform_assets
+ raw_ferc1_assets
+ [plants_ferc1_raw]
+ [xbrl_metadata_json],
+ [plants_ferc1_raw, raw_xbrl_metadata_json, clean_xbrl_metadata_json],
resources={
"ferc1_dbf_sqlite_io_manager": ferc1_dbf_sqlite_io_manager,
"ferc1_xbrl_sqlite_io_manager": ferc1_xbrl_sqlite_io_manager,
Expand Down
7 changes: 6 additions & 1 deletion src/pudl/metadata/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,11 @@
"type": "string",
"description": "Unique digitial object identifier of Zenodo archive.",
},
"dollar_value": {
"type": "number",
"description": "Dollar value of reported income, expense, asset, or liability.",
"unit": "USD",
},
"duct_burners": {
"type": "boolean",
"description": "Indicates whether the unit has duct-burners for supplementary firing of the turbine exhaust gas",
Expand Down Expand Up @@ -2113,7 +2118,7 @@
"row_type_xbrl": {
"type": "string",
"description": "Indicates whether the value reported in the row is calculated, or uniquely reported within the table.",
"constraints": {"enum": ["calculated_value", "reported_value"]},
"constraints": {"enum": ["calculated_value", "reported_value", "correction"]},
},
"rto_iso_lmp_node_id": {
"type": "string",
Expand Down

0 comments on commit ced19c5

Please sign in to comment.