Skip to content

Commit

Permalink
Merge pull request #2647 from catalyst-cooperative/spot-fix-ferc-expl…
Browse files Browse the repository at this point in the history
…oder

Spot fix ferc exploder
  • Loading branch information
aesharpe committed Jun 21, 2023
2 parents 852c8ec + 8daa787 commit 8f1e23e
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/release_notes.rst
Expand Up @@ -201,6 +201,8 @@ Data Cleaning
* Added "correction" records to many FERC Form 1 tables where the reported totals do not
match the outcomes of calculations specified in XBRL metadata (even after cleaning up
the often incorrect calculation specifications!). See :issue:`2957` and :pr:`2620`.
* Flip the sign of some erroneous negative values in the :ref:`plant_in_service_ferc1`
and :ref:`utility_plant_summary_ferc1` tables. See :issue:`2599`, and :pr:`2647`.

Analysis
^^^^^^^^
Expand Down
182 changes: 181 additions & 1 deletion src/pudl/transform/ferc1.py
Expand Up @@ -3165,8 +3165,22 @@ def transform_main(self, df: pd.DataFrame) -> pd.DataFrame:
"""The main table-specific transformations, affecting contents not structure.
Annotates and alters data based on information from the XBRL taxonomy metadata.
Make all electric_plant_sold balances positive.
"""
return super().transform_main(df).pipe(self.apply_sign_conventions)
df = super().transform_main(df).pipe(self.apply_sign_conventions)
# Make all electric_plant_sold values positive
# This could probably be a FERC transformer class function or in the
# apply_sign_conventions function, but it doesn't seem like the best fit for
# now.
neg_values = (df["ferc_account_label"] == "electric_plant_sold") & (
df["ending_balance"] < 0
)
df.loc[neg_values, "ending_balance"] = abs(df["ending_balance"])
logger.info(
f"{self.table_id.value}: Converted {len(df[neg_values])} negative values to positive."
)
return df


class PlantsSmallFerc1TableTransformer(Ferc1AbstractTableTransformer):
Expand Down Expand Up @@ -4126,6 +4140,172 @@ class UtilityPlantSummaryFerc1TableTransformer(Ferc1AbstractTableTransformer):
table_id: TableIdFerc1 = TableIdFerc1.UTILITY_PLANT_SUMMARY_FERC1
has_unique_record_ids: bool = False

def transform_main(self: Self, df: pd.DataFrame) -> pd.DataFrame:
"""Spot fix depreciation_utility_plant_in_service records with bad signs."""
df = super().transform_main(df)

primary_keys = [
"report_year",
"utility_id_ferc1",
"utility_type",
"utility_plant_asset_type",
]

# The utility_id_ferc1 211 follows the same pattern for several years
# instead of writing them all out in spot_fix_pks, we'll create a loop that
# generates all of them and then append them to spot_fix_pks later
spot_fix_211 = []
for year in np.append(2006, range(2009, 2021)):
for utility_type in ["electric", "total"]:
pks = [
(
year,
211,
utility_type,
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
year,
211,
utility_type,
"amortization_of_other_utility_plant_utility_plant_in_service",
),
(
year,
211,
utility_type,
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(
year,
211,
utility_type,
"depreciation_utility_plant_in_service",
),
]
spot_fix_211 = spot_fix_211 + pks

spot_fix_pks = [
(
2012,
156,
"total",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2012,
156,
"total",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2012, 156, "total", "depreciation_utility_plant_in_service"),
(
2012,
156,
"electric",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2012,
156,
"electric",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2012, 156, "electric", "depreciation_utility_plant_in_service"),
(
2013,
170,
"total",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2013,
170,
"total",
"amortization_of_other_utility_plant_utility_plant_in_service",
),
(2013, 170, "total", "amortization_of_plant_acquisition_adjustment"),
(
2013,
170,
"total",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2013, 170, "total", "depreciation_utility_plant_in_service"),
(
2013,
170,
"electric",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2013,
170,
"electric",
"amortization_of_other_utility_plant_utility_plant_in_service",
),
(2013, 170, "electric", "amortization_of_plant_acquisition_adjustment"),
(
2013,
170,
"electric",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2013, 170, "electric", "depreciation_utility_plant_in_service"),
(
2007,
393,
"electric",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2007,
393,
"electric",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2007, 393, "electric", "depreciation_utility_plant_in_service"),
(
2007,
393,
"total",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2007,
393,
"total",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2007, 393, "total", "depreciation_utility_plant_in_service"),
]

# Combine bespoke fixes with programatically generated spot fixes
spot_fix_pks = spot_fix_pks + spot_fix_211

# Par down spot fixes to account for fast tests where not all years are used
df_years = df.report_year.unique().tolist()
spot_fix_pks = [x for x in spot_fix_pks if x[0] in df_years]
logger.info(f"{self.table_id.value}: Spotfixing {len(spot_fix_pks)} records.")

if spot_fix_pks:
# Create a df of the primary key of the records you want to fix
df_keys = pd.DataFrame(spot_fix_pks, columns=primary_keys).set_index(
primary_keys
)
df.set_index(primary_keys, inplace=True)
# Flip the signs for the values in "ending balance" all records in the original
# df that appear in the primary key df
df.loc[df_keys.index, "ending_balance"] = df["ending_balance"] * -1
# All of these are flipping negative values to positive values,
# so let's make sure that's what happens
flipped_values = df.loc[df_keys.index]
if (flipped_values["ending_balance"] < 0).any():
raise AssertionError("None of these spot fixes should be negative")
df.reset_index(inplace=True)

return df


class BalanceSheetLiabilitiesFerc1TableTransformer(Ferc1AbstractTableTransformer):
"""Transformer class for :ref:`balance_sheet_liabilities_ferc1` table."""
Expand Down
4 changes: 2 additions & 2 deletions src/pudl/transform/params/ferc1.py
Expand Up @@ -3199,7 +3199,7 @@
"const_wrk_prgrs": "construction_work_in_progress_ending_balance",
"acqstn_adjstmnt": "utility_plant_acquisition_adjustment_ending_balance",
"tot_utlty_plant": "utility_plant_and_construction_work_in_progress_ending_balance",
"accum_prvsn_dad": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_ending_balance",
"accum_prvsn_dad": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_reported_ending_balance",
"net_utlty_plant": "utility_plant_net_ending_balance",
# detail of accum deprish
# in service
Expand All @@ -3219,7 +3219,7 @@
# rest of details of acum deprish
"abndn_leases": "abandonment_of_leases_ending_balance",
"amrtzplnt_acqstn": "amortization_of_plant_acquisition_adjustment_ending_balance",
"tot_accum_prvsn": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_detail_ending_balance",
"tot_accum_prvsn": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_ending_balance",
}
},
"xbrl": {
Expand Down

0 comments on commit 8f1e23e

Please sign in to comment.