diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 6bc2998546..3f9b6e33eb 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -201,6 +201,8 @@ Data Cleaning * Added "correction" records to many FERC Form 1 tables where the reported totals do not match the outcomes of calculations specified in XBRL metadata (even after cleaning up the often incorrect calculation specifications!). See :issue:`2957` and :pr:`2620`. +* Flip the sign of some erroneous negative values in the :ref:`plant_in_service_ferc1` + and :ref:`utility_plant_summary_ferc1` tables. See :issue:`2599`, and :pr:`2647`. Analysis ^^^^^^^^ diff --git a/src/pudl/transform/ferc1.py b/src/pudl/transform/ferc1.py index 55b943b2de..184085c0ae 100644 --- a/src/pudl/transform/ferc1.py +++ b/src/pudl/transform/ferc1.py @@ -3165,8 +3165,22 @@ def transform_main(self, df: pd.DataFrame) -> pd.DataFrame: """The main table-specific transformations, affecting contents not structure. Annotates and alters data based on information from the XBRL taxonomy metadata. + + Make all electric_plant_sold balances positive. """ - return super().transform_main(df).pipe(self.apply_sign_conventions) + df = super().transform_main(df).pipe(self.apply_sign_conventions) + # Make all electric_plant_sold values positive + # This could probably be a FERC transformer class function or in the + # apply_sign_conventions function, but it doesn't seem like the best fit for + # now. + neg_values = (df["ferc_account_label"] == "electric_plant_sold") & ( + df["ending_balance"] < 0 + ) + df.loc[neg_values, "ending_balance"] = abs(df["ending_balance"]) + logger.info( + f"{self.table_id.value}: Converted {len(df[neg_values])} negative values to positive." + ) + return df class PlantsSmallFerc1TableTransformer(Ferc1AbstractTableTransformer): @@ -4126,6 +4140,172 @@ class UtilityPlantSummaryFerc1TableTransformer(Ferc1AbstractTableTransformer): table_id: TableIdFerc1 = TableIdFerc1.UTILITY_PLANT_SUMMARY_FERC1 has_unique_record_ids: bool = False + def transform_main(self: Self, df: pd.DataFrame) -> pd.DataFrame: + """Spot fix depreciation_utility_plant_in_service records with bad signs.""" + df = super().transform_main(df) + + primary_keys = [ + "report_year", + "utility_id_ferc1", + "utility_type", + "utility_plant_asset_type", + ] + + # The utility_id_ferc1 211 follows the same pattern for several years + # instead of writing them all out in spot_fix_pks, we'll create a loop that + # generates all of them and then append them to spot_fix_pks later + spot_fix_211 = [] + for year in np.append(2006, range(2009, 2021)): + for utility_type in ["electric", "total"]: + pks = [ + ( + year, + 211, + utility_type, + "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility", + ), + ( + year, + 211, + utility_type, + "amortization_of_other_utility_plant_utility_plant_in_service", + ), + ( + year, + 211, + utility_type, + "depreciation_amortization_and_depletion_utility_plant_in_service", + ), + ( + year, + 211, + utility_type, + "depreciation_utility_plant_in_service", + ), + ] + spot_fix_211 = spot_fix_211 + pks + + spot_fix_pks = [ + ( + 2012, + 156, + "total", + "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility", + ), + ( + 2012, + 156, + "total", + "depreciation_amortization_and_depletion_utility_plant_in_service", + ), + (2012, 156, "total", "depreciation_utility_plant_in_service"), + ( + 2012, + 156, + "electric", + "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility", + ), + ( + 2012, + 156, + "electric", + "depreciation_amortization_and_depletion_utility_plant_in_service", + ), + (2012, 156, "electric", "depreciation_utility_plant_in_service"), + ( + 2013, + 170, + "total", + "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility", + ), + ( + 2013, + 170, + "total", + "amortization_of_other_utility_plant_utility_plant_in_service", + ), + (2013, 170, "total", "amortization_of_plant_acquisition_adjustment"), + ( + 2013, + 170, + "total", + "depreciation_amortization_and_depletion_utility_plant_in_service", + ), + (2013, 170, "total", "depreciation_utility_plant_in_service"), + ( + 2013, + 170, + "electric", + "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility", + ), + ( + 2013, + 170, + "electric", + "amortization_of_other_utility_plant_utility_plant_in_service", + ), + (2013, 170, "electric", "amortization_of_plant_acquisition_adjustment"), + ( + 2013, + 170, + "electric", + "depreciation_amortization_and_depletion_utility_plant_in_service", + ), + (2013, 170, "electric", "depreciation_utility_plant_in_service"), + ( + 2007, + 393, + "electric", + "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility", + ), + ( + 2007, + 393, + "electric", + "depreciation_amortization_and_depletion_utility_plant_in_service", + ), + (2007, 393, "electric", "depreciation_utility_plant_in_service"), + ( + 2007, + 393, + "total", + "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility", + ), + ( + 2007, + 393, + "total", + "depreciation_amortization_and_depletion_utility_plant_in_service", + ), + (2007, 393, "total", "depreciation_utility_plant_in_service"), + ] + + # Combine bespoke fixes with programatically generated spot fixes + spot_fix_pks = spot_fix_pks + spot_fix_211 + + # Par down spot fixes to account for fast tests where not all years are used + df_years = df.report_year.unique().tolist() + spot_fix_pks = [x for x in spot_fix_pks if x[0] in df_years] + logger.info(f"{self.table_id.value}: Spotfixing {len(spot_fix_pks)} records.") + + if spot_fix_pks: + # Create a df of the primary key of the records you want to fix + df_keys = pd.DataFrame(spot_fix_pks, columns=primary_keys).set_index( + primary_keys + ) + df.set_index(primary_keys, inplace=True) + # Flip the signs for the values in "ending balance" all records in the original + # df that appear in the primary key df + df.loc[df_keys.index, "ending_balance"] = df["ending_balance"] * -1 + # All of these are flipping negative values to positive values, + # so let's make sure that's what happens + flipped_values = df.loc[df_keys.index] + if (flipped_values["ending_balance"] < 0).any(): + raise AssertionError("None of these spot fixes should be negative") + df.reset_index(inplace=True) + + return df + class BalanceSheetLiabilitiesFerc1TableTransformer(Ferc1AbstractTableTransformer): """Transformer class for :ref:`balance_sheet_liabilities_ferc1` table.""" diff --git a/src/pudl/transform/params/ferc1.py b/src/pudl/transform/params/ferc1.py index 86019543d5..2a83b6d1d6 100644 --- a/src/pudl/transform/params/ferc1.py +++ b/src/pudl/transform/params/ferc1.py @@ -3199,7 +3199,7 @@ "const_wrk_prgrs": "construction_work_in_progress_ending_balance", "acqstn_adjstmnt": "utility_plant_acquisition_adjustment_ending_balance", "tot_utlty_plant": "utility_plant_and_construction_work_in_progress_ending_balance", - "accum_prvsn_dad": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_ending_balance", + "accum_prvsn_dad": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_reported_ending_balance", "net_utlty_plant": "utility_plant_net_ending_balance", # detail of accum deprish # in service @@ -3219,7 +3219,7 @@ # rest of details of acum deprish "abndn_leases": "abandonment_of_leases_ending_balance", "amrtzplnt_acqstn": "amortization_of_plant_acquisition_adjustment_ending_balance", - "tot_accum_prvsn": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_detail_ending_balance", + "tot_accum_prvsn": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_ending_balance", } }, "xbrl": {