Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spot fix ferc exploder #2647

Merged
merged 20 commits into from Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
574ec9e
Merge branch 'xbrl_meta_reshape' into spot-fix-ferc-exploder
aesharpe Jun 6, 2023
c9a681a
Add a transform main function with spot fixes for certain negative en…
aesharpe Jun 8, 2023
390530c
Merge branch 'dev' into spot-fix-ferc-exploder
aesharpe Jun 8, 2023
fad70cd
Add conditionals for spot fixes to accomodate fast tests
aesharpe Jun 13, 2023
ad5a3b0
Switch accum_provision_DAD rows in utility_plant_summary table
aesharpe Jun 13, 2023
2706266
Add spot fix to plant_in_service table that flips certain negative va…
aesharpe Jun 14, 2023
fc2e510
Apply reconcile_table_calculations to a copy of the dataframe in the …
aesharpe Jun 14, 2023
e01b49a
Merge branch 'dev' into spot-fix-ferc-exploder
aesharpe Jun 14, 2023
8c5650b
Respond to PR comments:
aesharpe Jun 15, 2023
6773520
Merge branch 'dev' into spot-fix-ferc-exploder
aesharpe Jun 15, 2023
85b8d21
Uncomment line I was using for testing
aesharpe Jun 15, 2023
9c18306
Fix indent issue in spot fixer and remove two spot fix rows that were…
aesharpe Jun 16, 2023
3083254
Merge branch 'dev' into spot-fix-ferc-exploder
aesharpe Jun 16, 2023
6e8c6cf
Add some docs changes to reflect new code and update assertion error …
aesharpe Jun 16, 2023
cde26b0
Merge branch 'dev' into spot-fix-ferc-exploder
e-belfer Jun 19, 2023
1a939bc
Merge branch 'dev' into spot-fix-ferc-exploder
aesharpe Jun 20, 2023
0fd7544
Merge branch 'dev' into spot-fix-ferc-exploder
aesharpe Jun 21, 2023
f3c1e22
Merge branch 'spot-fix-ferc-exploder' of https://github.com/catalyst-…
aesharpe Jun 21, 2023
d4b15b8
Add release notes
aesharpe Jun 21, 2023
8daa787
Merge branch 'dev' into spot-fix-ferc-exploder
e-belfer Jun 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
198 changes: 197 additions & 1 deletion src/pudl/transform/ferc1.py
Expand Up @@ -3042,8 +3042,26 @@ def transform_main(self, df: pd.DataFrame) -> pd.DataFrame:
"""The main table-specific transformations, affecting contents not structure.

Annotates and alters data based on information from the XBRL taxonomy metadata.

Fix instances where electric_plant_sold should be positive. This is not all
instance where electric_plant_sold is negative, rather some of the instances
aesharpe marked this conversation as resolved.
Show resolved Hide resolved
where electric_plant_sold is negative AND the calculated value for
electric_plant_in_service (the field that uses electric_plant_sold as a
subcomponent) does not match it's calculated value.
"""
return super().transform_main(df).pipe(self.apply_sign_conventions)
df = super().transform_main(df).pipe(self.apply_sign_conventions)
# Make all electric_plant_sold values positive
# This could probably be a FERC transformer class function or in the
# apply_sign_conventions function, but it doesn't seem like the best fit for
# now.
neg_values = (df["ferc_account_label"] == "electric_plant_sold") & (
df["ending_balance"] < 0
)
df.loc[neg_values, "ending_balance"] = abs(df["ending_balance"])
logger.info(
f"{self.table_id.value}: Converted {len(df[neg_values])} negative values to positive."
)
return df


class PlantsSmallFerc1TableTransformer(Ferc1AbstractTableTransformer):
Expand Down Expand Up @@ -4003,6 +4021,184 @@ class UtilityPlantSummaryFerc1TableTransformer(Ferc1AbstractTableTransformer):
table_id: TableIdFerc1 = TableIdFerc1.UTILITY_PLANT_SUMMARY_FERC1
has_unique_record_ids: bool = False

def transform_main(self: Self, df: pd.DataFrame) -> pd.DataFrame:
"""Spot fix depreciation_utility_plant_in_service records with bad signs."""
df = super().transform_main(df)

primary_keys = [
"report_year",
"utility_id_ferc1",
"utility_type",
"utility_plant_asset_type",
]

# The utility_id_ferc1 211 follows the same pattern for several years
# instead of writing them all out in spot_fix_pks, we'll create a loop that
# generates all of them and then append them to spot_fix_pks later
spot_fix_211 = []
for year in np.append(2006, range(2009, 2021)):
aesharpe marked this conversation as resolved.
Show resolved Hide resolved
for utility_type in ["electric", "total"]:
pks = [
(
year,
211,
utility_type,
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
year,
211,
utility_type,
"amortization_of_other_utility_plant_utility_plant_in_service",
),
(
year,
211,
utility_type,
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(
year,
211,
utility_type,
"depreciation_utility_plant_in_service",
),
]
spot_fix_211 = spot_fix_211 + pks

spot_fix_pks = [
(
2012,
156,
"total",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2012,
156,
"total",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2012, 156, "total", "depreciation_utility_plant_in_service"),
(
2012,
156,
"electric",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2012,
156,
"electric",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2012, 156, "electric", "depreciation_utility_plant_in_service"),
(
2002,
170,
"other1",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_reported",
),
(
2002,
170,
"other1",
"utility_plant_net",
), # ^^ This is the only record that goes positive to negative
(
2013,
170,
"total",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2013,
170,
"total",
"amortization_of_other_utility_plant_utility_plant_in_service",
),
(2013, 170, "total", "amortization_of_plant_acquisition_adjustment"),
(
2013,
170,
"total",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2013, 170, "total", "depreciation_utility_plant_in_service"),
(
2013,
170,
"electric",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2013,
170,
"electric",
"amortization_of_other_utility_plant_utility_plant_in_service",
),
(2013, 170, "electric", "amortization_of_plant_acquisition_adjustment"),
(
2013,
170,
"electric",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2013, 170, "electric", "depreciation_utility_plant_in_service"),
(
2007,
393,
"electric",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2007,
393,
"electric",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2007, 393, "electric", "depreciation_utility_plant_in_service"),
(
2007,
393,
"total",
"accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility",
),
(
2007,
393,
"total",
"depreciation_amortization_and_depletion_utility_plant_in_service",
),
(2007, 393, "total", "depreciation_utility_plant_in_service"),
]

# Combine bespoke fixes with programatically generated spot fixes
spot_fix_pks = spot_fix_pks + spot_fix_211

# Par down spot fixes to account for fast tests where not all years are used
df_years = df.report_year.unique().tolist()
spot_fix_pks = [x for x in spot_fix_pks if x[0] in df_years]
logger.info(f"{self.table_id.value}: Spotfixing {len(spot_fix_pks)} records.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd just make this "Spotfixing {} records with incorrectly signed values" or something like this so it's a bit more descriptive. Otherwise works like a charm.


if spot_fix_pks:
# Create a df out of the primary key of the records you want to fix
df_keys = pd.DataFrame(spot_fix_pks, columns=primary_keys).set_index(
primary_keys
)
df.set_index(primary_keys, inplace=True)
# Flip the signs for the values in "ending balance" all records in the original
# df that appear in the primary key df
df.loc[df_keys.index, "ending_balance"] = df["ending_balance"] * -1
# All of these are flipping negative values to positive values, except one,
# so let's make sure that's what happens
flipped_values = df.loc[df_keys.index]
if len(flipped_values[flipped_values["ending_balance"] < 0]) > 1:
raise AssertionError("Only one of these spot fixes should be negative")
df.reset_index(inplace=True)

return df


class BalanceSheetLiabilitiesFerc1TableTransformer(Ferc1AbstractTableTransformer):
"""Transformer class for :ref:`balance_sheet_liabilities_ferc1` table."""
Expand Down
4 changes: 2 additions & 2 deletions src/pudl/transform/params/ferc1.py
Expand Up @@ -3199,7 +3199,7 @@
"const_wrk_prgrs": "construction_work_in_progress_ending_balance",
"acqstn_adjstmnt": "utility_plant_acquisition_adjustment_ending_balance",
"tot_utlty_plant": "utility_plant_and_construction_work_in_progress_ending_balance",
"accum_prvsn_dad": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_ending_balance",
"accum_prvsn_dad": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_reported_ending_balance",
"net_utlty_plant": "utility_plant_net_ending_balance",
# detail of accum deprish
# in service
Expand All @@ -3219,7 +3219,7 @@
# rest of details of acum deprish
"abndn_leases": "abandonment_of_leases_ending_balance",
"amrtzplnt_acqstn": "amortization_of_plant_acquisition_adjustment_ending_balance",
"tot_accum_prvsn": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_detail_ending_balance",
"tot_accum_prvsn": "accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility_ending_balance",
}
},
"xbrl": {
Expand Down