Skip to content

Commit

Permalink
Subset FERC714 by year
Browse files Browse the repository at this point in the history
  • Loading branch information
e-belfer committed Jun 9, 2023
1 parent ced19c5 commit 9290ab5
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 26 deletions.
18 changes: 12 additions & 6 deletions src/pudl/extract/ferc714.py
Expand Up @@ -65,7 +65,7 @@

@multi_asset(
outs={"raw_" + table_name: AssetOut() for table_name in FERC714_FILES},
required_resource_keys={"datastore"},
required_resource_keys={"datastore", "dataset_settings"},
)
def extract_ferc714(context):
"""Extract the raw FERC Form 714 dataframes from their original CSV files.
Expand All @@ -76,20 +76,26 @@ def extract_ferc714(context):
Returns:
A tuple of extracted FERC-714 dataframes.
"""
logger.warning(
"Note that all years of FERC-714 data are lumped together and will be "
"processed together regardless of what years are requested."
)
ds = context.resources.datastore
ferc714_settings = context.resources.dataset_settings.ferc714
years = ", ".join(map(str, ferc714_settings.years))

raw_dfs: OrderedDict[str, pd.DataFrame] = OrderedDict({})
for table_name in FERC714_FILES:
logger.info(f"Extracting {table_name} from CSV into pandas DataFrame.")
logger.info(
f"Extracting {table_name} from CSV into pandas DataFrame (years: {years})."
)
with ds.get_zipfile_resource("ferc714", name="ferc714.zip").open(
FERC714_FILES[table_name]["name"]
) as f:
raw_dfs[table_name] = pd.read_csv(
f, encoding=FERC714_FILES[table_name]["encoding"]
)
if table_name != "respondent_id_ferc714":
raw_dfs[table_name] = raw_dfs[table_name].query(
"report_yr in @ferc714_settings.years"
)

return (
Output(output_name="raw_" + table_name, value=df)
for table_name, df in raw_dfs.items()
Expand Down
21 changes: 1 addition & 20 deletions src/pudl/package_data/settings/etl_fast.yml
Expand Up @@ -32,26 +32,7 @@ datasets:
ferc1:
years: [2020, 2021]
ferc714:
# Note: ferc714 from 2006-2020 is distributed as monolithic CSV files, so asking
# for a subset of years will not work. You can only get all of them or none.
years:
[
2006,
2007,
2008,
2009,
2010,
2011,
2012,
2013,
2014,
2015,
2016,
2017,
2018,
2019,
2020,
]
years: [2019, 2020]
eia:
eia923:
years: [2020, 2021]
Expand Down

0 comments on commit 9290ab5

Please sign in to comment.