Skip to content

Commit

Permalink
Create exhaustive integration test settings
Browse files Browse the repository at this point in the history
In order to be able to reliably run some multi-year tests (e.g. checking that
the database schemas we are generating from all of the different years of
FERC Form 1 database are mutually compatible) we do sometimes need to run the
tests against the *full* set of all years and tables of data. So I've added
a settings file specifying the full complement of data, which can be used
via pytest with the --etl-settings flag, or by doing `tox -e full_integration`
To just check the FERC 1 db schemas, `tox -e ferc1_schema` -- this does require
you to have all the FERC 1 raw input data (or it'll get downloaded for you)

Because we can tell the tests to run whatever ETL we want with --etl-settings
I removed the standalone test_ferc1_solo_etl test, and instead specified the
data that it should attempt to load in a ferc1-solo-test.yml file under
test/settings. This is run prior to the main integration tests by Tox.

Added a couple of basic tests to the ferc1_etl and pudl_engine tests, which
had just been `pass` statements. Now they at least check to see that the
fixture is of type `sa.engine.Engine` and check that a couple of tables which
should always be present appear in the engine.table_names() list.

Made some adjustments to which files are being included in coverage to more
accurately reflect how well we're doing.

Made the `ci` testenv the default -- if you just run `tox` that's what will
get run.  You can run `tox -av` to see the list of all the available test
environments with short descriptions of what they do.

Still need to finish documenting these changes via #940

Closes #942
  • Loading branch information
zaneselvans committed Mar 9, 2021
1 parent 2a44cf2 commit 7356f82
Show file tree
Hide file tree
Showing 11 changed files with 357 additions and 255 deletions.
3 changes: 2 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
omit =
*glue/zipper.py
*analysis/analysis.py
*analysis/demand_mapping.py
*epaipm.py
*__main__.py
*__init__.py
*cli.py
*_test.py

[report]
Expand Down
7 changes: 0 additions & 7 deletions docs/api/pudl.analysis.demand_mapping.rst

This file was deleted.

1 change: 0 additions & 1 deletion docs/api/pudl.analysis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ Submodules
:maxdepth: 4

pudl.analysis.allocate_net_gen
pudl.analysis.demand_mapping
pudl.analysis.mcoe
pudl.analysis.service_territory
pudl.analysis.timeseries_cleaning
Expand Down
16 changes: 8 additions & 8 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@

def pytest_addoption(parser):
"""Add a command line option Requiring fresh data download."""
parser.addoption(
"--live-dbs",
action="store_true",
default=False,
help="Use existing PUDL/FERC1 DBs instead of creating temporary ones."
)
parser.addoption(
"--tmp-data",
action="store_true",
Expand All @@ -29,22 +35,16 @@ def pytest_addoption(parser):
default=False,
help="Path to a non-standard ETL settings file to use."
)
parser.addoption(
"--sandbox",
action="store_true",
default=False,
help="Use raw inputs from the Zenodo sandbox server."
)
parser.addoption(
"--gcs-cache-path",
default=None,
help="If set, use this GCS path as a datastore cache layer."
)
parser.addoption(
"--live-dbs",
"--sandbox",
action="store_true",
default=False,
help="Use existing PUDL/FERC1 DBs instead of creating temporary ones."
help="Use raw inputs from the Zenodo sandbox server."
)


Expand Down
34 changes: 14 additions & 20 deletions test/integration/etl_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import logging
from pathlib import Path

import sqlalchemy as sa
import yaml

import pudl
Expand All @@ -26,7 +27,9 @@ def test_datapkg_bundle(datapkg_bundle):

def test_pudl_engine(pudl_engine):
"""Try creating a pudl_engine...."""
pass
assert isinstance(pudl_engine, sa.engine.Engine)
assert "plants_pudl" in pudl_engine.table_names()
assert "utilities_pudl" in pudl_engine.table_names()


def test_ferc1_etl(ferc1_engine):
Expand All @@ -36,13 +39,16 @@ def test_ferc1_etl(ferc1_engine):
Nothing needs to be in the body of this "test" because the database
connections are created by the ferc1_engine fixture defined in conftest.py
"""
pass
assert isinstance(ferc1_engine, sa.engine.Engine)
assert "f1_respondent_id" in ferc1_engine.table_names()


def test_epacems_to_parquet(datapkg_bundle,
pudl_settings_fixture,
pudl_etl_params,
request):
def test_epacems_to_parquet(
datapkg_bundle,
pudl_settings_fixture,
pudl_etl_params,
request
):
"""Attempt to convert a small amount of EPA CEMS data to parquet format."""
epacems_datapkg_json = Path(
pudl_settings_fixture['datapkg_dir'],
Expand All @@ -64,7 +70,7 @@ def test_epacems_to_parquet(datapkg_bundle,
)


def test_ferc1_lost_data(ferc1_etl_params, pudl_ferc1datastore_fixture):
def test_ferc1_schema(ferc1_etl_params, pudl_ferc1datastore_fixture):
"""
Check to make sure we aren't missing any old FERC Form 1 tables or fields.
Expand Down Expand Up @@ -100,6 +106,7 @@ def test_ferc1_lost_data(ferc1_etl_params, pudl_ferc1datastore_fixture):
f"Long lost FERC1 table: '{table}' found in year {yr}. "
f"Refyear: {refyear}"
)
# Check to make sure there aren't any lost archaic fields:
for field in dbc_maps[yr][table].values():
if field not in current_dbc_map[table].values():
raise AssertionError(
Expand All @@ -109,19 +116,6 @@ def test_ferc1_lost_data(ferc1_etl_params, pudl_ferc1datastore_fixture):
)


def test_ferc1_solo_etl(pudl_settings_fixture, ferc1_engine, test_dir):
"""Verify that a minimal FERC Form 1 can be loaded without other data."""
with open(test_dir / 'settings/ferc1-solo.yml', "r") as f:
datapkg_settings = yaml.safe_load(f)['datapkg_bundle_settings']

pudl.etl.generate_datapkg_bundle(
datapkg_settings,
pudl_settings_fixture,
datapkg_bundle_name='ferc1-solo',
clobber=True,
)


class TestFerc1Datastore:
"""Validate the Ferc1 Datastore and integration functions."""

Expand Down
33 changes: 33 additions & 0 deletions test/settings/ferc1-solo-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
###########################################################################
# FERC FORM 1 DB CLONE SETTINGS
###########################################################################
# if you are loading ferc1, you need to specify a reference year. This is the
# year whose database structure is used as a template.
ferc1_to_sqlite_refyear: 2019
# What years of original FERC data should be cloned into the SQLite DB?
ferc1_to_sqlite_years: [2019]
# A list of tables to be loaded into the local SQLite database. These are
# the table names as they appear in the 2015 FERC Form 1 database.
ferc1_to_sqlite_tables:
- f1_respondent_id
- f1_steam
- f1_fuel

datapkg_bundle_name: pudl-ferc1-solo
datapkg_bundle_doi: 10.5072/zenodo.123456 # Sandbox DOI... not real.
datapkg_bundle_settings:
###########################################################################
# FERC FORM 1 SETTINGS
###########################################################################
- name: ferc1-solo
title: FERC Form 1 Solo ETL
description: >
A truly minimal FERC Form 1 ETL, just to demonstrate it can be loaded
independently of all other datasets. One year, fuel and steam tables.
version: 0.1.0
datasets:
- ferc1:
ferc1_tables:
- fuel_ferc1 # requires plants_steam_ferc1 to load properly
- plants_steam_ferc1
ferc1_years: [2019]
18 changes: 0 additions & 18 deletions test/settings/ferc1-solo.yml

This file was deleted.

202 changes: 202 additions & 0 deletions test/settings/full-integration-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
###########################################################################
# FERC FORM 1 DB CLONE SETTINGS
###########################################################################
# if you are loading ferc1, you need to specify a reference year. This is the
# year whose database structure is used as a template.
ferc1_to_sqlite_refyear: 2019
# What years of original FERC data should be cloned into the SQLite DB?
ferc1_to_sqlite_years: [
1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2018, 2019
]
# A list of tables to be loaded into the local SQLite database. These are
# the table names as they appear in the 2015 FERC Form 1 database.
ferc1_to_sqlite_tables: [
f1_respondent_id,
f1_acb_epda,
f1_accumdepr_prvsn,
f1_accumdfrrdtaxcr,
f1_adit_190_detail,
f1_adit_190_notes,
f1_adit_amrt_prop,
f1_adit_other,
f1_adit_other_prop,
f1_allowances,
f1_bal_sheet_cr,
f1_capital_stock,
f1_cash_flow,
f1_cmmn_utlty_p_e,
f1_comp_balance_db,
f1_construction,
f1_control_respdnt,
f1_co_directors,
f1_cptl_stk_expns,
f1_csscslc_pcsircs,
f1_dacs_epda,
f1_dscnt_cptl_stk,
f1_edcfu_epda,
f1_elctrc_erg_acct,
f1_elctrc_oper_rev,
f1_elc_oper_rev_nb,
f1_elc_op_mnt_expn,
f1_electric,
f1_envrnmntl_expns,
f1_envrnmntl_fclty,
f1_fuel,
f1_general_info,
f1_gnrt_plant,
f1_important_chg,
f1_incm_stmnt_2,
f1_income_stmnt,
f1_miscgen_expnelc,
f1_misc_dfrrd_dr,
f1_mthly_peak_otpt,
f1_mtrl_spply,
f1_nbr_elc_deptemp,
f1_nonutility_prop,
f1_nuclear_fuel,
f1_officers_co,
f1_othr_dfrrd_cr,
f1_othr_pd_in_cptl,
f1_othr_reg_assets,
f1_othr_reg_liab,
f1_overhead,
f1_pccidica,
f1_plant_in_srvce,
f1_pumped_storage,
f1_purchased_pwr,
f1_reconrpt_netinc,
f1_reg_comm_expn,
f1_respdnt_control,
f1_retained_erng,
f1_r_d_demo_actvty,
f1_sales_by_sched,
f1_sale_for_resale,
f1_sbsdry_totals,
f1_schedules_list,
f1_security_holder,
f1_slry_wg_dstrbtn,
f1_substations,
f1_taxacc_ppchrgyr,
f1_unrcvrd_cost,
f1_utltyplnt_smmry,
f1_work,
f1_xmssn_adds,
f1_xmssn_elc_bothr,
f1_xmssn_elc_fothr,
f1_xmssn_line,
f1_xtraordnry_loss,
f1_codes_val,
f1_sched_lit_tbl,
f1_audit_log,
f1_col_lit_tbl,
f1_load_file_names,
f1_privilege,
f1_sys_error_log,
f1_unique_num_val,
f1_row_lit_tbl,
f1_hydro,
f1_ident_attsttn,
f1_steam,
f1_leased,
f1_sbsdry_detail,
f1_plant,
f1_long_term_debt,
f1_106_2009,
f1_106a_2009,
f1_106b_2009,
f1_208_elc_dep,
f1_231_trn_stdycst,
f1_324_elc_expns,
f1_325_elc_cust,
f1_331_transiso,
f1_338_dep_depl,
f1_397_isorto_stl,
f1_398_ancl_ps,
f1_399_mth_peak,
f1_400_sys_peak,
f1_400a_iso_peak,
f1_429_trans_aff,
f1_allowances_nox,
f1_cmpinc_hedge_a,
f1_cmpinc_hedge,
f1_email,
f1_rg_trn_srv_rev,
f1_s0_checks,
f1_s0_filing_log,
f1_security,
# f1_note_fin_stmnt, # Huge junk table, 52% of the data by MB
# f1_footnote_tbl, # Huge junk table, 37% of DB
# f1_footnote_data, # Only useful with f1_footnote_tbl
# f1_pins, # private database table, not publicly distributed
# f1_freeze, # private database table, not publicly distributed
]

datapkg_bundle_name: pudl-full-integration-test
datapkg_bundle_doi: 10.5072/zenodo.123456 # Sandbox DOI... not real.
datapkg_bundle_settings:
###########################################################################
# FERC FORM 1 SETTINGS
###########################################################################
- name: ferc1-test
title: FERC Form 1
description: All integrated tables for the available FERC Form 1 years.
version: 0.1.0
datasets:
- ferc1:
ferc1_tables:
- fuel_ferc1 # requires plants_steam_ferc1 to load properly
- plants_steam_ferc1
- plants_small_ferc1
- plants_hydro_ferc1
- plants_pumped_storage_ferc1
- plant_in_service_ferc1
- purchased_power_ferc1
ferc1_years: [
1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2018, 2019
]

###########################################################################
# EPA CEMS HOURLY SETTINGS
###########################################################################
- name: epacems-eia-test
title: EIA Forms 860 and 923 combined with EPA CEMS Hourly Emissions.
description: >
All years of epacems data for the smallest state (Idaho), combined with
the EIA Forms 860 and 923, to supply plant latitudes and longitudes,
from which timezones are inferred.
version: 0.1.0
datasets:
- eia:
eia923_tables:
- generation_fuel_eia923
- boiler_fuel_eia923
- generation_eia923
- coalmine_eia923 # REQUIRES fuel_receipts_costs_eia923
- fuel_receipts_costs_eia923
eia923_years: [
2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
2019
]
eia860_tables:
- boiler_generator_assn_eia860
- utilities_eia860
- plants_eia860
- generators_eia860
- ownership_eia860
eia860_years: [
2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2018, 2019
]
eia860_ytd: True
- epacems:
# Just Idaho, because it is tiny:
epacems_states: [ID]
epacems_years: [
1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2018, 2019,
]

0 comments on commit 7356f82

Please sign in to comment.