# Working with the FERC Form 1 Extract / Transform
This notebook steps through PUDL's extract and transform steps for FERC Form 1 to make it easier to test and add new years of data, or new tables from the various spreadsheets that haven't been integrated yet.

In [None]:
%load_ext autoreload
%autoreload 3
import pudl
import logging
import sys
from pathlib import Path
import pandas as pd
pd.options.display.max_columns = None

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

In [None]:
pudl_settings = pudl.workspace.setup.get_defaults()

## Pick the tables you want to load

In [None]:
tables = [
    "balance_sheet_assets_ferc1",
    "balance_sheet_liabilities_ferc1",
    "depreciation_amortization_summary_ferc1",
    "electric_energy_dispositions_ferc1",
    "electric_energy_sources_ferc1",
    "electric_opex_ferc1",
    "electric_plant_depreciation_changes_ferc1",
    "fuel_ferc1",
    "income_statement_ferc1",
    "plants_hydro_ferc1",
    "plants_pumped_storage_ferc1",
    "plants_small_ferc1",
    "plants_steam_ferc1",
    "plant_in_service_ferc1",
    "purchased_power_ferc1",
    "retained_earnings_ferc1",
    "transmission_statistics_ferc1",
    "utility_plant_summary_ferc1",
]

In [None]:
tables = ["income_statement_ferc1", "electricity_sales_by_rate_schedule"]

In [None]:
ferc1_settings = pudl.settings.Ferc1Settings(tables=tables)

## Extract DBF and XBRL Data:

In [None]:
# Extract old FERC form 1 data from DBF (2020 -)
ferc1_dbf_raw_dfs = pudl.extract.ferc1.extract_dbf(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)
# Extract new FERC form 1 data from XBRL (2021 + )
ferc1_xbrl_raw_dfs = pudl.extract.ferc1.extract_xbrl(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)
# Extract XBRL metadata
xbrl_metadata_json_dict = {table: pudl.extract.ferc1.extract_xbrl_metadata(ferc1_settings, pudl_settings)[table] for table in tables}

In [None]:
ferc1_xbrl_raw_dfs["electricity_sales_by_rate_schedule"]["instant"]

## Transform FERC 1 Tables:

In [None]:
from pudl.transform.ferc1 import *
from pudl.transform.params import *

transformers = [
    bsa := BalanceSheetAssetsFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["balance_sheet_assets_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    bsl := BalanceSheetLiabilitiesFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["balance_sheet_liabilities_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    das := DepreciationAmortizationSummaryFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["depreciation_amortization_summary_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    eed := ElectricEnergyDispositionsFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["electric_energy_dispositions_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ees := ElectricEnergySourcesFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["electric_energy_sources_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    eo := ElectricOpexFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["electric_opex_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    epdc := ElectricPlantDepreciationChangesFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["electric_plant_depreciation_changes_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ff := FuelFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["fuel_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ins := IncomeStatementFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["income_statement_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ph := PlantsHydroFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plants_hydro_ferc1"], 
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    pps := PlantsPumpedStorageFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plants_pumped_storage_ferc1"], 
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    psm := PlantsSmallFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plants_small_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    pst := PlantsSteamFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plants_steam_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    pis := PlantInServiceFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plant_in_service_ferc1"], 
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    pp := PurchasedPowerFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["purchased_power_ferc1"], 
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    re := RetainedEarningsFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["retained_earnings_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ts := TransmissionStatisticsFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["transmission_statistics_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ups := UtilityPlantSummaryFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["utility_plant_summary_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
]

### Transform Individual Tables

In [None]:
# Pick one table to transform
TRANSFORMER = psm

#### Transform Step-by-Step

In [None]:
start = TRANSFORMER.transform_start(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"]
)

In [None]:
main = TRANSFORMER.transform_main(
    start
)

In [None]:
end = TRANSFORMER.transform_end(
    main
)

#### Transform All Steps Together

In [None]:
full = TRANSFORMER.transform(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"]
)

### Transform All Tables

In [None]:
transformed_tables = {}

for transformer in transformers:
    transformed_tables[transformer.table_id.value] = transformer.transform(
        raw_dbf=ferc1_dbf_raw_dfs[transformer.table_id.value],
        raw_xbrl_instant=ferc1_xbrl_raw_dfs[transformer.table_id.value]["instant"],
        raw_xbrl_duration=ferc1_xbrl_raw_dfs[transformer.table_id.value]["duration"]
    )

In [None]:
transformed_table