# Working with the FERC Form 1 Extract / Transform
This notebook steps through PUDL's extract and transform steps for FERC Form 1 to make it easier to test and add new years of data, or new tables from the various spreadsheets that haven't been integrated yet.

In [None]:
%load_ext autoreload
%autoreload 3
import pudl
import logging
import sys
from pathlib import Path
import pandas as pd
pd.options.display.max_columns = None

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

In [None]:
pudl_settings = pudl.workspace.setup.get_defaults()

## Set the scope for the Extract-Transform:

In [None]:
ferc1_settings = pudl.settings.Ferc1Settings(
    tables=[
        "plants_steam_ferc1",
        "fuel_ferc1",
        "plants_hydro_ferc1",
        "plants_pumped_storage_ferc1",
        "purchased_power_ferc1",
        "plants_small_ferc1",
        "plant_in_service_ferc1",
    ]
)

## Extract DBF and XBRL Data:

In [None]:
# Extract old FERC form 1 data from DBF (2020 -)
ferc1_dbf_raw_dfs = pudl.extract.ferc1.extract_dbf(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)
# Extract new FERC form 1 data from XBRL (2021 + )
ferc1_xbrl_raw_dfs = pudl.extract.ferc1.extract_xbrl(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)

## Transform FERC 1 Tables:

In [None]:
from pudl.transform.ferc1 import *
from pudl.transform.params import *

# Instantiate the table-specific transformers
sp = PlantsSmallFerc1TableTransformer(cache_dfs=True, clear_cached_dfs=False)
hp = PlantsHydroFerc1TableTransformer(cache_dfs=True, clear_cached_dfs=False)
fp = FuelFerc1TableTransformer(cache_dfs=True, clear_cached_dfs=False)
st = PlantsSteamFerc1TableTransformer(cache_dfs=True, clear_cached_dfs=False)
ps = PlantsPumpedStorageFerc1TableTransformer(cache_dfs=True, clear_cached_dfs=False)
pp = PurchasedPowerTableTransformer(cache_dfs=True, clear_cached_dfs=False)
pis = PlantInServiceFerc1TableTransformer(cache_dfs=True, clear_cached_dfs=False)

In [None]:
# Pick which table to transform!
TRANSFORMER = sp

### Transform Step-by-Step

In [None]:
start = TRANSFORMER.transform_start(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"]
)

In [None]:
main = TRANSFORMER.transform_main(
    start
)

In [None]:
end = TRANSFORMER.transform_end(
    main
)

### Transform All Steps Together

In [None]:
TRANSFORMER.transform(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"]
)