# Working with the FERC Form 1 Extract / Transform
This notebook steps through PUDL's extract and transform steps for FERC Form 1 to make it easier to test and add new years of data, or new tables from the various spreadsheets that haven't been integrated yet.

This notebook deviates from other devtool debug notebooks in that it doesn't make use of the most recently created dagster asset values. Instead, the extraction and transforms steps are rerun within the notebook so we can inspect the outputs of lower level transform functions that don't have their own assets like `process_xbrl` and `transform_start`.

**Make sure you've created the raw FERC databases using one of the `ferc_to_sqlite` jobs!**

## Setup

In [None]:
%load_ext autoreload
%autoreload 3
import logging
import sys
from pathlib import Path

import pandas as pd

import pudl

pd.options.display.max_columns = None

## Extract DBF and XBRL Data:

In [None]:
from dagster import build_init_resource_context

from pudl.resources import dataset_settings

years = [2020, 2021]  # add desired years here
configured_dataset_settings = {"ferc1": {"years": years}}

dataset_init_context = build_init_resource_context(config=configured_dataset_settings)
configured_dataset_settings = dataset_settings(dataset_init_context)

In [None]:
from pudl.extract.ferc1 import extract_dbf, extract_xbrl

ferc1_dbf_raw_dfs = extract_dbf(configured_dataset_settings)
ferc1_xbrl_raw_dfs = extract_xbrl(configured_dataset_settings)

In [None]:
ferc1_xbrl_raw_dfs["fuel_ferc1"]["duration"].report_year

In [None]:
from dagster import build_op_context

from pudl.extract.ferc1 import raw_xbrl_metadata_json
from pudl.transform.ferc1 import clean_xbrl_metadata_json

context = build_op_context()
xbrl_metadata_json_dict = clean_xbrl_metadata_json(raw_xbrl_metadata_json(context))

## Transform FERC 1 Tables:

### Build Transformers

In [None]:
# Get table class information
import inspect

from pudl.transform.ferc1 import *
from pudl.transform.params import *


def get_table_classes(module):
    classes = [member[1] for member in inspect.getmembers(module, inspect.isclass)]
    table_classes = [x for x in classes if x.__name__.endswith("Ferc1TableTransformer")]
    return [x for x in table_classes if x.__name__ != "AbstractFerc1TableTransformer"]


classes = get_table_classes(pudl.transform.ferc1)
table_id_dict = {clas.table_id.value: clas for clas in classes}

# Loop over selected tables to build the transformers
transformers = {}
for table in TABLE_NAME_MAP_FERC1.keys():
    # this table is in the name map but doesn't have a transform class
    if table == "retained_earnings_appropriations_ferc1":
        continue
    transformers[table] = table_id_dict[table](
        xbrl_metadata_json=xbrl_metadata_json_dict[table],
        cache_dfs=True,
        clear_cached_dfs=False,
    )

### Transform Individual Tables

In [None]:
from pprint import pprint

# Pick one table to transform
pprint(list(transformers.keys()))

In [None]:
table_name = "other_regulatory_liabilities_ferc1"
TRANSFORMER = transformers[table_name]  # add a table here

#### Test each step of the transform process:

In [None]:
xbrl = TRANSFORMER.process_xbrl(
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"],
)

In [None]:
dbf = TRANSFORMER.process_dbf(raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value])

In [None]:
start = TRANSFORMER.transform_start(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"],
)

In [None]:
main = TRANSFORMER.transform_main(start)

In [None]:
end = TRANSFORMER.transform_end(main)

#### Test all steps together

In [None]:
full = TRANSFORMER.transform(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"],
)

### Transform All Tables

In [None]:
transformed_tables = {}
for table_name, transformer in transformers.items():
    if table_name == "plants_steam_ferc1":
        # plants_steam_ferc1 is a special case. It depends on the transformed fuel_ferc1 table.
        continue
    transformed_tables[transformer.table_id.value] = transformer.transform(
        raw_dbf=ferc1_dbf_raw_dfs[transformer.table_id.value],
        raw_xbrl_instant=ferc1_xbrl_raw_dfs[transformer.table_id.value]["instant"],
        raw_xbrl_duration=ferc1_xbrl_raw_dfs[transformer.table_id.value]["duration"],
    )

In [None]:
# Handle special case for "plants_steam_ferc1"
transformer = transformers["plants_steam_ferc1"]
transformed_tables[transformer.table_id.value] = transformer.transform(
    raw_dbf=ferc1_dbf_raw_dfs[transformer.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[transformer.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[transformer.table_id.value]["duration"],
    transformed_fuel=transformed_tables["fuel_ferc1"],
)