# Working with the FERC Form 1 Extract / Transform
This notebook steps through PUDL's extract and transform steps for FERC Form 1 to make it easier to test and add new years of data, or new tables from the various spreadsheets that haven't been integrated yet.

## Setup

In [None]:
%load_ext autoreload
%autoreload 3
import pudl
import logging
import sys
from pathlib import Path
import pandas as pd
pd.options.display.max_columns = None

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

In [None]:
pudl_settings = pudl.workspace.setup.get_defaults()

## Pick the tables you want to load

In [None]:
# Choose any tables from the following list and add them to the list of table you want to transform
list(pudl.extract.ferc1.TABLE_NAME_MAP_FERC1.keys())

In [None]:
tables = [] # Add tables here

In [None]:
ferc1_settings = pudl.settings.Ferc1Settings(tables=tables)

## Extract DBF and XBRL Data:

In [None]:
# Extract old FERC form 1 data from DBF (2020 -)
ferc1_dbf_raw_dfs = pudl.extract.ferc1.extract_dbf(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)
# Extract new FERC form 1 data from XBRL (2021 + )
ferc1_xbrl_raw_dfs = pudl.extract.ferc1.extract_xbrl(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)
# Extract XBRL metadata
xbrl_metadata_json_dict = {table: pudl.extract.ferc1.extract_xbrl_metadata(ferc1_settings, pudl_settings)[table] for table in tables}

## Transform FERC 1 Tables:

### Build Transformers

In [None]:
# Get table class information
import inspect
from pudl.transform.ferc1 import *
from pudl.transform.params import *

def get_table_classes(module):
    classes = [member[1] for member in inspect.getmembers(module, inspect.isclass)]
    table_classes = [x for x in classes if x.__name__.endswith("Ferc1TableTransformer")]
    return [x for x in table_classes if x.__name__ != "AbstractFerc1TableTransformer"]

classes = get_table_classes(pudl.transform.ferc1)
table_id_dict = {clas.table_id.value: clas for clas in classes}

# Loop over selected tables to build the transformers
transformers = {}
for table in tables:
    transformers[table] = (
        table_id_dict[table](
            xbrl_metadata_json=xbrl_metadata_json_dict[table],
            cache_dfs=True,
            clear_cached_dfs=False
        )
    )

### Transform Individual Tables

In [None]:
# Pick one table to transform
# transformers.keys()  # pick a table from this list to focus on
TRANSFORMER = transformers[] # add a table here

#### Test each step of the transform process:

In [None]:
xbrl = TRANSFORMER.process_xbrl(
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"]
)

In [None]:
dbf = TRANSFORMER.process_dbf(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value]
)

In [None]:
start = TRANSFORMER.transform_start(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"]
)

In [None]:
main = TRANSFORMER.transform_main(
    start
)

In [None]:
end = TRANSFORMER.transform_end(
    main
)

#### Test all steps together

In [None]:
full = TRANSFORMER.transform(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"]
)

### Transform All Tables

In [None]:
transformed_tables = {}
for table_name, transformer in transformers.items():
    transformed_tables[transformer.table_id.value] = transformer.transform(
        raw_dbf=ferc1_dbf_raw_dfs[transformer.table_id.value],
        raw_xbrl_instant=ferc1_xbrl_raw_dfs[transformer.table_id.value]["instant"],
        raw_xbrl_duration=ferc1_xbrl_raw_dfs[transformer.table_id.value]["duration"]
    )

### Test the Transform Module

In [None]:
# The transform module has a script you can call to test the module
# This will test allllll the tables
!python ../src/pudl/transform/ferc1.py