# Test ETL for New Years of Data

## Setup

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Standard libraries
import logging
import os
import pathlib
import sys

# 3rd party libraries
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns
import sqlalchemy as sa
import yaml

# Local libraries
import pudl

In [None]:
sns.set()
%matplotlib inline
mpl.rcParams['figure.figsize'] = (10,4)
mpl.rcParams['figure.dpi'] = 150
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [None]:
logger=logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

In [None]:
name_of_your_settings_file = 'etl_full_no_cems.yml'

In [None]:
pudl_settings = pudl.workspace.setup.get_defaults()
with pathlib.Path(pudl_settings['settings_dir'] + f'/{name_of_your_settings_file}').open() as f:
    script_settings = yaml.safe_load(f)
etl_settings = script_settings['datapkg_bundle_settings'][0]
#pudl_engine = sa.create_engine(pudl_settings['pudl_db'])

## Test Extract

In [None]:
ferc1_inputs = etl_settings['datasets'][0]['ferc1']
ferc1_years = ferc1_inputs['ferc1_years']
ferc1_tables = ferc1_inputs['ferc1_tables']

if not ferc1_years or not ferc1_tables:
    print('Not loading FERC1')

In [None]:
ferc1_raw_dfs = pudl.extract.ferc1.extract(
    ferc1_tables=ferc1_tables,
    ferc1_years=ferc1_years,
    pudl_settings=pudl_settings
)

## Test Transform

In [None]:
ferc1_transformed_dfs = pudl.transform.ferc1.transform(
    ferc1_raw_dfs, ferc1_tables=ferc1_tables
)