# Test EIA860 validation for old years

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Standard libraries
import logging
import sys
import os
import pathlib

# 3rd party libraries
import geopandas as gpd
import dask.dataframe as dd
from dask.distributed import Client
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns
import sqlalchemy as sa

# Local libraries
import pudl

In [3]:
# Enable viewing of logging outputs
logger=logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

In [4]:
# Display settings
sns.set()
%matplotlib inline
mpl.rcParams['figure.dpi'] = 150
pd.options.display.max_columns = 100
pd.options.display.max_rows = 5

In [5]:
# Establish connection to pudl database
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings['pudl_db'])

In [6]:
# Change root directory to pytest test folder
PUDL_REPO = pathlib.Path("/Users/aesharpe/Desktop/Work/Catalyst_Coop/pudl/")
os.chdir(PUDL_REPO / "test")

# Import test file
import validate.eia_test

In [21]:
# Generate pudl_out_eia object -- change freq as desired for tests
pudl_out_eia = pudl.output.pudltabl.PudlTabl(
    pudl_engine=pudl_engine,
    #freq="AS",
    fill_fuel_cost=True,
    roll_fuel_cost=True,
    fill_net_gen=True,
)

In [22]:
# Test parameters -- change for each test function
row_params = (
    "df_name,cols", [
        ("plants_eia860", "all"),
        ("utils_eia860", "all"),
        ("pu_eia860", "all"),
        ("bga_eia860", "all"),
        ("own_eia860", "all"),
        ("gens_eia860", "all"),
        ("gen_eia923", "all"),
        ("gf_eia923", "all"),
        ("bf_eia923", "all"),
        ("frc_eia923", "all"),
    ]
)

def params_to_args(params):
    keys = params[0].split(',')
    dicts = []
    for args in params[1]:
        new_dict = {}
        for key,arg in zip(keys,args):
            new_dict[key] = arg
        dicts.append(new_dict)
    return dicts

row_args = params_to_args(row_params)

In [23]:
# Run test of desired function -- change out function as desired
validate.eia_test.test_no_null_cols_eia(
    pudl_out_eia=pudl_out_eia,
    live_pudl_db=True,
    **row_args[0]
)

### SQL Access

In [24]:
pudl_engine = sa.create_engine(pudl_settings["pudl_db"])

In [25]:
plants = pd.read_sql("plants_eia860", pudl_engine)

In [26]:
plants.datum.unique()

array([None, 'UNK', 'NAD83', 'NADS27', 'WGS84', 'NAD27', 'ukn', 'NAA',
       '"NA"', '"UNK"', 'na', 'GPS', 'Pl', 'unk', 'NA', '"NA\'', 'nad83',
       'w', '(UNK)', '646', 'HARNS', 'Unk'], dtype=object)