In [1]:
%run DOVSOutages.ipynb
%run AMINonVee.ipynb
%run AMINonVeeSQL.ipynb
%run AMINonVeeCircuitSQL.ipynb
%run MeterPremise.ipynb

In [2]:
from importlib import reload
#reload(Utilities)

import sys, os
import re

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns
from packaging import version

import itertools
import copy
import pyodbc
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
import Plot_Box_sns
import GrubbsTest

In [3]:
def build_get_circuit_info_mp_sql():
    # From the serial numbers given in serial_numbers, first find the circuit information
    # and ensure all of the listed serial numbers are on the same circuit
    mp_sql_where = SQLWhere([dict(field_desc='mfr_devc_ser_nbr', comparison_operator='IN', value='({})', needs_quotes=False), 

                             dict(field_desc='circuit_nb', comparison_operator='IS NOT', value='NULL', needs_quotes=False), 
                             dict(field_desc='circuit_nb', comparison_operator='<>', value='', needs_quotes=True), 

                             dict(field_desc='circuit_nm', comparison_operator='IS NOT', value='NULL', needs_quotes=False), 
                             dict(field_desc='circuit_nm', comparison_operator='<>', value='', needs_quotes=True), 

                             dict(field_desc='station_nb', comparison_operator='IS NOT', value='NULL', needs_quotes=False), 
                             dict(field_desc='station_nb', comparison_operator='<>', value='', needs_quotes=True), 

                             dict(field_desc='station_nm', comparison_operator='IS NOT', value='NULL', needs_quotes=False), 
                             dict(field_desc='station_nm', comparison_operator='<>', value='', needs_quotes=True), 
                            ], 
                            idxs=None, run_check=True)
    # The combine_where_elements have no practical effect, but I have included because
    # they make the output string look better
    mp_sql_where.combine_where_elements([1,2], 'AND', close_gaps_in_keys=False)
    mp_sql_where.combine_where_elements([3,4], 'AND', close_gaps_in_keys=False)
    mp_sql_where.combine_where_elements([5,6], 'AND', close_gaps_in_keys=False)
    mp_sql_where.combine_where_elements([7,8], 'AND', close_gaps_in_keys=True)
    #--------------------
    mp_sql = SQLQuery(sql_select = SQLSelect(['DISTINCT circuit_nb,circuit_nm,station_nb,station_nm']), 
                      sql_from = SQLFrom('default', 'meter_premise'), 
                      sql_where = mp_sql_where 
                     )
    return mp_sql


def get_circuit_info(conn_aws, serial_numbers):
    # From the serial numbers given in serial_numbers, first find the circuit information
    # and ensure all of the listed serial numbers are on the same circuit
    mp_sql = build_get_circuit_info_mp_sql()
    mp_sql_stmnt = mp_sql.get_sql_statement(insert_n_tabs_to_each_line=1).format(Utilities_sql.join_list_w_quotes(serial_numbers))
    #--------------------
    df_mp = pd.read_sql(mp_sql_stmnt, conn_aws)
    assert(df_mp.shape[0]==1)
    #--------------------
    circuit_nb = df_mp.iloc[0]['circuit_nb']
    circuit_nm = df_mp.iloc[0]['circuit_nm']
    station_nb = df_mp.iloc[0]['station_nb']
    station_nm = df_mp.iloc[0]['station_nm']
    return dict(circuit_nb=circuit_nb, 
                circuit_nm=circuit_nm, 
                station_nb=station_nb, 
                station_nm=station_nm)

In [4]:
def get_trsf_pole_nbs_on_circuit(conn_aws, circuit_nb, circuit_nm, station_nb, station_nm):
    mp_sql = SQLQuery(sql_select = SQLSelect(['DISTINCT trsf_pole_nb']), 
                      sql_from = SQLFrom('default', 'meter_premise'), 
                      sql_where = SQLWhere([dict(field_desc='circuit_nb', comparison_operator='=', value='{}', needs_quotes=True), 
                                            dict(field_desc='circuit_nm', comparison_operator='=', value='{}', needs_quotes=True), 
                                            dict(field_desc='station_nb', comparison_operator='=', value='{}', needs_quotes=True), 
                                            dict(field_desc='station_nm', comparison_operator='=', value='{}', needs_quotes=True), 
                                           ], 
                                           idxs=None, run_check=True) 
                     )
    mp_sql_stmnt = mp_sql.get_sql_statement().format(circuit_nb, circuit_nm, station_nb, station_nm)
    #--------------------
    df_mp = pd.read_sql(mp_sql_stmnt, conn_aws)
    trsf_pole_nbs = df_mp['trsf_pole_nb'].tolist()
    return trsf_pole_nbs

In [5]:
def build_mp_sql_w_circuit_info_or_trsf_pole_nbs():
    mp_sql_where = SQLWhere([dict(field_desc='circuit_nb', comparison_operator='=', value='{}', needs_quotes=True), 
                             dict(field_desc='circuit_nm', comparison_operator='=', value='{}', needs_quotes=True), 
                             dict(field_desc='station_nb', comparison_operator='=', value='{}', needs_quotes=True), 
                             dict(field_desc='station_nm', comparison_operator='=', value='{}', needs_quotes=True), 
                             dict(field_desc='trsf_pole_nb', comparison_operator='IN', value='({})', needs_quotes=False), 
                            ], 
                            idxs=None, run_check=True)
    mp_sql_where.combine_where_elements(list(range(0,4)), 'AND', close_gaps_in_keys=False)
    mp_sql_where.combine_where_elements([0,4], 'OR', close_gaps_in_keys=True)
    #--------------------
    mp_sql = SQLQuery(sql_select = SQLSelect(['mfr_devc_ser_nbr', 'trsf_pole_nb']), 
                      sql_from = SQLFrom('default', 'meter_premise'), 
                      sql_where = mp_sql_where 
                     )
    return mp_sql

def build_mp_sql_statement_w_circuit_info_or_trsf_pole_nbs(circuit_nb, circuit_nm, station_nb, station_nm, 
                                                           trsf_pole_nbs, 
                                                           insert_n_tabs_to_each_line=1):
    mp_sql = build_mp_sql_w_circuit_info_or_trsf_pole_nbs()
    mp_sql_statement = mp_sql.get_sql_statement(insert_n_tabs_to_each_line=1).format(circuit_nb, circuit_nm, station_nb, station_nm, 
                                                                                     Utilities_sql.join_list_w_quotes(trsf_pole_nbs))
    return mp_sql_statement

# FINDING OTHERS ON THE CIRCUIT

### OLD ORIGINAL FUNCTIONS

In [6]:
def build_sql_outage_others_on_circuit_1_0(conn_aws, dev_ser_nbrs, date_range, groupby_xfmr=False, verbose=True):
    # Return SQL statement to build aggregate of all OTHER meters on the circuit (i.e., excluding those from dev_ser_nbrs)
    # ****************************************************
    # Step 1: From the serial numbers given in dev_ser_nbrs, first find the circuit information
    #         and ensure all of the listed serial numbers are on the same circuit
    # Step 2: Given the circuit information, find all transformers on the circuit.
    #         This information is needed because not all meters contain circuit data, and without the transformer
    #           numbers on the circuit these meters would be left out.
    #         In the final query, meters on the circuit will be found which either have the correct circuit
    #           information OR the correct transformer number.
    #           NOTE: To be 100% correct, it should probably be meters which have the correct circuit information
    #                 OR no circuit infomration AND the correct transformer number.
    # Step 3: Put it all together.  Return SQL statement to build aggregate of all OTHER meters on the circuit 
    #           (i.e., excluding those from dev_ser_nbrs)
    # ****************************************************
    
    # ********************** Step 1 **********************
    mp_where_str_1 = "mfr_devc_ser_nbr IN ({})".format(','.join(["'{}'".format(x) for x in dev_ser_nbrs]))
    sql_mp_1 = (
    """
    SELECT DISTINCT circuit_nb,circuit_nm,station_nb,station_nm
    FROM default.meter_premise
    WHERE {}
    AND circuit_nb IS NOT NULL AND circuit_nb <> ''
    AND circuit_nm IS NOT NULL AND circuit_nm <> ''
    AND station_nb IS NOT NULL AND station_nb <> ''
    AND station_nm IS NOT NULL AND station_nm <> ''
    """
    ).format(mp_where_str_1)
    #--------------------
    df_mp_1 = pd.read_sql(sql_mp_1, conn_aws)
    assert(df_mp_1.shape[0]==1)
    #--------------------
    circuit_nb = df_mp_1.iloc[0]['circuit_nb']
    circuit_nm = df_mp_1.iloc[0]['circuit_nm']
    station_nb = df_mp_1.iloc[0]['station_nb']
    station_nm = df_mp_1.iloc[0]['station_nm']
    
    # ********************** Step 2 **********************
    mp_where_str_2 = (
    f"""
    circuit_nb = '{circuit_nb}'
    AND circuit_nm = '{circuit_nm}'
    AND station_nb = '{station_nb}'
    AND station_nm = '{station_nm}'
    """
    )

    sql_mp_2 = (
    """
    SELECT DISTINCT trsf_pole_nb
    FROM default.meter_premise
    WHERE {}
    """
    ).format(mp_where_str_2)
    #--------------------
    df_mp_2 = pd.read_sql(sql_mp_2, conn_aws)
    trsf_pole_nbs = df_mp_2['trsf_pole_nb'].tolist()
    
    # ********************** Step 3 **********************
    mp_where_str_3 = (
    """
    (circuit_nb = '{}'
    AND circuit_nm = '{}'
    AND station_nb = '{}'
    AND station_nm = '{}')
    OR trsf_pole_nb IN ({})
    """
    ).format(circuit_nb, circuit_nm, station_nb, station_nm, 
             ','.join(["'{}'".format(x) for x in trsf_pole_nbs]))
    #--------------------
    if groupby_xfmr:
        groupby_xfmr_str = 'MP.trsf_pole_nb, '
    else:
        groupby_xfmr_str = ''    
    #--------------------
    sql = (
    """
    WITH MP
    AS (
        SELECT mfr_devc_ser_nbr, trsf_pole_nb
        FROM default.meter_premise
        WHERE {0}
    ), 
    U AS (
        SELECT serialnumber, starttimeperiod, endtimeperiod, aep_endtime_utc, timezoneoffset, 
               aep_derived_uom, aep_srvc_qlty_idntfr, aep_usage_dt, value
        FROM usage_nonvee.reading_ivl_nonvee
        WHERE aep_opco = 'oh'
        AND aep_usage_dt BETWEEN {1} AND {2}
    )

    SELECT {3}U.starttimeperiod, U.endtimeperiod, U.aep_endtime_utc, U.timezoneoffset, 
           U.aep_derived_uom, U.aep_srvc_qlty_idntfr, U.aep_usage_dt, 
           SUM(U.value) as value_sum, SUM(POWER(U.value, 2)) as value_sq_sum, 
           AVG(U.value) as value_mean, STDDEV_SAMP(U.value) as value_std, 
           COUNT(U.value) as counts, COUNT(*) as counts_including_null
    FROM MP
    INNER JOIN U
    ON MP.mfr_devc_ser_nbr = U.serialnumber
    WHERE U.serialnumber NOT IN ({4})
    GROUP BY {3}U.starttimeperiod, U.endtimeperiod, U.aep_endtime_utc, U.timezoneoffset, U.aep_derived_uom, U.aep_srvc_qlty_idntfr, U.aep_usage_dt
    """
    ).format(mp_where_str_3, 
             f"'{date_range[0]}'",  
             f"'{date_range[1]}'", 
             groupby_xfmr_str, 
             ','.join(["'{}'".format(x) for x in dev_ser_nbrs]))
    # ********************** Return **********************
    if verbose:
        print('sql_mp_1:\n', sql_mp_1, '\n\n')
        print('df_mp_1:\n', df_mp_1)
        print('sql_mp_2:\n', sql_mp_2, '\n\n')
        print('Final SQL Statement:\n', sql)
    #--------------------
    return sql

In [7]:
# I believe this aggregates by transformer first, then aggreages all of those...
def build_sql_outage_others_on_circuit_2_0(conn_aws, dev_ser_nbrs, date_range, verbose=True):
    # Return SQL statement to build aggregate of all OTHER meters on the circuit (i.e., excluding those from dev_ser_nbrs)
    # ****************************************************
    # Step 1: From the serial numbers given in dev_ser_nbrs, first find the circuit information
    #         and ensure all of the listed serial numbers are on the same circuit
    # Step 2: Given the circuit information, find all transformers on the circuit.
    #         This information is needed because not all meters contain circuit data, and without the transformer
    #           numbers on the circuit these meters would be left out.
    #         In the final query, meters on the circuit will be found which either have the correct circuit
    #           information OR the correct transformer number.
    #           NOTE: To be 100% correct, it should probably be meters which have the correct circuit information
    #                 OR no circuit infomration AND the correct transformer number.
    # Step 3: Put it all together.  Return SQL statement to build aggregate of all OTHER meters on the circuit 
    #           (i.e., excluding those from dev_ser_nbrs)
    # ****************************************************
    
    # ********************** Step 1 **********************
    mp_where_str_1 = "mfr_devc_ser_nbr IN ({})".format(','.join(["'{}'".format(x) for x in dev_ser_nbrs]))
    sql_mp_1 = (
    """
    SELECT DISTINCT circuit_nb,circuit_nm,station_nb,station_nm
    FROM default.meter_premise
    WHERE {}
    AND circuit_nb IS NOT NULL AND circuit_nb <> ''
    AND circuit_nm IS NOT NULL AND circuit_nm <> ''
    AND station_nb IS NOT NULL AND station_nb <> ''
    AND station_nm IS NOT NULL AND station_nm <> ''
    """
    ).format(mp_where_str_1)
    #--------------------
    df_mp_1 = pd.read_sql(sql_mp_1, conn_aws)
    assert(df_mp_1.shape[0]==1)
    #--------------------
    circuit_nb = df_mp_1.iloc[0]['circuit_nb']
    circuit_nm = df_mp_1.iloc[0]['circuit_nm']
    station_nb = df_mp_1.iloc[0]['station_nb']
    station_nm = df_mp_1.iloc[0]['station_nm']

    # ********************** Step 2 **********************
    mp_where_str_2 = (
    f"""
    circuit_nb = '{circuit_nb}'
    AND circuit_nm = '{circuit_nm}'
    AND station_nb = '{station_nb}'
    AND station_nm = '{station_nm}'
    """
    )

    sql_mp_2 = (
    """
    SELECT DISTINCT trsf_pole_nb
    FROM default.meter_premise
    WHERE {}
    """
    ).format(mp_where_str_2)
    #--------------------
    df_mp_2 = pd.read_sql(sql_mp_2, conn_aws)
    trsf_pole_nbs = df_mp_2['trsf_pole_nb'].tolist()

    # ********************** Step 3 **********************
    mp_where_str_3 = (
    """
    (circuit_nb = '{}'
    AND circuit_nm = '{}'
    AND station_nb = '{}'
    AND station_nm = '{}')
    OR trsf_pole_nb IN ({})
    """
    ).format(circuit_nb, circuit_nm, station_nb, station_nm, 
             ','.join(["'{}'".format(x) for x in trsf_pole_nbs]))   
    #--------------------  
    sql = (
    """
    WITH MP
    AS (
        SELECT mfr_devc_ser_nbr, trsf_pole_nb
        FROM default.meter_premise
        WHERE {0}
    ), 
    U AS (
        SELECT serialnumber, starttimeperiod, endtimeperiod, aep_endtime_utc, timezoneoffset, 
               aep_derived_uom, aep_srvc_qlty_idntfr, aep_usage_dt, value
        FROM usage_nonvee.reading_ivl_nonvee
        WHERE aep_opco = 'oh'
        AND aep_usage_dt BETWEEN {1} AND {2}
    ), 
    AGG1 AS (
        SELECT MP.trsf_pole_nb, U.starttimeperiod, U.endtimeperiod, U.aep_endtime_utc, U.timezoneoffset, 
               U.aep_derived_uom, U.aep_srvc_qlty_idntfr, U.aep_usage_dt, 
               SUM(U.value) as value_sum, SUM(POWER(U.value, 2)) as value_sq_sum, 
               AVG(U.value) as value_mean, STDDEV_SAMP(U.value) as value_std, 
               COUNT(U.value) as counts, COUNT(*) as counts_including_null
        FROM MP
        INNER JOIN U
        ON MP.mfr_devc_ser_nbr = U.serialnumber
        WHERE U.serialnumber NOT IN ({3})
        GROUP BY MP.trsf_pole_nb, U.starttimeperiod, U.endtimeperiod, U.aep_endtime_utc, U.timezoneoffset, 
                 U.aep_derived_uom, U.aep_srvc_qlty_idntfr, U.aep_usage_dt
    )
    SELECT starttimeperiod, endtimeperiod, aep_endtime_utc, timezoneoffset, 
           aep_derived_uom, aep_srvc_qlty_idntfr, aep_usage_dt, 
           SUM(value_sum) AS sum_value_sum, AVG(value_sum) as mean_value_sum, 
           SUM(value_sq_sum) AS sum_value_sq_sum, AVG(value_sq_sum) as mean_value_sq_sum, 
           SUM(value_mean) AS sum_value_mean, AVG(value_mean) as mean_value_mean, 
           SUM(value_std) AS sum_value_std, AVG(value_std) as mean_value_std, 
           SUM(counts) AS sum_counts, AVG(counts) as mean_counts, 
           SUM(counts_including_null) AS sum_counts_including_null, AVG(counts_including_null) as mean_counts_including_null
    FROM AGG1
    GROUP BY starttimeperiod, endtimeperiod, aep_endtime_utc, timezoneoffset, 
             aep_derived_uom, aep_srvc_qlty_idntfr, aep_usage_dt
    """
    ).format(mp_where_str_3, 
             f"'{date_range[0]}'",  
             f"'{date_range[1]}'", 
             ','.join(["'{}'".format(x) for x in dev_ser_nbrs]))
    # ********************** Return **********************
    if verbose:
        print('sql_mp_1:\n', sql_mp_1, '\n\n')
        print('df_mp_1:\n', df_mp_1)
        print('sql_mp_2:\n', sql_mp_2, '\n\n')
        print('Final SQL Statement:\n', sql)
    #--------------------
    return sql

In [8]:
# Similar to build_sql_outage_others_on_circuit_3_0, but TOTAL kWh is calculated by first forming a 
# signed_value column which is negative when aep_srvc_qlty_idntfr='RECEIVED' and then aggregating
# FOR NOW, this is also only for aep_derived_uom = 'KWH'
def build_sql_outage_others_on_circuit_3_0(conn_aws, dev_ser_nbrs, date_range, verbose=True):
    # Return SQL statement to build aggregate of all OTHER meters on the circuit (i.e., excluding those from dev_ser_nbrs)
    # ****************************************************
    # Step 1: From the serial numbers given in dev_ser_nbrs, first find the circuit information
    #         and ensure all of the listed serial numbers are on the same circuit
    # Step 2: Given the circuit information, find all transformers on the circuit.
    #         This information is needed because not all meters contain circuit data, and without the transformer
    #           numbers on the circuit these meters would be left out.
    #         In the final query, meters on the circuit will be found which either have the correct circuit
    #           information OR the correct transformer number.
    #           NOTE: To be 100% correct, it should probably be meters which have the correct circuit information
    #                 OR no circuit infomration AND the correct transformer number.
    # Step 3: Put it all together.  Return SQL statement to build aggregate of all OTHER meters on the circuit 
    #           (i.e., excluding those from dev_ser_nbrs)
    # ****************************************************
    
    # ********************** Step 1 **********************
    mp_where_str_1 = "mfr_devc_ser_nbr IN ({})".format(','.join(["'{}'".format(x) for x in dev_ser_nbrs]))
    sql_mp_1 = (
    """
    SELECT DISTINCT circuit_nb,circuit_nm,station_nb,station_nm
    FROM default.meter_premise
    WHERE {}
    AND circuit_nb IS NOT NULL AND circuit_nb <> ''
    AND circuit_nm IS NOT NULL AND circuit_nm <> ''
    AND station_nb IS NOT NULL AND station_nb <> ''
    AND station_nm IS NOT NULL AND station_nm <> ''
    """
    ).format(mp_where_str_1)
    #--------------------
    df_mp_1 = pd.read_sql(sql_mp_1, conn_aws)
    assert(df_mp_1.shape[0]==1)
    #--------------------
    circuit_nb = df_mp_1.iloc[0]['circuit_nb']
    circuit_nm = df_mp_1.iloc[0]['circuit_nm']
    station_nb = df_mp_1.iloc[0]['station_nb']
    station_nm = df_mp_1.iloc[0]['station_nm']

    # ********************** Step 2 **********************
    mp_where_str_2 = (
    f"""
    circuit_nb = '{circuit_nb}'
    AND circuit_nm = '{circuit_nm}'
    AND station_nb = '{station_nb}'
    AND station_nm = '{station_nm}'
    """
    )

    sql_mp_2 = (
    """
    SELECT DISTINCT trsf_pole_nb
    FROM default.meter_premise
    WHERE {}
    """
    ).format(mp_where_str_2)
    #--------------------
    df_mp_2 = pd.read_sql(sql_mp_2, conn_aws)
    trsf_pole_nbs = df_mp_2['trsf_pole_nb'].tolist()

    # ********************** Step 3 **********************
    mp_where_str_3 = (
    """
    (circuit_nb = '{}'
    AND circuit_nm = '{}'
    AND station_nb = '{}'
    AND station_nm = '{}')
    OR trsf_pole_nb IN ({})
    """
    ).format(circuit_nb, circuit_nm, station_nb, station_nm, 
             ','.join(["'{}'".format(x) for x in trsf_pole_nbs]))   
    #--------------------  
    sql = (
    """
    WITH MP
    AS (
        SELECT mfr_devc_ser_nbr, trsf_pole_nb
        FROM default.meter_premise
        WHERE {0}
    ), 
    USG_W_SIGNED_VAL AS (
        SELECT serialnumber, starttimeperiod, endtimeperiod, aep_endtime_utc, timezoneoffset, 
               aep_derived_uom, aep_srvc_qlty_idntfr, aep_usage_dt, value, IF(aep_srvc_qlty_idntfr='RECEIVED', -1*value, value) as signed_value
        FROM usage_nonvee.reading_ivl_nonvee
        WHERE aep_opco = 'oh'
        AND aep_derived_uom = 'KWH'
        AND aep_usage_dt BETWEEN {1} AND {2}    
    ), 
    U AS (
        SELECT serialnumber, starttimeperiod, endtimeperiod, aep_endtime_utc, timezoneoffset, 
               aep_derived_uom, aep_usage_dt, SUM(signed_value) as value
        FROM USG_W_SIGNED_VAL
        GROUP BY serialnumber, starttimeperiod, endtimeperiod, aep_endtime_utc, timezoneoffset, 
                 aep_derived_uom, aep_usage_dt
    ), 
    AGG1 AS (
        SELECT MP.trsf_pole_nb, U.starttimeperiod, U.endtimeperiod, U.aep_endtime_utc, U.timezoneoffset, 
               U.aep_derived_uom, U.aep_usage_dt, 
               SUM(U.value) as value_sum, SUM(POWER(U.value, 2)) as value_sq_sum, 
               AVG(U.value) as value_mean, STDDEV_SAMP(U.value) as value_std, 
               COUNT(U.value) as counts, COUNT(*) as counts_including_null
        FROM MP
        INNER JOIN U
        ON MP.mfr_devc_ser_nbr = U.serialnumber
        WHERE U.serialnumber NOT IN ({3})
        GROUP BY MP.trsf_pole_nb, U.starttimeperiod, U.endtimeperiod, U.aep_endtime_utc, U.timezoneoffset, 
                 U.aep_derived_uom, U.aep_usage_dt
    )
    SELECT starttimeperiod, endtimeperiod, aep_endtime_utc, timezoneoffset, 
           aep_derived_uom, aep_usage_dt, 
           SUM(value_sum) AS sum_value_sum, AVG(value_sum) as mean_value_sum, 
           SUM(value_sq_sum) AS sum_value_sq_sum, AVG(value_sq_sum) as mean_value_sq_sum, 
           SUM(value_mean) AS sum_value_mean, AVG(value_mean) as mean_value_mean, 
           SUM(value_std) AS sum_value_std, AVG(value_std) as mean_value_std, 
           SUM(counts) AS sum_counts, AVG(counts) as mean_counts, 
           SUM(counts_including_null) AS sum_counts_including_null, AVG(counts_including_null) as mean_counts_including_null
    FROM AGG1
    GROUP BY starttimeperiod, endtimeperiod, aep_endtime_utc, timezoneoffset, 
             aep_derived_uom, aep_usage_dt
    """
    ).format(mp_where_str_3, 
             f"'{date_range[0]}'",  
             f"'{date_range[1]}'", 
             ','.join(["'{}'".format(x) for x in dev_ser_nbrs]))
    # ********************** Return **********************
    if verbose:
        print('sql_mp_1:\n', sql_mp_1, '\n\n')
        print('df_mp_1:\n', df_mp_1)
        print('sql_mp_2:\n', sql_mp_2, '\n\n')
        print('Final SQL Statement:\n', sql)
    #--------------------
    return sql

### NEW FUNCTIONS

In [9]:
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#TODO PROBABLY MOVE TO SQLSelect.py
#     OR MAYBE Utilities_sql.py
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
def refine_agg_cols_and_types(agg_cols_and_types, try_to_split_col_strs=True):
    # Change any string keys to SQLElement keys
    # This makes it such that all keys in agg_cols_and_types are of type SQLElement.
    # The input keys may be of type SQLElement or str (when simple column names).
    # try_to_split_col_strs:
    #   when True and a key of type str is found, this will attempt to split
    #   the column name into field_desc and table_alias_prefix components,
    #   which will then be used when creating the SQLElement replacement key
    #   e.g. 'U.value' --> field_desc='value' and table_alias_prefix='U'
    #
    # NOTE: Cannot alter dict when iterating over dict
    #       Therefore, one must iterate over a list of the keys
    #         i.e. for col in list(agg_cols_and_types.keys())
    #       Even if one defines keys = agg_cols_and_types.keys() beforehand,
    #         and tries for col in keys, this will not work!
    for col in list(agg_cols_and_types.keys()):
        assert(isinstance(col, str) or isinstance(col, SQLElement))
        if isinstance(col, str):
            if try_to_split_col_strs:
                components_dict = SQLElement.split_field_desc(col)
                field_desc         = components_dict['field_desc']
                table_alias_prefix = components_dict['table_alias_prefix']
            else:
                field_desc=col
                table_alias_prefix=None
            sql_el = SQLElement(field_desc=field_desc, 
                                table_alias_prefix=table_alias_prefix)
            assert(sql_el not in agg_cols_and_types)
            agg_cols_and_types[sql_el] = agg_cols_and_types[col]
            del agg_cols_and_types[col]
    return agg_cols_and_types


#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#TODO PROBABLY MOVE TO SQLSelect.py
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
def add_aggregate_elements_to_sql_select(sql_select, agg_cols_and_types, 
                                         try_to_split_col_strs=True, 
                                         include_counts_including_null=True, **kwargs):
    # agg_cols_and_types_dict:
    #  keys:
    #      equal to column names OR SQLElement objects (representing column names) to be aggregated
    #  values:
    #      each value should be equal to a list of aggregations to perform on column
    #      At this time, the available aggregate functions are:
    #        'sum', 'sq_sum', 'mean', 'std', 'count'
    #      NOTE: If more aggregate functions are added, to_SQL_dict must be updated appropriately
    #
    # try_to_split_col_strs:
    #   when True and a key in agg_cols_and_types_dict of type str is found, this will attempt to split
    #   the column name into field_desc and table_alias_prefix components,
    #   which will then be used when creating the SQLElement replacement key
    #   e.g. 'U.value' --> field_desc='value' and table_alias_prefix='U'
    #---------------------
    to_SQL_dict = {'sum':'SUM({})', 
                   'sq_sum':'SUM(POWER({}, 2))', 
                   'mean':'AVG({})', 
                   'std':'STDDEV_SAMP({})', 
                   'count':'COUNT({})'}
    #---------------------
    # Make all keys in agg_cols_and_types type SQLElement
    agg_cols_and_types = refine_agg_cols_and_types(agg_cols_and_types, try_to_split_col_strs)
    #---------------------
    # If any of the aggregate columns (which, at this point, are all SQLElement objects) are found
    # in the sql_select, remove them.
    comp_alias = kwargs.get('comp_alias', False)
    comp_table_alias_prefix = kwargs.get('comp_table_alias_prefix', True)
    for sql_elm in agg_cols_and_types.keys():
        found_idx = sql_select.find_idx_of_approx_element_in_collection_dict(sql_elm, 
                                                                             comp_alias=comp_alias, 
                                                                             comp_table_alias_prefix=comp_table_alias_prefix)
        if found_idx > -1:
            sql_select.remove_single_element_from_collection_at_idx(found_idx)
    #---------------------
    # Get new agg_cols with aliases
    agg_sql_elements = [] 
    for col_el,agg_types in agg_cols_and_types.items():
        for agg_type in agg_types:
            field_desc_i = to_SQL_dict[agg_type].format(col_el.get_field_desc(include_table_alias_prefix=True))
            #alias_i = f"{col_el.get_field_desc(include_table_alias_prefix=False)}_{agg_type}"
            alias_i = f"{agg_type}_{col_el.get_field_desc(include_table_alias_prefix=False)}"
            sql_el_i = SQLSelectElement(field_desc=field_desc_i, alias=alias_i, is_agg=True)
            assert(sql_el_i not in agg_sql_elements)
            agg_sql_elements.append(sql_el_i)
    if include_counts_including_null:
        agg_sql_elements.append(SQLSelectElement(field_desc='COUNT(*)', alias='counts_including_null', is_agg=True))
    #---------------------
    # Add new agg_cols with aliases (stored now in SQLSelectElement objects) to sql_select
    sql_select.add_select_elements(agg_sql_elements, run_check=True)
    #---------------------
    return sql_select


#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#TODO PROBABLY MOVE TO SQLSelect.py
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
def build_aggregate_sql_select(field_descs, agg_cols_and_types, 
                               try_to_split_col_strs=True, 
                               global_table_alias_prefix=None, idxs=None, run_check=False, 
                               include_counts_including_null=True, 
                               **kwargs):
    # field_descs, global_table_alias_prefix, idxs, run_check:
    #   These are just as should be input into SQLSelect
    #   i.e., field_descs should be a list of column names or a list of dict items, each with 
    #         possible keys 'field_desc', 'alias', 'table_alias_prefix'
    #
    # agg_cols_and_types_dict:
    #  keys:
    #      equal to column names OR SQLElement objects (representing column names) to be aggregated
    #  values:
    #      each value should be equal to a list of aggregations to perform on column
    #      At this time, the available aggregate functions are:
    #        'sum', 'sq_sum', 'mean', 'std', 'count'
    #      NOTE: If more aggregate functions are added, to_SQL_dict must be updated appropriately
    #
    # try_to_split_col_strs:
    #   when True and a key in agg_cols_and_types_dict of type str is found, this will attempt to split
    #   the column name into field_desc and table_alias_prefix components,
    #   which will then be used when creating the SQLElement replacement key
    #   e.g. 'U.value' --> field_desc='value' and table_alias_prefix='U'
    #---------------------
    sql_select = SQLSelect(field_descs=field_descs, 
                           global_table_alias_prefix=global_table_alias_prefix, 
                           idxs=idxs, run_check=run_check)
    sql_select = add_aggregate_elements_to_sql_select(sql_select=sql_select, 
                                                      agg_cols_and_types=agg_cols_and_types, 
                                                      try_to_split_col_strs=try_to_split_col_strs, 
                                                      include_counts_including_null=include_counts_including_null, 
                                                      **kwargs) 
    return sql_select

In [10]:
#TODO BETTER NAME
# TODO MAYBE RENAME global_table_alias_prefix to e.g. global_usg_table_alias_prefix?
# TODO much other work to do as well here
def build_agg_1_sql(field_descs, agg_cols_and_types, groupby_cols, groupby_xfmr, 
                    try_to_split_col_strs=True, global_table_alias_prefix='U', idxs=None, run_check=True, 
                    include_counts_including_null=True, **kwargs):
    kwargs['comp_table_alias_prefix'] = kwargs.get('comp_table_alias_prefix', False)
    #--------------------
    agg_1_sql_select = build_aggregate_sql_select(field_descs=field_descs, 
                                                  agg_cols_and_types=agg_cols_and_types, 
                                                  try_to_split_col_strs=try_to_split_col_strs, 
                                                  global_table_alias_prefix=global_table_alias_prefix, 
                                                  idxs=idxs, run_check=run_check, 
                                                  include_counts_including_null=include_counts_including_null, 
                                                  **kwargs)
    if groupby_xfmr:
        agg_1_sql_select.add_select_element(field_desc='trsf_pole_nb', alias=None, table_alias_prefix='MP', 
                                            idx=0, run_check=True)
    #--------------------
    agg_1_sql_join = SQLJoin(join_type='INNER', 
                             join_table='', 
                             join_table_alias='MP', 
                             orig_table_alias=global_table_alias_prefix, 
                             list_of_columns_to_join=[['serialnumber', 'mfr_devc_ser_nbr']])
    #--------------------
    agg_1_sql_groupby = SQLGroupBy(field_descs=groupby_cols, 
                                   global_table_alias_prefix=global_table_alias_prefix, 
                                   idxs=None, run_check=True)
    if groupby_xfmr:
        agg_1_sql_groupby.add_groupby_statement(field_desc='trsf_pole_nb', table_alias_prefix='MP', idx=0, run_check=True)
    #--------------------
    agg_1_sql = SQLQuery(sql_select = agg_1_sql_select, 
                         sql_from = SQLFrom(global_table_alias_prefix), 
                         sql_where = SQLWhere([dict(field_desc='serialnumber', comparison_operator='NOT IN', 
                                                    value=f'({Utilities_sql.join_list_w_quotes(serial_numbers)})', 
                                                    needs_quotes=False, table_alias_prefix=global_table_alias_prefix)]), 
                         sql_join_coll = agg_1_sql_join, 
                         sql_groupby=agg_1_sql_groupby)

    return agg_1_sql

In [11]:
def build_agg_rd2_sql(agg_1_sql, groupby_cols, agg_types_rd2=['sum', 'mean'], agg_1_table_alias='AGG1'):
    # Below, I will set some of the table_alias_prefix to None
    # So, the maintain the original agg_1_sql, make a deep copy
    agg_1_sql_cpy = copy.deepcopy(agg_1_sql)
    #-----
    # Find aggregate elements (to be aggregated again) and "normal elements"
    #   which are all elements which are not aggregate and not equal to 'trsf_pole_nb'
    #   (which should be equal to groupby_cols)
    agg_element_ids = agg_1_sql_cpy.sql_select.get_agg_element_ids()
    normal_element_ids = [idx for idx,sql_el in agg_1_sql_cpy.sql_select.collection_dict.items() 
                          if (idx not in agg_element_ids and sql_el.field_desc != 'trsf_pole_nb')]
    #-----
    field_descs_normal = [agg_1_sql_cpy.sql_select.collection_dict[idx] 
                          for idx in normal_element_ids]
    for fd in field_descs_normal:
        fd.table_alias_prefix = None
    #-----
    agg_cols_and_types_rd2 = {}
    for idx in agg_element_ids:
        assert(agg_1_sql_cpy.sql_select.collection_dict[idx].alias not in agg_cols_and_types_rd2)
        agg_cols_and_types_rd2[agg_1_sql_cpy.sql_select.collection_dict[idx].alias] = agg_types_rd2
    #-----
    sql_select_final = build_aggregate_sql_select(field_descs=field_descs_normal, 
                                                  agg_cols_and_types=agg_cols_and_types_rd2, 
                                                  include_counts_including_null=False)
    sql_groupby_final = SQLGroupBy(field_descs=groupby_cols, global_table_alias_prefix=None, idxs=None, run_check=True)
    sql_partial = SQLQuery(sql_select=sql_select_final, 
                           sql_from = SQLFrom(agg_1_table_alias), 
                           sql_where = None, 
                           sql_groupby=sql_groupby_final)    
    return sql_partial

In [12]:
def build_usg_w_signed_val_sql(cols_of_interest_usage):
    usg_w_signed_val_sql_select = SQLSelect(cols_of_interest_usage)
    usg_w_signed_val_sql_select.add_select_element(field_desc="IF(aep_srvc_qlty_idntfr='RECEIVED', -1*value, value)", alias="signed_value")
    usg_w_signed_val_sql = SQLQuery(sql_select = usg_w_signed_val_sql_select, 
                                    sql_from = SQLFrom('usage_nonvee', 'reading_ivl_nonvee'), 
                                    sql_where = SQLWhere([dict(field_desc='aep_opco', comparison_operator='=', value='oh', needs_quotes=True), 
                                                          dict(field_desc='aep_derived_uom', comparison_operator='=', value='KWH', needs_quotes=True), 
                                                          dict(field_desc='aep_usage_dt', comparison_operator='BETWEEN', 
                                                               value=[f'{date_range[0]}',f'{date_range[1]}'], needs_quotes=True)
                                                         ], idxs=None, run_check=True)
                                   )
    return usg_w_signed_val_sql

In [13]:
#TODO cols_of_interest_usage used!!!! BUT NOT AN INPUT TO FUNCTION!!!!!
def build_sql_outage_others_on_circuit_1(conn_aws, serial_numbers, date_range, 
                                       field_descs, agg_cols_and_types, groupby_cols, 
                                       try_to_split_col_strs=True, 
                                       groupby_xfmr=False, verbose=True, 
                                       include_counts_including_null=True, 
                                       **kwargs):
    # Return SQL statement to build aggregate of all OTHER meters on the circuit (i.e., excluding those from serial_numbers)
    # ****************************************************
    # field_descs:
    #   This is just as should be input into SQLSelect
    #   i.e., field_descs should be a list of column names or a list of dict items, each with 
    #         possible keys 'field_desc', 'alias', 'table_alias_prefix'
    #
    # agg_cols_and_types_dict:
    #  keys:
    #      equal to column names OR SQLElement objects (representing column names) to be aggregated
    #  values:
    #      each value should be equal to a list of aggregations to perform on column
    #      At this time, the available aggregate functions are:
    #        'sum', 'sq_sum', 'mean', 'std', 'count'
    #      NOTE: If more aggregate functions are added, to_SQL_dict must be updated appropriately
    #
    # try_to_split_col_strs:
    #   when True and a key in agg_cols_and_types_dict of type str is found, this will attempt to split
    #   the column name into field_desc and table_alias_prefix components,
    #   which will then be used when creating the SQLElement replacement key
    #   e.g. 'U.value' --> field_desc='value' and table_alias_prefix='U'
    #
    # groupby_cols:
    #   should be columns from usage_nonvee.reading_ivl_nonvee.
    #   SHOULD NOT contain 'trsf_pole_nb', as this will be added where needed.
    #     (if 'trsf_pole_nb' in groupby_cols, it will simply be removed)
    # ****************************************************
    # Step 1: From the serial numbers given in serial_numbers, first find the circuit information
    #         and ensure all of the listed serial numbers are on the same circuit
    # Step 2: Given the circuit information, find all transformers on the circuit.
    #         This information is needed because not all meters contain circuit data, and without the transformer
    #           numbers on the circuit these meters would be left out.
    #         In the final query, meters on the circuit will be found which either have the correct circuit
    #           information OR the correct transformer number.
    #           NOTE: To be 100% correct, it should probably be meters which have the correct circuit information
    #                 OR no circuit infomration AND the correct transformer number.
    # Step 3: Put it all together.  Return SQL statement to build aggregate of all OTHER meters on the circuit 
    #           (i.e., excluding those from serial_numbers)
    # ****************************************************
    
    # ********************** Step 1 **********************
    circuit_info = get_circuit_info(conn_aws, serial_numbers)
    circuit_nb = circuit_info['circuit_nb']
    circuit_nm = circuit_info['circuit_nm']
    station_nb = circuit_info['station_nb']
    station_nm = circuit_info['station_nm']
    
    # ********************** Step 2 **********************
    trsf_pole_nbs = get_trsf_pole_nbs_on_circuit(conn_aws, circuit_nb, circuit_nm, station_nb, station_nm)
    
    # ********************** Step 3 **********************
    mp_sql_stmnt = build_mp_sql_statement_w_circuit_info_or_trsf_pole_nbs(circuit_nb, circuit_nm, station_nb, station_nm, 
                                                                          trsf_pole_nbs, 
                                                                          insert_n_tabs_to_each_line=1)
    #--------------------
    cols_of_interest_usage = []
    for x in field_descs:
        if isinstance(x, str):
            cols_of_interest_usage.append(x)
        elif isinstance(x, dict):
            cols_of_interest_usage.append(x[field_desc])
        elif isinstance(x, SQLElement):
            cols_of_interest_usage.append(x.field_desc)
        else:
            assert(0)
    assert('serialnumber' not in cols_of_interest_usage)
    cols_of_interest_usage= ['serialnumber'] + cols_of_interest_usage
    usg_sql = build_sql_usg(cols_of_interest_usage=cols_of_interest_usage, serial_numbers=[], date_range=date_range)
    usg_sql.sql_where.find_and_remove_approx_element_in_collection_dict(SQLWhereElement('serialnumber', comparison_operator='', value=''))
    usg_sql_stmnt = usg_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
    #--------------------
    if 'trsf_pole_nb' in groupby_cols:
        _ = groupby_cols.pop(groupby_cols.index('trsf_pole_nb'))
    agg_1_sql = build_agg_1_sql(field_descs=field_descs, 
                                agg_cols_and_types=agg_cols_and_types, 
                                groupby_cols=groupby_cols, 
                                groupby_xfmr=groupby_xfmr, 
                                try_to_split_col_strs=try_to_split_col_strs, 
                                global_table_alias_prefix='U', 
                                idxs=None, run_check=True, 
                                include_counts_including_null=include_counts_including_null, 
                                **kwargs)
    agg_1_sql_stmnt = agg_1_sql.get_sql_statement()
    #-------------------------------------
    sql_full_stmnt = f"""
    WITH MP
    AS (
    {mp_sql_stmnt}
    ), 
    U AS (
    {usg_sql_stmnt}
    )

    {agg_1_sql_stmnt}
    """
    # ********************** Return **********************
    if verbose:
        print(sql_full_stmnt)
    #--------------------
    return sql_full_stmnt

In [14]:
#TODO cols_of_interest_usage used!!!! BUT NOT AN INPUT TO FUNCTION!!!!!
# I believe this aggregates by transformer first, then aggreages all of those...
def build_sql_outage_others_on_circuit_2(conn_aws, serial_numbers, date_range, 
                                         field_descs, agg_cols_and_types, groupby_cols, 
                                         try_to_split_col_strs=True, 
                                         verbose=True, 
                                         include_counts_including_null=True, 
                                         **kwargs):
    # Return SQL statement to build aggregate of all OTHER meters on the circuit (i.e., excluding those from serial_numbers)
    # ****************************************************
    # field_descs:
    #   This is just as should be input into SQLSelect
    #   i.e., field_descs should be a list of column names or a list of dict items, each with 
    #         possible keys 'field_desc', 'alias', 'table_alias_prefix'
    #
    # agg_cols_and_types_dict:
    #  keys:
    #      equal to column names OR SQLElement objects (representing column names) to be aggregated
    #  values:
    #      each value should be equal to a list of aggregations to perform on column
    #      At this time, the available aggregate functions are:
    #        'sum', 'sq_sum', 'mean', 'std', 'count'
    #      NOTE: If more aggregate functions are added, to_SQL_dict must be updated appropriately
    #
    # try_to_split_col_strs:
    #   when True and a key in agg_cols_and_types_dict of type str is found, this will attempt to split
    #   the column name into field_desc and table_alias_prefix components,
    #   which will then be used when creating the SQLElement replacement key
    #   e.g. 'U.value' --> field_desc='value' and table_alias_prefix='U'
    #
    # groupby_cols:
    #   should be columns from usage_nonvee.reading_ivl_nonvee.
    #   SHOULD NOT contain 'trsf_pole_nb', as this will be added where needed.
    #     (if 'trsf_pole_nb' in groupby_cols, it will simply be removed)
    # ****************************************************
    # Step 1: From the serial numbers given in serial_numbers, first find the circuit information
    #         and ensure all of the listed serial numbers are on the same circuit
    # Step 2: Given the circuit information, find all transformers on the circuit.
    #         This information is needed because not all meters contain circuit data, and without the transformer
    #           numbers on the circuit these meters would be left out.
    #         In the final query, meters on the circuit will be found which either have the correct circuit
    #           information OR the correct transformer number.
    #           NOTE: To be 100% correct, it should probably be meters which have the correct circuit information
    #                 OR no circuit infomration AND the correct transformer number.
    # Step 3: Put it all together.  Return SQL statement to build aggregate of all OTHER meters on the circuit 
    #           (i.e., excluding those from serial_numbers)
    # ****************************************************
    
    # ********************** Step 1 **********************
    circuit_info = get_circuit_info(conn_aws, serial_numbers)
    circuit_nb = circuit_info['circuit_nb']
    circuit_nm = circuit_info['circuit_nm']
    station_nb = circuit_info['station_nb']
    station_nm = circuit_info['station_nm']
    
    # ********************** Step 2 **********************
    trsf_pole_nbs = get_trsf_pole_nbs_on_circuit(conn_aws, circuit_nb, circuit_nm, station_nb, station_nm)
    
    # ********************** Step 3 **********************
    mp_sql_stmnt = build_mp_sql_statement_w_circuit_info_or_trsf_pole_nbs(circuit_nb, circuit_nm, station_nb, station_nm, 
                                                                          trsf_pole_nbs, 
                                                                          insert_n_tabs_to_each_line=1)
    #--------------------
    cols_of_interest_usage = []
    for x in field_descs:
        if isinstance(x, str):
            cols_of_interest_usage.append(x)
        elif isinstance(x, dict):
            cols_of_interest_usage.append(x[field_desc])
        elif isinstance(x, SQLElement):
            cols_of_interest_usage.append(x.field_desc)
        else:
            assert(0)
    assert('serialnumber' not in cols_of_interest_usage)
    cols_of_interest_usage= ['serialnumber'] + cols_of_interest_usage
    usg_sql = build_sql_usg(cols_of_interest_usage=cols_of_interest_usage, serial_numbers=[], date_range=date_range)
    usg_sql.sql_where.find_and_remove_approx_element_in_collection_dict(SQLWhereElement('serialnumber', comparison_operator='', value=''))
    usg_sql_stmnt = usg_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
    #--------------------
    if 'trsf_pole_nb' in groupby_cols:
        _ = groupby_cols.pop(groupby_cols.index('trsf_pole_nb'))
    agg_1_sql = build_agg_1_sql(field_descs=field_descs, 
                                agg_cols_and_types=agg_cols_and_types, 
                                groupby_cols=groupby_cols, 
                                groupby_xfmr=True, 
                                try_to_split_col_strs=try_to_split_col_strs, 
                                global_table_alias_prefix='U', 
                                idxs=None, run_check=True, 
                                include_counts_including_null=include_counts_including_null, 
                                **kwargs)
    agg_1_sql_stmnt = agg_1_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
    #--------------------
    agg_types_rd2 = ['sum', 'mean']
    agg_rd2_sql = build_agg_rd2_sql(agg_1_sql, groupby_cols, agg_types_rd2=agg_types_rd2, agg_1_table_alias='AGG1')
    agg_rd2_sql_statement = agg_rd2_sql.get_sql_statement()
    #-------------------------------------
    sql_full_stmnt = f"""
    WITH MP
    AS (
    {mp_sql_stmnt}
    ), 
    U AS (
    {usg_sql_stmnt}
    ), 
    AGG1 AS (
    {agg_1_sql_stmnt}
    )

    {agg_rd2_sql_statement}
    """
    # ********************** Return **********************
    if verbose:
        print(sql_full_stmnt)
    #--------------------
    return sql_full_stmnt

In [15]:
#TODO cols_of_interest_usage used!!!! BUT NOT AN INPUT TO FUNCTION!!!!!
# Similar to build_sql_outage_others_on_circuit_3_0, but TOTAL kWh is calculated by first forming a 
# signed_value column which is negative when aep_srvc_qlty_idntfr='RECEIVED' and then aggregating
# FOR NOW, this is also only for aep_derived_uom = 'KWH'
def build_sql_outage_others_on_circuit_3(conn_aws, serial_numbers, date_range, 
                                         field_descs, agg_cols_and_types, groupby_cols, 
                                         try_to_split_col_strs=True, 
                                         verbose=True, 
                                         include_counts_including_null=True, 
                                         **kwargs):
    # Return SQL statement to build aggregate of all OTHER meters on the circuit (i.e., excluding those from serial_numbers)
    # ****************************************************
    # field_descs:
    #   This is just as should be input into SQLSelect
    #   i.e., field_descs should be a list of column names or a list of dict items, each with 
    #         possible keys 'field_desc', 'alias', 'table_alias_prefix'
    #
    # agg_cols_and_types_dict:
    #  keys:
    #      equal to column names OR SQLElement objects (representing column names) to be aggregated
    #  values:
    #      each value should be equal to a list of aggregations to perform on column
    #      At this time, the available aggregate functions are:
    #        'sum', 'sq_sum', 'mean', 'std', 'count'
    #      NOTE: If more aggregate functions are added, to_SQL_dict must be updated appropriately
    #
    # try_to_split_col_strs:
    #   when True and a key in agg_cols_and_types_dict of type str is found, this will attempt to split
    #   the column name into field_desc and table_alias_prefix components,
    #   which will then be used when creating the SQLElement replacement key
    #   e.g. 'U.value' --> field_desc='value' and table_alias_prefix='U'
    #
    # groupby_cols:
    #   should be columns from usage_nonvee.reading_ivl_nonvee.
    #   SHOULD NOT contain 'trsf_pole_nb', as this will be added where needed.
    #     (if 'trsf_pole_nb' in groupby_cols, it will simply be removed)
    # ****************************************************
    # Step 1: From the serial numbers given in serial_numbers, first find the circuit information
    #         and ensure all of the listed serial numbers are on the same circuit
    # Step 2: Given the circuit information, find all transformers on the circuit.
    #         This information is needed because not all meters contain circuit data, and without the transformer
    #           numbers on the circuit these meters would be left out.
    #         In the final query, meters on the circuit will be found which either have the correct circuit
    #           information OR the correct transformer number.
    #           NOTE: To be 100% correct, it should probably be meters which have the correct circuit information
    #                 OR no circuit infomration AND the correct transformer number.
    # Step 3: Put it all together.  Return SQL statement to build aggregate of all OTHER meters on the circuit 
    #           (i.e., excluding those from serial_numbers)
    # ****************************************************
    
    # ********************** Step 1 **********************
    circuit_info = get_circuit_info(conn_aws, serial_numbers)
    circuit_nb = circuit_info['circuit_nb']
    circuit_nm = circuit_info['circuit_nm']
    station_nb = circuit_info['station_nb']
    station_nm = circuit_info['station_nm']
    
    # ********************** Step 2 **********************
    trsf_pole_nbs = get_trsf_pole_nbs_on_circuit(conn_aws, circuit_nb, circuit_nm, station_nb, station_nm)
    
    # ********************** Step 3 **********************
    mp_sql_stmnt = build_mp_sql_statement_w_circuit_info_or_trsf_pole_nbs(circuit_nb, circuit_nm, station_nb, station_nm, 
                                                                          trsf_pole_nbs, 
                                                                          insert_n_tabs_to_each_line=1)
    #--------------------
    cols_of_interest_usage = []
    for x in field_descs:
        if isinstance(x, str):
            cols_of_interest_usage.append(x)
        elif isinstance(x, dict):
            cols_of_interest_usage.append(x[field_desc])
        elif isinstance(x, SQLElement):
            cols_of_interest_usage.append(x.field_desc)
        else:
            assert(0)
    assert('serialnumber' not in cols_of_interest_usage)
    cols_of_interest_usage= ['serialnumber'] + cols_of_interest_usage
    #--------------------
    usg_w_signed_val_sql=build_usg_w_signed_val_sql(cols_of_interest_usage)
    usg_w_signed_val_sql_stmnt = usg_w_signed_val_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
    #--------------------
    usg_sql_select = SQLSelect(cols_of_interest_usage)
    # Instead of value, want SUM(signed_value) AS value
    value_idx = usg_sql_select.find_idx_of_approx_element_in_collection_dict(SQLSelectElement('value'))
    usg_sql_select.remove_single_element_from_collection_at_idx(value_idx)
    usg_sql_select.add_select_element(field_desc='SUM(signed_value)', alias='value')
    # Going to sum over aep_srvc_qlty_idntfr, (by excluding from groupby) so don't want it in selection anymore
    aep_srvc_qlty_idntfr_idx = usg_sql_select.find_idx_of_approx_element_in_collection_dict(SQLSelectElement('aep_srvc_qlty_idntfr'))
    usg_sql_select.remove_single_element_from_collection_at_idx(aep_srvc_qlty_idntfr_idx)
    #-----
    usg_sql_groupby = SQLGroupBy(field_descs=['serialnumber'] + [x for x in groupby_cols if x != 'aep_srvc_qlty_idntfr'], 
                                 global_table_alias_prefix=None, idxs=None, run_check=True)
    #-----
    usg_sql = SQLQuery(sql_select = usg_sql_select, 
                       sql_from = SQLFrom(table_name='USG_W_SIGNED_VAL'), 
                       sql_where = SQLWhere([dict(field_desc='aep_usage_dt', comparison_operator='BETWEEN', 
                                                  value=[f'{date_range[0]}',f'{date_range[1]}'], needs_quotes=True)
                                            ], idxs=None, run_check=True), 
                       sql_groupby = usg_sql_groupby
                     )
    usg_sql_stmnt = usg_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
    #--------------------
    if 'trsf_pole_nb' in groupby_cols:
        _ = groupby_cols.pop(groupby_cols.index('trsf_pole_nb'))
    agg_1_sql = build_agg_1_sql(field_descs=field_descs, 
                                agg_cols_and_types=agg_cols_and_types, 
                                groupby_cols=groupby_cols, 
                                groupby_xfmr=True, 
                                try_to_split_col_strs=try_to_split_col_strs, 
                                global_table_alias_prefix='U', 
                                idxs=None, run_check=True, 
                                include_counts_including_null=include_counts_including_null, 
                                **kwargs)
    # aep_srvc_qlty_idntfr was summed over, so should no longer be included
    aep_srvc_qlty_idntfr_idx = agg_1_sql.sql_select.find_idx_of_approx_element_in_collection_dict(SQLSelectElement('aep_srvc_qlty_idntfr'))
    if aep_srvc_qlty_idntfr_idx > -1:
        agg_1_sql.sql_select.remove_single_element_from_collection_at_idx(aep_srvc_qlty_idntfr_idx)
    # aep_srvc_qlty_idntfr was summed over, so should no longer be included
    aep_srvc_qlty_idntfr_idx = agg_1_sql.sql_groupby.find_idx_of_approx_element_in_collection_dict(SQLGroupByElement('aep_srvc_qlty_idntfr'))
    if aep_srvc_qlty_idntfr_idx > -1:
        agg_1_sql.sql_groupby.remove_single_element_from_collection_at_idx(aep_srvc_qlty_idntfr_idx)
    agg_1_sql_stmnt = agg_1_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
    #--------------------
    agg_types_rd2 = ['sum', 'mean']
    agg_rd2_sql = build_agg_rd2_sql(agg_1_sql, groupby_cols, agg_types_rd2=agg_types_rd2, agg_1_table_alias='AGG1')
    # aep_srvc_qlty_idntfr was summed over, so should no longer be included
    aep_srvc_qlty_idntfr_idx = agg_rd2_sql.sql_groupby.find_idx_of_approx_element_in_collection_dict(SQLGroupByElement('aep_srvc_qlty_idntfr'))
    if aep_srvc_qlty_idntfr_idx > -1:
        agg_rd2_sql.sql_groupby.remove_single_element_from_collection_at_idx(aep_srvc_qlty_idntfr_idx)    
    agg_rd2_sql_statement = agg_rd2_sql.get_sql_statement()
    #-------------------------------------
    sql_full_stmnt = f"""
    WITH MP
    AS (
    {mp_sql_stmnt}
    ), 
    USG_W_SIGNED_VAL AS (
    {usg_w_signed_val_sql_stmnt}
    ), 
    U AS (
    {usg_sql_stmnt}
    ), 
    AGG1 AS (
    {agg_1_sql_stmnt}
    )

    {agg_rd2_sql_statement}
    """
    # ********************** Return **********************
    if verbose:
        print(sql_full_stmnt)
    #--------------------
    return sql_full_stmnt

In [16]:
def assemble_usg_kwh_net_or_total_sql_statementOLD(usg_sql_dict, final_table_alias='USG_KWH', 
                                                insert_n_tabs_to_each_line=1, prepend_with_to_stmnt=False):
    assert('usg_w_signed_val_sql'  in usg_sql_dict)
    assert('usg_w_net_value_sql'   in usg_sql_dict)
    assert('usg_w_total_value_sql' in usg_sql_dict)
    assert('usg_wnv_union_wtv_sql' in usg_sql_dict)
    assert('usg_sql'               in usg_sql_dict)
    #-----
    usg_w_signed_val_sql  = usg_sql_dict['usg_w_signed_val_sql']
    usg_w_net_value_sql   = usg_sql_dict['usg_w_net_value_sql']
    usg_w_total_value_sql = usg_sql_dict['usg_w_total_value_sql']
    usg_wnv_union_wtv_sql = usg_sql_dict['usg_wnv_union_wtv_sql']
    usg_sql               = usg_sql_dict['usg_sql']
    #-----
    usg_w_signed_val_sql_stmnt  = usg_w_signed_val_sql.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    usg_w_net_value_sql_stmnt   = usg_w_net_value_sql.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    usg_w_total_value_sql_stmnt = usg_w_total_value_sql.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    usg_wnv_union_wtv_sql_stmnt = usg_wnv_union_wtv_sql.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    usg_sql_stmnt               = usg_sql.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    #-----
    if prepend_with_to_stmnt:
        sql_full_stmnt = "WITH "
    else:
        sql_full_stmnt = ""
    sql_full_stmnt += f"USG_W_SIGNED_VAL AS (\n{usg_w_signed_val_sql_stmnt}\n), "\
    f"\nUSG_W_NET_VAL AS (\n{usg_w_net_value_sql_stmnt}\n), "\
    f"\nUSG_W_TOTAL_VAL AS (\n{usg_w_total_value_sql_stmnt}\n), "\
    f"\nUSG_W_NET_VAL_UNION_USG_W_TOTAL_VAL AS (\n{usg_wnv_union_wtv_sql_stmnt}\n), "\
    f"\n{final_table_alias} AS (\n{usg_sql_stmnt}\n)"
    return sql_full_stmnt    

In [17]:
def build_usg_kwh_net_or_totalOLD(cols_of_interest_usage, 
                               date_range, 
                               serial_numbers, 
                               additional_derived_uoms=None, 
                               run_careful=True, 
                               value_col='value', 
                               aep_srvc_qlty_idntfr_col='aep_srvc_qlty_idntfr', 
                               serialnumber_col='serialnumber', 
                               return_statement=True, 
                               final_table_alias='USG_KWH', 
                               insert_n_tabs_to_each_line=1, 
                               prepend_with_to_stmnt=False, 
                               **kwargs):
    r"""
    additional_derived_uoms:
      - additional_derived_uoms can take on any form acceptable for the aep_derived_uoms input parameter
          in build_sql_usg (reproduced below).
      - ADDITIONALLY, additional_derived_uoms may equal the string 'ALL'
          If additional_derived_uoms=='ALL', then the net kWh table will be combine with
          aep_derived_uoms of all OTHER types (exclduing, of course, 'KWH', as this is handled in
          the rest of this function!)
    
    *** from build_sql_usg ***
    aep_derived_uoms should be a list whose elements are of type:
      i.   string, equal to a aep_derived_uom 
               e.g. aep_derived_uoms = ['KVARH', 'KVAH']
      ii.  tuple, equal to [aep_derived_uom, aep_srvc_qlty_idntfr] pair, in that order
               e.g. aep_derived_uoms = [['VOLT', 'AVG']]
      iii. dict with keys equal to aep_derived_uom, aep_srvc_qlty_idntfr
               e.g. aep_derived_uoms = [dict(aep_derived_uom='VOLT', aep_srvc_qlty_idntfr='AVG')]
      iv.  any combination of the aboe
               e.g. aep_derived_uoms = ['KVARH', ['VOLT', 'AVG'], 
                                        dict(aep_derived_uom='KVAH', aep_srvc_qlty_idntfr='DELIVERED')]
    """
    # See build_sql_usg for information about additional_derived_uoms
    # FOR UNION TO WORK, NEED TO BE CAREFUL AND ENSURE COLUMNS ARE EXACTLY THE SAME AS ARE THEIR ORDERS!
    usg_w_signed_val_sql=build_usg_w_signed_val_sql(cols_of_interest_usage=cols_of_interest_usage, 
                                                    date_range=date_range, 
                                                    serial_numbers=serial_numbers)
    #------------------------------------------------------------
    # WHERE statements already handled in usg_w_signed_val_sql
    # Therefore, not needed here in usg_w_net_value_sql
    usg_w_net_value_sql = build_usg_w_net_value_sql(cols_of_interest_usage=cols_of_interest_usage, 
                                                    date_range=None, 
                                                    serial_numbers=None, 
                                                    value_col=value_col, 
                                                    aep_srvc_qlty_idntfr_col=aep_srvc_qlty_idntfr_col, 
                                                    serialnumber_col=serialnumber_col, 
                                                    usg_w_signed_val_table_name='USG_W_SIGNED_VAL', 
                                                    sum_signed_val_col='signed_value', 
                                                    sum_signed_val_alias=None, 
                                                    new_const_aep_srvc_qlty_idntfr_val='DEL_MINUS_REC')
    #------------------------------------------------------------
    usg_w_total_value_sql = build_usg_w_total_val_sql(cols_of_interest_usage=cols_of_interest_usage, 
                                                      serial_numbers=serial_numbers)
    #------------------------------------------------------------
    # Create union of usg_w_net_value_sql and usg_w_total_value_sql
    # This will be aggregated to calculate the final net values
    #   - Remember, from what I have seen so far, entries either have
    #      'RECEIVED' and 'DELIVERED' OR 'RECEIVED' and 'TOTAL'
    #   - usg_w_net_value_sql combined any entries with 'RECEIVED' and 'DELIVERED'
    #   - usg_w_total_value_sql kept only 'TOTAL' entries (while discarding 'RECEIVED' for these pairs)
    # So, the procedure here allows for the case where the sample contains entires of both types
    #   'RECEIVED'/'DELIVERED' and 'RECEIVED'/'TOTAL'
    #
    # NOTE: easier to do SELECT *, but better to do it this way
    #       because it is important columns are exactly the same in unions.
    #       Especially important if including any additional unions here
    sub_query_usg_wnv = SQLQuery(sql_select = SQLSelect(cols_of_interest_usage), 
                                 sql_from = SQLFrom(table_name='USG_W_NET_VAL'), 
                                 sql_where = None)

    sub_query_usg_wtv = SQLQuery(sql_select = SQLSelect(cols_of_interest_usage), 
                                 sql_from = SQLFrom(table_name='USG_W_TOTAL_VAL'), 
                                 sql_where = None)
    #-------------------------
    sub_query_stmnt = f"(\n{sub_query_usg_wnv.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)}\n)" \
                      f"\nUNION\n" \
                      f"(\n{sub_query_usg_wtv.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)}\n)"
    #-------------------------
    if additional_derived_uoms == 'ALL':
        additional_sql = build_sql_usg(cols_of_interest_usage=cols_of_interest_usage, 
                                       serial_numbers=serial_numbers, 
                                       date_range=date_range, 
                                       aep_derived_uoms=None, 
                                       kwh_and_vlt_only=False, 
                                       aep_opco=kwargs.get('aep_opco', None), 
                                       schema_name=kwargs.get('schema_name', 'usage_nonvee'), 
                                       table_name=kwargs.get('table_name', 'reading_ivl_nonvee'), 
                                       serialnumber_col=serialnumber_col, 
                                       aep_derived_uom_col=kwargs.get('aep_derived_uom_col', 'aep_derived_uom'), 
                                       aep_srvc_qlty_idntfr_col=aep_srvc_qlty_idntfr_col)
        additional_sql.sql_where.add_where_statement(field_desc='aep_derived_uom', 
                                                     comparison_operator='<>', value='KWH', needs_quotes=True)        
        
    elif additional_derived_uoms is not None and len(additional_derived_uoms)>0:
        additional_sql = build_sql_usg(cols_of_interest_usage=cols_of_interest_usage, 
                                       serial_numbers=serial_numbers, 
                                       date_range=date_range, 
                                       aep_derived_uoms=additional_derived_uoms, 
                                       kwh_and_vlt_only=False, 
                                       aep_opco=kwargs.get('aep_opco', None), 
                                       schema_name=kwargs.get('schema_name', 'usage_nonvee'), 
                                       table_name=kwargs.get('table_name', 'reading_ivl_nonvee'), 
                                       serialnumber_col=serialnumber_col, 
                                       aep_derived_uom_col=kwargs.get('aep_derived_uom_col', 'aep_derived_uom'), 
                                       aep_srvc_qlty_idntfr_col=aep_srvc_qlty_idntfr_col)
    else:
        additional_sql = None
    #-------------------------
    if additional_sql is not None:
        sub_query_stmnt +=  f"\nUNION\n" \
                            f"(\n{additional_sql.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)}\n)"
    #-------------------------
    usg_wnv_union_wtv_sql = SQLQueryGeneric(sub_query_stmnt)
    #------------------------------------------------------------
    # Now, aggregate table from usg_wnv_union_wtv_sql (similar to usg_w_net_value_sql) to get the final net
    # value, which is the ..
    # FUCK TODO
    # Now that I am writing this out and thinking about it, I don't think all of this is necessary.
    # At this point, everything is still being done at the serial number level
    # Therefore, should not come across a case where all 'DELIVERED', 'RECEIVED' and 'TOTAL' are present.
    # IF such an instance exists, then TOTAL better equal DELIVERED-RECEIVED!
    # I'll finish out this effort anyway, but intend to go back to working with non-aggregate data when
    # developing this functionality instead of trying to skip ahead with agg
    if run_careful:
        # This "aggregates" again, but enforces COUNT({value_col})=1
        # Which essentially means make sure there is only one value per group.
        # The reason for this is as follows:
        #   A serial should either have 'DELIVERED'/'RECEIVED' OR 'TOTAL'/'RECEIVED'
        #   Therefore, all entries for a given serial number should be in usg_w_net_value_sql OR usg_w_total_value_sql
        #     but not both.  The code below essentially enforces this.
        usg_sql = build_usg_w_net_value_sql(cols_of_interest_usage=cols_of_interest_usage, 
                                            date_range=date_range, 
                                            serial_numbers=serial_numbers, 
                                            value_col=value_col, 
                                            aep_srvc_qlty_idntfr_col=aep_srvc_qlty_idntfr_col, 
                                            serialnumber_col=serialnumber_col, 
                                            usg_w_signed_val_table_name='USG_W_NET_VAL_UNION_USG_W_TOTAL_VAL_0', 
                                            sum_signed_val_col=value_col, 
                                            sum_signed_val_alias=None, 
                                            new_const_aep_srvc_qlty_idntfr_val='CALCULATED_NET', 
                                            **kwargs)
        usg_sql.sql_where=SQLWhere()
        usg_sql.sql_having = SQLHaving([dict(field_desc=f'COUNT({value_col})', comparison_operator='=', 
                                             value='1', needs_quotes=False)
                                       ], idxs=None, run_check=True)
    else:
        usg_sql = SQLQuery(sql_select = SQLSelect(['*']), 
                           sql_from = SQLFrom('USG_W_NET_VAL_UNION_USG_W_TOTAL_VAL_0'), 
                           sql_where=None)

    
    usg_sql_dict = {'usg_w_signed_val_sql':usg_w_signed_val_sql, 
                    'usg_w_net_value_sql':usg_w_net_value_sql, 
                    'usg_w_total_value_sql':usg_w_total_value_sql, 
                    'usg_wnv_union_wtv_sql':usg_wnv_union_wtv_sql, 
                    'usg_sql':usg_sql}
    if return_statement:
        return assemble_usg_kwh_net_or_total_sql_statementOLD(usg_sql_dict=usg_sql_dict, 
                                                           final_table_alias=final_table_alias, 
                                                           insert_n_tabs_to_each_line=insert_n_tabs_to_each_line, 
                                                           prepend_with_to_stmnt=prepend_with_to_stmnt)
    else:
        return usg_sql_dict

# ------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------

In [18]:
# Replaced by build_sql_kwh_usg_delrec_w_signed_val
def build_usg_w_signed_val_sqlOLD(cols_of_interest_usage, 
                               date_range, 
                               serial_numbers, 
                               serialnumber_col='serialnumber'):
    usg_w_signed_val_sql_select = SQLSelect(cols_of_interest_usage)
    usg_w_signed_val_sql_select.add_select_element(field_desc="IF(aep_srvc_qlty_idntfr='RECEIVED', -1*value, value)", alias="signed_value")
    #-----
    usg_w_signed_val_sql_where = SQLWhere([dict(field_desc='aep_opco', comparison_operator='=', value='oh', needs_quotes=True), 
                                           dict(field_desc='aep_derived_uom', comparison_operator='=', value='KWH', needs_quotes=True), 
                                           dict(field_desc='aep_srvc_qlty_idntfr', comparison_operator='=', value='RECEIVED', needs_quotes=True), 
                                           dict(field_desc='aep_srvc_qlty_idntfr', comparison_operator='=', value='DELIVERED', needs_quotes=True), 
                                           dict(field_desc='aep_usage_dt', comparison_operator='BETWEEN', 
                                                value=[f'{date_range[0]}',f'{date_range[1]}'], needs_quotes=True)
                                          ], idxs=None, run_check=True)
    usg_w_signed_val_sql_where.combine_where_elements([2,3], 'OR', close_gaps_in_keys=True)
    if serial_numbers is not None:
        usg_w_signed_val_sql_where.add_where_statement(field_desc=serialnumber_col, comparison_operator='IN', 
                                                      value=f'({Utilities_sql.join_list_w_quotes(serial_numbers)})', needs_quotes=False)
    #-----
    usg_w_signed_val_sql = SQLQuery(sql_select = usg_w_signed_val_sql_select, 
                                    sql_from = SQLFrom('usage_nonvee', 'reading_ivl_nonvee'), 
                                    sql_where = usg_w_signed_val_sql_where
                                   )
    return usg_w_signed_val_sql

# Replaced by build_sql_kwh_usg_total_only
def build_usg_w_total_val_sqlOLD(cols_of_interest_usage, date_range, serial_numbers, serialnumber_col='serialnumber'):
    usg_w_total_val_sql_select = SQLSelect(cols_of_interest_usage)
    #-----
    usg_w_total_val_sql_where = SQLWhere([dict(field_desc='aep_opco', comparison_operator='=', value='oh', needs_quotes=True), 
                                           dict(field_desc='aep_derived_uom', comparison_operator='=', value='KWH', needs_quotes=True), 
                                           dict(field_desc='aep_srvc_qlty_idntfr', comparison_operator='=', value='TOTAL', needs_quotes=True), 
                                           dict(field_desc='aep_usage_dt', comparison_operator='BETWEEN', 
                                                value=[f'{date_range[0]}',f'{date_range[1]}'], needs_quotes=True)
                                          ], idxs=None, run_check=True)
    if serial_numbers is not None:
        usg_w_total_val_sql_where.add_where_statement(field_desc=serialnumber_col, comparison_operator='IN', 
                                                      value=f'({Utilities_sql.join_list_w_quotes(serial_numbers)})', needs_quotes=False)
    #-----
    usg_w_total_val_sql = SQLQuery(sql_select = usg_w_total_val_sql_select, 
                                    sql_from = SQLFrom('usage_nonvee', 'reading_ivl_nonvee'), 
                                    sql_where = usg_w_total_val_sql_where
                                   )
    return usg_w_total_val_sql


#Updated version called build_sql_usg_agg_by_srvc_qlty_idntfr
def build_sql_usg_w_net_valueOLD(cols_of_interest_usage, 
                              date_range, 
                              serial_numbers, 
                              value_col='value', 
                              aep_srvc_qlty_idntfr_col='aep_srvc_qlty_idntfr', 
                              serialnumber_col='serialnumber', 
                              usg_w_signed_val_table_name='KWH_USG_DELREC_W_SIGNED_VAL', 
                              **kwargs):
    r"""
    
    """
    # TODO!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    # TODO IN FUTURE will allow for groupby_cols input argument, which will default to 
    # cols_of_interest_usage - value_col - aep_srvc_qlty_idntfr_col if non input
    # Leaving out for now because I want a failure if groupby_cols is included, to ensure everything is working
    # TODO also in the future maybe(?) make serial_numbers = None by default, but again want to fail if not for now
    # TODO also, maybe want aep_opco etc in different dict than kwargs...
    #-------------------------
    assert(value_col                in cols_of_interest_usage)
    assert(aep_srvc_qlty_idntfr_col in cols_of_interest_usage)
    assert(serialnumber_col         in cols_of_interest_usage)
    #-------------------------
    sum_signed_val_col                 = kwargs.get('sum_signed_val_col', 'signed_value')
    sum_signed_val_alias               = kwargs.get('sum_signed_val_alias', None)
    new_const_aep_srvc_qlty_idntfr_val = kwargs.get('new_const_aep_srvc_qlty_idntfr_val', 'CALCULATED_NET')
    #-------------------------
    sql_select_usg_w_net_value = SQLSelect(cols_of_interest_usage)
    #-----
    # Instead of value, want SUM(signed_value) AS value
    sql_select_usg_w_net_value = AMINonVeeSQL.adjust_value_to_sum_signed_val_in_sql_select(sql_select_usg_w_net_value, 
                                                                              value_col=value_col, 
                                                                              sum_signed_val=f'SUM({sum_signed_val_col})', 
                                                                              sum_signed_val_alias=sum_signed_val_alias)
    #-----
    # Going to sum over aep_srvc_qlty_idntfr, (by excluding from groupby) so don't want it in selection anymore
    # But, add in generic aep_srvc_qlty_idntfr with value equal to 'CALCULATED_NET'
    sql_select_usg_w_net_value = AMINonVeeSQL.adjust_aep_srvc_qlty_idntfr_to_const_in_sql_select(sql_select_usg_w_net_value, 
                                                                                    aep_srvc_qlty_idntfr_col=aep_srvc_qlty_idntfr_col, 
                                                                                    new_const_val=new_const_aep_srvc_qlty_idntfr_val)
    #-------------------------
    sql_where_usg_w_net_value = SQLWhere()
    if date_range is not None:
        sql_where_usg_w_net_value.add_where_statement(field_desc='aep_usage_dt', comparison_operator='BETWEEN', 
                                                      value=[f'{date_range[0]}',f'{date_range[1]}'], needs_quotes=True)
    aep_opco = kwargs.get('aep_opco', None)
    if aep_opco is not None:
        sql_where_usg_w_net_value.add_where_statement(field_desc='aep_opco', comparison_operator='=', value=f'{aep_opco}', 
                                                      needs_quotes=True, idx=0)
    if serial_numbers is not None:
        sql_where_usg_w_net_value.add_where_statement(field_desc=serialnumber_col, comparison_operator='IN', 
                                                      value=f'({Utilities_sql.join_list_w_quotes(serial_numbers)})', needs_quotes=False)
    #-------------------------
    groupby_cols = Utilities.include_at_front_and_exclude_from_list(cols_of_interest_usage, 
                                                                    exclude_from_list=[aep_srvc_qlty_idntfr_col, value_col], 
                                                                    inplace=False)
    sql_groupby_usg_w_net_value = SQLGroupBy(field_descs=groupby_cols, 
                                             global_table_alias_prefix=None, idxs=None, run_check=True)
    #-------------------------
    sql_usg_w_net_value = SQLQuery(sql_select = sql_select_usg_w_net_value, 
                                   sql_from = SQLFrom(table_name=usg_w_signed_val_table_name), 
                                   sql_where = sql_where_usg_w_net_value, 
                                   sql_groupby = sql_groupby_usg_w_net_value, 
                                   sql_having = SQLHaving([dict(field_desc=f'COUNT({sum_signed_val_col})', comparison_operator='=', 
                                                                value='2', needs_quotes=False)
                                                          ], idxs=None, run_check=True)
                     )
    return sql_usg_w_net_value


# Previously assemble_usg_kwh_net_or_total_sql_statement
def assemble_net_kwh_usage_sql_statementOLD(usg_sql_dict, final_table_alias='USG_KWH', 
                                         insert_n_tabs_to_each_line=1, prepend_with_to_stmnt=False):
    assert('sql_kwh_usg_delrec_w_signed_val'      in usg_sql_dict)
    assert('sql_kwh_usg_delrec_net'               in usg_sql_dict)
    assert('sql_kwh_usg_total_only'               in usg_sql_dict)
    assert('sql_kwh_usg_delrec_net_union_total_0' in usg_sql_dict)
    assert('sql_kwh_usg_delrec_net_union_total'   in usg_sql_dict)
    assert('additional_sql'                       in usg_sql_dict)
    #-----
    sql_kwh_usg_delrec_w_signed_val      = usg_sql_dict['sql_kwh_usg_delrec_w_signed_val']
    sql_kwh_usg_delrec_net               = usg_sql_dict['sql_kwh_usg_delrec_net']
    sql_kwh_usg_total_only               = usg_sql_dict['sql_kwh_usg_total_only']
    sql_kwh_usg_delrec_net_union_total_0 = usg_sql_dict['sql_kwh_usg_delrec_net_union_total_0']
    sql_kwh_usg_delrec_net_union_total   = usg_sql_dict['sql_kwh_usg_delrec_net_union_total']
    additional_sql                       = usg_sql_dict['additional_sql']
    #-----
    sql_kwh_usg_delrec_w_signed_val_stmnt      = sql_kwh_usg_delrec_w_signed_val.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    sql_kwh_usg_delrec_net_stmnt               = sql_kwh_usg_delrec_net.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    sql_kwh_usg_total_only_stmnt               = sql_kwh_usg_total_only.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    sql_kwh_usg_delrec_net_union_total_0_stmnt = sql_kwh_usg_delrec_net_union_total_0.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    sql_kwh_usg_delrec_net_union_total_stmnt   = sql_kwh_usg_delrec_net_union_total.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line)
    #-----
    if prepend_with_to_stmnt:
        sql_full_stmnt = "WITH "
    else:
        sql_full_stmnt = ""
    sql_full_stmnt += f"KWH_USG_DELREC_W_SIGNED_VAL AS (\n{sql_kwh_usg_delrec_w_signed_val_stmnt}\n), "\
    f"\nKWH_USG_DELREC_NET AS (\n{sql_kwh_usg_delrec_net_stmnt}\n), "\
    f"\nKWH_USG_TOTAL_VAL AS (\n{sql_kwh_usg_total_only_stmnt}\n), "\
    f"\nKWH_USG_DELREC_NET_UNION_TOTAL_0 AS (\n{sql_kwh_usg_delrec_net_union_total_0_stmnt}\n), "
    if additional_sql is None:
        sql_full_stmnt += f"\n{final_table_alias} AS (\n{sql_kwh_usg_delrec_net_union_total_stmnt}\n)"
    else:
        sql_full_stmnt += f"\nKWH_USG_DELREC_NET_UNION_TOTAL AS (\n{sql_kwh_usg_delrec_net_union_total_stmnt}\n), "
        usage_union_sql = SQLQuery(sql_select = SQLSelect(['*']), 
                                   sql_from = SQLFrom(table_name='KWH_USG_DELREC_NET_UNION_TOTAL'), 
                                   sql_where = None)        
        sql_full_stmnt += f"\n{final_table_alias} AS (\n" \
                          f"\t(\n\t{usage_union_sql.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line+1)}\n\t)" \
                          f"\n\tUNION\n" \
                          f"\t(\n\t{additional_sql.get_sql_statement(insert_n_tabs_to_each_line=insert_n_tabs_to_each_line+1)}\n\t)" \
                          f"\n)"
        
    return sql_full_stmnt


#TODO cols_of_interest_usage used!!!! BUT NOT AN INPUT TO FUNCTION!!!!!
# Similar to build_sql_outage_others_on_circuit_3_0, but TOTAL kWh is calculated by first forming a 
# signed_value column which is negative when aep_srvc_qlty_idntfr='RECEIVED' and then aggregating
# FOR NOW, this is also only for aep_derived_uom = 'KWH'
# TODO: calculate_net_kwh is not really correct.  This combines the negative "RECEIVED" value with whatever else is there
#       So, for case where RECIEVED is accompanied by DELIEVERED, this is correct
#       BUT, when RECEIVED is accompanied by TOTAL, this in incorrect, as net should just be total
# TODO Would I ever want groupby_xfmr = False and stop_after_agg_1 = False?
# Write out what all combos mean in description
def build_sql_outage_others_on_circuit_OLD(conn_aws, serial_numbers, date_range, 
                                       field_descs, agg_cols_and_types, groupby_cols, 
                                       calculate_net_kwh, 
                                       groupby_xfmr=True, stop_after_agg_1=False, 
                                       try_to_split_col_strs=True, 
                                       verbose=True, 
                                       include_counts_including_null=True, 
                                       **kwargs):
    # TODO I don't think I'd ever want groupby_xfmr = False and stop_after_agg_1 = False? Correct?
    assert(groupby_xfmr+stop_after_agg_1>0)
    # Return SQL statement to build aggregate of all OTHER meters on the circuit (i.e., excluding those from serial_numbers)
    # ****************************************************
    # field_descs:
    #   This is just as should be input into SQLSelect
    #   i.e., field_descs should be a list of column names or a list of dict items, each with 
    #         possible keys 'field_desc', 'alias', 'table_alias_prefix'
    #
    # agg_cols_and_types_dict:
    #  keys:
    #      equal to column names OR SQLElement objects (representing column names) to be aggregated
    #  values:
    #      each value should be equal to a list of aggregations to perform on column
    #      At this time, the available aggregate functions are:
    #        'sum', 'sq_sum', 'mean', 'std', 'count'
    #      NOTE: If more aggregate functions are added, to_SQL_dict must be updated appropriately
    #
    # try_to_split_col_strs:
    #   when True and a key in agg_cols_and_types_dict of type str is found, this will attempt to split
    #   the column name into field_desc and table_alias_prefix components,
    #   which will then be used when creating the SQLElement replacement key
    #   e.g. 'U.value' --> field_desc='value' and table_alias_prefix='U'
    #
    # groupby_cols:
    #   should be columns from usage_nonvee.reading_ivl_nonvee.
    #   SHOULD NOT contain 'trsf_pole_nb', as this will be added where needed.
    #     (if 'trsf_pole_nb' in groupby_cols, it will simply be removed)
    # ****************************************************
    # Step 1: From the serial numbers given in serial_numbers, first find the circuit information
    #         and ensure all of the listed serial numbers are on the same circuit
    # Step 2: Given the circuit information, find all transformers on the circuit.
    #         This information is needed because not all meters contain circuit data, and without the transformer
    #           numbers on the circuit these meters would be left out.
    #         In the final query, meters on the circuit will be found which either have the correct circuit
    #           information OR the correct transformer number.
    #           NOTE: To be 100% correct, it should probably be meters which have the correct circuit information
    #                 OR no circuit infomration AND the correct transformer number.
    # Step 3: Put it all together.  Return SQL statement to build aggregate of all OTHER meters on the circuit 
    #           (i.e., excluding those from serial_numbers)
    # ****************************************************
    
    # ********************** Step 1 **********************
    circuit_info = AMINonVeeCircuitSQL.get_circuit_info(conn_aws, 
                                                        build_mp_kwargs=dict(mfr_devc_ser_nbrs=serial_numbers))
    circuit_nb = circuit_info['circuit_nb']
    circuit_nm = circuit_info['circuit_nm']
    station_nb = circuit_info['station_nb']
    station_nm = circuit_info['station_nm']
    
    # ********************** Step 2 **********************
    trsf_pole_nbs = AMINonVeeCircuitSQL.get_trsf_pole_nbs_on_circuit(conn_aws, 
                                                                     circuit_nb, circuit_nm, 
                                                                     station_nb, station_nm)
    
    # ********************** Step 3 **********************
    mp_sql_stmnt = AMINonVeeCircuitSQL.build_mp_sql_w_circuit_info_or_trsf_pole_nbs(circuit_nb, circuit_nm, station_nb, station_nm, 
                                                                                    trsf_pole_nbs, 
                                                                                    return_args = dict(return_statement=True, 
                                                                                                       insert_n_tabs_to_each_line=1))
    #--------------------
    cols_of_interest_usage = []
    for x in field_descs:
        if isinstance(x, str):
            cols_of_interest_usage.append(x)
        elif isinstance(x, dict):
            cols_of_interest_usage.append(x[field_desc])
        elif isinstance(x, SQLElement):
            cols_of_interest_usage.append(x.field_desc)
        else:
            assert(0)
    assert('serialnumber' not in cols_of_interest_usage)
    cols_of_interest_usage= ['serialnumber'] + cols_of_interest_usage
    #--------------------
    if calculate_net_kwh:
        sql_kwh_usg_delrec_w_signed_val = AMINonVeeSQL.build_sql_kwh_usg_delrec_w_signed_val(cols_of_interest_usage=cols_of_interest_usage, 
                                                                                date_range=date_range, 
                                                                                serial_numbers=None, 
                                                                                aep_opco=kwargs.get('aep_opco', None), 
                                                                                alias='KWH_USG_DELREC_W_SIGNED_VAL')
        #sql_kwh_usg_delrec_w_signed_val.sql_where.remove_single_element_from_collection_at_idx(2) #Old version did not force 'RECEIVED' or 'DELIVERED here'
        sql_kwh_usg_delrec_w_signed_val_stmnt = sql_kwh_usg_delrec_w_signed_val.get_sql_statement(insert_n_tabs_to_each_line=1)
        #--------------------
        usg_sql_select = SQLSelect(cols_of_interest_usage)
        # Instead of value, want SUM(signed_value) AS value
        value_idx = usg_sql_select.find_idx_of_approx_element_in_collection_dict(SQLSelectElement('value'))
        usg_sql_select.remove_single_element_from_collection_at_idx(value_idx)
        usg_sql_select.add_select_element(field_desc='SUM(signed_value)', alias='value')
        # Going to sum over aep_srvc_qlty_idntfr, (by excluding from groupby) so don't want it in selection anymore
        aep_srvc_qlty_idntfr_idx = usg_sql_select.find_idx_of_approx_element_in_collection_dict(SQLSelectElement('aep_srvc_qlty_idntfr'))
        usg_sql_select.remove_single_element_from_collection_at_idx(aep_srvc_qlty_idntfr_idx)
        # But, add in generic aep_srvc_qlty_idntfr with value equal to 'CALCULATED_NET'
        usg_sql_select.add_select_element(field_desc="'CALCULATED_NET'", alias='aep_srvc_qlty_idntfr')
        #-----
        usg_sql_groupby = SQLGroupBy(field_descs=['serialnumber'] + [x for x in groupby_cols if x != 'aep_srvc_qlty_idntfr'], 
                                     global_table_alias_prefix=None, idxs=None, run_check=True)
        #-----
        usg_sql = SQLQuery(sql_select = usg_sql_select, 
                           sql_from = SQLFrom(table_name='KWH_USG_DELREC_W_SIGNED_VAL'), 
                           sql_where = SQLWhere([dict(field_desc='aep_usage_dt', comparison_operator='BETWEEN', 
                                                      value=[f'{date_range[0]}',f'{date_range[1]}'], needs_quotes=True)
                                                ], idxs=None, run_check=True), 
                           sql_groupby = usg_sql_groupby
                         )
        usg_sql_stmnt = usg_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
    else:
        usg_sql = AMINonVeeSQL.build_sql_usg(cols_of_interest_usage=cols_of_interest_usage, serial_numbers=[], date_range=date_range)
        usg_sql.sql_where.find_and_remove_approx_element_in_collection_dict(SQLWhereElement('serialnumber', comparison_operator='', value=''))
        usg_sql_stmnt = usg_sql.get_sql_statement(insert_n_tabs_to_each_line=1)        
    #--------------------
    if 'trsf_pole_nb' in groupby_cols:
        _ = groupby_cols.pop(groupby_cols.index('trsf_pole_nb'))
    agg_1_sql = AMINonVeeCircuitSQL.build_agg_1_sql(field_descs=field_descs, 
                                                    agg_cols_and_types=agg_cols_and_types, 
                                                    groupby_cols=groupby_cols, 
                                                    groupby_xfmr=groupby_xfmr, 
                                                    try_to_split_col_strs=try_to_split_col_strs, 
                                                    usg_alias='U', 
                                                    idxs=None, run_check=True, 
                                                    include_counts_including_null=include_counts_including_null, 
                                                    **kwargs)
    agg_1_sql_stmnt = agg_1_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
    if stop_after_agg_1:
        sql_full_stmnt = f"WITH MP AS (\n{mp_sql_stmnt}\n),  \nU AS (\n{usg_sql_stmnt}\n) \n\n{agg_1_sql_stmnt}"
        if verbose:
            print(sql_full_stmnt)
        #--------------------
        return sql_full_stmnt
    #--------------------
    agg_types_rd2 = ['sum', 'mean']
    agg_rd2_sql = AMINonVeeCircuitSQL.build_agg_rd2_sql(agg_1_sql, groupby_cols, 
                                                        agg_types_rd2=agg_types_rd2, agg_1_table_alias='AGG1')  
    agg_rd2_sql_statement = agg_rd2_sql.get_sql_statement()
    #-------------------------------------
    sql_full_stmnt = f"WITH MP AS (\n{mp_sql_stmnt}),  " 
    if calculate_net_kwh:
        sql_full_stmnt += f"\nKWH_USG_DELREC_W_SIGNED_VAL AS (\n{sql_kwh_usg_delrec_w_signed_val_stmnt}\n),  "
    sql_full_stmnt += f"\nU AS (\n{usg_sql_stmnt}\n),  "\
                      f"\nAGG1 AS (\n{agg_1_sql_stmnt}\n) "\
                      f"\n\n{agg_rd2_sql_statement}"
    # ********************** Return **********************
    if verbose:
        print(sql_full_stmnt)
    #--------------------
    return sql_full_stmnt

# ------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------
# NORMAL TESTING FROM ORIGINAL DEVELOPMENT
test_df_3_0/test_df_3 are investigated more in the next section (Looking into the difference...) below

# ------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------

In [None]:
run_test = True
if run_test:
    groupby_xfmr=True
    test_sql_stmnt_gpd_0 = build_sql_outage_others_on_circuit_1_0(conn_aws, serial_numbers, date_range, groupby_xfmr, verbose=False)
    test_sql_stmnt_gpd = AMINonVeeCircuitSQL.build_sql_outage_others_on_circuit(
        conn_aws, 
        serial_numbers, date_range, 
        field_descs, agg_cols_and_types, groupby_cols, 
        aep_derived_uoms_and_idntfrs=None, 
        calculate_net_kwh=False, addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
        stop_after_agg_1=True, groupby_xfmr_in_agg_1=groupby_xfmr, 
        include_counts_including_null=True, 
        addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs), 
        verbose=False
    )
    #--------------
    test_df_gpd_0 = pd.read_sql(test_sql_stmnt_gpd_0, conn_aws)
    test_df_gpd = pd.read_sql(test_sql_stmnt_gpd, conn_aws)
    #---------------
    test_df_gpd_0 = Utilities_df.remove_table_aliases(test_df_gpd_0)
    test_df_gpd = Utilities_df.remove_table_aliases(test_df_gpd)
    #---------------
    old_to_new_cols = {
        'value_mean':'mean_value', 
        'value_std':'std_value', 
        'value_sq_sum':'sq_sum_value', 
        'value_sum':'sum_value', 
        'counts':'count_value'
    }
    test_df_gpd_0 = test_df_gpd_0.rename(columns=old_to_new_cols)
    #---------------        
    print(f"test_df_gpd_0.shape = {test_df_gpd_0.shape}")
    print(f"test_df_gpd.shape   = {test_df_gpd.shape}")
    print(f"test_df_gpd_0.shape==test_df_gpd.shape?: {test_df_gpd_0.shape==test_df_gpd.shape}")
    #-----
    overlap_cols = list(set(test_df_gpd_0.columns).intersection(set(test_df_gpd.columns)))
    #-----
    numeric_cols_1 = Utilities_df.get_numeric_columns(test_df_gpd_0)
    numeric_cols_2 = Utilities_df.get_numeric_columns(test_df_gpd)
    cols_compared = list(set(numeric_cols_1).intersection(set(numeric_cols_2)))
    cols_not_compared = [x for x in overlap_cols if x not in cols_compared]
    #-----
    in_gpd_0_not_gpd = list(set(test_df_gpd_0.columns).difference(set(test_df_gpd.columns)))
    in_gpd_not_gpd_0 = list(set(test_df_gpd.columns).difference(set(test_df_gpd_0.columns)))
    #-----
    print()
    print('Are shared numeric columns equal?')
    approx_dfs = Utilities_df.get_dfs_diff_approx_ok(test_df_gpd, test_df_gpd_0, 
                                                     sort_by=['aep_endtime_utc'])
    if approx_dfs.shape[0]==0:
        print('True')
    else:
        print('False')
    #-----
    print()
    print('Are other shared columns equal?')
    print(test_df_gpd.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared].equals(
        test_df_gpd_0.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared])
         )
    #-----
    print()
    print('Columns not compared?')
    print(f"in_gpd_0_not_gpd: {in_gpd_0_not_gpd}")
    print(f"in_gpd_not_gpd_0: {in_gpd_not_gpd_0}")

In [None]:
run_test = True
if run_test:
    groupby_xfmr=False
    test_sql_stmnt_0 = build_sql_outage_others_on_circuit_1_0(conn_aws, serial_numbers, date_range, groupby_xfmr, verbose=False)
    test_sql_stmnt = AMINonVeeCircuitSQL.build_sql_outage_others_on_circuit(
        conn_aws, 
        serial_numbers, date_range, 
        field_descs, agg_cols_and_types, groupby_cols, 
        aep_derived_uoms_and_idntfrs=None, 
        calculate_net_kwh=False, addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
        stop_after_agg_1=True, groupby_xfmr_in_agg_1=groupby_xfmr,  
        include_counts_including_null=True, 
        addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs),  
        verbose=False
    )
    #--------------
    test_df_0 = pd.read_sql(test_sql_stmnt_0, conn_aws)
    test_df = pd.read_sql(test_sql_stmnt, conn_aws)
    #---------------
    test_df_0 = Utilities_df.remove_table_aliases(test_df_0)
    test_df = Utilities_df.remove_table_aliases(test_df)
    #---------------
    old_to_new_cols = {
        'value_mean':'mean_value', 
        'value_std':'std_value', 
        'value_sq_sum':'sq_sum_value', 
        'value_sum':'sum_value', 
        'counts':'count_value'
    }
    test_df_0 = test_df_0.rename(columns=old_to_new_cols)
    #---------------        
    print(f"test_df_0.shape = {test_df_0.shape}")
    print(f"test_df.shape   = {test_df.shape}")
    print(f"test_df_0.shape==test_df.shape?: {test_df_0.shape==test_df.shape}")
    #-----
    overlap_cols = list(set(test_df_0.columns).intersection(set(test_df.columns)))
    #-----
    numeric_cols_1 = Utilities_df.get_numeric_columns(test_df_0)
    numeric_cols_2 = Utilities_df.get_numeric_columns(test_df)
    cols_compared = list(set(numeric_cols_1).intersection(set(numeric_cols_2)))
    cols_not_compared = [x for x in overlap_cols if x not in cols_compared]
    #-----
    in_0_not_ = list(set(test_df_0.columns).difference(set(test_df.columns)))
    in_not_0 = list(set(test_df.columns).difference(set(test_df_0.columns)))
    #-----
    print()
    print('Are shared numeric columns equal?')
    approx_dfs = Utilities_df.get_dfs_diff_approx_ok(test_df, test_df_0, 
                                                     sort_by=['aep_endtime_utc'])
    if approx_dfs.shape[0]==0:
        print('True')
    else:
        print('False')
    #-----
    print()
    print('Are other shared columns equal?')
    print(test_df.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared].equals(
        test_df_0.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared])
         )
    #-----
    print()
    print('Columns not compared?')
    print(f"in_0_not_: {in_0_not_}")
    print(f"in_not_0: {in_not_0}")

In [None]:
run_test = True
if run_test:
    test_sql_stmnt_2_0 = build_sql_outage_others_on_circuit_2_0(conn_aws, serial_numbers, date_range, verbose=False)
    test_sql_stmnt_2 = AMINonVeeCircuitSQL.build_sql_outage_others_on_circuit(
        conn_aws, 
        serial_numbers, date_range, 
        field_descs, agg_cols_and_types, groupby_cols, 
        aep_derived_uoms_and_idntfrs=None, 
        calculate_net_kwh=False, addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
        stop_after_agg_1=False, groupby_xfmr_in_agg_1=True,  
        include_counts_including_null=True, 
        addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs),  
        verbose=False
    )
    #--------------
    test_df_2_0 = pd.read_sql(test_sql_stmnt_2_0, conn_aws)
    test_df_2 = pd.read_sql(test_sql_stmnt_2, conn_aws)
    #---------------
    test_df_2_0 = Utilities_df.remove_table_aliases(test_df_2_0)
    test_df_2 = Utilities_df.remove_table_aliases(test_df_2)
    #---------------
    old_to_new_cols = {
        'sum_value_sum':'sum_sum_value', 
        'mean_value_sum':'mean_sum_value', 
        'sum_value_sq_sum':'sum_sq_sum_value',
        'mean_value_sq_sum':'mean_sq_sum_value', 
        'sum_value_mean':'sum_mean_value', 
        'mean_value_mean':'mean_mean_value',
        'sum_value_std':'sum_std_value', 
        'mean_value_std':'mean_std_value', 
        'sum_counts':'sum_count_value', 
        'mean_counts':'mean_count_value',
        'sum_counts_including_null':'sum_counts_including_null', 
        'mean_counts_including_null':'mean_counts_including_null'
    }
    test_df_2_0 = test_df_2_0.rename(columns=old_to_new_cols)
    #---------------        
    print(f"test_df_2_0.shape = {test_df_2_0.shape}")
    print(f"test_df_2.shape   = {test_df_2.shape}")
    print(f"test_df_2_0.shape==test_df_2.shape?: {test_df_2_0.shape==test_df_2.shape}")
    #-----
    overlap_cols = list(set(test_df_2_0.columns).intersection(set(test_df_2.columns)))
    #-----
    numeric_cols_1 = Utilities_df.get_numeric_columns(test_df_2_0)
    numeric_cols_2 = Utilities_df.get_numeric_columns(test_df_2)
    cols_compared = list(set(numeric_cols_1).intersection(set(numeric_cols_2)))
    cols_not_compared = [x for x in overlap_cols if x not in cols_compared]
    #-----
    in_2_0_not_2 = list(set(test_df_2_0.columns).difference(set(test_df_2.columns)))
    in_2_not_2_0 = list(set(test_df_2.columns).difference(set(test_df_2_0.columns)))
    #-----
    print()
    print('Are shared numeric columns equal?')
    approx_dfs = Utilities_df.get_dfs_diff_approx_ok(test_df_2, test_df_2_0, 
                                                     sort_by=['aep_endtime_utc'])
    if approx_dfs.shape[0]==0:
        print('True')
    else:
        print('False')
    #-----
    print()
    print('Are other shared columns equal?')
    print(test_df_2.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared].equals(
        test_df_2_0.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared])
         )
    #-----
    print()
    print('Columns not compared?')
    print(f"in_2_0_not_2: {in_2_0_not_2}")
    print(f"in_2_not_2_0: {in_2_not_2_0}")

In [None]:
#NOTE: Old version did not combine net kwh correctly, so expect some small differences here
run_test = True
if run_test:
    cols_of_interest_usage_agg = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                                  'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'value', 'aep_usage_dt']

    serial_numbers = dev_ser_nbrs
    date_range = ['2021-01-01', '2021-01-02']
    field_descs=cols_of_interest_usage_agg
    agg_cols_and_types = {'U.value':['sum', 'sq_sum', 'mean', 'std', 'count']}
    groupby_cols = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                    'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'aep_usage_dt']
    try_to_split_col_strs = True
    
    
    test_sql_stmnt_3_0 = build_sql_outage_others_on_circuit_3_0(conn_aws, serial_numbers, date_range, verbose=False)
    test_sql_stmnt_3 = AMINonVeeCircuitSQL.build_sql_outage_others_on_circuit(
        conn_aws, 
        serial_numbers, date_range, 
        field_descs, agg_cols_and_types, groupby_cols, 
        aep_derived_uoms_and_idntfrs=['KWH'], 
        calculate_net_kwh=True, addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
        stop_after_agg_1=False, groupby_xfmr_in_agg_1=True,  
        include_counts_including_null=True, 
        addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs),  
        verbose=False
    )
    #--------------
    test_df_3_0 = pd.read_sql(test_sql_stmnt_3_0, conn_aws)
    test_df_3 = pd.read_sql(test_sql_stmnt_3, conn_aws)
    #---------------
    test_df_3_0 = Utilities_df.remove_table_aliases(test_df_3_0)
    test_df_3 = Utilities_df.remove_table_aliases(test_df_3)
    #---------------
    old_to_new_cols = {
        'sum_value_sum':'sum_sum_value', 
        'mean_value_sum':'mean_sum_value', 
        'sum_value_sq_sum':'sum_sq_sum_value',
        'mean_value_sq_sum':'mean_sq_sum_value', 
        'sum_value_mean':'sum_mean_value', 
        'mean_value_mean':'mean_mean_value',
        'sum_value_std':'sum_std_value', 
        'mean_value_std':'mean_std_value', 
        'sum_counts':'sum_count_value', 
        'mean_counts':'mean_count_value',
        'sum_counts_including_null':'sum_counts_including_null', 
        'mean_counts_including_null':'mean_counts_including_null'
    }
    test_df_3_0 = test_df_3_0.rename(columns=old_to_new_cols)
    #---------------        
    print(f"test_df_3_0.shape = {test_df_3_0.shape}")
    print(f"test_df_3.shape   = {test_df_3.shape}")
    print(f"test_df_3_0.shape==test_df_3.shape?: {test_df_3_0.shape==test_df_3.shape}")
    #-----
    overlap_cols = list(set(test_df_3_0.columns).intersection(set(test_df_3.columns)))
    #-----
    numeric_cols_1 = Utilities_df.get_numeric_columns(test_df_3_0)
    numeric_cols_2 = Utilities_df.get_numeric_columns(test_df_3)
    cols_compared = list(set(numeric_cols_1).intersection(set(numeric_cols_2)))
    cols_not_compared = [x for x in overlap_cols if x not in cols_compared]
    #-----
    in_3_0_not_3 = list(set(test_df_3_0.columns).difference(set(test_df_3.columns)))
    in_3_not_3_0 = list(set(test_df_3.columns).difference(set(test_df_3_0.columns)))
    #-----
    print()
    print('Are shared numeric columns equal?')
    approx_dfs = Utilities_df.get_dfs_diff_approx_ok(test_df_3, test_df_3_0, 
                                                     sort_by=['aep_endtime_utc'])
    if approx_dfs.shape[0]==0:
        print('True')
    else:
        print('False')
    #-----
    print()
    print('Are other shared columns equal?')
    print(test_df_3.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared].equals(
        test_df_3_0.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared])
         )
    #-----
    print()
    print('Columns not compared?')
    print(f"in_3_0_not_3: {in_3_0_not_3}")
    print(f"in_3_not_3_0: {in_3_not_3_0}")

# Looking into the difference between old (wrong) net method and new
NOT TERRIBLY IMPORTANT, BUT WOULD BE GOOD TO LOOK AT IF POSSIBLE

# Investigation 1

In [None]:
#NOTE: Old version did not combine net kwh correctly, so expect some small differences here
run_test = True
if run_test:
    conn_aws = Utilities.get_athena_prod_aws_connection()
    cols_of_interest_usage_agg = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                                  'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'value', 'aep_usage_dt']

    serial_numbers = [884390632, 880682782, 880320285, 880076534, 880320207, 889926007, 884390655, 880076535]
    date_range = ['2021-01-01', '2021-01-02']
    field_descs=cols_of_interest_usage_agg
    agg_cols_and_types = {'U.value':['sum', 'sq_sum', 'mean', 'std', 'count']}
    groupby_cols = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                    'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'aep_usage_dt']
    try_to_split_col_strs = True
    
    
    test_sql_stmnt_3_0 = build_sql_outage_others_on_circuit_3_0(conn_aws, serial_numbers, date_range, verbose=False)
    test_sql_stmnt_3 = AMINonVeeCircuitSQL.build_sql_outage_others_on_circuit(
        conn_aws, 
        serial_numbers, date_range, 
        field_descs, agg_cols_and_types, groupby_cols, 
        aep_derived_uoms_and_idntfrs=['KWH'], 
        calculate_net_kwh=True, addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
        stop_after_agg_1=False, groupby_xfmr_in_agg_1=True,  
        include_counts_including_null=True, 
        addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs),  
        verbose=False
    )
    #--------------
    test_df_3_0 = pd.read_sql(test_sql_stmnt_3_0, conn_aws)
    test_df_3 = pd.read_sql(test_sql_stmnt_3, conn_aws)
    #---------------
    test_df_3_0 = Utilities_df.remove_table_aliases(test_df_3_0)
    test_df_3 = Utilities_df.remove_table_aliases(test_df_3)
    #---------------
    old_to_new_cols = {
        'sum_value_sum':'sum_sum_value', 
        'mean_value_sum':'mean_sum_value', 
        'sum_value_sq_sum':'sum_sq_sum_value',
        'mean_value_sq_sum':'mean_sq_sum_value', 
        'sum_value_mean':'sum_mean_value', 
        'mean_value_mean':'mean_mean_value',
        'sum_value_std':'sum_std_value', 
        'mean_value_std':'mean_std_value', 
        'sum_counts':'sum_count_value', 
        'mean_counts':'mean_count_value',
        'sum_counts_including_null':'sum_counts_including_null', 
        'mean_counts_including_null':'mean_counts_including_null'
    }
    test_df_3_0 = test_df_3_0.rename(columns=old_to_new_cols)
    #---------------        
    print(f"test_df_3_0.shape = {test_df_3_0.shape}")
    print(f"test_df_3.shape   = {test_df_3.shape}")
    print(f"test_df_3_0.shape==test_df_3.shape?: {test_df_3_0.shape==test_df_3.shape}")
    #-----
    overlap_cols = list(set(test_df_3_0.columns).intersection(set(test_df_3.columns)))
    #-----
    numeric_cols_1 = Utilities_df.get_numeric_columns(test_df_3_0)
    numeric_cols_2 = Utilities_df.get_numeric_columns(test_df_3)
    cols_compared = list(set(numeric_cols_1).intersection(set(numeric_cols_2)))
    cols_not_compared = [x for x in overlap_cols if x not in cols_compared]
    #-----
    in_3_0_not_3 = list(set(test_df_3_0.columns).difference(set(test_df_3.columns)))
    in_3_not_3_0 = list(set(test_df_3.columns).difference(set(test_df_3_0.columns)))
    #-----
    print()
    print('Are shared numeric columns equal?')
    approx_dfs = Utilities_df.get_dfs_diff_approx_ok(test_df_3, test_df_3_0, 
                                                     sort_by=['aep_endtime_utc'])
    if approx_dfs.shape[0]==0:
        print('True')
    else:
        print('False')
    #-----
    print()
    print('Are other shared columns equal?')
    print(test_df_3.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared].equals(
        test_df_3_0.sort_values(by=['aep_endtime_utc'], ignore_index=True)[cols_not_compared])
         )
    #-----
    print()
    print('Columns not compared?')
    print(f"in_3_0_not_3: {in_3_0_not_3}")
    print(f"in_3_not_3_0: {in_3_not_3_0}")

In [None]:
approx_dfs

In [None]:
approx_dfs2 = Utilities_df.get_dfs_diff_approx_ok(test_df_3, test_df_3_0, 
                                                 sort_by=['aep_endtime_utc'], precision=1e-02)

In [None]:
approx_dfs2

In [None]:
# print(test_sql_stmnt_3)

In [None]:
approx_dfs[approx_dfs['df1_values']<approx_dfs['df2_values']]

In [None]:
approx_dfs2[approx_dfs2['df1_values']<approx_dfs2['df2_values']]

In [None]:
test_3_all_meters_sql = ("""
WITH MP AS (
	SELECT
		mfr_devc_ser_nbr,
		trsf_pole_nb
	FROM default.meter_premise
	WHERE ((circuit_nb = '06' AND circuit_nm = 'F-8906' AND station_nb = '0089' AND station_nm = 'SHANNON') OR trsf_pole_nb IN ('1893161692563','1893355692345','1893751692088','1893772692757','1893899692721','1893951691949','1894151692642','1894214691507','1894621689717','1894659692098','1894664692742','1894678692290','1894768691393','1894943690495','1895049690979','1895178691337','1895190688028','1895221691730','1895235691963','1895246688635','1895255692207','1895272691100','1895277692451','1895297692694','1895370693033','1895419689332','1895538690860','1895591688085','1895653691427','1895669689010','1895673688667','1895705692098','1895765692797','1895787693054','1895802691061','1895967692502','1895973688498','1896012692779','1896047688699','1896061691034','1896071691608','1896142689907','1896309691590','1896315692166','1896334691869','1896447691298','1896518692741','1896666693007','1896729691561','1896762692722','1896787690973','1896900691263','1896951692709','1897157690938','1897169691673','1897408691211','1897446691568','1897490692052','1897699691192','1897720691449','1897741691677','1897865690874','1898038691760','1898058691998','1898320691619','1898348691146','1898361692107','1898448692512','1898542690818','1898602691388','1898660692325','1898754690937','1898929691480','1898946692563','1898957691780','1898973692077','1899011693413','1899106690894','1899182691250','1899186691373','1899251692330','1899262692739','1899306686450','1899330690530','1899371693192','1899546687512','1899603693174','1899614693453','1899649692288','1899871692788','1899904694088','1900092693415','1900189693043','1900446693384','1900478693062','1900535686424','1900618686345','1900748686223','1900748694060','1900817693669','1900894686082','1902544684474','1902790683950','1903623679856','1903780683051','1904182681045','1904650682157','1905083680959','1905578681621','1905640682612','1905661682903','1905687683306','1905736683825','1905757684156','1905864684957','1905982685611','1893219692283','1893264692222','1893536692216','1894149691809','1894280692399','1894403692776','1894449691795','1894481692273','1894560691535','1894742692493','1894906690009','1894969691261','1895207688217','1895226688429','1895304688796','1895333688974','1895360690468','1895581688258','1895635688596','1895638690270','1895648689269','1895663691073','1895680691825','1895722692307','1895755692661','1895767688084','1895864690116','1895886691337','1895899688297','1896050692190','1896074689159','1896105691885','1896194693045','1896209691318','1896220692485','1896263692761','1896274689106','1896306691018','1896429693028','1896437689065','1896452689696','1896539691575','1896556692151','1896563691854','1896594692451','1896682691279','1896700692441','1896755691839','1896806691269','1896893692988','1896896691832','1896918692102','1896933692339','1896967689336','1897146691437','1897178691797','1897186691908','1897227692256','1897279690927','1897317692840','1897328692949','1897466691810','1897624690894','1897762691919','1897796692230','1897854692791','1898025691521','1898058692601','1898077692251','1898120691163','1898340691865','1898434690828','1898619691638','1898629691745','1898649691985','1898906691247','1898955692686','1898963692341','1898975692923','1898990693157','1899245691812','1899246692505','1899276692972','1899367693469','1899471686652','1899526692602','1899566692886','1899637687248','1899833693161','1900175686768','1900213687659','1900318693697','1900512684131','1900539693369','1900609693369','1900833684107','1901141684086','1901816683611','1901933681296','1902975679875','1903015683914','1903015684150','1903140683755','1903272683613','1903445683433','1903718680905','1904750680226','1904973685590','1905424683819','1905632682406','1905741684371','1905774683523','1905785684538','1905800685208','1905835685763','1905912686911','1905954684979','1905956685108','1906067685050'))
),  
KWH_USG_DELREC_W_SIGNED_VAL AS (
	SELECT
		serialnumber,
		starttimeperiod,
		endtimeperiod,
		aep_endtime_utc,
		timezoneoffset,
		aep_derived_uom,
		aep_srvc_qlty_idntfr,
		value,
		aep_usage_dt,
		IF(aep_srvc_qlty_idntfr='RECEIVED', -1*value, value) AS signed_value
	FROM usage_nonvee.reading_ivl_nonvee
	WHERE aep_usage_dt BETWEEN '2021-01-01' AND '2021-01-02'
	AND   ((aep_derived_uom = 'KWH' AND aep_srvc_qlty_idntfr = 'RECEIVED') OR (aep_derived_uom = 'KWH' AND aep_srvc_qlty_idntfr = 'DELIVERED'))
), 

KWH_USG_DELREC_NET AS (
	SELECT
		serialnumber,
		starttimeperiod,
		endtimeperiod,
		aep_endtime_utc,
		timezoneoffset,
		aep_derived_uom,
		'DEL_MINUS_REC' AS aep_srvc_qlty_idntfr,
		SUM(signed_value) AS value,
		aep_usage_dt
	FROM KWH_USG_DELREC_W_SIGNED_VAL
	GROUP BY
		serialnumber,
		starttimeperiod,
		endtimeperiod,
		aep_endtime_utc,
		timezoneoffset,
		aep_derived_uom,
		aep_usage_dt
	HAVING COUNT(signed_value) = 2
), 

KWH_USG_TOTAL_VAL AS (
	SELECT
		serialnumber,
		starttimeperiod,
		endtimeperiod,
		aep_endtime_utc,
		timezoneoffset,
		aep_derived_uom,
		aep_srvc_qlty_idntfr,
		value,
		aep_usage_dt
	FROM usage_nonvee.reading_ivl_nonvee
	WHERE aep_usage_dt BETWEEN '2021-01-01' AND '2021-01-02'
	AND   ((aep_derived_uom = 'KWH' AND aep_srvc_qlty_idntfr = 'TOTAL'))
), 

KWH_USG_DELREC_NET_UNION_TOTAL_0 AS (
	(
	SELECT
		serialnumber,
		starttimeperiod,
		endtimeperiod,
		aep_endtime_utc,
		timezoneoffset,
		aep_derived_uom,
		aep_srvc_qlty_idntfr,
		value,
		aep_usage_dt
	FROM KWH_USG_DELREC_NET
	)
	UNION
	(
	SELECT
		serialnumber,
		starttimeperiod,
		endtimeperiod,
		aep_endtime_utc,
		timezoneoffset,
		aep_derived_uom,
		aep_srvc_qlty_idntfr,
		value,
		aep_usage_dt
	FROM KWH_USG_TOTAL_VAL
	)
), 

U AS (
SELECT
	serialnumber,
	starttimeperiod,
	endtimeperiod,
	aep_endtime_utc,
	timezoneoffset,
	aep_derived_uom,
	'CALCULATED_NET' AS aep_srvc_qlty_idntfr,
	value,
	aep_usage_dt
FROM KWH_USG_DELREC_NET_UNION_TOTAL_0
), 
AGG1 AS (
	SELECT
		MP.trsf_pole_nb,
        U.serialnumber, 
		U.starttimeperiod,
		U.endtimeperiod,
		U.aep_endtime_utc,
		U.timezoneoffset,
		U.aep_derived_uom,
		U.aep_srvc_qlty_idntfr,
        value, 
		U.aep_usage_dt
	FROM U
		INNER JOIN MP ON U.serialnumber=MP.mfr_devc_ser_nbr
	WHERE U.serialnumber NOT IN ('884390632','880682782','880320285','880076534','880320207','889926007','884390655','880076535')
) 

SELECT * FROM AGG1
""")

In [None]:
test_3_all_meters_df = pd.read_sql(test_3_all_meters_sql, conn_aws)
test_3_all_meters_df = Utilities_df.remove_prepend_from_columns_in_df(test_3_all_meters_df)
test_3_all_meters_df.head()

In [None]:
test_df_3.head()

In [None]:
gp_by_cols_1 = ['trsf_pole_nb', 'starttimeperiod', 'endtimeperiod',
                'aep_endtime_utc', 'timezoneoffset', 'aep_derived_uom',
                'aep_srvc_qlty_idntfr', 'aep_usage_dt']
gp_by_cols_2 = ['starttimeperiod', 'endtimeperiod',
                'aep_endtime_utc', 'timezoneoffset', 'aep_derived_uom',
                'aep_srvc_qlty_idntfr', 'aep_usage_dt']

In [None]:
gpd_1 = test_3_all_meters_df.groupby(gp_by_cols_1).agg({'value':['sum', 'mean', 'count']})
gpd_1 = Utilities_df.flatten_multiindex_index(gpd_1)
gpd_1 = gpd_1.reset_index()
gpd_1 = Utilities_df.flatten_multiindex_columns(gpd_1)

In [None]:
gpd_1.sort_values(by='aep_endtime_utc')

In [None]:
gpd_2 = gpd_1.groupby(gp_by_cols_2).agg({'sum value':['sum', 'mean', 'count'], 
                                         'mean value':['sum', 'mean', 'count'], 
                                         'count value':['sum', 'mean', 'count']})
gpd_2 = Utilities_df.flatten_multiindex_index(gpd_2)
gpd_2 = gpd_2.sort_values(by='aep_endtime_utc')
gpd_2 = Utilities_df.flatten_multiindex_index(gpd_2)
gpd_2 = gpd_2.reset_index()
gpd_2 = Utilities_df.flatten_multiindex_columns(gpd_2)

In [None]:
gpd_2

In [None]:
print(gpd_1.shape)
print(gpd_2.shape)
print(test_df_3.shape)

In [None]:
# test_df_3.sort_values(by='aep_endtime_utc')['sum_sum_value']!=gpd_2['sum sum value']

In [None]:
gpd_2.head()

In [None]:
test_df_3.sort_values(by='aep_endtime_utc').head()

# Investigation 2

In [19]:
conn_aws = Utilities.get_athena_prod_aws_connection()
cols_of_interest_usage_agg = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                              'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'value', 'aep_usage_dt']

serial_numbers = [884390632, 880682782, 880320285, 880076534, 880320207, 889926007, 884390655, 880076535]
date_range = ['2021-01-01', '2021-01-02']
field_descs=cols_of_interest_usage_agg
agg_cols_and_types = {'U.value':['sum', 'sq_sum', 'mean', 'std', 'count']}
groupby_cols = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'aep_usage_dt']
try_to_split_col_strs = True

In [20]:
test_sql_stmnt_3OLD = build_sql_outage_others_on_circuit_OLD(conn_aws, 
                                                             serial_numbers, date_range, 
                                                             field_descs, agg_cols_and_types, groupby_cols, 
                                                             calculate_net_kwh=True, 
                                                             groupby_xfmr=True, stop_after_agg_1=False,  
                                                             try_to_split_col_strs=try_to_split_col_strs, 
                                                             verbose=False, 
                                                             include_counts_including_null=True)

In [21]:
test_sql_stmnt_3NEW = AMINonVeeCircuitSQL.build_sql_outage_others_on_circuit(
    conn_aws, 
    serial_numbers, date_range, 
    field_descs, agg_cols_and_types, groupby_cols, 
    aep_derived_uoms_and_idntfrs=['KWH'], 
    calculate_net_kwh=True, addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
    stop_after_agg_1=False, groupby_xfmr_in_agg_1=True,  
    include_counts_including_null=True, 
    addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs), 
    verbose=False
)

In [22]:
test_sql_stmnt_3ALL = AMINonVeeCircuitSQL.build_sql_outage_others_on_circuit(
    conn_aws, 
    serial_numbers, date_range, 
    field_descs, agg_cols_and_types, groupby_cols, 
    aep_derived_uoms_and_idntfrs=None, 
    calculate_net_kwh=True, addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
    stop_after_agg_1=False, groupby_xfmr_in_agg_1=True, 
    include_counts_including_null=True, 
    addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs), 
    verbose=False
) 

In [23]:
df_old = pd.read_sql(test_sql_stmnt_3OLD, conn_aws)
df_old = Utilities_df.remove_table_aliases(df_old)

In [24]:
df_new = pd.read_sql(test_sql_stmnt_3NEW, conn_aws)
df_new = Utilities_df.remove_table_aliases(df_new)
df_new.head()

Unnamed: 0,starttimeperiod,endtimeperiod,aep_endtime_utc,timezoneoffset,aep_derived_uom,aep_srvc_qlty_idntfr,aep_usage_dt,sum_sum_value,mean_sum_value,sum_sq_sum_value,mean_sq_sum_value,sum_mean_value,mean_mean_value,sum_std_value,mean_std_value,sum_count_value,mean_count_value,sum_counts_including_null,mean_counts_including_null
0,2021-01-01T00:00:00-05:00,2021-01-01T00:15:00-05:00,1609478100,-05:00,KWH,CALCULATED_NET,2021-01-01,463.613398,2.024513,2384.408151,10.412263,172.160459,0.751792,46.190085,0.228664,1362,5.947598,1377,6.0131
1,2021-01-01T00:15:00-05:00,2021-01-01T00:30:00-05:00,1609479000,-05:00,KWH,CALCULATED_NET,2021-01-01,456.909598,1.995238,2250.712478,9.828439,169.752305,0.741276,44.888048,0.222218,1362,5.947598,1377,6.0131
2,2021-01-01T00:30:00-05:00,2021-01-01T00:45:00-05:00,1609479900,-05:00,KWH,CALCULATED_NET,2021-01-01,471.755002,2.060066,3083.941783,13.466995,188.032186,0.821101,44.812879,0.221846,1362,5.947598,1377,6.0131
3,2021-01-01T00:45:00-05:00,2021-01-01T01:00:00-05:00,1609480800,-05:00,KWH,CALCULATED_NET,2021-01-01,469.834198,2.051678,3513.580693,15.343147,197.367526,0.861867,41.015583,0.203047,1362,5.947598,1377,6.0131
4,2021-01-01T01:00:00-05:00,2021-01-01T01:15:00-05:00,1609481700,-05:00,KWH,CALCULATED_NET,2021-01-01,470.665401,2.055307,3834.462165,16.744376,200.61151,0.876033,41.073059,0.203332,1362,5.947598,1377,6.0131


In [25]:
df_all = pd.read_sql(test_sql_stmnt_3ALL, conn_aws)
df_all = Utilities_df.remove_table_aliases(df_all)
df_all_sub=df_all[df_all['aep_derived_uom']=='KWH'].copy()

In [26]:
print(df_old.shape)
print(df_new.shape)
print(df_all.shape)
print(df_all_sub.shape)

(192, 19)
(192, 19)
(1920, 19)
(192, 19)


In [27]:
df_all_sub[(df_all_sub['aep_srvc_qlty_idntfr']=='RECEIVED') & (df_all_sub['sum_sum_value']>0)].sort_values(by=['aep_endtime_utc'])

Unnamed: 0,starttimeperiod,endtimeperiod,aep_endtime_utc,timezoneoffset,aep_derived_uom,aep_srvc_qlty_idntfr,aep_usage_dt,sum_sum_value,mean_sum_value,sum_sq_sum_value,mean_sq_sum_value,sum_mean_value,mean_mean_value,sum_std_value,mean_std_value,sum_count_value,mean_count_value,sum_counts_including_null,mean_counts_including_null


In [28]:
utc_time = '1609478100'

In [29]:
df_old[df_old['aep_endtime_utc']==utc_time]

Unnamed: 0,starttimeperiod,endtimeperiod,aep_endtime_utc,timezoneoffset,aep_derived_uom,aep_srvc_qlty_idntfr,aep_usage_dt,sum_sum_value,mean_sum_value,sum_sq_sum_value,mean_sq_sum_value,sum_mean_value,mean_mean_value,sum_std_value,mean_std_value,sum_count_value,mean_count_value,sum_counts_including_null,mean_counts_including_null
0,2021-01-01T00:00:00-05:00,2021-01-01T00:15:00-05:00,1609478100,-05:00,KWH,CALCULATED_NET,2021-01-01,112.230398,0.490089,2190.807903,9.566847,106.327982,0.464314,3.31023,0.016387,1362,5.87069,1383,5.961207


In [30]:
df_new[df_new['aep_endtime_utc']==utc_time]

Unnamed: 0,starttimeperiod,endtimeperiod,aep_endtime_utc,timezoneoffset,aep_derived_uom,aep_srvc_qlty_idntfr,aep_usage_dt,sum_sum_value,mean_sum_value,sum_sq_sum_value,mean_sq_sum_value,sum_mean_value,mean_mean_value,sum_std_value,mean_std_value,sum_count_value,mean_count_value,sum_counts_including_null,mean_counts_including_null
0,2021-01-01T00:00:00-05:00,2021-01-01T00:15:00-05:00,1609478100,-05:00,KWH,CALCULATED_NET,2021-01-01,463.613398,2.024513,2384.408151,10.412263,172.160459,0.751792,46.190085,0.228664,1362,5.947598,1377,6.0131


In [31]:
df_all_sub[df_all_sub['aep_endtime_utc']==utc_time]

Unnamed: 0,starttimeperiod,endtimeperiod,aep_endtime_utc,timezoneoffset,aep_derived_uom,aep_srvc_qlty_idntfr,aep_usage_dt,sum_sum_value,mean_sum_value,sum_sq_sum_value,mean_sq_sum_value,sum_mean_value,mean_mean_value,sum_std_value,mean_std_value,sum_count_value,mean_count_value,sum_counts_including_null,mean_counts_including_null
2,2021-01-01T00:00:00-05:00,2021-01-01T00:15:00-05:00,1609478100,-05:00,KWH,CALCULATED_NET,2021-01-01,463.613398,2.024513,2384.408151,10.412263,172.160459,0.751792,46.190085,0.228664,1362,5.947598,1377,6.0131


In [32]:
print(df_old[df_old['aep_endtime_utc']==utc_time]['sum_sum_value'].sum())
print(df_new[df_new['aep_endtime_utc']==utc_time]['sum_sum_value'].sum())
print(df_all_sub[df_all_sub['aep_endtime_utc']==utc_time]['sum_sum_value'].sum())

112.23039838299155
463.6133982941974
463.6133982941974
