In [1]:
from importlib import reload
#reload(Utilities)
#reload(clm)

import sys, os
import re

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns
from packaging import version

import itertools
import copy
import pyodbc
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
import CommonLearningMethods as clm
#-----
from MeterPremise import MeterPremise
#-----
from AMI_SQL import AMI_SQL
from AMINonVee_SQL import AMINonVee_SQL
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df

In [6]:
class AMINonVeeCircuit_SQL:
    def __init__(self):
        self.sql_query = None
        
    #****************************************************************************************************
    @staticmethod
    def build_mp_sql_circuit_info(build_mp_kwargs={}):
        r"""
        Returns a SQLQuery object which can be used to extract circuit information from the
        default.meter_premise data.

        build_mp_kwargs:
          - a dict containing key/value pairs which are essentially used to identify the subset of 
            premises from which to ultimately extract the circuit information.
          - See MeterPremise.build_sql_meter_premise for possible build_mp_kwargs key/value pairs.
          - Original version designed for case build_mp_kwargs = dict(mfr_devc_ser_nbrs=serial_numbers)
            ==> from the serial numbers given in serial_numbers, first find the circuit information
                and ensure all of the listed serial numbers are on the same circuit
        """
        mp_sql = MeterPremise.build_sql_meter_premise(cols_of_interest=[], 
                                                      **build_mp_kwargs)
        #-----
        sql_select = SQLSelect(['DISTINCT circuit_nb,circuit_nm,station_nb,station_nm'])
        mp_sql.sql_select = sql_select
        #-----
        # From the serial numbers given in serial_numbers, first find the circuit information
        # and ensure all of the listed serial numbers are on the same circuit
        # Note: The combine_where_elements have no practical effect, but I have included because
        #       they make the output string look better
        mp_sql.sql_where.add_where_statements([dict(field_desc='circuit_nb', comparison_operator='IS NOT', value='NULL', needs_quotes=False), 
                                               dict(field_desc='circuit_nb', comparison_operator='<>', value='', needs_quotes=True)])
        mp_sql.sql_where.combine_last_n_where_elements(last_n=2, join_operator='AND', close_gaps_in_keys=True)

        mp_sql.sql_where.add_where_statements([dict(field_desc='circuit_nm', comparison_operator='IS NOT', value='NULL', needs_quotes=False), 
                                               dict(field_desc='circuit_nm', comparison_operator='<>', value='', needs_quotes=True)])
        mp_sql.sql_where.combine_last_n_where_elements(last_n=2, join_operator='AND', close_gaps_in_keys=True)

        mp_sql.sql_where.add_where_statements([dict(field_desc='station_nb', comparison_operator='IS NOT', value='NULL', needs_quotes=False), 
                                               dict(field_desc='station_nb', comparison_operator='<>', value='', needs_quotes=True)])
        mp_sql.sql_where.combine_last_n_where_elements(last_n=2, join_operator='AND', close_gaps_in_keys=True)

        mp_sql.sql_where.add_where_statements([dict(field_desc='station_nm', comparison_operator='IS NOT', value='NULL', needs_quotes=False), 
                                               dict(field_desc='station_nm', comparison_operator='<>', value='', needs_quotes=True)])
        mp_sql.sql_where.combine_last_n_where_elements(last_n=2, join_operator='AND', close_gaps_in_keys=True)
        #--------------------
        return mp_sql
    
    #****************************************************************************************************
    @staticmethod
    def get_circuit_infos_df(conn_aws, build_mp_kwargs={}):
        r"""
        - Build a SQLQuery object using AMINonVeeCircuit_SQL.build_mp_sql_circuit_info, run the query 
          with conn_aws, and return a pd.DataFrame with information about the circuits found. 
          - columns = ['circuit_nb', 'circuit_nm', 'station_nb', 'station_nm']

        build_mp_kwargs:
          - a dict containing key/value pairs which are essentially used to identify the subset of 
            premises from which to ultimately extract the circuit information.
          - See MeterPremise.build_sql_meter_premise for possible build_mp_kwargs key/value pairs.
          - Original version designed for case build_mp_kwargs = dict(mfr_devc_ser_nbrs=serial_numbers)
            ==> from the serial numbers given in serial_numbers, return the unique circuit informations
        """
        mp_sql = AMINonVeeCircuit_SQL.build_mp_sql_circuit_info(build_mp_kwargs=build_mp_kwargs)
        mp_sql_stmnt = mp_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
        #--------------------
        df_mp = pd.read_sql(mp_sql_stmnt, conn_aws)
        return df_mp    
    
    #****************************************************************************************************
    @staticmethod
    def get_circuit_info(conn_aws, build_mp_kwargs={}):
        r"""
        - Build a SQLQuery object using AMINonVeeCircuit_SQL.build_mp_sql_circuit_info, run the query 
          with conn_aws, and return a dict with information about the circut 
          - keys = ['circuit_nb', 'circuit_nm', 'station_nb', 'station_nm']
        - This assumes (and enforces) that all premises found using build_mp_kwargs are on the same circuit.
          - An assert(0) will be thrown otherwise.
        - If multiple circuits are possible, use AMINonVeeCircuit_SQL.get_circuit_infos_df instead.

        build_mp_kwargs:
          - a dict containing key/value pairs which are essentially used to identify the subset of 
            premises from which to ultimately extract the circuit information.
          - See MeterPremise.build_sql_meter_premise for possible build_mp_kwargs key/value pairs.
          - Original version designed for case build_mp_kwargs = dict(mfr_devc_ser_nbrs=serial_numbers)
            ==> from the serial numbers given in serial_numbers, first find the circuit information
                and ensure all of the listed serial numbers are on the same circuit
        """
        #--------------------
        df_mp = AMINonVeeCircuit_SQL.get_circuit_infos_df(conn_aws, build_mp_kwargs)
        assert(df_mp.shape[0]==1)
        #--------------------
        circuit_nb = df_mp.iloc[0]['circuit_nb']
        circuit_nm = df_mp.iloc[0]['circuit_nm']
        station_nb = df_mp.iloc[0]['station_nb']
        station_nm = df_mp.iloc[0]['station_nm']
        return dict(circuit_nb=circuit_nb, 
                    circuit_nm=circuit_nm, 
                    station_nb=station_nb, 
                    station_nm=station_nm)    
    
    #****************************************************************************************************
    @staticmethod
    def get_trsf_pole_nbs_on_circuit(conn_aws, circuit_nb, circuit_nm, station_nb, station_nm):
        r"""
        Returns the distinct transformer pole numbers on the circuit defined by the inputs
        """
        mp_sql = SQLQuery(sql_select = SQLSelect(['DISTINCT trsf_pole_nb']), 
                          sql_from = SQLFrom('default', 'meter_premise'), 
                          sql_where = SQLWhere([dict(field_desc='circuit_nb', comparison_operator='=', value=circuit_nb, needs_quotes=True), 
                                                dict(field_desc='circuit_nm', comparison_operator='=', value=circuit_nm, needs_quotes=True), 
                                                dict(field_desc='station_nb', comparison_operator='=', value=station_nb, needs_quotes=True), 
                                                dict(field_desc='station_nm', comparison_operator='=', value=station_nm, needs_quotes=True), 
                                               ], 
                                               idxs=None, run_check=True) 
                         )
        mp_sql_stmnt = mp_sql.get_sql_statement()
        #--------------------
        df_mp = pd.read_sql(mp_sql_stmnt, conn_aws)
        trsf_pole_nbs = df_mp['trsf_pole_nb'].tolist()
        return trsf_pole_nbs    
    
    #****************************************************************************************************
    @staticmethod
    def build_mp_sql_w_circuit_info_or_trsf_pole_nbs(circuit_nb, circuit_nm, station_nb, station_nm, trsf_pole_nbs, 
                                                     return_args = dict(return_statement=True, 
                                                                        insert_n_tabs_to_each_line=0)):
        mp_sql_where = SQLWhere([dict(field_desc='circuit_nb', comparison_operator='=', value=circuit_nb, needs_quotes=True), 
                                 dict(field_desc='circuit_nm', comparison_operator='=', value=circuit_nm, needs_quotes=True), 
                                 dict(field_desc='station_nb', comparison_operator='=', value=station_nb, needs_quotes=True), 
                                 dict(field_desc='station_nm', comparison_operator='=', value=station_nm, needs_quotes=True), 
                                 dict(field_desc='trsf_pole_nb', comparison_operator='IN', 
                                      value=f'({Utilities_sql.join_list_w_quotes(trsf_pole_nbs)})', needs_quotes=False), 
                                ], 
                                idxs=None, run_check=True)
        mp_sql_where.combine_where_elements(list(range(0,4)), 'AND', close_gaps_in_keys=False)
        mp_sql_where.combine_where_elements([0,4], 'OR', close_gaps_in_keys=True)
        #--------------------
        mp_sql = SQLQuery(sql_select = SQLSelect(['mfr_devc_ser_nbr', 'trsf_pole_nb']), 
                          sql_from = SQLFrom('default', 'meter_premise'), 
                          sql_where = mp_sql_where 
                         )
        #--------------------      
        return_statement = return_args.get('return_statement', False)
        if not return_statement:
            return mp_sql
        else:
            insert_n_tabs_to_each_line = return_args.get('insert_n_tabs_to_each_line', 0)
            return_stmnt = mp_sql.get_sql_statement(insert_n_tabs_to_each_line)
            return return_stmnt    
    
    #****************************************************************************************************
    @staticmethod
    # TODO BETTER NAME
    def build_agg_1_sql(field_descs, 
                        agg_cols_and_types, 
                        groupby_cols, 
                        groupby_xfmr, 
                        exclude_serial_numbers, 
                        try_to_split_col_strs=True, 
                        usg_alias='U', mp_alias='MP', 
                        idxs=None, 
                        run_check=True, 
                        include_counts_including_null=True, 
                        **kwargs):
        # kwargs are ultimately used in add_aggregate_elements_to_sql_select
        # with keys/standard values = comp_alias = kwargs.get('comp_alias', False)
        #                             comp_table_alias_prefix = kwargs.get('comp_table_alias_prefix', True)
        kwargs['comp_table_alias_prefix'] = kwargs.get('comp_table_alias_prefix', False)
        #--------------------
        agg_1_sql_select = SQLSelect.build_aggregate_sql_select(field_descs=field_descs, 
                                                                agg_cols_and_types=agg_cols_and_types, 
                                                                try_to_split_col_strs=try_to_split_col_strs, 
                                                                global_table_alias_prefix=usg_alias, 
                                                                idxs=idxs, run_check=run_check, 
                                                                include_counts_including_null=include_counts_including_null, 
                                                                **kwargs)
        if groupby_xfmr:
            agg_1_sql_select.add_select_element(field_desc='trsf_pole_nb', alias=None, table_alias_prefix=mp_alias, 
                                                idx=0, run_check=True)
        #--------------------
        agg_1_sql_join = SQLJoin(join_type='INNER', 
                                 join_table='', 
                                 join_table_alias=mp_alias, 
                                 orig_table_alias=usg_alias, 
                                 list_of_columns_to_join=[['serialnumber', 'mfr_devc_ser_nbr']])
        #--------------------
        agg_1_sql_groupby = SQLGroupBy(field_descs=groupby_cols, 
                                       global_table_alias_prefix=usg_alias, 
                                       idxs=None, run_check=True)
        if groupby_xfmr:
            agg_1_sql_groupby.add_groupby_statement(field_desc='trsf_pole_nb', table_alias_prefix=mp_alias, idx=0, run_check=True)
        #--------------------
        agg_1_sql = SQLQuery(sql_select = agg_1_sql_select, 
                             sql_from = SQLFrom(usg_alias), 
                             sql_where = SQLWhere([dict(field_desc='serialnumber', comparison_operator='NOT IN', 
                                                        value=f'({Utilities_sql.join_list_w_quotes(exclude_serial_numbers)})', 
                                                        needs_quotes=False, table_alias_prefix=usg_alias)]), 
                             sql_join_coll = agg_1_sql_join, 
                             sql_groupby=agg_1_sql_groupby)

        return agg_1_sql    
    
    #****************************************************************************************************
    @staticmethod
    # TODO BETTER NAME
    def build_agg_rd2_sql(agg_1_sql, groupby_cols, agg_types_rd2=['sum', 'mean'], agg_1_table_alias='AGG1'):
        # Below, I will set some of the table_alias_prefix to None
        # So, the maintain the original agg_1_sql, make a deep copy
        agg_1_sql_cpy = copy.deepcopy(agg_1_sql)
        #-----
        # Find aggregate elements (to be aggregated again) and "normal elements"
        #   which are all elements which are not aggregate and not equal to 'trsf_pole_nb'
        #   (which should be equal to groupby_cols)
        agg_element_ids = agg_1_sql_cpy.sql_select.get_agg_element_ids()
        normal_element_ids = [idx for idx,sql_el in agg_1_sql_cpy.sql_select.collection_dict.items() 
                              if (idx not in agg_element_ids and sql_el.field_desc != 'trsf_pole_nb')]
        #-----
        field_descs_normal = [agg_1_sql_cpy.sql_select.collection_dict[idx] 
                              for idx in normal_element_ids]
        for fd in field_descs_normal:
            fd.table_alias_prefix = None
        #-----
        agg_cols_and_types_rd2 = {}
        for idx in agg_element_ids:
            assert(agg_1_sql_cpy.sql_select.collection_dict[idx].alias not in agg_cols_and_types_rd2)
            agg_cols_and_types_rd2[agg_1_sql_cpy.sql_select.collection_dict[idx].alias] = agg_types_rd2
        #-----
        sql_select_final = SQLSelect.build_aggregate_sql_select(field_descs=field_descs_normal, 
                                                                agg_cols_and_types=agg_cols_and_types_rd2, 
                                                                include_counts_including_null=False)
        sql_groupby_final = SQLGroupBy(field_descs=groupby_cols, global_table_alias_prefix=None, idxs=None, run_check=True)
        sql_partial = SQLQuery(sql_select=sql_select_final, 
                               sql_from = SQLFrom(agg_1_table_alias), 
                               sql_where = None, 
                               sql_groupby=sql_groupby_final)    
        return sql_partial    
    
    #****************************************************************************************************

    @staticmethod
    def build_sql_outage_others_on_circuit(conn_aws, 
                                           serial_numbers, date_range, 
                                           field_descs, agg_cols_and_types, groupby_cols, 
                                           aep_derived_uoms_and_idntfrs=None, 
                                           calculate_net_kwh=False, addtnl_build_net_kwh_kwargs=None, 
                                           stop_after_agg_1=False, groupby_xfmr_in_agg_1=True, 
                                           include_counts_including_null=True, 
                                           usg_alias='U', mp_alias='MP', agg_1_alias='AGG1', 
                                           addtnl_build_agg_1_sql_kwargs=None, 
                                           verbose=True, 
                                           **kwargs):
        r"""
        Builds and returns a SQL statement string for an aggregate of meters all on a common circuit excluding
        those with serial numbers contained in serial_numbers.

        This function was designed to help build a baseline dataset against which to compare the meters in serial_numbers.
        Basically, serial_numbers contains meters from a transformer which suffered an outage, and the desire is to compare those
        to others on the circuit to see if abnormalities exist.

        TODO: In the future, it may be beneficial to include a switch exclude_serial_numbers_from_circuit.  This would allow this
              function to also be used to gather information about a circuit as a whole, given serial_numbers of some meters
              on the circuit.

        TODO: In future, maybe automatically change groupby_xfmr_in_agg_1 to True when stop_after_agg_1 == False and output
              a warning message (which can be turned off with verbose call)

        ****************************************************************************************************
        Qualitative Procedure:
        ****************************************************************************************************
        Step 1: From the serial numbers given in serial_numbers, first find the circuit information
                and ensure all of the listed serial numbers are on the same circuit
        Step 2: Given the circuit information, find all transformers on the circuit.
                This information is needed because not all meters contain circuit data, and without the transformer
                  numbers on the circuit these meters would be left out.
                In the final query, meters on the circuit will be found which either have the correct circuit
                  information OR the correct transformer number.
                  NOTE: To be 100% correct, it should probably be meters which have the correct circuit information
                        OR no circuit infomration AND the correct transformer number.
        Step 3: Put it all together.  Return SQL statement to build aggregate of all OTHER meters on the circuit 
                  (i.e., excluding those from serial_numbers)
        ****************************************************************************************************

        ****************************************************************************************************
        Input Argument Descriptions:
        ****************************************************************************************************
        ---------------------------------------------------------------------------
        conn_aws:
        ---------
          Connection to EMR Prod database (see Utilities.get_athena_prod_aws_connection())
        ---------------------------------------------------------------------------
        serial_numbers:
        ---------------
          - List of strings or ints
          - Serial numbers of meters on the circuit of interest.  
          - The functionality here REQUIRES all serial numbers come from the same circuit.  
            It should fail is this is not so.
          - By default, these serial numbers are used to find the circuit information, but will be
            excluded from the circuit aggregation.
            - As described elsewhere, this function was designed to help build a baseline dataset against 
              which to compare the meters in serial_numbers.  So, one would not want serial_numbers in 
              the baseline, as this would induce an auto-correlation effect

        ---------------------------------------------------------------------------
        data_range:
        -----------
          - A tuple containing the minimum and maximum date to query.

        ---------------------------------------------------------------------------
        field_descs:
        ------------
          This is just as should be input into SQLSelect
          i.e., field_descs should be a list of column names or a list of dict items, each with 
                possible keys 'field_desc', 'alias', 'table_alias_prefix'

        ---------------------------------------------------------------------------
        agg_cols_and_types:
        ------------------------
         keys:
             equal to column names OR SQLElement objects (representing column names) to be aggregated
         values:
             each value should be equal to a list of aggregations to perform on column
             At this time, the available aggregate functions are:
               'sum', 'sq_sum', 'mean', 'std', 'count'
             NOTE: If more aggregate functions are added, to_SQL_dict must be updated appropriately

        ---------------------------------------------------------------------------
        groupby_cols:
        -------------
          should be columns from usage_nonvee.reading_ivl_nonvee.
          SHOULD NOT contain 'trsf_pole_nb', as this will be added where needed.
            (if 'trsf_pole_nb' in groupby_cols, it will simply be removed)    

        ---------------------------------------------------------------------------
        calculate_net_kwh:
        ------------------
          - FOR NOW, THIS IS ONLY FOR aep_derived_uom = 'KWH', but it should be possible to expand
              the functionality without too much difficulty to accomodate other instances.
          - If one wants ONLY net kWh, then aep_derived_uoms_and_idntfrs must be set equal to ['KWH']
          - The net kWh for each meter is calculated first before any aggregation.
          - From what I have seen, for a specific time and serial number and when aep_derived_uom = KWH
              there are two entries, either DELIVERED/RECEIVED or TOTAL/RECEIVED.
          - For the case of DELIVERED/RECEIVED, the net is calculated as DELIVERED-RECEIVED
          - For the case of TOTAL/RECEIVED, the net is taken to be TOTAL
          - It is best to do this calculation at the meter level because combining after aggregation
              is approximately correct at best (when done carefully) and confusing.

        ---------------------------------------------------------------------------
        addtnl_build_net_kwh_kwargs:
        ----------------------------         


        ---------------------------------------------------------------------------            
        aep_derived_uoms_and_idntfrs:
        -----------------
        - If left as None or set equal to empty list, this will not impose any selection WHERE on aep_derived_uom,
            and therefore all aep_derived_uoms_and_idntfrs will be returned.
            - When calculate_net_kwh == True, net kWh will be calculated and included (obviously) and all 
              aep_derived_uoms_and_idntfrs other than = 'KWH' will be included as well
        - See AMINonVee_SQL.standardize_aep_derived_uoms_and_srvc_qlty_idntfrs for more information
        - aep_derived_uoms_and_idntfrs should be a list whose elements are of type:
            i.   string, equal to a aep_derived_uom 
                     e.g. aep_derived_uoms_and_idntfrs = ['KVARH', 'KVAH']
            ii.  tuple, equal to [aep_derived_uom, aep_srvc_qlty_idntfr] pair, in that order
                     e.g. aep_derived_uoms_and_idntfrs = [['VOLT', 'AVG']]
            iii. dict with keys equal to aep_derived_uom, aep_srvc_qlty_idntfr
                     e.g. aep_derived_uoms_and_idntfrs = [dict(aep_derived_uom='VOLT', aep_srvc_qlty_idntfr='AVG')]
            iv.  any combination of the aboe
                     e.g. aep_derived_uoms_and_idntfrs = ['KVARH', ['VOLT', 'AVG'], 
                                                     dict(aep_derived_uom='KVAH', aep_srvc_qlty_idntfr='DELIVERED')]    

        ---------------------------------------------------------------------------
        stop_after_agg_1 and groupby_xfmr_in_agg_1:
        -------------------------------------------
          - If stop_after_agg_1==True, groupby_xfmr_in_agg_1 must be False, i.e., when groupby_xfmr_in_agg_1==False and stop_after_agg_1==False
              an assertion error will be thrown (see ***Additional Notes*** below)
          - stop_after_agg_1 determines whether one or two rounds of aggregation occur.
          - groupby_xfmr_in_agg_1 affects the first round of aggregation when stop_after_agg_1==True
          - Possible combinations:
              i.    stop_after_agg_1      = True
                    groupby_xfmr_in_agg_1 = False

              ii.   stop_after_agg_1      = True
                    groupby_xfmr_in_agg_1 = True

              iii.  stop_after_agg_1      = False
                    groupby_xfmr_in_agg_1 = True
        ---------------------------------------------------------------------------
        stop_after_agg_1:
        -----------------
          The default value is set to False because the main intent of this function is to perform the double aggregation.
          -------------------------
          stop_after_agg_1 == False:
          -------------------------
            *** groupby_xfmr_in_agg_1 must be set to True!
            This aggregates first at the transformer level and then over all transformers in the circuit.  The point is to
            extract information about e.g., how average looking transformers on the circuit look.
            The two rounds of aggregation occur (assuming value of interest = kWh):
              Round 1: Aggregate at the transformer level, i.e., aggregate all meters on each transformer individually.
                       This leaves a collection of transformer level meterics.
                         e.g, sum_value = total kWh for all meters on transformer, representing the usage of the transformer
                              mean_value = average kWh of meters on transformer, representing what an average looking
                                           meter looks like for each transformer
              Round 2: Aggregate all transformers on circuit.  
                       This leaves a collection of circuit level meterics.
                         e.g., mean_sum_value = average kWh of transformers on the circuit, representing what an average
                                                looking transformer on the circuit looks like.
                               sum_sum_value = sum of all meters on the circuit
                                               NOTE: This will equal sum_value when stop_after_agg_1=True 
                                                     and groupby_xfmr_in_agg_1=False
          -------------------------
          stop_after_agg_1 == True:
          -------------------------
            As hinted in the name, this causes the aggregation to stop after the first round.
            One may use this functionality to extract circuit level aggregates of all meters pooled together
              or to extract transformer level aggregates for all transformers on circuit.
            ----------
            groupby_xfmr_in_agg_1 == True 
                                     (& stop_after_agg_1 == True):
              Transformer level aggregates for all transformers on circuit
            ----------
            groupby_xfmr_in_agg_1 == False 
                                     (& stop_after_agg_1 == True):
              Circuit level aggregates of all meters pooled together

        ---------------------------------------------------------------------------
        try_to_split_col_strs:
        ----------------------
          when True and a key in agg_cols_and_types of type str is found, this will attempt to split
          the column name into field_desc and table_alias_prefix components,
          which will then be used when creating the SQLElement replacement key
          e.g. 'U.value' --> field_desc='value' and table_alias_prefix='U'

        ---------------------------------------------------------------------------
        verbose:
        --------

        ---------------------------------------------------------------------------
        include_counts_including_null: 
        ------------------------------


        ---------------------------------------------------------------------------
        kwargs:
        -------

        ****************************************************************************************************
        Additional Notes:
        ****************************************************************************************************
          ---------------------------------------------------------------------------      
          NOTE: groupby_xfmr_in_agg_1=False and stop_after_agg_1=False
            -------------------------
            Having groupby_xfmr_in_agg_1=False and stop_after_agg_1=False will not cause the program to crash.
            However, there is no instance where this would be the desired functionality, as when these are both fasle
            the effect is to repeat the same aggregation with the same exact GROUP BY columns twice.
            Thus, after the first aggregation, each group in the second aggregation will only have one member, thus
            negating any further aggregation.
            Consider the normal case where:
              - column to aggregate is 'value' and one is aggregating 
              - agg_types_rd1 = ['sum', 'sum_sq', 'mean', 'std', 'count']
              - and agg_types_rd2 = ['sum', 'mean']
              After the first aggregation, the columns will be: 
                [..., 'sum_value', 'sq_sum_value', 'mean_value', 'std_value', 'count_value']
              After the second aggregation, these columns become:
                [..., 'sum_sum_value', 'mean_sum_value', 'sum_sq_sum_value',
                 'mean_sq_sum_value', 'sum_mean_value', 'mean_mean_value',
                 'sum_std_value', 'mean_std_value', 'sum_count_value',
                 'mean_count_value', 'sum_counts_including_null',
                 'mean_counts_including_null']
              Due to the fact that each group in the second aggregation has only one member, as described above,
              the following pairs will be exactly the same (except, possibly one being int whereas other is float):
                agg_2_col_a               == agg_2_col_b                == agg_1_col_c
                sum_sum_value             == mean_sum_value             == sum_value
                sum_sq_sum_value          == mean_sq_sum_value          == sq_sum_value
                sum_mean_value            == mean_mean_value            == mean_value
                sum_std_value             == mean_std_value             == std_value
                sum_count_value           == mean_count_value           == count_value
                sum_counts_including_null == mean_counts_including_null == counts_including_null


        """
        kwargs['serialnumber_col']         = kwargs.get('serialnumber_col', 'serialnumber')
        kwargs['aep_srvc_qlty_idntfr_col'] = kwargs.get('aep_srvc_qlty_idntfr_col', 'aep_srvc_qlty_idntfr')
        #----------------------------------------------------------------------------------------------------
        assert(groupby_xfmr_in_agg_1+stop_after_agg_1>0)
        if aep_derived_uoms_and_idntfrs is None:
            aep_derived_uoms_and_idntfrs = []
        #--------------------------------------------------
        # Make sure date_range in kwargs matches that explicitly supplied in input argument
        kwargs['date_range'] = date_range
        # serial_numbers used in Step 1, but not elsewhere.
        # Therefore, actually want kwargs['serial_numbers'] = None
        kwargs['serial_numbers'] = None
        #--------------------------------------------------
        # Arguments for build_net_kwh_usage_sql_statement other than:
        #   cols_of_interest_usage, additional_derived_uoms, final_table_alias, and **kwargs
        default_addtnl_build_net_kwh_kwargs = dict(run_careful=True, 
                                                   value_col='value', 
                                                   return_statement=True, 
                                                   final_table_alias=usg_alias, 
                                                   insert_n_tabs_to_each_line=0, 
                                                   prepend_with_to_stmnt=False, 
                                                   join_mp_args=True)
        if addtnl_build_net_kwh_kwargs is None:
            addtnl_build_net_kwh_kwargs = default_addtnl_build_net_kwh_kwargs
        else:
            addtnl_build_net_kwh_kwargs = Utilities_sql.supplement_dict_with_default_values(addtnl_build_net_kwh_kwargs, 
                                                                                            default_addtnl_build_net_kwh_kwargs)
        #--------------------------------------------------
        default_addtnl_build_agg_1_sql_kwargs = dict(comp_alias=False, 
                                                     comp_table_alias_prefix=True, 
                                                     try_to_split_col_strs=True, 
                                                     idxs=None, 
                                                     run_check=True)
        if addtnl_build_agg_1_sql_kwargs is None:
            addtnl_build_agg_1_sql_kwargs = default_addtnl_build_agg_1_sql_kwargs
        else:
            addtnl_build_agg_1_sql_kwargs = Utilities_sql.supplement_dict_with_default_values(addtnl_build_agg_1_sql_kwargs, 
                                                                                              default_addtnl_build_agg_1_sql_kwargs)
        #----------------------------------------------------------------------------------------------------
        # ********************** Step 1 **********************
        circuit_info = AMINonVeeCircuit_SQL.get_circuit_info(
            conn_aws, 
            build_mp_kwargs=dict(mfr_devc_ser_nbrs=serial_numbers)
        ) 
        circuit_nb = circuit_info['circuit_nb']
        circuit_nm = circuit_info['circuit_nm']
        station_nb = circuit_info['station_nb']
        station_nm = circuit_info['station_nm']

        # ********************** Step 2 **********************
        trsf_pole_nbs = AMINonVeeCircuit_SQL.get_trsf_pole_nbs_on_circuit(
            conn_aws, 
            circuit_nb, circuit_nm, 
            station_nb, station_nm
        )

        # ********************** Step 3 **********************
        mp_sql_stmnt = AMINonVeeCircuit_SQL.build_mp_sql_w_circuit_info_or_trsf_pole_nbs(
            circuit_nb, circuit_nm, station_nb, station_nm, 
            trsf_pole_nbs, 
            return_args = dict(return_statement=True, 
                               insert_n_tabs_to_each_line=1)
        )
        #--------------------        
        cols_of_interest_usage = SQLElementsCollection.extract_field_descs_from_elements_list(field_descs)
        assert('serialnumber' not in cols_of_interest_usage)
        cols_of_interest_usage= ['serialnumber'] + cols_of_interest_usage
        #--------------------
        if calculate_net_kwh:
            if aep_derived_uoms_and_idntfrs is None or len(aep_derived_uoms_and_idntfrs)==0:
                additional_derived_uoms = 'ALL'
            else:
                # Separate 'KWH' from additional derived uoms in aep_derived_uoms_and_idntfrs
                aep_derived_uoms_and_idntfrs_std = AMINonVee_SQL.standardize_aep_derived_uoms_and_srvc_qlty_idntfrs(aep_derived_uoms_and_idntfrs)
                additional_derived_uoms = [x for x in aep_derived_uoms_and_idntfrs_std if x['aep_derived_uom'].value!='KWH']
            #-----
            usg_kwh_net_or_total_sql_stmnt = AMINonVee_SQL.build_net_kwh_usage_sql_statement(
                cols_of_interest_usage, 
                additional_derived_uoms=additional_derived_uoms, 
                **addtnl_build_net_kwh_kwargs, 
                **kwargs
            )
        else:
            usg_sql = AMINonVee_SQL.build_sql_usg(
                cols_of_interest_usage=cols_of_interest_usage, 
                aep_derived_uoms_and_idntfrs=aep_derived_uoms_and_idntfrs, 
                kwh_and_vlt_only=False, 
                from_table_alias='USG_i', 
                **kwargs
            )
            usg_sql.build_and_add_join(join_type='INNER', join_table=None, 
                                       join_table_alias=mp_alias, orig_table_alias='USG_i', 
                                       list_of_columns_to_join=[[kwargs['serialnumber_col'], 'mfr_devc_ser_nbr']], 
                                       idx=None, run_check=True)
            usg_sql_stmnt = usg_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
        #--------------------
        if 'trsf_pole_nb' in groupby_cols:
            _ = groupby_cols.pop(groupby_cols.index('trsf_pole_nb'))
        agg_1_sql = AMINonVeeCircuit_SQL.build_agg_1_sql(
            field_descs=field_descs, 
            agg_cols_and_types=agg_cols_and_types, 
            groupby_cols=groupby_cols, 
            groupby_xfmr=groupby_xfmr_in_agg_1, 
            exclude_serial_numbers=serial_numbers,  
            usg_alias=usg_alias, 
            mp_alias=mp_alias, 
            include_counts_including_null=include_counts_including_null, 
            **addtnl_build_agg_1_sql_kwargs
        )
        agg_1_sql_stmnt = agg_1_sql.get_sql_statement(insert_n_tabs_to_each_line=1)
        #-------------------------
        sql_full_stmnt = f"WITH {mp_alias} AS (\n{mp_sql_stmnt}\n),  "
        #-----
        if calculate_net_kwh:
            sql_full_stmnt += f"\n{usg_kwh_net_or_total_sql_stmnt}"
        else:
            sql_full_stmnt += f"\n{usg_alias} AS (\n{usg_sql_stmnt}\n)"
        #-----
        if stop_after_agg_1:
            sql_full_stmnt += f" \n\n{agg_1_sql_stmnt}"
            if verbose:
                print(sql_full_stmnt)
            return sql_full_stmnt
        else:
            sql_full_stmnt += ", "
        #--------------------
        agg_types_rd2 = ['sum', 'mean']
        agg_rd2_sql = AMINonVeeCircuit_SQL.build_agg_rd2_sql(
            agg_1_sql, 
            groupby_cols, 
            agg_types_rd2=agg_types_rd2, 
            agg_1_table_alias=agg_1_alias
        )  
        agg_rd2_sql_statement = agg_rd2_sql.get_sql_statement()
        #-------------------------------------
        sql_full_stmnt += f"\n{agg_1_alias} AS (\n{agg_1_sql_stmnt}\n) "\
                          f"\n\n{agg_rd2_sql_statement}"
        # ********************** Return **********************
        if verbose:
            print(sql_full_stmnt)
        #--------------------
        return sql_full_stmnt

In [14]:
run_testing=False

In [15]:
if run_testing:
    conn_aws = Utilities.get_athena_prod_aws_connection()

In [16]:
if run_testing:
    cols_of_interest_usage_agg = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                                  'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'value', 'aep_usage_dt']

    serial_numbers = [884390632, 880682782, 880320285, 880076534, 880320207, 889926007, 884390655, 880076535]
    date_range = ['2021-01-01', '2021-01-02']
    field_descs=cols_of_interest_usage_agg
    agg_cols_and_types = {'U.value':['sum', 'sq_sum', 'mean', 'std', 'count']}
    groupby_cols = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                    'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'aep_usage_dt']
    try_to_split_col_strs = True
    
    test_sql_stmnt_3 = AMINonVeeCircuit_SQL.build_sql_outage_others_on_circuit(
        conn_aws, 
        serial_numbers, date_range, 
        field_descs, agg_cols_and_types, groupby_cols, 
        calculate_net_kwh=True, 
        addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
        aep_derived_uoms=['KWH'], 
        stop_after_agg_1=False, groupby_xfmr_in_agg_1=True,  
        addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs), 
        verbose=False, 
        include_counts_including_null=True
    )
    print(test_sql_stmnt_3)

In [17]:
if run_testing:
    test_sql_stmnt_3ALL = AMINonVeeCircuit_SQL.build_sql_outage_others_on_circuit(
        conn_aws, 
        serial_numbers, date_range, 
        field_descs, agg_cols_and_types, groupby_cols, 
        calculate_net_kwh=True, 
        addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
        aep_derived_uoms=None, 
        stop_after_agg_1=False, groupby_xfmr_in_agg_1=True, 
        addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs), 
        verbose=False, 
        include_counts_including_null=True
    )
    print(test_sql_stmnt_3ALL)

In [18]:
if run_testing:
    cols_of_interest_usage_agg = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                                  'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'value', 'aep_usage_dt']

    serial_numbers = [884390632, 880682782, 880320285, 880076534, 880320207, 889926007, 884390655, 880076535]
    date_range = ['2021-01-01', '2021-01-02']
    field_descs=cols_of_interest_usage_agg
    agg_cols_and_types = {'U.value':['sum', 'sq_sum', 'mean', 'std', 'count']}
    groupby_cols = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset', 
                    'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'aep_usage_dt']
    try_to_split_col_strs = True
    
    test_sql_stmnt = AMINonVeeCircuit_SQL.build_sql_outage_others_on_circuit(
        conn_aws, 
        serial_numbers, date_range, 
        field_descs, agg_cols_and_types, groupby_cols, 
        aep_derived_uoms_and_idntfrs=None, 
        calculate_net_kwh=True, addtnl_build_net_kwh_kwargs = dict(run_careful=False), 
        stop_after_agg_1=False, groupby_xfmr_in_agg_1=True,  
        include_counts_including_null=True, 
        addtnl_build_agg_1_sql_kwargs=dict(try_to_split_col_strs=try_to_split_col_strs), 
        verbose=False
    )

In [19]:
AMINonVeeCircuit_SQL.build_mp_sql_circuit_info().print()

SELECT
	DISTINCT circuit_nb,circuit_nm,station_nb,station_nm
FROM default.meter_premise
WHERE (
	circuit_nb IS NOT NULL AND 
	circuit_nb <> ''
)
AND   (
	circuit_nm IS NOT NULL AND 
	circuit_nm <> ''
)
AND   (
	station_nb IS NOT NULL AND 
	station_nb <> ''
)
AND   (
	station_nm IS NOT NULL AND 
	station_nm <> ''
)


In [20]:
# if run_testing:

In [None]:
# if run_testing:

In [None]:
# if run_testing:

In [None]:
# if run_testing:

In [None]:
# if run_testing:

In [None]:
# if run_testing: