# SEE 'Directions for running' below!

In [None]:
from importlib import reload
#reload(Utilities)
#reload(clm)
# NOTE: To reload a class imported as, e.g., 
# from module import class
# One must call:
#   1. import module
#   2. reload module
#   3. from module import class

import sys, os
import re
from pathlib import Path
import json
import pickle
import joblib

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns, natsort_keygen
from packaging import version
import copy

import itertools

import pyodbc
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
import matplotlib.colors as mcolors
import matplotlib.cm as cm #e.g. for cmap=cm.jet
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
import CommonLearningMethods as clm
#-----
from MeterPremise import MeterPremise
from EEMSP import EEMSP
#-----
from AMI_SQL import AMI_SQL
from AMINonVee_SQL import AMINonVee_SQL
from AMIEndEvents_SQL import AMIEndEvents_SQL
from AMIUsgInst_SQL import AMIUsgInst_SQL
from DOVSOutages_SQL import DOVSOutages_SQL
#-----
from GenAn import GenAn
from AMINonVee import AMINonVee
from AMIEndEvents import AMIEndEvents
from MECPODf import MECPODf
from MECPOAn import MECPOAn
from AMIUsgInst import AMIUsgInst
from DOVSOutages import DOVSOutages
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
import TableInfos
from TableInfos import TableInfo
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
#sys.path.insert(0, os.path.join(os.path.realpath('..'), 'Utilities'))
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
from Utilities_df import DFConstructType
import Utilities_dt
import Plot_General
import Plot_Box_sns
import Plot_Hist
import Plot_Bar
import GrubbsTest
import DataFrameSubsetSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSlicer as DFSlicer

# Directions for running:

For the purposes of this demonstration, let's assume your AEP User ID is s123456, and your local Documents directory
is located at C:\Users\s123456\Documents

1. If not already done, clone the Analysis GitHub repo (https://github.aepsc.com/s346557/Analysis).
<br>- I will assume the repo was cloned into the Documents directory, i.e. I assume your local copy of the repo is located at C:\Users\s123456\Documents\Analysis (and therefore, this Jupyter notebook should be located at C:\Users\s123456\Documents\Analysis\JupyterNbs\IT_Demo.ipynb)
<br><br>

2. Create a simple text file containing your AEP passwords.
- I suggest you use the file pwd_file_template.txt in the Analysis directory (C:\Users\s123456\Documents\Analysis\pwd_file_template.txt) to create your own password file.
    - DO NOT ALTER the pwd_file_template.txt file, create a new pwd_file.txt file!
- I further suggest you name your password file pwd_file.txt and place it in the Analysis directory (C:\Users\s123456\Documents\Analysis\pwd_file.txt).
    - The Git repo is set up to ignore pwd_file.txt in the Analysis directory, so your information will not be pushed up to the repo if saved in this manner.
- NOTE: At one point, my Athena and Oracle passwords were different, which is why there is a 'Main' and 'Oracle' entry in the password file.  Likely you will put the same password for both entries.
<br><br>

3. IF NOT ALREADY DONE, run the method Utilities_config.generate_initial_config_file to initiate your config.yaml file
- I suggest you input arguments for all three parameters (aep_user_id, pwd_file_path, and local_data_dir)
    - If no aep_user_id is given, the code will attempt to determine your AEP User ID from the contents of your C:\Users directory
    - If no pwd_file_path is given, it is assumed to exist, be named pwd_file.txt, and be located in the Analysis directory (C:\Users\s123456\Documents\Analysis\pwd_file.txt)
    - If local_data_dir is not None, it should point to a directory when you plan to store any results (my personal local_data_dir is located at C:\Users\s346557\Documents\LocalData\).
        - If you are not planning to save or load any files locally, I believe this can be kept as None

# ----------------------------------------------------------------------------------------------------
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# ONLY NEED TO RUN ONCE!
So, if you have already run Utilities_config.generate_initial_config_file (and your configuration has not changed since), there is no need to run again

In [None]:
run_config=False

In [None]:
if run_config:
    # REPLACE VALUES BELOW WITH YOUR OWN
    aep_user_id = 's123456'
    pwd_file_path = r'C:\Users\s3123456\Documents\Analysis\pwd_file.txt'
    local_data_dir = r'C:\Users\s123456\Documents\LocalData'

    Utilities_config.generate_initial_config_file(
        aep_user_id=aep_user_id, 
        pwd_file_path=pwd_file_path, 
        local_data_dir=local_data_dir
    )

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# ----------------------------------------------------------------------------------------------------

In [None]:
cols_of_interest_end_dev_event = TableInfos.AMIEndEvents_TI.std_columns_of_interest
cols_of_interest_met_prem      = TableInfos.MeterPremise_TI.std_columns_of_interest

patterns_to_replace=[
    r'\:?\s*([0-9a-zA-Z]{1,2})(\:[0-9a-zA-Z]{1,2})+', 
    
    (
        (
            r'(Under Voltage)\s*'\
            r'([0-9a-zA-Z]*)?\s*'\
            r'(\([0-9a-zA-Z\s]*\))\s*'\
            r'([0-9a-zA-Z]*)?\s?'\
            r'(for meter\:?\s*)'\
            r'(?:(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+)?[\s:,.]*'\
            r'(?:Phase\s{1,2}[ABC](?:(?:\s*and\s*[ABC])|(?:,\s*[ABC])*))?\s*'\
            r'(Voltage out of tolerance)?'
        ), 
        'AMIEndEvents.under_voltage_match_func'
    ), 
    
    (
        (
            r'(Last Gasp\s*-\s*[0-9a-zA-Z\s]*)[\s\:,.]*'\
            r'.*'\
            r'(Fail Reason: .*)$'
        ), 
        'AMIEndEvents.last_gasp_reduce_func'
    ), 
    
    (r'(Angle out of tolerance) \[.*\]', r'\1'), 
    
    (r'(NIC Power Restore Trap Received from device).*', r'\1'), #TODO This has much info, like last gasp
    
    (
        (
            r'(Requested operation .* could not be applied to the given device type and firmware version.) '\
            r'Device, DeviceType: .*, Firmware Version: .*$'
        ), 
        r'\1'
    ), 
    
    ('meterN/A', 'meter'),
    
    (r'(Meter needs explicit time sync.) Drift: -?\d* s, (Encountered Problems:\s*.*), Meter_Time', r'\1 \2'), 
    
    (r'(Meter Program Seal mismatch for Device) \[Device ID, MAC Id\] = .*', r'\1'), 
    
    (r'Device Time: .* Failed Device Reason: (.*) Reboot Counter: .* Refresh Counter: .*', r'\1'), 
    
    (r'(Ignoring (?:Interval|Register) Read data for device as it has time in the future) .*', r'\1'), 
    
    (r'(Secure association operation failed consecutively for 1 times for [0-9a-zA-Z]{4}.) .*', r'\1'), 
    
    (r'Device, (Last Gasp State: .*), (Detector State: .*), Reboot Count: \d*', r'\1, \2'), 
    
    (r'(Detected end of voltage sag on meter).*', r'\1'), 
    
    (r'(Detected end of voltage swell on meter).*', r'\1'), 
    
    r'N/A', 
    
    (r'\s{2,}', ' ')
]

# NOTE:
This is a somewhat simple-minded example, as I am naively joining the meter_events.end_device_event table with default.meter_premise table.
<br>As we have discussed, to correctly join with meter_premise, one must determine which meters were active AT THE TIME IN QUESTION (i.e., on 2022-10-01), not those simply in default.meter_premise.
<br>Thus, to be completely correct, one should use methods contained in the MeterPremise class.
<br>However, for the purposes here, this simple-minded join is acceptable.

In [None]:
# Build AMIEndEvents object housing data from the meter_events.end_device_event table.
# Events are collected from those meters having aep_opco='oh' on the date '2022-10-01'
ami_ede = AMIEndEvents(
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True, 
    build_sql_function=None, 
    build_sql_function_kwargs=dict(
        cols_of_interest=cols_of_interest_end_dev_event, 
        opco='oh', 
        date_range=['2022-10-01', '2022-10-01'], 
        join_mp_args=dict(
            join_with_CTE=True, 
            build_mp_kwargs=dict(cols_of_interest=cols_of_interest_met_prem), 
            join_type='LEFT', 
            list_of_columns_to_join=[
                ['serialnumber', 'mfr_devc_ser_nbr'], 
                ['aep_premise_nb', 'prem_nb']
            ]
        )
    ), 
    save_args=False
)

In [None]:
# The SQL statement run by the above block is printed below
print(ami_ede.get_sql_statement())

In [None]:
# Grab the pandas.DataFrame (DF) object from ami_ede
end_events_df = ami_ede.df.copy()
# Also, grab the full list of reasons found
reasons_full = sorted(end_events_df['reason'].unique().tolist())

In [None]:
# Print some basic info about the DF
print(f'end_events_df.shape = {end_events_df.shape}')
print(f"Number of unique reasons = {end_events_df['reason'].nunique()}")
end_events_df.head()

# -------------------------

In [None]:
# Below, when running reduce_end_event_reasons_in_df, the default behavior (placement_col=None) is to simply replace
#   the entries in the 'reason' column by their reduced versions.
# For purposes here, I will keep both the full reason column (renamed to 'reason_full') and the reduced reason column (named 'reason')

print(f'end_events_df.shape[0]                          = {end_events_df.shape[0]}')
print(f"end_events_df['serialnumber'].nunique()         = {end_events_df['serialnumber'].nunique()}")
print(f"end_events_df['enddeviceeventtypeid'].nunique() = {end_events_df['enddeviceeventtypeid'].nunique()}")
print('\n\n')

end_events_df = end_events_df.rename(columns={'reason':'reason_full'})
end_events_df = AMIEndEvents.reduce_end_event_reasons_in_df(
    df=end_events_df, 
    patterns_to_replace=patterns_to_replace, 
    reason_col='reason_full', 
    placement_col='reason'
)

print('BEFORE REASON REDUCTION')
print(f"end_events_df['reason'].nunique()               = {end_events_df['reason_full'].nunique()}")
print('AFTER REASON REDUCTION')
print(f"end_events_df['reason'].nunique()               = {end_events_df['reason'].nunique()}")

In [None]:
# Grab the unique reasons after the reduction (rdcd=='reduced')
reasons_rdcd_1 = sorted(end_events_df['reason'].unique().tolist())

In [None]:
reasons_rdcd_1

# -------------------------

In [None]:
# Create a mapping of the full reasons captured in each reason
reasons_to_full_mapping = end_events_df.groupby('reason')['reason_full'].unique().to_frame()
reasons_to_full_mapping['n_full_in_reason'] = reasons_to_full_mapping['reason_full'].apply(lambda x: len(x))
reasons_to_full_mapping

In [None]:
# e.g., all of the full reasons contained in 'Access Point has lost connectivity with FHSS 900 MHz band'
reasons_to_full_mapping.loc['Access Point has lost connectivity with FHSS 900 MHz band', 'reason_full']

# -------------------------

In [None]:
# Create a mapping of the (reduced) reasons captured in each enddeviceeventtypeid
ede_typeid_to_reason_df = AMIEndEvents.build_ede_typeid_to_reason_df(
    end_events_df=end_events_df, 
    reason_col='reason', 
    ede_typeid_col='enddeviceeventtypeid'
)
ede_typeid_to_reason_df['n_reasons_in_id'] = ede_typeid_to_reason_df['reason'].apply(lambda x: len(x))
ede_typeid_to_reason_df

In [None]:
# e.g., all of the (reduced) reasons contained in enddeviceeventtypeid='3.12.136.85'
ede_typeid_to_reason_df.loc['3.12.136.85', 'reason']

In [None]:
# Hmmm, the 'failed consecutively for 1 times for' seems a bit strange, where did it originate?
reasons_to_full_mapping.loc['failed consecutively for 1 times for', 'reason_full']

In [None]:
end_events_df.shape

# -------------------------
# NOTE:
Below, I am only using the first 1000 entries in end_events_df.
<br>Running AMIEndEvents.get_reason_counts_per_group with the full DF takes a significant amount of time, and doesn't really add to the purpose here.

### Build the reason_counts_per_group DF grouped by trsf_pole_nb, rcpo_by_xfmr

In [None]:
rcpo_by_xfmr = AMIEndEvents.get_reason_counts_per_group(
    end_events_df = end_events_df.iloc[:1000], 
    group_cols=['trsf_pole_nb'], 
    group_freq=None, 
    serial_number_col='serialnumber', 
    reason_col='reason', 
    include_normalize_by_nSNs=False, 
    inclue_zero_counts=True,
    possible_reasons=None, 
    include_nSNs=True, 
    include_SNs=True, 
    prem_nb_col='aep_premise_nb', 
    include_nprem_nbs=True,
    include_prem_nbs=True,   
    return_form = dict(return_multiindex_outg_reason = False, 
                       return_normalized_separately  = False)
)

In [None]:
non_counts_cols = ['_SNs', '_nSNs', '_nprem_nbs', '_prem_nbs']
print(f'nXfmrs:                {rcpo_by_xfmr.shape[0]}')
print(f'nXfmrs w. n_events==1: {(rcpo_by_xfmr[[x for x in rcpo_by_xfmr.columns.tolist() if x not in non_counts_cols]].sum(axis=1)==1).sum()}' )
print(f'nXfmrs w. n_events>1:  {(rcpo_by_xfmr[[x for x in rcpo_by_xfmr.columns.tolist() if x not in non_counts_cols]].sum(axis=1)>1).sum()}' )
rcpo_by_xfmr

In [None]:
end_events_df.shape

### Build the reason_counts_per_group DF grouped by serial number, rcpo_by_SN

In [None]:
rcpo_by_SN = AMIEndEvents.get_reason_counts_per_group(
    end_events_df = end_events_df.iloc[:1000], 
    group_cols=['serialnumber'], 
    group_freq=None, 
    serial_number_col='serialnumber', 
    reason_col='reason', 
    include_normalize_by_nSNs=False, 
    inclue_zero_counts=True,
    possible_reasons=None, 
    include_nSNs=False, 
    include_SNs=False, 
    prem_nb_col='aep_premise_nb', 
    include_nprem_nbs=False,
    include_prem_nbs=False,   
    return_form = dict(return_multiindex_outg_reason = False, 
                       return_normalized_separately  = False)
)

In [None]:
print(f'nSNs:                {rcpo_by_SN.shape[0]}')
print(f'nSNs w. n_events==1: {(rcpo_by_SN.sum(axis=1)==1).sum()}' )
print(f'nSNs w. n_events>1:  {(rcpo_by_SN.sum(axis=1)>1).sum()}' )
rcpo_by_SN

### Build the reason_counts_per_group DF grouped by serial number, and including the number of serial numbers connected to the transformer
### This is a closer mock-up (although still not exactly what we want) of what we're ultimately looking for from IT

In [None]:
mp = MeterPremise(
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True, 
    build_sql_function=None, 
    build_sql_function_kwargs=dict(
        cols_of_interest=cols_of_interest_met_prem, 
        trsf_pole_nbs=end_events_df.iloc[:1000]['trsf_pole_nb'].unique().tolist()
    ), 
    save_args=False
)

In [None]:
mp_df = mp.df.copy()

In [None]:
print(mp_df.shape[0])
print(mp_df['mfr_devc_ser_nbr'].nunique())

In [None]:
end_events_df.iloc[:1000]['mfr_devc_ser_nbr'].nunique()

In [None]:
rcpo_final = AMIEndEvents.get_reason_counts_per_group(
    end_events_df = end_events_df.iloc[:1000], 
#     group_cols=['serialnumber'], 
    group_cols=['serialnumber', 'aep_premise_nb', 'trsf_pole_nb'], 
    group_freq=None, 
    gpby_dropna=False, 
    serial_number_col='serialnumber', 
    reason_col='reason', 
    include_normalize_by_nSNs=False, 
    inclue_zero_counts=True,
    possible_reasons=None, 
    include_nSNs=False, 
    include_SNs=False, 
    prem_nb_col='aep_premise_nb', 
    include_nprem_nbs=False,
    include_prem_nbs=False,   
    return_form = dict(return_multiindex_outg_reason = False, 
                       return_normalized_separately  = False)
)
rcpo_final=rcpo_final.reset_index().set_index('serialnumber')
#-------------------------
n_SNs_per_xfmr = mp_df.groupby('trsf_pole_nb')['mfr_devc_ser_nbr'].apply(lambda x: len(set(x)))
n_SNs_per_xfmr.name = 'n_SNs_on_xfmr'
#-------------------------
rcpo_final = pd.merge(
    rcpo_final, 
    n_SNs_per_xfmr, 
    left_on='trsf_pole_nb', 
    right_index=True, 
    how='left'
)
rcpo_final.loc[rcpo_final['n_SNs_on_xfmr'].notna(), 'n_SNs_on_xfmr'] = rcpo_final[rcpo_final['n_SNs_on_xfmr'].notna()]['n_SNs_on_xfmr'].astype(int)
rcpo_final=Utilities_df.move_cols_to_front(rcpo_final, ['aep_premise_nb', 'trsf_pole_nb', 'n_SNs_on_xfmr'])
rcpo_final['n_events_tot'] = rcpo_final.iloc[:, 3:].sum(axis=1)
rcpo_final=Utilities_df.move_cols_to_front(rcpo_final, ['aep_premise_nb', 'trsf_pole_nb', 'n_SNs_on_xfmr', 'n_events_tot'])
#-------------------------

In [None]:
print(f"nSNs:                {rcpo_final.index.nunique()}")
print(f"nSNs w. n_events==1: {(rcpo_final['n_events_tot']==1).sum()}" )
print(f"nSNs w. n_events>1:  {(rcpo_final['n_events_tot']>1).sum()}" )
rcpo_final

In [None]:
# rcpo_final = AMIEndEvents.get_reason_counts_per_group(
#     end_events_df = end_events_df.iloc[:1000], 
# #     group_cols=['serialnumber'], 
#     group_cols=['serialnumber', 'aep_premise_nb', 'trsf_pole_nb'], 
#     group_freq=None, 
#     gpby_dropna=False, 
#     serial_number_col='serialnumber', 
#     reason_col='reason', 
#     include_normalize_by_nSNs=False, 
#     inclue_zero_counts=True,
#     possible_reasons=None, 
#     include_nSNs=False, 
#     include_SNs=False, 
#     prem_nb_col='aep_premise_nb', 
#     include_nprem_nbs=False,
#     include_prem_nbs=False,   
#     return_form = dict(return_multiindex_outg_reason = False, 
#                        return_normalized_separately  = False)
# )
# rcpo_final=rcpo_final.reset_index().set_index('serialnumber')
# #-------------------------
# n_SNs_per_xfmr = mp_df.groupby('trsf_pole_nb')['mfr_devc_ser_nbr'].apply(lambda x: len(set(x)))
# n_SNs_per_xfmr.name = 'n_SNs_on_xfmr'
# #-------------------------
# rcpo_final = pd.merge(
#     rcpo_final, 
#     n_SNs_per_xfmr, 
#     left_on='trsf_pole_nb', 
#     right_index=True, 
#     how='left'
# )
# rcpo_final.loc[rcpo_final['n_SNs_on_xfmr'].notna(), 'n_SNs_on_xfmr'] = rcpo_final[rcpo_final['n_SNs_on_xfmr'].notna()]['n_SNs_on_xfmr'].astype(int)
# rcpo_final=Utilities_df.move_cols_to_front(rcpo_final, ['aep_premise_nb', 'trsf_pole_nb', 'n_SNs_on_xfmr'])
# rcpo_final['n_events_tot'] = rcpo_final.iloc[:, 3:].sum(axis=1)
# rcpo_final=Utilities_df.move_cols_to_front(rcpo_final, ['aep_premise_nb', 'trsf_pole_nb', 'n_SNs_on_xfmr', 'n_events_tot'])
# #-------------------------

In [None]:
rcpo_final_xfmr = AMIEndEvents.get_reason_counts_per_group(
    end_events_df = end_events_df.iloc[:1000], 
#     group_cols=['serialnumber'], 
    group_cols=['trsf_pole_nb'], 
    group_freq=None, 
    gpby_dropna=False, 
    serial_number_col='serialnumber', 
    reason_col='reason', 
    include_normalize_by_nSNs=False, 
    inclue_zero_counts=True,
    possible_reasons=None, 
    include_nSNs=False, 
    include_SNs=False, 
    prem_nb_col='aep_premise_nb', 
    include_nprem_nbs=False,
    include_prem_nbs=False,   
    return_form = dict(return_multiindex_outg_reason = False, 
                       return_normalized_separately  = False)
)
#-------------------------
n_SNs_per_xfmr = mp_df.groupby('trsf_pole_nb')['mfr_devc_ser_nbr'].apply(lambda x: len(set(x)))
n_SNs_per_xfmr.name = 'n_SNs_on_xfmr'
#-------------------------
rcpo_final_xfmr = pd.merge(
    rcpo_final_xfmr, 
    n_SNs_per_xfmr, 
    left_on='trsf_pole_nb', 
    right_index=True, 
    how='left'
)
#-------------------------
rcpo_final_xfmr.loc[rcpo_final_xfmr['n_SNs_on_xfmr'].notna(), 'n_SNs_on_xfmr'] = rcpo_final_xfmr[rcpo_final_xfmr['n_SNs_on_xfmr'].notna()]['n_SNs_on_xfmr'].astype(int)
rcpo_final_xfmr=Utilities_df.move_cols_to_front(rcpo_final_xfmr, ['n_SNs_on_xfmr'])
rcpo_final_xfmr['n_events_tot'] = rcpo_final_xfmr.iloc[:, 1:].sum(axis=1)
rcpo_final_xfmr=Utilities_df.move_cols_to_front(rcpo_final_xfmr, ['n_SNs_on_xfmr', 'n_events_tot'])

In [None]:
#-------------------------
rcpo_final_xfmr=rcpo_final_xfmr.loc[~rcpo_final_xfmr.index.isin(['NETWORK', 'PRIMARY', 'TRANSMISSION'])]
rcpo_final_xfmr=rcpo_final_xfmr.loc[rcpo_final_xfmr.index.notna()]

In [None]:
rcpo_final_xfmr

In [None]:
rcpo_final_xfmr = AMIEndEvents.get_reason_counts_per_group(
    end_events_df = end_events_df.iloc[:1000], 
#     group_cols=['serialnumber'], 
    group_cols=['trsf_pole_nb'], 
    group_freq=None, 
    gpby_dropna=False, 
    serial_number_col='serialnumber', 
    reason_col='reason', 
    include_normalize_by_nSNs=False, 
    inclue_zero_counts=True,
    possible_reasons=None, 
    include_nSNs=True, 
    include_SNs=False, 
    prem_nb_col='aep_premise_nb', 
    include_nprem_nbs=True,
    include_prem_nbs=False,   
    return_form = dict(return_multiindex_outg_reason = False, 
                       return_normalized_separately  = False)
)
#-------------------------
rcpo_final_xfmr=rcpo_final_xfmr.loc[~rcpo_final_xfmr.index.isin(['NETWORK', 'PRIMARY', 'TRANSMISSION'])]
rcpo_final_xfmr=rcpo_final_xfmr.loc[rcpo_final_xfmr.index.notna()]
#-------------------------
n_SNs_per_xfmr = mp_df.groupby('trsf_pole_nb')['mfr_devc_ser_nbr'].apply(lambda x: len(set(x)))
n_SNs_per_xfmr.name = 'n_SNs_on_xfmr'
#-------------------------
n_PNs_per_xfmr = mp_df.groupby('trsf_pole_nb')['prem_nb'].apply(lambda x: len(set(x)))
n_PNs_per_xfmr.name = 'n_PNs_on_xfmr'
#-------------------------
rcpo_final_xfmr = pd.merge(
    rcpo_final_xfmr, 
    n_SNs_per_xfmr, 
    left_on='trsf_pole_nb', 
    right_index=True, 
    how='left'
)
#-------------------------
rcpo_final_xfmr = pd.merge(
    rcpo_final_xfmr, 
    n_PNs_per_xfmr, 
    left_on='trsf_pole_nb', 
    right_index=True, 
    how='left'
)
#-------------------------
rcpo_final_xfmr.loc[rcpo_final_xfmr['n_SNs_on_xfmr'].notna(), 'n_SNs_on_xfmr'] = rcpo_final_xfmr[rcpo_final_xfmr['n_SNs_on_xfmr'].notna()]['n_SNs_on_xfmr'].astype(int)
rcpo_final_xfmr.loc[rcpo_final_xfmr['n_PNs_on_xfmr'].notna(), 'n_PNs_on_xfmr'] = rcpo_final_xfmr[rcpo_final_xfmr['n_PNs_on_xfmr'].notna()]['n_PNs_on_xfmr'].astype(int)
#-----
rcpo_final_xfmr=Utilities_df.move_cols_to_front(rcpo_final_xfmr, ['_nSNs', '_nprem_nbs', 'n_SNs_on_xfmr', 'n_PNs_on_xfmr'])
rcpo_final_xfmr['n_events_tot'] = rcpo_final_xfmr.iloc[:, 4:].sum(axis=1)
#-----
rcpo_final_xfmr=Utilities_df.move_cols_to_front(rcpo_final_xfmr, ['_nSNs', '_nprem_nbs', 'n_SNs_on_xfmr', 'n_PNs_on_xfmr', 'n_events_tot'])
#-------------------------
rcpo_final_xfmr=rcpo_final_xfmr.rename(columns={
    '_nSNs':        'n_SNs*', 
    '_nprem_nbs':   'n_PNs*', 
    'n_SNs_on_xfmr':'n_SNs', 
    'n_PNs_on_xfmr':'n_PNs'
})

In [None]:
rcpo_final_xfmr

In [None]:
rcpo_final_xfmr_nrm = rcpo_final_xfmr.copy()
cols_to_norm = rcpo_final_xfmr_nrm.columns[4:].tolist()
norm_col = 'n_SNs'
#-----
rcpo_final_xfmr_nrm[cols_to_norm] = rcpo_final_xfmr_nrm[cols_to_norm].divide(rcpo_final_xfmr_nrm[norm_col], axis=0)
#-----
rcpo_final_xfmr_nrm