In [None]:
%run ./model_end_events_for_outages_METHODS.ipynb

In [None]:
from importlib import reload
#reload(Utilities)
# NOTE: To reload a class imported as, e.g., 
# from module import class
# One must call:
#   1. import module
#   2. reload module
#   3. from module import class

import sys, os
import re
from pathlib import Path
import json
import pickle

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns, natsort_keygen
from packaging import version
import copy

import itertools

import pyodbc
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
import matplotlib.colors as mcolors
import matplotlib.cm as cm #e.g. for cmap=cm.jet
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
from MeterPremise import MeterPremise
#-----
from AMI_SQL import AMI_SQL
from AMINonVee_SQL import AMINonVee_SQL
from AMIEndEvents_SQL import AMIEndEvents_SQL
from AMIUsgInst_SQL import AMIUsgInst_SQL
from DOVSOutages_SQL import DOVSOutages_SQL
#-----
from GenAn import GenAn
from AMINonVee import AMINonVee
from AMIEndEvents import AMIEndEvents
from MECPODf import MECPODf
from MECPOAn import MECPOAn
from MECPOCollection import MECPOCollection
from AMIUsgInst import AMIUsgInst
from DOVSOutages import DOVSOutages
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
import TableInfos
from TableInfos import TableInfo
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
#sys.path.insert(0, os.path.join(os.path.realpath('..'), 'Utilities'))
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
from Utilities_df import DFConstructType
import Utilities_dt
import Plot_General
import Plot_Box_sns
import Plot_Hist
import Plot_Bar
import GrubbsTest
import DataFrameSubsetSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSlicer as DFSlicer

In [None]:
from sklearn.preprocessing import OrdinalEncoder

In [None]:
def adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args,
    new_max_total_counts_args=None, 
    no_outg_idxs=None,
    adjust_barplot_kwargs=None, 
    inplace=False
):
    r"""
    Adjust all of the cpo_df_subset_by_mjr_mnr_cause_args in mecpo_colls_w_get_args WITH THE EXCEPTION OF
    the elements in no_outg_idxs.
      This is because selecting a outage subset from the no-outage data doesn't make any sense!
    
    mecpo_colls_w_get_args:
      A list of dict objects.
      Each dict should have keys:
        mecpo_coll:
          The MECPOCollection object
        cpo_df_name:
          Name of cpo_df to grab from each MECPOAn in the collection for plotting
        cpo_df_subset_by_mjr_mnr_cause_args:
          Arguments to grab any subset of the data by outage major/minor cause
        max_total_counts_args=None:
          Arguments to further select a subset by the maximum number of total counts.
        mecpo_an_order:
          The order in which the analyses in MECPOAn should be plotted
          
    adjust_barplot_kwargs:
      If one wants to, e.g., change hatches for all, etc.
    """
    #-------------------------
    if not inplace:
        mecpo_colls_w_get_args = copy.deepcopy(mecpo_colls_w_get_args)
    #-------------------------
    if no_outg_idxs is not None:
        if not isinstance(no_outg_idxs, list):
            assert(isinstance(no_outg_idxs, int))
            no_outg_idxs = [no_outg_idxs]
        # Not really any harm in having element in no_outg_idxs >= len(mecpo_colls_w_get_args), as this would
        # as this would be the same functionality as no no_outg member identified in list
        # Assertion here just to make sure user understands what's going on
        for idx in no_outg_idxs:
            assert(idx<len(mecpo_colls_w_get_args))
    #-------------------------
    for idx, coll_w_get_args in enumerate(mecpo_colls_w_get_args):
        if adjust_barplot_kwargs is not None:
            Plot_General.adjust_kwargs(
                coll_w_get_args['mecpo_coll'].barplot_kwargs_shared, 
                adjust_barplot_kwargs, 
                inplace=True
            )
        if no_outg_idxs is not None and idx in no_outg_idxs:
            continue
        coll_w_get_args['cpo_df_subset_by_mjr_mnr_cause_args'] =  new_cpo_df_subset_by_mjr_mnr_cause_args
        coll_w_get_args['max_total_counts_args'] =  new_max_total_counts_args
    #-------------------------
    if inplace:
        return
    return mecpo_colls_w_get_args

In [None]:
def adjust_cpo_df_names_in_mecpo_colls_w_get_args(
    mecpo_colls_w_get_args, 
    cpo_df_names
):
    r"""
    Adjust the cpo_df_name to be used for each MECPOCollection in mecpo_colls_w_get_args.
    
    cpo_df_names:
        The new cpo_df_names, which can be a string, list of lists, or dict
        string:
            A single cpo_df_name which will be used for all MECPOCollections
        list:
            Each element should be a list of length 2.
            The 0th element should contain the index position of the MECPOCollection in mecpo_colls_w_get_args.
            The 1st element should new cpo_df_name to use.
        dict:
            Keys are the index positions of the MECPOCollections in mecpo_colls_w_get_args.
            Values are the associated new cpo_df_names to use.
    """
    #-------------------------
    assert(Utilities.is_object_one_of_types(cpo_df_names, [str, list, dict]))
    if isinstance(cpo_df_names, dict):
        cpo_df_names_dict = cpo_df_names
    elif isinstance(cpo_df_names, str):
        cpo_df_names_dict = {i:cpo_df_names for i in range(len(mecpo_colls_w_get_args))}
    elif isinstance(cpo_df_names, list):
        assert(Utilities.are_all_list_elements_of_type(cpo_df_names, list) and 
               Utilities.are_list_elements_lengths_homogeneous(cpo_df_names, 2))
        cpo_df_names_dict={}
        for idx,cpo_df_name in cpo_df_names:
            assert(isinstance(idx, int))
            assert(idx not in cpo_df_names_dict.keys())
            cpo_df_names_dict[idx] = cpo_df_name
    else:
        assert(0)
    #-------------------------
    for idx,cpo_df_name in cpo_df_names_dict.items():
        assert(idx < len(mecpo_colls_w_get_args))
        mecpo_colls_w_get_args[idx]['cpo_df_name']=cpo_df_name
    #-------------------------
    return mecpo_colls_w_get_args

In [None]:
def draw_cpo_dfs_full_vs_direct_3x2(
    fig_num, 
    mecpo_an_list_full, 
    mecpo_an_list_drct, 
    cpo_full_dfs_name,
    cpo_drct_dfs_name=None,
    cpo_df_subset_by_mjr_mnr_cause_args=None, 
    max_total_counts_args=None, 
    subplot_titles=None, 
    reason_order=None, 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=False, 
    include_xticklabels_for_all=False, 
    suptitle=None, 
    row_major=False, 
    save_args=dict(
        save_fig=False,
        save_dir='', 
        save_name=''
    ), 
    mecpo_an_list_no_outg=None,
    cpo_no_outg_dfs_name=None, 
    **kwargs
):
    r"""
    Compares full to direct cpo_dfs for six different cases.
    Intended to be used to compare 01_05, 06_10, 11_15, 16_20, 21_25, 26_30
    
    mecpo_an_list_full, mecpo_an_list_drct:
      Should be lists (or possibly dicts with matching keys) EACH CONTAINING SIX MECPOAn objects
      
    cpo_full_dfs_name:
      The name of the cpo_full pd.DataFrames, used to retrieve the correct item from mecpo_an_list_full
    cpo_drct_dfs_name:
      Similar to cpo_full_dfs_name, but for the direct collection.  If left equal to None, it will be set equal
      to cpo_full_dfs_name
      
    cpo_df_subset_by_mjr_mnr_cause_args:
      Allows one to select subsets of the pd.DataFrames by the outage type.
      This should be a dict with arguments appropriate for the MECPOAn.get_cpo_df_subset_by_mjr_mnr_cause
        function (except for the cpo_df_name argument, which will be set to cpo_full_dfs_name/cpo_drct_dfs_name)
        
    max_total_counts_args:
      Allows one to further select a subset by the maximum number of total counts.
      This should be a dict with arguments appropriate for the MECPODf.get_cpo_df_subset_below_max_total_counts
        function (except for the cpo_df argument, which will be set to return_df
      
    subplot_titles:
      A list of titles for each subpot (e.g., ['01-05 Days', '06-10 Days', ... '26-30 Days'])
      If None is given and mecpo_an_list_full,mecpo_an_list_full are dicts, the keys will be used
      
    reason_order:
      The order in which to plot the reasons along the x-axis.
      If set to None, the order will be taken from the first DF in mecpo_an_list_full, sorted by mean value
      
    n_reason_to_include:
      Number of reasons to include in the plot.  A reasonable number here is 10, as there is typically a steep drop off
      after this.
      If set equal to None, all will be included.
      
    is_rcpo:
      Set to True if the DataFrames are Reason Counts Per Outage. 
      Set to False if they are Id (enddeviceeventtypeid) Counts Per Outage.
      Used mainly to determine the size of margins to use, as the Reasons tend to be much longer than IDs
      
    replace_xtick_labels_with_ints:
      If True, the xtick labels will be replaced with integers, and a key will be printed to the right of the figures.
      
    include_xticklabels_for_all:
      By default (i.e., when include_xticklabels_for_all is False), the xtick labels are included only for the bottom
        two plots.
      If include_xticklabels_for_all==True, xtick labels are included for each subplot.
      
    suptitle:
      Overall title for collection of plots
      
    row_major:
      If True, the array of axes, axs, is flattened in row-major order.
      If False, the array of axes, axs, is flattened in column-major order.      
      Default: False, so the as the indices increase, the plots move down the first column before
               entering and traversing the second column.
      
    save_args:
      A dict containing information on whether or not to save figure, and, if so, where to save it.
      It can also simply be set to False
      
    kwargs:
        barplot_kwargs_full
        barplot_kwargs_drct
        common_barplot_kwargs
        common_general_kwargs
        sharex
        sharey
        barplot_kwargs_no_outg
    """
    #-------------------------
    assert(len(mecpo_an_list_full)==len(mecpo_an_list_full)==6)
    if isinstance(mecpo_an_list_full, dict) or isinstance(mecpo_an_list_drct, dict):
        assert(isinstance(mecpo_an_list_full, dict) and isinstance(mecpo_an_list_drct, dict))
        assert(mecpo_an_list_full.keys()==mecpo_an_list_drct.keys())
        tmp_full = []
        tmp_drct = []
        tmp_subplot_titles = []
        for key in mecpo_an_list_full.keys():
            tmp_full.append(mecpo_an_list_full[key])
            tmp_drct.append(mecpo_an_list_drct[key])
            tmp_subplot_titles.append(key)
        if subplot_titles is None:
            subplot_titles = tmp_subplot_titles
    if subplot_titles is None:
        subplot_titles = ['' for x in range(6)]
    assert(len(subplot_titles)==6)
    #--------------------------------------------------
    # Unpack kwargs
    barplot_kwargs_full   = kwargs.get('barplot_kwargs_full', dict(facecolor='red', label="Outages (All Xfmrs)"))
    barplot_kwargs_drct   = kwargs.get('barplot_kwargs_drct', dict(facecolor='green', label="Outages (Xfmr tied to Outg. Location ID)"))
    common_barplot_kwargs = kwargs.get('common_barplot_kwargs', dict(alpha=0.25, fill=True, edgecolor='black', hatch='//'))
    
    barplot_kwargs_no_outg = kwargs.get('barplot_kwargs_no_outg', dict(facecolor='orange', label="No Outages"))
    #----------
    xtick_labelrotation=90
    if is_rcpo:
        xtick_labelsize=15
    else:
        xtick_labelsize=20
    ytick_labelsize=25
    if replace_xtick_labels_with_ints:
        xtick_labelrotation=0
        xtick_labelsize=25
    dflt_common_general_kwargs = dict( 
        n_bars_to_include=n_reason_to_include, 
        keep_edges_opaque=True, 
        include_hatches=False, 
        draw_side_by_side=True, 
        draw_single_idx_full_width=None,     
        run_set_general_plotting_args=True, 
        orient='v',
        draw_legend=True, 
        legend_args=dict(fontsize=15), 
        ylabel_args = dict(ylabel=f'', fontsize=0, x=0.0, y=0.8, ha='left', va='bottom'), 
        xlabel_args = dict(xlabel='', fontsize=0, x=0.9, y=0.0, ha='right', va='top'), 
        tick_args=[dict(axis='x', labelrotation=xtick_labelrotation, labelsize=xtick_labelsize), 
                   dict(axis='y', labelsize=ytick_labelsize)]
    )
    common_general_kwargs = kwargs.get('common_general_kwargs', dflt_common_general_kwargs)
    #----------
    sharex = kwargs.get('sharex', True)
    sharey = kwargs.get('sharey', True)
    #--------------------------------------------------
    if cpo_drct_dfs_name is None:
        cpo_drct_dfs_name = cpo_full_dfs_name
    #-------------------------    
    barplot_kwargs_full = {**barplot_kwargs_full, **common_barplot_kwargs}
    barplot_kwargs_drct = {**barplot_kwargs_drct, **common_barplot_kwargs}
    barplot_kwargs_no_outg = {**barplot_kwargs_no_outg, **common_barplot_kwargs}
    #-------------------------
    if save_args:
        assert('save_fig' in save_args)
        if save_args['save_fig']:
            assert('save_dir' in save_args and 
                   'save_name' in save_args)
    #----------------------------------------------------------------------
    # BUILD THE COLLECTION OF DATAFRAMES
    #----------------------------------------------------------------------
    cpo_dfs_full = []
    for i in range(len(mecpo_an_list_full)):
        cpo_dfs_full.append(
            mecpo_an_list_full[i].get_cpo_df(
                cpo_df_name=cpo_full_dfs_name, 
                cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args, 
                max_total_counts_args=max_total_counts_args
            )
        )
            
    #-----
    cpo_dfs_drct = []
    for i in range(len(mecpo_an_list_drct)):
        cpo_dfs_drct.append(
            mecpo_an_list_drct[i].get_cpo_df(
                cpo_df_name=cpo_drct_dfs_name, 
                cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args, 
                max_total_counts_args=max_total_counts_args
            )
        )
            
    #-----
    if mecpo_an_list_no_outg is not None:
        assert(len(mecpo_an_list_no_outg)==6)
        if cpo_no_outg_dfs_name is None:
            cpo_no_outg_dfs_name = cpo_full_dfs_name
        cpo_dfs_no_outg = []
        for i in range(len(mecpo_an_list_no_outg)):
            cpo_dfs_no_outg.append(
                mecpo_an_list_no_outg[i].get_cpo_df(
                    cpo_df_name=cpo_no_outg_dfs_name, 
                    cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args, 
                    max_total_counts_args=max_total_counts_args
                )
            )
    #----------------------------------------------------------------------
    n_x=2
    n_y=3
    #-----
    fig, axs = Plot_General.default_subplots(n_x=n_x, n_y=n_y, fig_num=fig_num, sharex=sharex, sharey=sharey, 
                                             return_flattened_axes=True, row_major=row_major)
    #-------------------------
    scale_margin_top = None
    if suptitle:
        scale_margin_top = 0.75
    if not replace_xtick_labels_with_ints:
        right = None # i.e., default
        if is_rcpo:
            bottom = 0.5
        else:
            bottom = 0.15
    else:
        bottom = None #i.e., default
        right = 0.75

    subplots_adjust_args = Plot_General.get_subplots_adjust_args_std_3x2(scale_hspace=0.5, scale_margin_left=0.75, 
                                                                         scale_margin_top=scale_margin_top, 
                                                                         bottom=bottom, right=right)
    fig = Plot_General.adjust_subplots_args(fig, subplots_adjust_args)
    #-------------------------
    # When using sharex=True and sharey=True, the default behavior is:
    #   When subplots have a shared x-axis along a column, only the x tick labels of the bottom subplot are created. 
    #   Similarly, when subplots have a shared y-axis along a row, only the y tick labels of the first column subplot are created.
    #   To turn on all tick labels for x(y), set include_all_x(y)_tick_labels to True
    include_all_x_tick_labels=False # If false, only the x tick labels of the bottom subplot are created
    include_all_y_tick_labels=True  # If false, only the y tick labels of the first column subplot are created
    #-------------------------
    if reason_order is None:
        reason_order = cpo_dfs_full[0].mean().sort_values(ascending=False).index.tolist()
    common_general_kwargs['order'] = reason_order
    #-------------------------
    if replace_xtick_labels_with_ints:
        if n_reason_to_include is not None:
            xtick_elements = reason_order[:n_reason_to_include]
        else:
            xtick_elements = reason_order
        xtick_rename_dict = {xtick_el:i+1 for i,xtick_el in enumerate(xtick_elements)}
    #-------------------------
    # NOTE: xticks = np.arange(len(xtick_rename_dict)) below is to ensure all ticks are drawn,
    #       as sometimes mpl draws less ticks when there are many
    if replace_xtick_labels_with_ints:
        Plot_General.adjust_kwargs(
            general_kwargs=common_general_kwargs, 
            new_values_dict=dict(
                ax_args=dict(
                    xticks = np.arange(len(xtick_rename_dict)),
                    xticklabels=list(xtick_rename_dict.values())
                )
            ), 
            append_to_containers=False,
            inplace=True
        )
        
    #------------------------- ACTUAL PLOTTING!!! -------------------------
    #----------------------------------------------------------------------
    # The bottom two plots should always include the xtick labels
    # However, one only needs to worry about this when sharex=False
    # NOTE: If include_xticklabels_for_all==True, one cannot simply set idxs_to_exclude_xticklabels = []
    #       This would work if sharex==False, but if sharex==True, plt.subplots automatically turns off the
    #       tick labels for all but buttom.  Thus, instead of not turning them off (via, e.g., idxs_to_exclude_xticklabels 
    #       = []), one must actively turn them on as well!
    idxs_to_include_xticklabels = [2, 5]
    if row_major:
        idxs_to_include_xticklabels = [4, 5]
    idxs_to_exclude_xticklabels = [x for x in range(6) if x not in idxs_to_include_xticklabels]
    common_general_kwargs_wo_xticklabels = Plot_General.adjust_kwargs(common_general_kwargs, dict(ax_args=dict(xticklabels=[])))
    if include_xticklabels_for_all:
        idxs_to_exclude_xticklabels = []
    #-------------------------
    for i in range(len(cpo_dfs_full)):
        common_general_kwargs_i = common_general_kwargs
        if(not sharex and 
           i in idxs_to_exclude_xticklabels):
            common_general_kwargs_i = common_general_kwargs_wo_xticklabels
        #----------
        dfs_w_args=[
            (cpo_dfs_full[i], barplot_kwargs_full), 
            (cpo_dfs_drct[i], barplot_kwargs_drct)
        ]
        if mecpo_an_list_no_outg is not None:
            dfs_w_args.append((cpo_dfs_no_outg[i], barplot_kwargs_no_outg))
        
        axs[i] = Plot_Bar.plot_multiple_barplots(
            ax=axs[i], 
            dfs_w_args=dfs_w_args, 
            **common_general_kwargs_i, 
            title_args=dict(label=subplot_titles[i], fontsize=20)
        )
        if include_xticklabels_for_all:
            axs[i].tick_params(axis='x', labelbottom=True)
    #---------------------------------------------------------------------------
    # Make all have same scale
    if not sharey:
        Plot_General.make_all_axes_have_same_ylims(axs)
    #---------------------------------------------------------------------------
    subplot_layout_params = Plot_General.get_subplot_layout_params(fig)
    #-----
    if is_rcpo:
        supxlabel = 'Reason'
    else:
        supxlabel = 'ID'
    fig.supxlabel(supxlabel, fontsize=30, 
                  x=subplot_layout_params['right'], ha='right', 
                  y=0.0, va='bottom')
    fig.supylabel('Normalized Counts', fontsize=30, 
                  x=0.0, ha='left',
                  y=subplot_layout_params['top'], va='top')
    if suptitle:
        fig.suptitle(suptitle, fontsize=50, 
                     x=0.5*(subplot_layout_params['left']+subplot_layout_params['right']), ha='center')

    if include_all_y_tick_labels and sharey:
        for ax_i in axs.flatten():
            ax_i.tick_params(axis='y', labelleft=True)

    if include_all_x_tick_labels and sharex:
        for ax_i in axs.flatten():
            ax_i.tick_params(axis='x', labelbottom=True)

    if replace_xtick_labels_with_ints:
        Plot_General.generate_xtick_labels_legend_textbox(
            fig=fig, 
            xtick_rename_dict=xtick_rename_dict, 
            text_x_pos=1.02*subplot_layout_params['right'], 
            text_y_pos=subplot_layout_params['top'])
    #---------------------------------------------------------------------------
    if save_args and save_args['save_fig']:
        Plot_General.save_fig(fig, save_args['save_dir'], save_args['save_name'])   
    #---------------------------------------------------------------------------
    return fig, axs

In [None]:
def draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num, 
    mecpo_colls_w_get_args, 
    reason_order=None, 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=False, 
    include_xticklabels_for_all=False, 
    suptitle=None, 
    row_major=False, 
    save_args=dict(
        save_fig=False,
        save_dir='', 
        save_name=''
    ), 
    **kwargs
):
    r"""
    mecpo_colls_w_get_args:
      A list of dict objects.
      Each dict should have keys:
        mecpo_coll:
          The MECPOCollection object
        cpo_df_name:
          Name of cpo_df to grab from each MECPOAn in the collection for plotting
        cpo_df_subset_by_mjr_mnr_cause_args:
          Arguments to grab any subset of the data by outage major/minor cause.
          This should be a dict with arguments appropriate for the MECPOAn.get_cpo_df_subset_by_mjr_mnr_cause
            function (except for the cpo_df_name argument, which will be set to cpo_df_name)  
        max_total_counts_args=None:
          Arguments to further select a subset by the maximum number of total counts.
          This should be a dict with arguments appropriate for the MECPODf.get_cpo_df_subset_below_max_total_counts
            function (except for the cpo_df argument, which will be set to return_df
        mecpo_an_order:
          The order in which the analyses in MECPOAn should be plotted
          
      !!!!!!!!!!!!!!!!!!!!    
      FOR NOW (to be safe):
      !!!!!!!!!!!!!!!!!!!!
        Each mecpo_coll must have the same keys.
          This will ensure that the intended analyses are being plotted together
          These can also be used as the subplot titles
          
    reason_order:
      The order in which to plot the reasons along the x-axis.
      This should be a list of Reasons or IDs (enddeviceeventtypeid).
      If set to an index idx, the order will be taken from mecpo_colls_w_get_args[idx]['mecpo_coll'] via the
        get_rough_reason_ordering method.
      
    n_reason_to_include:
      Number of reasons to include in the plot.  A reasonable number here is 10, as there is typically a steep drop off
      after this.
      If set equal to None, all will be included.
      
    is_rcpo:
      Set to True if the DataFrames are Reason Counts Per Outage. 
      Set to False if they are Id (enddeviceeventtypeid) Counts Per Outage.
      Used mainly to determine the size of margins to use, as the Reasons tend to be much longer than IDs
      
    replace_xtick_labels_with_ints:
      If True, the xtick labels will be replaced with integers, and a key will be printed to the right of the figures.
      
    include_xticklabels_for_all:
      By default (i.e., when include_xticklabels_for_all is False), the xtick labels are included only for the bottom
        two plots.
      If include_xticklabels_for_all==True, xtick labels are included for each subplot.
      
    suptitle:
      Overall title for collection of plots
      
    row_major:
      If True, the array of axes, axs, is flattened in row-major order.
      If False, the array of axes, axs, is flattened in column-major order.      
      Default: False, so the as the indices increase, the plots move down the first column before
               entering and traversing the second column.
      
    save_args:
      A dict containing information on whether or not to save figure, and, if so, where to save it.
      It can also simply be set to False
      
    kwargs:
        common_barplot_kwargs
        common_general_kwargs
        sharex
        sharey
    """
    #-------------------------    
    # Make sure each element in mecpo_colls_w_get_args has the expected keys
    necessary_mecpo_colls_w_get_args_keys = ['mecpo_coll', 'cpo_df_name','mecpo_an_order']
    other_mecpo_colls_w_get_args_keys = ['cpo_df_subset_by_mjr_mnr_cause_args', 'max_total_counts_args']
    for coll_w_get_args in mecpo_colls_w_get_args:
        assert(len(set(necessary_mecpo_colls_w_get_args_keys).difference(set(coll_w_get_args.keys())))==0)
        for other_key in other_mecpo_colls_w_get_args_keys:
            coll_w_get_args[other_key] = coll_w_get_args.get(other_key, None)
    
    # Plotting 6 subplots (3x2), so each MECPOCollection should have 6 MECPOAn objects
    for mecpo_coll in [x['mecpo_coll'] for x in mecpo_colls_w_get_args]:
        assert(mecpo_coll.n_mecpo_ans<=6)

    # (Possibly temporary) Restriction: Each MECPOCollection must have the same keys
    mecpo_an_keys = mecpo_colls_w_get_args[0]['mecpo_coll'].mecpo_an_keys
    assert(len(mecpo_an_keys)<=6)
    for mecpo_coll in [x['mecpo_coll'] for x in mecpo_colls_w_get_args]:
        assert(len(set(mecpo_an_keys).symmetric_difference(set(mecpo_coll.mecpo_an_keys)))==0)
    #--------------------------------------------------
    # Unpack kwargs
    common_barplot_kwargs = kwargs.get('common_barplot_kwargs', dict(alpha=0.25, fill=True, edgecolor='black', hatch='//'))
    #----------
    xtick_labelrotation=90
    if is_rcpo:
        xtick_labelsize=15
    else:
        xtick_labelsize=20
    ytick_labelsize=25
    if replace_xtick_labels_with_ints:
        xtick_labelrotation=0
        xtick_labelsize=25
    dflt_common_general_kwargs = dict( 
        n_bars_to_include=n_reason_to_include, 
        keep_edges_opaque=True, 
        include_hatches=False, 
        draw_side_by_side=True, 
        draw_single_idx_full_width=None,     
        run_set_general_plotting_args=True, 
        orient='v',
        draw_legend=True, 
        legend_args=dict(fontsize=15), 
        ylabel_args = dict(ylabel=f'', fontsize=0, x=0.0, y=0.8, ha='left', va='bottom'), 
        xlabel_args = dict(xlabel='', fontsize=0, x=0.9, y=0.0, ha='right', va='top'), 
        tick_args=[dict(axis='x', labelrotation=xtick_labelrotation, labelsize=xtick_labelsize), 
                   dict(axis='y', labelsize=ytick_labelsize)]
    )
    common_general_kwargs = kwargs.get('common_general_kwargs', dflt_common_general_kwargs)
    #----------
    sharex = kwargs.get('sharex', True)
    sharey = kwargs.get('sharey', True)
    #--------------------------------------------------
    if save_args:
        assert('save_fig' in save_args)
        if save_args['save_fig']:
            assert('save_dir' in save_args and 
                   'save_name' in save_args)        
    #----------------------------------------------------------------------
    n_x=2
    n_y=3
    #-----
    fig, axs = Plot_General.default_subplots(n_x=n_x, n_y=n_y, fig_num=fig_num, sharex=sharex, sharey=sharey, 
                                             return_flattened_axes=True, row_major=row_major)
    #-------------------------
    scale_margin_top = None
    if suptitle:
        scale_margin_top = 0.75
    if not replace_xtick_labels_with_ints:
        right = None # i.e., default
        if is_rcpo:
            bottom = 0.5
        else:
            bottom = 0.15
    else:
        bottom = None #i.e., default
        right = 0.75

    subplots_adjust_args = Plot_General.get_subplots_adjust_args_std_3x2(scale_hspace=0.5, scale_margin_left=0.75, 
                                                                         scale_margin_top=scale_margin_top, 
                                                                         bottom=bottom, right=right)
    fig = Plot_General.adjust_subplots_args(fig, subplots_adjust_args)
    #-------------------------
    # When using sharex=True and sharey=True, the default behavior is:
    #   When subplots have a shared x-axis along a column, only the x tick labels of the bottom subplot are created. 
    #   Similarly, when subplots have a shared y-axis along a row, only the y tick labels of the first column subplot are created.
    #   To turn on all tick labels for x(y), set include_all_x(y)_tick_labels to True
    include_all_x_tick_labels=False # If false, only the x tick labels of the bottom subplot are created
    include_all_y_tick_labels=True  # If false, only the y tick labels of the first column subplot are created
    #-------------------------
    if reason_order is None:
        reason_order = 0
    assert(Utilities.is_object_one_of_types(reason_order, [int, list]))
    if isinstance(reason_order, int):
        reason_order = mecpo_colls_w_get_args[reason_order]['mecpo_coll'].get_rough_reason_ordering(
            cpo_df_name=mecpo_colls_w_get_args[reason_order]['cpo_df_name'], 
            cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[reason_order]['cpo_df_subset_by_mjr_mnr_cause_args'],
            max_total_counts_args=mecpo_colls_w_get_args[reason_order]['max_total_counts_args']
        )
    #-----
    common_general_kwargs['order'] = reason_order
    #-------------------------
    if replace_xtick_labels_with_ints:
        if n_reason_to_include is not None:
            xtick_elements = reason_order[:n_reason_to_include]
        else:
            xtick_elements = reason_order
        xtick_rename_dict = {xtick_el:i+1 for i,xtick_el in enumerate(xtick_elements)}
    #-------------------------
    # NOTE: xticks = np.arange(len(xtick_rename_dict)) below is to ensure all ticks are drawn,
    #       as sometimes mpl draws less ticks when there are many    
    if replace_xtick_labels_with_ints:
        Plot_General.adjust_kwargs(
            general_kwargs=common_general_kwargs, 
            new_values_dict=dict(
                ax_args=dict(
                    xticks = np.arange(len(xtick_rename_dict)),
                    xticklabels=list(xtick_rename_dict.values())
                )
            ), 
            append_to_containers=False,
            inplace=True
        )
        
    #------------------------- ACTUAL PLOTTING!!! -------------------------
    #----------------------------------------------------------------------
    # The bottom two plots should always include the xtick labels
    # However, one only needs to worry about this when sharex=False
    # NOTE: If include_xticklabels_for_all==True, one cannot simply set idxs_to_exclude_xticklabels = []
    #       This would work if sharex==False, but if sharex==True, plt.subplots automatically turns off the
    #       tick labels for all but buttom.  Thus, instead of not turning them off (via, e.g., idxs_to_exclude_xticklabels 
    #       = []), one must actively turn them on as well!
    idxs_to_include_xticklabels = [2, 5]
    if row_major:
        idxs_to_include_xticklabels = [4, 5]
    idxs_to_exclude_xticklabels = [x for x in range(6) if x not in idxs_to_include_xticklabels]
    common_general_kwargs_wo_xticklabels = Plot_General.adjust_kwargs(common_general_kwargs, dict(ax_args=dict(xticklabels=[])))
    if include_xticklabels_for_all:
        idxs_to_exclude_xticklabels = []
    #-------------------------
    #--------------------------------------------------    
    # Below, qualitatively 
    #   i represents iteration over the 6 subplots
    #     - Note: There are 6 MECPOAn objects in each coll_w_get_args['mecpo_coll'] in mecpo_colls_w_get_args, 
    #             one for each subplot
    #   j represents iteration over the collections (mecpo_colls_w_get_args) for a given subplot
    #-----
    # First, iterate over the plot number/MECPOAn number
    for i_plot in range(len(mecpo_an_keys)):
        # Use mecpo_an_keys[i_plot] as the subplot title
        subplot_title_i = mecpo_an_keys[i_plot]
        #-----
        # Next, build dfs_w_args_i to be plotted by iterating over the MECPOCollection
        # objects (or, more specifically, the members of mecpo_colls_w_get_args), and grabbing
        # the correct DF from each collection
        dfs_w_args_i = []
        for coll_w_get_args_j in mecpo_colls_w_get_args:
            # Get the key for the correct MECPOAn from which to grab the DF
            mecpo_an_key_ij = coll_w_get_args_j['mecpo_an_order'][i_plot] 
            
            # As well as the cpo_df_name, cpo_df_subset_by_mjr_mnr_cause_args, and max_total_counts_args
            # NOTE: Only single value for these, so no [i_plot] accessor
            cpo_df_name_j = coll_w_get_args_j['cpo_df_name']
            cpo_df_subset_by_mjr_mnr_cause_args_j = coll_w_get_args_j['cpo_df_subset_by_mjr_mnr_cause_args']
            max_total_counts_args_j = coll_w_get_args_j['max_total_counts_args']
            #-----
            # Now, get the correct DF from coll_w_get_args_j['mecpo_coll'] given the known mecpo_an_key, cpo_df_name, 
            # cpo_df_subset_by_mjr_mnr_cause_args, and max_total_counts_args
            df_ij = coll_w_get_args_j['mecpo_coll'].get_cpo_df(
                mecpo_an_key=mecpo_an_key_ij, 
                cpo_df_name=cpo_df_name_j, 
                cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args_j, 
                max_total_counts_args=max_total_counts_args_j
            )
            #-----
            # Finally, get the barplot_kwargs_shared from coll_w_get_args_j['mecpo_coll'] and join together
            # with common_barplot_kwargs to get barplot_kwargs_ij
            barplot_kwargs_ij = coll_w_get_args_j['mecpo_coll'].barplot_kwargs_shared
            # Note: Order actually is important here.  If keys are shared, the values for the second are kept
            barplot_kwargs_ij = {**common_barplot_kwargs, **barplot_kwargs_ij}
            #-------------------------
            dfs_w_args_i.append((df_ij, barplot_kwargs_ij))
        # END for coll_w_get_args_j in mecpo_colls_w_get_args
        #-----
        # Now, perform actual plotting for i_plot
        common_general_kwargs_i = common_general_kwargs
        if(not sharex and 
           i_plot in idxs_to_exclude_xticklabels):
            common_general_kwargs_i = common_general_kwargs_wo_xticklabels
        #----------
        axs[i_plot] = Plot_Bar.plot_multiple_barplots(
            ax=axs[i_plot], 
            dfs_w_args=dfs_w_args_i,
            **common_general_kwargs_i, 
            title_args=dict(label=subplot_title_i, fontsize=20)
        )
        if include_xticklabels_for_all:
            axs[i_plot].tick_params(axis='x', labelbottom=True)
    #---------------------------------------------------------------------------
    # Make all have same scale
    if not sharey:
        Plot_General.make_all_axes_have_same_ylims(axs)
    #---------------------------------------------------------------------------
    subplot_layout_params = Plot_General.get_subplot_layout_params(fig)
    #-----
    if is_rcpo:
        supxlabel = 'Reason'
    else:
        supxlabel = 'ID'
    fig.supxlabel(supxlabel, fontsize=30, 
                  x=subplot_layout_params['right'], ha='right', 
                  y=0.0, va='bottom')
    fig.supylabel('Normalized Counts', fontsize=30, 
                  x=0.0, ha='left',
                  y=subplot_layout_params['top'], va='top')
    if suptitle:
        fig.suptitle(suptitle, fontsize=50, 
                     x=0.5*(subplot_layout_params['left']+subplot_layout_params['right']), ha='center')

    if include_all_y_tick_labels and sharey:
        for ax_i in axs.flatten():
            ax_i.tick_params(axis='y', labelleft=True)

    if include_all_x_tick_labels and sharex:
        for ax_i in axs.flatten():
            ax_i.tick_params(axis='x', labelbottom=True)

    if replace_xtick_labels_with_ints:
        Plot_General.generate_xtick_labels_legend_textbox(
            fig=fig, 
            xtick_rename_dict=xtick_rename_dict, 
            text_x_pos=1.02*subplot_layout_params['right'], 
            text_y_pos=subplot_layout_params['top'])
    #---------------------------------------------------------------------------
    if save_args and save_args['save_fig']:
        Plot_General.save_fig(fig, save_args['save_dir'], save_args['save_name'])   
    #---------------------------------------------------------------------------
    return fig, axs

In [None]:
fig_num=0
save_fig=False
save_ext='png'
save_dir = r'C:\Users\s346557\Downloads\tmp1'

run_testing_data=False

assert_all_cols_equal=True
include_normalize_by_nSNs=True
inclue_zero_counts=True
return_multiindex_outg_reason=False
return_normalized_separately=False

if include_normalize_by_nSNs and not return_normalized_separately:
    normalize_by_nSNs_included=True
else:
    normalize_by_nSNs_included=False
    
xfmr_equip_typ_nms_of_interest = ['TRANSFORMER, OH', 'TRANSFORMER, UG']

normalize_by_time_interval=True
include_power_down_minus_up = False

In [None]:
# days_min_max_outg_td_windows=[
#     [1,5], [6,10], [11,15], [16,20], [21,25], [26,30]
# ]
# old_to_new_keys_dict = {
#     'outg_td_window_1_to_5_days'  :'01-05 Days',
#     'outg_td_window_6_to_10_days' :'06-10 Days',
#     'outg_td_window_11_to_15_days':'11-15 Days',
#     'outg_td_window_16_to_20_days':'16-20 Days',
#     'outg_td_window_21_to_25_days':'21-25 Days',
#     'outg_td_window_26_to_30_days':'26-30 Days'
# }


# days_min_max_outg_td_windows=[
#     [0,1], [1,2], [2,3], [3,4], [4,5]
# ]
# old_to_new_keys_dict = {
#     'outg_td_window_0_to_1_days':'00-01 Days',
#     'outg_td_window_1_to_2_days':'01-02 Days',
#     'outg_td_window_2_to_3_days':'02-03 Days',
#     'outg_td_window_3_to_4_days':'03-04 Days',
#     'outg_td_window_4_to_5_days':'04-05 Days'
# }

days_min_max_outg_td_windows=[
    [1,6], [6,11], [11,16], [16,21], [21,26], [26,31]
]
old_to_new_keys_dict = {
    'outg_td_window_1_to_6_days'  :'01-06 Days',
    'outg_td_window_6_to_11_days' :'06-11 Days',
    'outg_td_window_11_to_16_days':'11-16 Days',
    'outg_td_window_16_to_21_days':'16-21 Days',
    'outg_td_window_21_to_26_days':'21-26 Days',
    'outg_td_window_26_to_31_days':'26-31 Days'
}

#-------------------------
assert(len(old_to_new_keys_dict)==len(days_min_max_outg_td_windows))

In [None]:
append_only=True

In [None]:
rcpo_dfs_name_outg          = 'rcpo_df_norm_by_xfmr_nSNs'
rcpo_dfs_name_no_outg       = 'rcpo_df_norm_by_xfmr_nSNs'
rcpo_dfs_name_no_outg_prstn = 'rcpo_df_norm_by_xfmr_nSNs'

mecpo_idx_for_ordering = 0

#-------------------------
icpo_dfs_name_outg          = 'i'+rcpo_dfs_name_outg[1:]
icpo_dfs_name_no_outg       = 'i'+rcpo_dfs_name_no_outg[1:]
icpo_dfs_name_no_outg_prstn = 'i'+rcpo_dfs_name_no_outg_prstn[1:]

# Outages

In [None]:
run_date_outg = '20230615'
# event_date_ranges_outg = [
#     ['2020-01-01', '2020-12-31'],
#     ['2021-01-01', '2021-12-31'], 
#     ['2022-01-01', '2022-09-30'], 
# ]
event_date_ranges_outg = [
    ['2022-01-01', '2022-12-31'], 
]
grp_by_cols_outg = ['outg_rec_nb', 'trsf_pole_nb']

mecpo_coll_full = build_and_combine_mecpo_colls_for_dates(
    dataset='outg_full', 
    acq_run_date=run_date_outg, 
    data_date_ranges=event_date_ranges_outg, 
    grp_by_cols=grp_by_cols_outg, 
    days_min_max_outg_td_windows=days_min_max_outg_td_windows, 
    old_to_new_keys_dict=old_to_new_keys_dict, 
    coll_label='Outages (All Xfmrs)', 
    barplot_kwargs_shared=dict(facecolor='red'), 
    normalize_by_time_interval=normalize_by_time_interval, 
    data_dir_base=r'C:\Users\s346557\Documents\LocalData\dovs_and_end_events_data'
)

# No outages

In [None]:
run_date_no_outg = '20230512'
event_date_ranges_no_outg = [
    ['2022-01-01', '2022-12-31'], 
]
grp_by_cols_no_outg = ['trsf_pole_nb', 'no_outg_rec_nb']

mecpo_coll_no_outg = build_and_combine_mecpo_colls_for_dates(
    dataset='no_outg', 
    acq_run_date=run_date_no_outg, 
    data_date_ranges=event_date_ranges_no_outg, 
    grp_by_cols=grp_by_cols_no_outg, 
    days_min_max_outg_td_windows=days_min_max_outg_td_windows, 
    old_to_new_keys_dict=old_to_new_keys_dict, 
    coll_label='No Outages', 
    barplot_kwargs_shared=dict(facecolor='orange'), 
    normalize_by_time_interval=normalize_by_time_interval, 
    data_dir_base=r'C:\Users\s346557\Documents\LocalData\dovs_and_end_events_data'
)

# No outages pristine

In [None]:
run_date_no_outg_prstn = '20230301'
event_date_ranges_prstn = [
    ['2022-01-01', '2022-12-31'], 
]
grp_by_cols_no_outg_prstn = ['trsf_pole_nb', 'no_outg_rec_nb']

mecpo_coll_no_outg_prstn = build_and_combine_mecpo_colls_for_dates(
    dataset='no_outg_prstn', 
    acq_run_date=run_date_no_outg_prstn, 
    data_date_ranges=event_date_ranges_prstn, 
    grp_by_cols=grp_by_cols_no_outg_prstn, 
    days_min_max_outg_td_windows=days_min_max_outg_td_windows, 
    old_to_new_keys_dict=old_to_new_keys_dict, 
    coll_label='No Outages', 
    barplot_kwargs_shared=dict(facecolor='orange'), 
    normalize_by_time_interval=normalize_by_time_interval, 
    data_dir_base=r'C:\Users\s346557\Documents\LocalData\dovs_and_end_events_data'
)

# Similarity operations

In [None]:
for an_key in mecpo_coll_full.mecpo_an_keys:
    print(f"{an_key}, 'rcpo_df_norm' shape: {mecpo_coll_full.get_cpo_df(an_key, 'rcpo_df_norm').shape}")

In [None]:
#-------------------------
# First, make columns equal between MECPOAn objects within each MECPOCollection
mecpo_coll_full.make_cpo_columns_equal(drop_empty_cpo_dfs=True)
mecpo_coll_no_outg.make_cpo_columns_equal(drop_empty_cpo_dfs=True)
mecpo_coll_no_outg_prstn.make_cpo_columns_equal(drop_empty_cpo_dfs=True)

#-------------------------
# Now, make columns equal between the MECPOCollections
MECPOCollection.make_cpo_columns_equal_between_mecpo_colls(
    mecpo_colls = [
        mecpo_coll_full, 
        mecpo_coll_no_outg, 
        mecpo_coll_no_outg_prstn
    ], 
    drop_empty_cpo_dfs=True
)

#-------------------------
# If not all same cpo_df names are used between collections, then one should call 
#   MECPOCollection.make_mixed_cpo_columns_equal_between_mecpo_colls.
if not(rcpo_dfs_name_outg==rcpo_dfs_name_no_outg==rcpo_dfs_name_no_outg_prstn):
    MECPOCollection.make_mixed_cpo_columns_equal_between_mecpo_colls(
        mecpo_colls_with_cpo_df_names = [
            [mecpo_coll_full, rcpo_dfs_name_outg], 
            [mecpo_coll_no_outg, rcpo_dfs_name_no_outg], 
            [mecpo_coll_no_outg_prstn, rcpo_dfs_name_no_outg_prstn]
        ], 
        segregate_by_mecpo_an_keys=False
    )
#-------------------------
if not(icpo_dfs_name_outg==icpo_dfs_name_no_outg==icpo_dfs_name_no_outg_prstn):
    MECPOCollection.make_mixed_cpo_columns_equal_between_mecpo_colls(
        mecpo_colls_with_cpo_df_names = [
            [mecpo_coll_full, icpo_dfs_name_outg], 
            [mecpo_coll_no_outg, icpo_dfs_name_no_outg], 
            [mecpo_coll_no_outg_prstn, icpo_dfs_name_no_outg_prstn]
        ], 
        segregate_by_mecpo_an_keys=False
    )

In [None]:
for an_key in mecpo_coll_full.mecpo_an_keys:
    print(f"{an_key}, 'rcpo_df_norm' shape: {mecpo_coll_full.get_cpo_df(an_key, 'rcpo_df_norm').shape}")

In [None]:
mecpo_coll_full.get_cpo_df(mecpo_coll_full.mecpo_an_keys[0], 'rcpo_df_norm').columns

In [None]:
start=time.time()
#-------------------------
# Remove all reasons containing 'cleared'
mecpo_coll_full.remove_reasons_from_all_rcpo_dfs(['.*cleared.*', '.*Test Mode.*'])
mecpo_coll_no_outg.remove_reasons_from_all_rcpo_dfs(['.*cleared.*', '.*Test Mode.*'])
mecpo_coll_no_outg_prstn.remove_reasons_from_all_rcpo_dfs(['.*cleared.*', '.*Test Mode.*'])

#-------------------------
# Combine reasons using the standard combine (see dflt_patterns_and_replace in MECPODf.combine_cpo_df_reasons
#   for the list of default patterns_and_replace)
red_to_org_cols_dicts_full    = mecpo_coll_full.combine_reasons_in_all_rcpo_dfs(
    initial_strip=True, initial_punctuation_removal=True, return_red_to_org_cols_dict=True
)
red_to_org_cols_dicts_no_outg = mecpo_coll_no_outg.combine_reasons_in_all_rcpo_dfs(
    initial_strip=True, initial_punctuation_removal=True, return_red_to_org_cols_dict=True
) 
red_to_org_cols_dicts_no_outg_prstn = mecpo_coll_no_outg_prstn.combine_reasons_in_all_rcpo_dfs(
    initial_strip=True, initial_punctuation_removal=True, return_red_to_org_cols_dict=True
) 
print(time.time()-start)
#-------------------------
# Build power down minus power up counts
if include_power_down_minus_up:
    mecpo_coll_full.delta_cpo_df_reasons_in_all_rcpo_dfs(
        reasons_1='Primary Power Down',
        reasons_2='Primary Power Up',
        delta_reason_name='Power Down Minus Up'
    )
    mecpo_coll_no_outg.delta_cpo_df_reasons_in_all_rcpo_dfs(
        reasons_1='Primary Power Down',
        reasons_2='Primary Power Up',
        delta_reason_name='Power Down Minus Up'
    )
    mecpo_coll_no_outg_prstn.delta_cpo_df_reasons_in_all_rcpo_dfs(
        reasons_1='Primary Power Down',
        reasons_2='Primary Power Up',
        delta_reason_name='Power Down Minus Up'
    )

In [None]:
#-------------------------
# Don't want to include SNs or nSNs cols (and similar) in plotting, so remove
mecpo_coll_full.remove_SNs_cols_from_all_cpo_dfs()
mecpo_coll_no_outg.remove_SNs_cols_from_all_cpo_dfs()
mecpo_coll_no_outg_prstn.remove_SNs_cols_from_all_cpo_dfs()

In [None]:
for an_key in mecpo_coll_full.mecpo_an_keys:
    print(f"{an_key}, 'rcpo_df_norm' shape: {mecpo_coll_full.get_cpo_df(an_key, 'rcpo_df_norm').shape}")

In [None]:
mecpo_coll_full.remove_all_cpo_dfs_except(
    to_keep=[rcpo_dfs_name_outg, icpo_dfs_name_outg], 
    first_build_and_set_norm_counts_df_for_all=True, 
    build_and_set_norm_counts_df_for_all_kwargs=None, 
    keep_rcpo_df_OG=False
)
#-----
mecpo_coll_no_outg.remove_all_cpo_dfs_except(
    to_keep=[rcpo_dfs_name_no_outg, icpo_dfs_name_no_outg], 
    first_build_and_set_norm_counts_df_for_all=True, 
    build_and_set_norm_counts_df_for_all_kwargs=None, 
    keep_rcpo_df_OG=False
)
#-----
mecpo_coll_no_outg_prstn.remove_all_cpo_dfs_except(
    to_keep=[rcpo_dfs_name_no_outg_prstn, icpo_dfs_name_no_outg_prstn], 
    first_build_and_set_norm_counts_df_for_all=True, 
    build_and_set_norm_counts_df_for_all_kwargs=None, 
    keep_rcpo_df_OG=False
)
#-----

# =================================================

In [None]:
mecpo_an_keys=mecpo_coll_full.mecpo_an_keys
assert(mecpo_an_keys==mecpo_coll_no_outg.mecpo_an_keys)
assert(mecpo_an_keys==mecpo_coll_no_outg_prstn.mecpo_an_keys)

In [None]:
mecpo_coll_w_get_args_full = dict(
    mecpo_coll=mecpo_coll_full, 
    cpo_df_name=rcpo_dfs_name_outg, 
    cpo_df_subset_by_mjr_mnr_cause_args=None, 
    mecpo_an_order=mecpo_an_keys
)
#-----
mecpo_coll_w_get_args_no_outg = dict(
    mecpo_coll=mecpo_coll_no_outg, 
    cpo_df_name=rcpo_dfs_name_no_outg, 
    cpo_df_subset_by_mjr_mnr_cause_args=None, 
    mecpo_an_order=mecpo_an_keys
)
#-----
mecpo_coll_w_get_args_no_outg_prstn = dict(
    mecpo_coll=mecpo_coll_no_outg_prstn, 
    cpo_df_name=rcpo_dfs_name_no_outg_prstn, 
    cpo_df_subset_by_mjr_mnr_cause_args=None, 
    mecpo_an_order=mecpo_an_keys
)
#-------------------------
mecpo_colls_w_get_args = [
    mecpo_coll_w_get_args_full, 
    mecpo_coll_w_get_args_no_outg, 
    mecpo_coll_w_get_args_no_outg_prstn
]
no_outg_idxs=[1, 2]

In [None]:
subset_args_all_outgs = None
subset_args_dl_ol = dict(
    mjr_cause='DL',
    mnr_cause='OL', 
    addtnl_slicers=None, 
)
subset_args_dl_eqf = dict(
    mjr_cause='DL',
    mnr_cause='EQF', 
    addtnl_slicers=None, 
)
subset_args_dl_eqf_xfmr = dict(
    mjr_cause='DL',
    mnr_cause='EQF', 
    addtnl_slicers=[dict(column='EQUIP_TYP_NM', value=['TRANSFORMER, OH', 'TRANSFORMER, UG'])], 
)

In [None]:
mecpo_coll_full.mecpo_an_keys

In [None]:
start=time.time()
common_barplot_kwargs = dict(alpha=0.25, fill=True, edgecolor='black', hatch='//')
fig, ax0 = plt.subplots(1, 1, num=fig_num, figsize=[14, 6])
reason_order = mecpo_coll_full.get_cpo_df(mecpo_an_keys[0], rcpo_dfs_name_outg).mean().sort_values(ascending=False).index.tolist()
n_reason_to_include=15
ax0 = Plot_Bar.plot_multiple_barplots(
    ax=ax0, 
    dfs_w_args=[
        (mecpo_coll_full.get_cpo_df(mecpo_an_keys[0], rcpo_dfs_name_outg), {**dict(facecolor='red', hatch=None, label="Outages (All Xfmrs)"), **common_barplot_kwargs}), 
        (mecpo_coll_no_outg.get_cpo_df(mecpo_an_keys[0], rcpo_dfs_name_no_outg), {**dict(facecolor='green', hatch=None, label="Outages (Xfmr tied to Outg. Location ID)"), **common_barplot_kwargs})
    ], 
    order=reason_order, 
    n_bars_to_include=n_reason_to_include, 
    keep_edges_opaque=True, 
    include_hatches=False, 
    draw_side_by_side=True, 
    draw_single_idx_full_width=None,     
    run_set_general_plotting_args=True, 
    orient='v',
    draw_legend=False, 
    legend_args=dict(fontsize=15), 
    title_args=dict(label=f'Title', fontsize=20), 
    ylabel_args = dict(ylabel=f'y-label', fontsize=20, x=0.0, y=0.8, ha='left', va='bottom'), 
    xlabel_args = dict(xlabel='x-label', fontsize=20, x=0.9, y=0.0, ha='right', va='top'), 
    tick_args=[dict(axis='x', labelrotation=90, labelsize=7), 
              dict(axis='y', labelsize=15)]
)
print(time.time()-start)

In [None]:
start=time.time()
save_name = f'RCPO_dfs_3x2_full.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_all_outgs,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch=None), 
    inplace=False
)

reason_order=mecpo_coll_full.get_cpo_df(
    mecpo_an_keys[0], 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=10, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    include_xticklabels_for_all=True, 
    suptitle='Full', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)
print(time.time()-start)

In [None]:
start=time.time()
# NEW!!!!!!!!!!!!!!!!!!!!!!!!
# with max_total_counts
save_name = f'RCPO_dfs_3x2_full_max_counts_150.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_all_outgs,
    new_max_total_counts_args=150, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch=None), 
    inplace=False
)

reason_order=mecpo_coll_full.get_cpo_df(
    mecpo_an_keys[0], 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=10, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    include_xticklabels_for_all=True, 
    suptitle='Full', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)
print(time.time()-start)

In [None]:
start=time.time()
save_name = f'RCPO_dfs_3x2_dl_ol.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_ol,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch='//'), 
    inplace=False
)

reason_order=mecpo_coll_full.get_cpo_df(
    mecpo_an_keys[0], 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=10, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='DL_OL', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)
print(time.time()-start)

In [None]:
start=time.time()
save_name = f'RCPO_dfs_3x2_dl_ol_2.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_ol,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch='//'), 
    inplace=False
)

reason_order=mecpo_coll_full.get_cpo_df(
    mecpo_an_keys[0], 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=None
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=10, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='DL_OL', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)
print(time.time()-start)

In [None]:
start=time.time()
save_name = f'RCPO_dfs_3x2_dl_eqf.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch='-'), 
    inplace=False
)

reason_order=mecpo_coll_full.get_cpo_df(
    mecpo_an_keys[0], 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=10, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='DL_EQF', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)
print(time.time()-start)

In [None]:
start=time.time()
save_name = f'RCPO_dfs_3x2_dl_eqf_xfmr.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf_xfmr,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch='\\\\'), 
    inplace=False
)

reason_order=mecpo_coll_full.get_cpo_df(
    mecpo_an_keys[0], 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order,  
    n_reason_to_include=10, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='DL_EQF_XFMR', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)
print(time.time()-start)

In [None]:
# mecpo_an_order = [
#     '01-05 Days',
#     '06-10 Days',
#     '11-15 Days',
#     '16-20 Days',
#     '21-25 Days',
#     '26-30 Days'
# ]

In [None]:
fig, ax0 = plt.subplots(1, 1, num=fig_num, figsize=[14, 6])
ax0 = Plot_Bar.plot_barplot(
    ax=ax0, 
    df = mecpo_coll_no_outg.get_cpo_df('00-01 Days', 'rcpo_df_norm_by_xfmr_nSNs'), 
    order=mecpo_coll_no_outg.get_cpo_df('00-01 Days', 'rcpo_df_norm_by_xfmr_nSNs').mean().sort_values(ascending=False).index.tolist(), 
    n_bars_to_include=10, 
    barplot_kwargs=dict(label='No Outages'), 
    orient='v', 
    draw_legend=True, 
    legend_args=dict(fontsize=15), 
    title_args=dict(label=f'End Events', fontsize=20), 
    ylabel_args = dict(ylabel=f'Normalized Counts', fontsize=20, x=0.0, y=0.4, ha='left', va='bottom'), 
    xlabel_args = dict(xlabel='Reason', fontsize=20, x=0.9, y=0.0, ha='right', va='top'), 
    tick_args=[dict(axis='x', labelrotation=90, labelsize=15), 
              dict(axis='y', labelsize=15)]
)
# Plot_General.save_fig(fig, save_dir, f'fig_7.{save_ext}')

# ICPO DFs

In [None]:
mecpo_colls_w_get_args = adjust_cpo_df_names_in_mecpo_colls_w_get_args(
    mecpo_colls_w_get_args, 
    cpo_df_names = {
        0:icpo_dfs_name_outg, 
        1:icpo_dfs_name_no_outg, 
        2:icpo_dfs_name_no_outg_prstn
    }
)

In [None]:
save_name = f'ICPO_dfs_3x2_full_best.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_all_outgs,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch=None), 
    inplace=False
)

reason_order=mecpo_coll_drct.get_cpo_df(
    '01-05 Days', 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=10, 
    is_rcpo=False, 
    replace_xtick_labels_with_ints=False, 
    include_xticklabels_for_all=False, 
    suptitle='Full', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)

In [None]:
save_name = f'ICPO_dfs_3x2_dl_ol_best.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_ol,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch=None), 
    inplace=False
)

reason_order=mecpo_coll_drct.get_cpo_df(
    '01-05 Days', 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=10, 
    is_rcpo=False, 
    replace_xtick_labels_with_ints=False, 
    include_xticklabels_for_all=False, 
    suptitle='DL_OL', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)

In [None]:
save_name = f'ICPO_dfs_3x2_dl_eqf_best.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch=None), 
    inplace=False
)

reason_order=mecpo_coll_drct.get_cpo_df(
    '01-05 Days', 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=10, 
    is_rcpo=False, 
    replace_xtick_labels_with_ints=False, 
    include_xticklabels_for_all=False, 
    suptitle='DL_EQF', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)

In [None]:
save_name = f'ICPO_dfs_3x2_dl_eqf_xfmr_best.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf_xfmr,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch=None), 
    inplace=False
)

reason_order=mecpo_coll_drct.get_cpo_df(
    '01-05 Days', 
    mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
    cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=10, 
    is_rcpo=False, 
    replace_xtick_labels_with_ints=False, 
    include_xticklabels_for_all=False, 
    suptitle='DL_EQF_XFMR', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)

In [None]:
mecpo_colls_w_get_args_all_outgs = [
    mecpo_coll_w_get_args_full, 
    mecpo_coll_w_get_args_drct, 
    mecpo_coll_w_get_args_no_outg
]
mecpo_colls_w_get_args_all_outgs=adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_all_outgs, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_all_outgs,
    new_max_total_counts_args=None, 
    adjust_barplot_kwargs=dict(hatch=None), 
    inplace=False
)
#----------
mecpo_colls_w_get_args_dl_ol = [
    mecpo_coll_w_get_args_full, 
    mecpo_coll_w_get_args_drct
]
mecpo_colls_w_get_args_dl_ol=adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_dl_ol, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_ol,
    new_max_total_counts_args=None, 
    adjust_barplot_kwargs=dict(hatch='//', label='DL_OL'), 
    inplace=False
)
#----------
mecpo_colls_w_get_args_dl_eqf = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_dl_ol, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf,
    new_max_total_counts_args=None, 
    adjust_barplot_kwargs=dict(hatch='-', label='DL_EQF'), 
    inplace=False
)
#----------
mecpo_colls_w_get_args_dl_eqf_xfmr = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_dl_ol, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf_xfmr,
    new_max_total_counts_args=None, 
    adjust_barplot_kwargs=dict(hatch='\\\\', label='DL_EQF_XFMR'), 
    inplace=False
)



In [None]:
mecpo_colls_w_get_args_FULL = [
    *mecpo_colls_w_get_args_all_outgs, 
    *mecpo_colls_w_get_args_dl_ol, 
    *mecpo_colls_w_get_args_dl_eqf, 
    *mecpo_colls_w_get_args_dl_eqf_xfmr
]

In [None]:
# mecpo_colls_w_get_args_FULL

In [None]:
save_name = f'RCPO_dfs_3x2_ALL.{save_ext}'
#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_FULL, 
    reason_order=mecpo_coll_drct.get_cpo_df('01-05 Days', 'rcpo_df_norm_by_xfmr_nSNs').mean().sort_values(ascending=False).index.tolist(), 
    n_reason_to_include=10, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='Full', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)

In [None]:
unq_colors = []
subset_cause_args = []
for coll_w_get_args in mecpo_colls_w_get_args_FULL:
    unq_colors.append(coll_w_get_args['mecpo_coll'].barplot_kwargs_shared['facecolor'])
    subset_cause_args.append(coll_w_get_args['cpo_df_subset_by_mjr_mnr_cause_args'])
unq_colors = list(set(unq_colors))
#-----
unq_subset_cause_args = []
for subset in subset_cause_args:
    if subset in unq_subset_cause_args:
        continue
    unq_subset_cause_args.append(subset)

In [None]:
unq_subset_cause_args

In [None]:
unq_colors

In [None]:
unq_colors=['red', 'green', 'orange']

In [None]:
mecpo_colls_w_get_args_FULL_sorted = []
for color in unq_colors:
    for subset in unq_subset_cause_args:
        for coll_w_get_args in mecpo_colls_w_get_args_FULL:
            if(coll_w_get_args['mecpo_coll'].barplot_kwargs_shared['facecolor']==color and 
               coll_w_get_args['cpo_df_subset_by_mjr_mnr_cause_args']==subset):
                mecpo_colls_w_get_args_FULL_sorted.append(coll_w_get_args)
assert(len(mecpo_colls_w_get_args_FULL_sorted)==len(mecpo_colls_w_get_args_FULL))

In [None]:
save_name = f'RCPO_dfs_3x2_ALL_SRTD.{save_ext}'
#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_FULL_sorted, 
    reason_order=mecpo_coll_drct.get_cpo_df('01-05 Days', 'rcpo_df_norm_by_xfmr_nSNs').mean().sort_values(ascending=False).index.tolist(), 
    n_reason_to_include=10, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='Full', 
    row_major=False, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=True
)

# TOTAL COUNTS

In [None]:
cpo_df_subset_by_mjr_mnr_cause_args=None
#-------------------------
total_event_counts_full = mecpo_coll_full.get_total_event_counts(
    '01-05 Days', 
    'rcpo_df_norm_by_xfmr_nSNs', 
    cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args)

total_event_counts_drct = mecpo_coll_drct.get_total_event_counts(
    '01-05 Days', 
    'rcpo_df_norm_by_xfmr_nSNs', 
    cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args)

total_event_counts_no_outg = mecpo_coll_no_outg.get_total_event_counts(
    '01-05 Days', 
    'rcpo_df_norm_by_xfmr_nSNs', 
    cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args)

In [None]:
bin_size = 10
min_max_and_bin_size = (
    0, 
    np.ceil(total_event_counts_full['total_counts'].max()), 
    bin_size
)

fig, ax0 = Plot_General.default_subplots(1,1)
Plot_Hist.plot_hist(ax=ax0, df=total_event_counts_full, x_col='total_counts', min_max_and_bin_size=min_max_and_bin_size)
ax0.set_yscale('log')

In [None]:
bin_size = 10
min_max_and_bin_size = (
    0, 
    np.ceil(total_event_counts_drct['total_counts'].max()), 
    bin_size
)

fig, ax0 = Plot_General.default_subplots(1,1)
Plot_Hist.plot_hist(ax=ax0, df=total_event_counts_drct, x_col='total_counts', min_max_and_bin_size=min_max_and_bin_size)
ax0.set_yscale('log')

In [None]:
bin_size = 10
min_max_and_bin_size = (
    0, 
    np.ceil(total_event_counts_no_outg['total_counts'].max()), 
    bin_size
)

fig, ax0 = Plot_General.default_subplots(1,1)
Plot_Hist.plot_hist(ax=ax0, df=total_event_counts_no_outg, x_col='total_counts', min_max_and_bin_size=min_max_and_bin_size)
ax0.set_yscale('log')

In [None]:
bin_size = 10
min_max_and_bin_size = (
    0, 
    np.ceil(total_event_counts_no_outg['total_counts'].max()), 
    bin_size
)

fig, ax0 = Plot_General.default_subplots(1,1)
Plot_Hist.plot_multiple_hists(
    ax=ax0, 
    dfs_w_args=[
        (total_event_counts_full, mecpo_coll_full.barplot_kwargs_shared), 
        (total_event_counts_drct, mecpo_coll_drct.barplot_kwargs_shared), 
        (total_event_counts_no_outg, mecpo_coll_no_outg.barplot_kwargs_shared)
    ], 
    x_col='total_counts', 
    min_max_and_bin_size=min_max_and_bin_size, 
    draw_side_by_side=True, 
    draw_legend=True
)
ax0.set_yscale('log')

In [None]:
bin_size = 10
min_max_and_bin_size = (
    0, 
    np.ceil(total_event_counts_no_outg['total_counts'].max()), 
    bin_size
)

fig, ax0 = Plot_General.default_subplots(1,1)
Plot_Hist.plot_multiple_hists(
    ax=ax0, 
    dfs_w_args=[
        (total_event_counts_full, mecpo_coll_full.barplot_kwargs_shared), 
        (total_event_counts_drct, mecpo_coll_drct.barplot_kwargs_shared), 
        (total_event_counts_no_outg, mecpo_coll_no_outg.barplot_kwargs_shared)
    ], 
    x_col='total_counts', 
    min_max_and_bin_size=min_max_and_bin_size, 
    stat='density', 
    draw_side_by_side=True, 
    draw_legend=True
)
ax0.set_yscale('log')

In [None]:
bin_size = 10
min_max_and_bin_size = (
    0, 
    np.ceil(total_event_counts_no_outg['total_counts'].max()), 
    bin_size
)

fig, ax0 = Plot_General.default_subplots(1,1)
Plot_Hist.plot_multiple_hists(
    ax=ax0, 
    dfs_w_args=[
        (total_event_counts_full, mecpo_coll_full.barplot_kwargs_shared), 
        (total_event_counts_drct, mecpo_coll_drct.barplot_kwargs_shared), 
        (total_event_counts_no_outg, mecpo_coll_no_outg.barplot_kwargs_shared)
    ], 
    x_col='total_counts', 
    min_max_and_bin_size=min_max_and_bin_size, 
    draw_side_by_side=True, 
    draw_legend=True
)

In [None]:
def set_pct_col(df, col='SUM_CMI_NB', pct_col='pct_cmi_nb'): 
    df[pct_col] = 100*df[col]/df[col].sum()
    return df

In [None]:
def draw_outages_summary_barplot(fig, ax, df, 
                                 x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                 sort_values=True, y_threshold=None, abbr_leg_str=None, 
                                 include_labels='include', 
                                 x_tick_new_labels_dict=None, return_x_values_included=False, 
                                 **kwargs):
    # Current kwargs keys: 'palette_dict', 'xlabel' 'ylabel', 'x_tick_rotation', 'title', 'title_font_size'
    # y_threshold:
    #     if y_threshold is not None, it should be a dict with keys = ['threshold_col', 'threshold_val']
    #       e.g. y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':1.0}
    # include_labels: 
    #     can equal 'include', 'exclude', 'passive' (CASE INSENSITIVE)
    include_labels = include_labels.lower()
    possible_include_labels = ['include', 'exclude', 'passive']
    if include_labels not in possible_include_labels:
        possible_include_labels = 'include'
    #---------------
    if y_threshold is not None:
        assert('threshold_col' in y_threshold and 'threshold_val' in y_threshold)
        df_to_plot = df[df[y_threshold['threshold_col']] > y_threshold['threshold_val']].copy()
    else:
        df_to_plot = df.copy()
    #---------------
    if sort_values:
        df_to_plot = df_to_plot.sort_values(by=y_col, ascending=False)
    #---------------
    sns.barplot(ax=ax, x=x_col, y=y_col, data=df_to_plot, 
                palette=kwargs.get('palette_dict', None))
    #---------------
    if include_labels=='include':
        ax.set_ylabel(kwargs.get('ylabel', y_col), fontsize=24, x=0.0, y=0.8, ha='left', va='bottom')
        ax.set_xlabel(kwargs.get('xlabel', x_col), fontsize=24, x=0.9, y=0.0, ha='right', va='top')   
        ax.tick_params(axis='both', labelsize=12)

        x_tick_rotation = kwargs.get('x_tick_rotation', 90)
        ax.tick_params(axis='x', labelrotation=x_tick_rotation, labelsize=12.0, direction='in')
        if x_tick_rotation != 0 and x_tick_rotation != 90:
            #-- Align rotated labels with ticks at end, not center
            for label in ax.xaxis.get_majorticklabels():
                label.set_horizontalalignment('right')

        if x_tick_new_labels_dict is not None:
            labels = [x.get_text() for x in ax.get_xticklabels()]
            new_labels = [x_tick_new_labels_dict[x] for x in labels]
            ax.set_xticklabels(new_labels)
    elif include_labels=='exclude':
        ax.set(xticklabels=[])
        ax.set(yticklabels=[])
        ax.set(xlabel=None)
        ax.set(ylabel=None)
    else:
        assert(include_labels=='passive')
    #---------------
    if abbr_leg_str is not None:
        ax.text(1.025, 0.9, abbr_leg_str, transform=ax.transAxes, fontsize=20, ha='left', va='top')
    ax.set_title(kwargs.get('title', None), fontsize=kwargs.get('title_font_size', 40))
    if return_x_values_included:
        return fig, ax, df_to_plot[x_col].unique().tolist()
    else:
        return fig,ax


In [None]:
# cpo_df = mecpo_coll_drct.get_cpo_df('01-05 Days', 'rcpo_df_norm').copy()
cpo_df = mecpo_coll_full.get_cpo_df('01-05 Days', 'rcpo_df_norm').copy()
outg_rec_nb_idx_lvl=0

In [None]:
dovs_outgs = DOVSOutages(                 
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True, 
    build_sql_function=DOVSOutages_SQL.build_sql_outage, 
    build_sql_function_kwargs=dict(
        outg_rec_nbs=cpo_df.index.get_level_values(outg_rec_nb_idx_lvl).tolist(), 
        from_table_alias='DOV', 
        datetime_col='DT_OFF_TS_FULL', 
        field_to_split='outg_rec_nbs', 
        include_DOVS_OUTAGE_CAUSE_TYPES_DIM=True
    )
)

In [None]:
outgs_df = dovs_outgs.df

In [None]:
mjr_cause_cd_to_nm_dict = DOVSOutages.get_mjr_cause_cd_to_nm_dict()
mjr_cause_nm_abbr_dict = {v:k for k,v in mjr_cause_cd_to_nm_dict.items()}
max_len_abbr = len(max(list(mjr_cause_nm_abbr_dict.values()), key = len))
abbr_leg_str = ''
for mjr_cause in mjr_cause_nm_abbr_dict:
    abbr_leg_str += '{}: {}\n'.format(mjr_cause_nm_abbr_dict[mjr_cause].ljust(max_len_abbr), mjr_cause)

In [None]:
# df_by_opco = df_by_opco.groupby('OPRTG_UNT_NM', dropna=False).apply(lambda x: set_pct_col(x, col='SUM_CMI_NB', pct_col='pct_cmi_nb'))
# df_by_opco = df_by_opco.groupby('OPRTG_UNT_NM', dropna=False).apply(lambda x: set_pct_col(x, col='SUM_CI_NB', pct_col='pct_ci_nb'))
# df_by_opco = DOVSOutages.set_mjr_mnr_cause_nm_col(df_by_opco, set_null_to_NA=True, mjr_cause_nm_abbr_dict=mjr_cause_nm_abbr_dict)

In [None]:
outgs_df.head()

In [None]:
outgs_df = DOVSOutages.set_mjr_mnr_cause_nm_col(outgs_df, set_null_to_NA=True, mjr_cause_nm_abbr_dict=mjr_cause_nm_abbr_dict)

In [None]:
outgs_df

In [None]:
fig,ax = Plot_General.default_subplots(1,1)
sns.barplot(ax=ax, data=outgs_df, x='MJR_MNR_CAUSE_NM', y='CI_NB')
Plot_General.set_general_plotting_args(
    ax=ax, 
    tick_args =[dict(axis='x', labelrotation=90, labelsize=7.0, direction='in'), 
                dict(axis='y', labelrotation=0, labelsize=10.0, direction='out')]
)

In [None]:
outgs_df['MJR_MNR_CAUSE_NM'].nunique()

In [None]:
outgs_df.sort_values(by='CMI_NB', ascending=False)

In [None]:
outgs_df.groupby(['MJR_CAUSE_CD', 'MNR_CAUSE_CD', 'MJR_CAUSE_NM', 'MNR_CAUSE_NM'])[['CI_NB', 'CMI_NB']].sum()

In [None]:
outgs_df

In [None]:
idk=outgs_df.groupby(['MJR_MNR_CAUSE_NM'])[['CI_NB', 'CMI_NB']].sum()

In [None]:
idk.sort_values(by='CI_NB', ascending=False)

In [None]:
fig,ax = Plot_General.default_subplots(1,1)
sns.barplot(ax=ax, data=outgs_df, x='MJR_MNR_CAUSE_NM', y='CI_NB', 
           order=idk.sort_values(by='CI_NB', ascending=False).index)
Plot_General.set_general_plotting_args(
    ax=ax, 
    tick_args =[dict(axis='x', labelrotation=90, labelsize=7.0, direction='in'), 
                dict(axis='y', labelrotation=0, labelsize=10.0, direction='out')]
)

In [None]:
# Using barplot after summing (below) is the better option for this plot
to_inc = idk.sort_values(by='CI_NB', ascending=False).index[:10]
fig,ax = Plot_General.default_subplots(1,1)
sns.histplot(ax=ax, data=outgs_df[outgs_df['MJR_MNR_CAUSE_NM'].isin(to_inc)].reset_index(drop=True), 
             x='MJR_MNR_CAUSE_NM', weights='CI_NB')
Plot_General.set_general_plotting_args(
    ax=ax, 
    tick_args =[dict(axis='x', labelrotation=90, labelsize=7.0, direction='in'), 
                dict(axis='y', labelrotation=0, labelsize=10.0, direction='out')]
)

In [None]:
dovs_outgs_2 = DOVSOutages(                 
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True, 
    build_sql_function=DOVSOutages_SQL.build_sql_outage, 
    build_sql_function_kwargs=dict(
        outg_rec_nbs=cpo_df.index.get_level_values(outg_rec_nb_idx_lvl).tolist(), 
        from_table_alias='DOV', 
        datetime_col='DT_OFF_TS_FULL', 
        cols_of_interest=[
            'CI_NB', 'CMI_NB', 'MJR_CAUSE_CD', 'MNR_CAUSE_CD'
        ], 
        field_to_split='outg_rec_nbs', 
        include_DOVS_OUTAGE_CAUSE_TYPES_DIM=True, 
        groupby_cols=['DOV.MJR_CAUSE_CD', 'DOV.MNR_CAUSE_CD', 'DOV5.MJR_CAUSE_NM', 'DOV5.MNR_CAUSE_NM'], 
        agg_cols_and_types={
            'DOV.CI_NB' : ['sum'], 
            'DOV.CMI_NB' : ['sum']
        }
    )
)

In [None]:
outgs_df_2 = dovs_outgs_2.df
outgs_df_2.shape

In [None]:
# If running aggregation through SQL, as is done here, one SHOULD NOT run by batches, unless one is prepared
#   to complete the aggregation after the DF is returned from SQL.
# When running by batches, the aggregation is done on each batch, not on the DF as a whole.
#   Therefore, each batch will have its own unique groups, many of which overlap with each other.
#   Thus, the aggregation of the batches still needs to take place!
outgs_df_2 = outgs_df_2.groupby(['MJR_CAUSE_CD', 'MNR_CAUSE_CD', 'MJR_CAUSE_NM', 'MNR_CAUSE_NM']).sum()
outgs_df_2 = outgs_df_2.reset_index()

In [None]:
outgs_df_2 = DOVSOutages.set_mjr_mnr_cause_nm_col(outgs_df_2, set_null_to_NA=True, mjr_cause_nm_abbr_dict=mjr_cause_nm_abbr_dict)

In [None]:
outgs_df_2

In [None]:
fig,ax = Plot_General.default_subplots(1,1)
sns.barplot(ax=ax, data=outgs_df_2.sort_values(by='SUM_CI_NB', ignore_index=True, ascending=False), x='MJR_MNR_CAUSE_NM', y='SUM_CI_NB')
Plot_General.set_general_plotting_args(
    ax=ax, 
    tick_args =[dict(axis='x', labelrotation=90, labelsize=7.0, direction='in'), 
                dict(axis='y', labelrotation=0, labelsize=10.0, direction='out')]
)

In [None]:
fig,ax = Plot_General.default_subplots(1,1)
sns.barplot(ax=ax, data=outgs_df_2.sort_values(by='SUM_CMI_NB', ignore_index=True, ascending=False), x='MJR_MNR_CAUSE_NM', y='SUM_CMI_NB')
Plot_General.set_general_plotting_args(
    ax=ax, 
    tick_args =[dict(axis='x', labelrotation=90, labelsize=7.0, direction='in'), 
                dict(axis='y', labelrotation=0, labelsize=10.0, direction='out')]
)

In [None]:
fig,ax = Plot_General.default_subplots(1,1)
sns.barplot(ax=ax, data=outgs_df_2.sort_values(by='SUM_CI_NB', ignore_index=True, ascending=False).iloc[:10], x='MJR_MNR_CAUSE_NM', y='SUM_CI_NB')
Plot_General.set_general_plotting_args(
    ax=ax, 
    tick_args =[dict(axis='x', labelrotation=90, labelsize=7.0, direction='in'), 
                dict(axis='y', labelrotation=0, labelsize=10.0, direction='out')]
)

In [None]:
fig,ax = Plot_General.default_subplots(1,1)
sns.barplot(ax=ax, data=outgs_df_2.sort_values(by='SUM_CMI_NB', ignore_index=True, ascending=False).iloc[:10], x='MJR_MNR_CAUSE_NM', y='SUM_CMI_NB')
Plot_General.set_general_plotting_args(
    ax=ax, 
    tick_args =[dict(axis='x', labelrotation=90, labelsize=7.0, direction='in'), 
                dict(axis='y', labelrotation=0, labelsize=10.0, direction='out')]
)

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, outgs_df_2.sort_values(by='SUM_CMI_NB', ignore_index=True, ascending=False).iloc[:10], 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='SUM_CMI_NB')
fig_num += 1 

In [None]:
outgs_df_2 = set_pct_col(outgs_df_2, col='SUM_CMI_NB', pct_col='pct_cmi_nb')
outgs_df_2 = set_pct_col(outgs_df_2, col='SUM_CI_NB', pct_col='pct_ci_nb')

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, outgs_df_2, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, 
                                       palette_dict=palette_dict
                                      )
fig_num += 1 

In [None]:
top_causes_ede_cmi = outgs_df_2[outgs_df_2['pct_cmi_nb']>pct_cutoff].sort_values(by='pct_cmi_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_cmi_nb']]
top_causes_ede_ci  = outgs_df_2[outgs_df_2['pct_ci_nb']>pct_cutoff].sort_values(by='pct_ci_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_ci_nb']]

In [None]:
top_causes_dovs_cmi = df_oh[df_oh['pct_cmi_nb']>pct_cutoff].sort_values(by='pct_cmi_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_cmi_nb']]
top_causes_dovs_ci  = df_oh[df_oh['pct_ci_nb']>pct_cutoff].sort_values(by='pct_ci_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_ci_nb']]

In [None]:
top_causes_ede_cmi = top_causes_ede_cmi.rename(columns={'pct_cmi_nb':'pct'})
top_causes_ede_ci  = top_causes_ede_cmi.rename(columns={'pct_ci_nb':'pct'})

top_causes_dovs_cmi = top_causes_dovs_cmi.rename(columns={'pct_cmi_nb':'pct'})
top_causes_dovs_ci  = top_causes_dovs_ci.rename(columns={'pct_ci_nb':'pct'})

In [None]:
top_causes = pd.concat([top_causes_ede_cmi, top_causes_ede_ci, top_causes_dovs_cmi, top_causes_dovs_ci])
top_causes = top_causes.groupby('MJR_MNR_CAUSE_NM')['pct'].apply(max).to_frame().reset_index().sort_values(by='pct', ignore_index=True, ascending=False)
palette_dict = Plot_General.get_standard_colors_dict(top_causes['MJR_MNR_CAUSE_NM'].tolist())

In [None]:
outgs_df_3 = DOVSOutages.get_df_subset_excluding_mjr_mnr_causes(
    df=outgs_df_2.copy(), 
    mjr_mnr_causes_to_exclude=None, 
    mjr_causes_to_exclude=None,
    mnr_causes_to_exclude=['SCO', 'SO']
)

In [None]:
fig,ax = Plot_General.default_subplots(1,1)
sns.barplot(ax=ax, data=outgs_df_3.sort_values(by='SUM_CI_NB', ignore_index=True, ascending=False), x='MJR_MNR_CAUSE_NM', y='SUM_CI_NB')
Plot_General.set_general_plotting_args(
    ax=ax, 
    tick_args =[dict(axis='x', labelrotation=90, labelsize=7.0, direction='in'), 
                dict(axis='y', labelrotation=0, labelsize=10.0, direction='out')]
)

In [None]:
df = mecpo_coll_drct.get_cpo_df('01-05 Days', 'rcpo_df_norm')

In [None]:
df

In [None]:
outg_rec_nb_idfr= 'index'
contstruct_df_args=None
build_sql_function=None
build_sql_function_kwargs=None

In [None]:
#--------------------------------------------------
# Get outg_rec_nbs (series) and outg_rec_nbs_unq (list) from df
outg_rec_nbs = DOVSOutages.get_outg_rec_nbs_from_df(df=df, idfr=outg_rec_nb_idfr)
assert(len(df)==len(outg_rec_nbs)) # Important in ensuring proper merge at end
outg_rec_nbs_unq = outg_rec_nbs.unique().tolist()

In [None]:
#--------------------------------------------------
# Build dovs_outgs_df
df_construct_type=DFConstructType.kRunSqlQuery
init_df_in_constructor=True
#-------------------------
if build_sql_function is None:
    build_sql_function = DOVSOutages_SQL.build_sql_std_outage
#-----
if build_sql_function_kwargs is None:
    build_sql_function_kwargs={}
#-----
if build_sql_function == DOVSOutages_SQL.build_sql_std_outage:
    build_sql_function_kwargs['include_premise']=False
    build_sql_function_kwargs['include_DOVS_PREMISE_DIM']=False
else:
    build_sql_function_kwargs['include_DOVS_PREMISE_DIM']=False
#-------------------------
# Turn off verbose by default
build_sql_function_kwargs['verbose'] = build_sql_function_kwargs.get('verbose', False)
#-------------------------
build_sql_function_kwargs['outg_rec_nbs'] = outg_rec_nbs_unq
build_sql_function_kwargs['field_to_split'] = 'outg_rec_nbs'
#-------------------------   
dovs_outgs = DOVSOutages(
    df_construct_type=df_construct_type, 
    contstruct_df_args=contstruct_df_args, 
    init_df_in_constructor=init_df_in_constructor,
    build_sql_function=build_sql_function, 
    build_sql_function_kwargs=build_sql_function_kwargs, 
    build_consolidated=False
)
dovs_outgs_df = dovs_outgs.get_df()
outg_rec_nb_col = build_sql_function_kwargs.get('outg_rec_nb_col', 'OUTG_REC_NB')

In [None]:
dovs_outgs_df = dovs_outgs_df.set_index(outg_rec_nb_col).sort_index()

In [None]:
dovs_outgs_df.head()

In [None]:
#--------------------------------------------------
# Merge dovs_outgs_df with df
og_shape = df.shape
return_df = pd.merge(df, dovs_outgs_df, how='left', left_on=outg_rec_nbs, right_index=True)

In [None]:
return_df.head()

In [None]:
import DOVSOutages
reload(DOVSOutages)
from DOVSOutages import DOVSOutages

In [None]:
r2 = DOVSOutages.append_outg_info_to_df(
    df=df, 
    outg_rec_nb_idfr=outg_rec_nb_idfr, 
    contstruct_df_args=contstruct_df_args, 
    build_sql_function=build_sql_function, 
    build_sql_function_kwargs=build_sql_function_kwargs
)

In [None]:
r2.equals(return_df)

In [None]:
return_df

In [None]:
conn_outages = Utilities.get_utldb01p_oracle_connection()

In [None]:
#GROUP BY OP UNIT AS WELL

sql = (
"""
SELECT   DOV.MJR_CAUSE_CD, DOV.MNR_CAUSE_CD, SUM(DOV.CI_NB) AS SUM_CI_NB, SUM(DOV.CMI_NB) AS SUM_CMI_NB, DOV.OPERATING_UNIT_ID, DOV1.OPRTG_UNT_NM, DOV5.MJR_CAUSE_NM, DOV5.MNR_CAUSE_NM
        
FROM     DOVSADM.DOVS_OUTAGE_FACT DOV
         LEFT OUTER JOIN DOVSADM.DOVS_MASTER_GEO_DIM DOV1 ON DOV.OPERATING_UNIT_ID=DOV1.OPRTG_UNT_ID AND DOV.STATE_ABBR_TX = DOV1.STATE_ID   AND DOV.OPCO_NBR=DOV1.OPCO_ID AND DOV.DISTRICT_NB=DOV1.DISTRICT_ID AND DOV.SRVC_CNTR_NB=DOV1.AREA_ID AND DOV.GIS_CRCT_NB = DOV1.GIS_CIRCUIT_ID
         LEFT OUTER JOIN DOVSADM.DOVS_OUTAGE_ATTRIBUTES_DIM DOV2 ON DOV.OUTG_REC_NB=DOV2.OUTG_REC_NB 
         LEFT OUTER JOIN DOVSADM.DOVS_OUTAGE_CAUSE_TYPES_DIM DOV5 ON DOV.MJR_CAUSE_CD=DOV5.MJR_CAUSE_CD AND DOV.MNR_CAUSE_CD=DOV5.MNR_CAUSE_CD 

WHERE    DOV.DT_OFF_TS BETWEEN '2018-01-01' AND '2022-09-30'
AND      DOV2.INTRPTN_TYP_CD = 'S' 
AND      DOV2.CURR_REC_STAT_CD = 'A'
AND      DOV.MJR_CAUSE_CD <> 'NI'
AND      DOV.DEVICE_CD <> 85
AND      DOV.JMED_FL = 'N'

GROUP BY DOV.MJR_CAUSE_CD, DOV.MNR_CAUSE_CD, DOV5.MJR_CAUSE_NM, DOV5.MNR_CAUSE_NM, DOV1.OPRTG_UNT_NM, DOV.OPERATING_UNIT_ID
ORDER BY SUM(DOV.CMI_NB) DESC

"""
)

df_by_opco = pd.read_sql(sql, conn_outages)

df_by_opco = df_by_opco.groupby('OPRTG_UNT_NM', dropna=False).apply(lambda x: set_pct_col(x, col='SUM_CMI_NB', pct_col='pct_cmi_nb'))
df_by_opco = df_by_opco.groupby('OPRTG_UNT_NM', dropna=False).apply(lambda x: set_pct_col(x, col='SUM_CI_NB', pct_col='pct_ci_nb'))
df_by_opco = DOVSOutages.set_mjr_mnr_cause_nm_col(df_by_opco, set_null_to_NA=True, mjr_cause_nm_abbr_dict=mjr_cause_nm_abbr_dict)

In [None]:
df_by_opco

In [None]:
# Get df_by_cause from df_by_opco

cols_to_drop = ['OPERATING_UNIT_ID', 'OPRTG_UNT_NM']
cols_to_agg = ['SUM_CI_NB', 'SUM_CMI_NB', 'pct_cmi_nb', 'pct_ci_nb']
cols_to_groupby = [x for x in df_by_opco.columns 
                   if x not in (cols_to_drop+cols_to_agg)]
#-----
df_by_cause_from_by_opco = df_by_opco.drop(columns=cols_to_drop).groupby(cols_to_groupby, dropna=False).agg({x:'sum' for x in cols_to_agg})
df_by_cause_from_by_opco = df_by_cause_from_by_opco.reset_index() #flatten out index, instead of multi-index of groups
df_by_cause_from_by_opco = set_pct_col(df_by_cause_from_by_opco, col='SUM_CMI_NB', pct_col='pct_cmi_nb')
df_by_cause_from_by_opco = set_pct_col(df_by_cause_from_by_opco, col='SUM_CI_NB', pct_col='pct_ci_nb')
df_by_cause_from_by_opco = df_by_cause_from_by_opco.sort_values(by='pct_cmi_nb', ignore_index=True, ascending=False)

In [None]:
# Build x_tick_new_labels_dict for all possible values of 'MJR_MNR_CAUSE_NM'
# where the new labels are simply to ordering of the causes when sorted by pct_cmi_nb
x_tick_new_labels_dict = df_by_cause_from_by_opco.sort_values(by='pct_cmi_nb', ascending=False)['MJR_MNR_CAUSE_NM'].to_dict()
x_tick_new_labels_dict = {v:k+1 for k,v in x_tick_new_labels_dict.items()}

# Build palette_dict for those values of 'MJR_MNR_CAUSE_NM' which will actually be plotted
pct_cutoff = 1.0
top_mjr_mnr_causes_cmi = df_by_opco[df_by_opco['pct_cmi_nb']>pct_cutoff]['MJR_MNR_CAUSE_NM'].unique().tolist()
top_mjr_mnr_causes_ci = df_by_opco[df_by_opco['pct_ci_nb']>pct_cutoff]['MJR_MNR_CAUSE_NM'].unique().tolist()
top_mjr_mnr_causes = top_mjr_mnr_causes_cmi.copy()
top_mjr_mnr_causes.extend([x for x in top_mjr_mnr_causes_ci if x not in top_mjr_mnr_causes])
palette_dict = Plot_General.get_standard_colors_dict(top_mjr_mnr_causes)

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, df_by_cause_from_by_opco, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, palette_dict=palette_dict)
fig_num += 1 

In [None]:
df_by_opco['OPRTG_UNT_NM'].unique()

In [None]:
# NOTE: Some instances where e.g. OPERATING_UNIT_ID==3 but OPRTG_UNT_NM is empty
# df_oh = df_by_opco[df_by_opco['OPERATING_UNIT_ID']=='3'].copy()
df_oh = df_by_opco[df_by_opco['OPRTG_UNT_NM']=='AEP-Ohio'].copy()
print(df_oh.shape)
print(df_oh['MJR_MNR_CAUSE_NM'].nunique())

In [None]:
df_oh['MJR_MNR_CAUSE_NM'].value_counts()

In [None]:
df_oh[df_oh['MJR_MNR_CAUSE_NM']=='DL-EQUIPMENT FAILURE']

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, df_oh, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, palette_dict=palette_dict)
fig_num += 1 

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, outgs_df_2, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, 
                                       palette_dict=palette_dict
                                      )
fig_num += 1 

In [None]:
df_oh

In [None]:
test_a = outgs_df_2.copy()
test_b = df_oh.copy()

In [None]:
tmp_reasons = test_a.sort_values(by='pct_cmi_nb', ascending=False)['MJR_MNR_CAUSE_NM'].tolist()[:10]

In [None]:
test_a=test_a[test_a['MJR_MNR_CAUSE_NM'].isin(tmp_reasons)].copy()
test_b=test_b[test_b['MJR_MNR_CAUSE_NM'].isin(tmp_reasons)].copy()

In [None]:
test_a = test_a.sort_values(by='pct_cmi_nb', ascending=False)
# test_b = test_b.sort_values(by='pct_cmi_nb', ascending=False)

In [None]:
test_a['hue'] = 'A'
test_b['hue'] = 'B'

In [None]:
test=pd.concat([test_a, test_b])

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[14, 6])
sns.barplot(ax=ax, x='MJR_MNR_CAUSE_NM', y='pct_cmi_nb', data=test, hue='hue')

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[14, 6])
sns.barplot(ax=ax, x='MJR_MNR_CAUSE_NM', y='pct_cmi_nb', data=test, hue='hue')

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, outgs_df_2, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, 
                                       palette_dict=palette_dict
                                      )
ax = Plot_Bar.adjust_bar_and_line_positions_and_widths(
    ax=ax, 
    div_width_by=2, 
    position_idx=0, 
    i_patch_beg=0, 
    i_patch_end=len(ax.patches), 
#     orient=orient
)
fig_num += 1 

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, df_oh, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, palette_dict=palette_dict, order=tmp_reasons)
fig_num += 1 

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, outgs_df_2, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, 
                                       palette_dict=palette_dict, order=tmp_reasons
                                      )
fig_num += 1 

In [None]:
    if sort_values:
        df_to_plot = df_to_plot.sort_values(by=y_col, ascending=False)

In [None]:
outgs_df_2

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
n_patches_beg = len(ax.patches)
fig, ax = draw_outages_summary_barplot(fig, ax, df_oh, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, palette_dict=palette_dict, order=tmp_reasons, hatch='/')
n_patches_end = len(ax.patches) # Needed in case properties of over/underflow bins need changed
ax = Plot_Bar.adjust_bar_and_line_positions_and_widths(
    ax=ax, 
    div_width_by=2, 
    position_idx=0, 
    i_patch_beg=n_patches_beg, 
    i_patch_end=n_patches_end, 
#     orient=orient
)


n_patches_beg = len(ax.patches)
fig, ax = draw_outages_summary_barplot(fig, ax, outgs_df_2, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, 
                                       palette_dict=palette_dict, order=tmp_reasons, hatch='\\\\'
                                      )
n_patches_end = len(ax.patches) # Needed in case properties of over/underflow bins need changed
ax = Plot_Bar.adjust_bar_and_line_positions_and_widths(
    ax=ax, 
    div_width_by=2, 
    position_idx=1, 
    i_patch_beg=n_patches_beg, 
    i_patch_end=n_patches_end, 
#     orient=orient
)


legend_elements = [
    mpatches.Patch(facecolor=None, fill=False, hatch='/', label='df_oh'), 
    mpatches.Patch(facecolor=None, fill=False, hatch='\\\\', label='outgs_df_2')
]
ax.legend(
    title=None, 
    handles=legend_elements, 
    fontsize=20, 
    title_fontsize=30, 
    loc='upper right'
)

fig_num += 1 

In [None]:
def draw_outages_summary_barplot(
    fig, 
    ax, 
    df, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_cmi_nb', 
    sort_values=True, 
    y_threshold=None, 
    order=None,
    abbr_leg_str=None, 
    include_labels='include', 
    x_tick_new_labels_dict=None, 
    return_x_values_included=False, 
    div_drawn_width_by=None, 
    relative_position_idx=None,
    **kwargs
):
    r"""
    Current kwargs keys: 
        palette_dict
        xlabel
        ylabel
        x_tick_rotation
        title
        title_font_size
        hatch
        
    y_threshold:
        if y_threshold is not None, it should be a dict with keys = ['threshold_col', 'threshold_val']
          e.g. y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':1.0}
    include_labels: 
        can equal 'include', 'exclude', 'passive' (CASE INSENSITIVE)
    """
    #-------------------------
    # If order is given, both y_threshold and sort_values will be ignored
    # NOTE: Through order one can also set the number of reasons to be included
    if order is not None:
        if sort_values:
            print('Warning: order is not None, so sort_values will be ignored')
            sort_values=False
        if y_threshold is not None:
            print('Warning: order is not None, so y_threshold will be ignored')
            y_threshold = None
    #---------------------------
    n_patches_beg = len(ax.patches) # Needed in case adjust_bar_and_line_positions_and_widths used
    #-------------------------
    include_labels = include_labels.lower()
    possible_include_labels = ['include', 'exclude', 'passive']
    if include_labels not in possible_include_labels:
        possible_include_labels = 'include'
    #---------------
    if y_threshold is not None:
        assert('threshold_col' in y_threshold and 'threshold_val' in y_threshold)
        df_to_plot = df[df[y_threshold['threshold_col']] > y_threshold['threshold_val']].copy()
    else:
        df_to_plot = df.copy()
    #---------------
    if sort_values:
        df_to_plot = df_to_plot.sort_values(by=y_col, ascending=False)
    #---------------
    sns.barplot(
        ax=ax, 
        x=x_col, 
        y=y_col, 
        data=df_to_plot, 
        order=order, 
        palette=kwargs.get('palette_dict', None), 
        hatch=kwargs.get('hatch', None))
    #---------------------------
    n_patches_end = len(ax.patches) # Needed in case properties of over/underflow bins need changed
    #---------------------------
    if div_drawn_width_by is not None:
        if relative_position_idx is None:
            relative_position_idx = 0
        ax = Plot_Bar.adjust_bar_and_line_positions_and_widths(
            ax=ax, 
            div_width_by=div_drawn_width_by, 
            position_idx=relative_position_idx, 
            i_patch_beg=n_patches_beg, 
            i_patch_end=n_patches_end, 
            orient='v'
        )
    #---------------
    if include_labels=='include':
        ax.set_ylabel(kwargs.get('ylabel', y_col), fontsize=24, x=0.0, y=0.8, ha='left', va='bottom')
        ax.set_xlabel(kwargs.get('xlabel', x_col), fontsize=24, x=0.9, y=0.0, ha='right', va='top')   
        ax.tick_params(axis='both', labelsize=12)

        x_tick_rotation = kwargs.get('x_tick_rotation', 90)
        ax.tick_params(axis='x', labelrotation=x_tick_rotation, labelsize=12.0, direction='in')
        if x_tick_rotation != 0 and x_tick_rotation != 90:
            #-- Align rotated labels with ticks at end, not center
            for label in ax.xaxis.get_majorticklabels():
                label.set_horizontalalignment('right')

        if x_tick_new_labels_dict is not None:
            labels = [x.get_text() for x in ax.get_xticklabels()]
            new_labels = [x_tick_new_labels_dict[x] for x in labels]
            ax.set_xticklabels(new_labels)
    elif include_labels=='exclude':
        ax.set(xticklabels=[])
        ax.set(yticklabels=[])
        ax.set(xlabel=None)
        ax.set(ylabel=None)
    else:
        assert(include_labels=='passive')
    #---------------
    if abbr_leg_str is not None:
        ax.text(1.025, 0.9, abbr_leg_str, transform=ax.transAxes, fontsize=20, ha='left', va='top')
    ax.set_title(kwargs.get('title', None), fontsize=kwargs.get('title_font_size', 40))
    if return_x_values_included:
        return fig, ax, df_to_plot[x_col].unique().tolist()
    else:
        return fig,ax


In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
legend_elements = []
#-------------------------
fig, ax, x_values_included = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=df_oh, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_cmi_nb', 
    sort_values=True, 
    y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
    order=None, 
    return_x_values_included=True, 
    div_drawn_width_by=2, 
    relative_position_idx=0, 
    ylabel='% CMI', 
    xlabel='Major-Minor Cause', 
    title='Outage CMI by Cause', 
    x_tick_rotation=90, 
    palette_dict=palette_dict, 
    hatch='/'
)
legend_elements.append(mpatches.Patch(facecolor=None, fill=False, hatch='/', label='df_oh'))
#-------------------------
fig, ax = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_df_2, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_cmi_nb', 
    sort_values=False, 
    y_threshold=None, 
    order=x_values_included, 
    return_x_values_included=False, 
    div_drawn_width_by=2, 
    relative_position_idx=1, 
    ylabel='% CMI', 
    xlabel='Major-Minor Cause', 
    title='Outage CMI by Cause', 
    x_tick_rotation=90, 
    palette_dict=palette_dict, 
    hatch='\\\\'
)
legend_elements.append(mpatches.Patch(facecolor=None, fill=False, hatch='\\\\', label='outgs_df_2'))
#-------------------------
ax.legend(
    title=None, 
    handles=legend_elements, 
    fontsize=20, 
    title_fontsize=30, 
    loc='upper right'
)


#-------------------------
fig_num += 1 

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
legend_elements = []
#-------------------------
fig, ax, x_values_included = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=df_oh, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_ci_nb', 
    sort_values=True, 
    y_threshold={'threshold_col':'pct_ci_nb', 'threshold_val':pct_cutoff}, 
    order=None, 
    return_x_values_included=True, 
    div_drawn_width_by=2, 
    relative_position_idx=0, 
    ylabel='% CI', 
    xlabel='Major-Minor Cause', 
    title='Outage CI by Cause', 
    x_tick_rotation=90, 
    palette_dict=palette_dict, 
    hatch='/'
)
legend_elements.append(mpatches.Patch(facecolor=None, fill=False, hatch='/', label='df_oh'))
#-------------------------
fig, ax = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_df_2, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_ci_nb', 
    sort_values=False, 
    y_threshold=None, 
    order=x_values_included, 
    return_x_values_included=False, 
    div_drawn_width_by=2, 
    relative_position_idx=1, 
    ylabel='% CI', 
    xlabel='Major-Minor Cause', 
    title='Outage CI by Cause', 
    x_tick_rotation=90, 
    palette_dict=palette_dict, 
    hatch='\\\\'
)
legend_elements.append(mpatches.Patch(facecolor=None, fill=False, hatch='\\\\', label='outgs_df_2'))
#-------------------------
ax.legend(
    title=None, 
    handles=legend_elements, 
    fontsize=20, 
    title_fontsize=30, 
    loc='upper right'
)


#-------------------------
fig_num += 1 

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
legend_elements = []
#-------------------------
fig, ax, x_values_included = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=df_oh, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_ci_nb', 
    sort_values=True, 
    y_threshold={'threshold_col':'pct_ci_nb', 'threshold_val':pct_cutoff}, 
    order=None, 
    return_x_values_included=True, 
    div_drawn_width_by=2, 
    relative_position_idx=0, 
    ylabel='% CI', 
    xlabel='Major-Minor Cause', 
    title='Outage CI by Cause', 
    x_tick_rotation=90, 
    palette_dict={k:'red' for k,v in palette_dict.items()}, 
    hatch='/'
)
legend_elements.append(mpatches.Patch(facecolor='red', fill=True, hatch='/', label='df_oh'))
#-------------------------
fig, ax = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_df_2, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_ci_nb', 
    sort_values=False, 
    y_threshold=None, 
    order=x_values_included, 
    return_x_values_included=False, 
    div_drawn_width_by=2, 
    relative_position_idx=1, 
    ylabel='% CI', 
    xlabel='Major-Minor Cause', 
    title='Outage CI by Cause', 
    x_tick_rotation=90, 
    palette_dict={k:'blue' for k,v in palette_dict.items()}, 
    hatch='\\\\'
)
legend_elements.append(mpatches.Patch(facecolor='blue', fill=True, hatch='\\\\', label='outgs_df_2'))
#-------------------------
ax.legend(
    title=None, 
    handles=legend_elements, 
    fontsize=20, 
    title_fontsize=30, 
    loc='upper right'
)


#-------------------------
fig_num += 1 

In [None]:
assert(df_oh['MJR_MNR_CAUSE_NM'].nunique()==df_oh.shape[0])

In [None]:
fuck

In [None]:
df_oh

In [None]:

fuck = df_oh[['MJR_MNR_CAUSE_NM', 'pct_cmi_nb']].T.copy()
fuck.columns = fuck.loc['MJR_MNR_CAUSE_NM']
fuck=fuck.drop(index='MJR_MNR_CAUSE_NM')
fuck

fuck_2 = outgs_df_2[['MJR_MNR_CAUSE_NM', 'pct_cmi_nb']].T.copy()
fuck_2.columns = fuck_2.loc['MJR_MNR_CAUSE_NM']
fuck_2=fuck_2.drop(index='MJR_MNR_CAUSE_NM')
fuck_2

common_barplot_kwargs = dict(alpha=0.25, fill=True, edgecolor='black', hatch='//')
fig, ax0 = plt.subplots(1, 1, num=fig_num, figsize=[14, 6])
# reason_order = mecpo_coll_drct.get_cpo_df('01-05 Days', 'rcpo_df_norm').mean().sort_values(ascending=False).index.tolist()
reason_order = outgs_df_2.sort_values(by='pct_cmi_nb', ascending=False)['MJR_MNR_CAUSE_NM'].tolist()
n_reason_to_include=15
ax0 = Plot_Bar.plot_multiple_barplots(
    ax=ax0, 
    dfs_w_args=[
        (fuck_2, {**dict(facecolor='red', hatch=None, label="Outages (All Xfmrs)"), **common_barplot_kwargs}), 
        (fuck, {**dict(facecolor='green', hatch=None, label="Outages (Xfmr tied to Outg. Location ID)"), **common_barplot_kwargs})
    ], 
    order=reason_order, 
    n_bars_to_include=n_reason_to_include, 
    keep_edges_opaque=True, 
    include_hatches=False, 
    draw_side_by_side=True, 
    draw_single_idx_full_width=None,     
    run_set_general_plotting_args=True, 
    orient='v',
    draw_legend=True, 
    legend_args=dict(fontsize=15), 
    title_args=dict(label=f'Title', fontsize=20), 
    ylabel_args = dict(ylabel=f'y-label', fontsize=20, x=0.0, y=0.8, ha='left', va='bottom'), 
    xlabel_args = dict(xlabel='x-label', fontsize=20, x=0.9, y=0.0, ha='right', va='top'), 
    tick_args=[dict(axis='x', labelrotation=90, labelsize=7), 
              dict(axis='y', labelsize=15)]
)
# Plot_General.save_fig(fig, save_dir, f'fig_0.{save_ext}')

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
legend_elements = []
#-------------------------
fig, ax = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_df_2, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_cmi_nb', 
    sort_values=False, 
    y_threshold=None, 
    order=reason_order[:n_reason_to_include], 
    return_x_values_included=False, 
    div_drawn_width_by=2, 
    relative_position_idx=0, 
    ylabel='% CMI', 
    xlabel='Major-Minor Cause', 
    title='Outage CMI by Cause', 
    x_tick_rotation=90, 
    palette_dict={k:'red' for k,v in palette_dict.items()}, 
    hatch='\\\\'
)
legend_elements.append(mpatches.Patch(facecolor='red', fill=True, hatch='\\\\', label='outgs_df_2'))

#-------------------------
fig, ax, x_values_included = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=df_oh, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_cmi_nb', 
    sort_values=False, 
    y_threshold=None, 
    order=reason_order[:n_reason_to_include], 
    return_x_values_included=True, 
    div_drawn_width_by=2, 
    relative_position_idx=1, 
    ylabel='% CMI', 
    xlabel='Major-Minor Cause', 
    title='Outage CMI by Cause', 
    x_tick_rotation=90, 
    palette_dict={k:'green' for k,v in palette_dict.items()}, 
    hatch='/'
)
legend_elements.append(mpatches.Patch(facecolor='green', fill=True, hatch='/', label='df_oh'))
#-------------------------
ax.legend(
    title=None, 
    handles=legend_elements, 
    fontsize=20, 
    title_fontsize=30, 
    loc='upper right'
)


#-------------------------
fig_num += 1 

In [None]:
df_oh.columns

In [None]:
outgs_df_2.columns

In [None]:
df_oh = df_by_opco[df_by_opco['OPRTG_UNT_NM']=='AEP-Ohio'].copy()

In [None]:
we = DOVSOutages(                 
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True, 
    build_sql_function=DOVSOutages_SQL.build_sql_std_outage, 
    build_sql_function_kwargs=dict(
        include_premise=False, 
        include_DOVS_EQUIPMENT_TYPES_DIM=True, 
        opco='3', 
        opco_col='OPCO_NBR', 
        date_range=['2021-01-01', '2021-12-31']
    )
)

In [None]:
we.df

In [None]:
print(we.get_sql_statement())

In [None]:
we.df.shape

In [None]:
xfmr_equip_typ_nms_of_interest = ['TRANSFORMER, OH', 'TRANSFORMER, UG']
we.df[we.df['EQUIP_TYP_NM'].isin(xfmr_equip_typ_nms_of_interest)]

In [None]:
xfmr_equip_typ_nms_of_interest = ['TRANSFORMER, OH', 'TRANSFORMER, UG']
df_outage_prim_strict = df_outage[(df_outage['LOCATION_ID']==df_outage['trsf_pole_nb']) & 
                                  (df_outage['EQUIP_TYP_NM'].isin(xfmr_equip_typ_nms_of_interest))].copy()

In [None]:
DOVSOutages_SQL.build_sql_std_outage

In [None]:
dovs_outgs_NEW = DOVSOutages(                 
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True, 
    build_sql_function=DOVSOutages_SQL.build_sql_outage, 
    build_sql_function_kwargs=dict(
        #outg_rec_nbs=cpo_df.index.get_level_values(outg_rec_nb_idx_lvl).tolist(), 
        from_table_alias='DOV', 
        datetime_col='DT_OFF_TS_FULL', 
        cols_of_interest=[
            'CI_NB', 'CMI_NB', 'MJR_CAUSE_CD', 'MNR_CAUSE_CD'
        ], 
        mjr_mnr_cause=['DL', 'EQF'], 
        #field_to_split='outg_rec_nbs', 
        include_DOVS_OUTAGE_CAUSE_TYPES_DIM=True, 
        include_DOVS_EQUIPMENT_TYPES_DIM=True, 
        groupby_cols=['DOV.MJR_CAUSE_CD', 'DOV.MNR_CAUSE_CD', 'DOV5.MJR_CAUSE_NM', 'DOV5.MNR_CAUSE_NM', 'DOV4.EQUIP_TYP_NM', 'DOV4.SHORT_NM'], 
        agg_cols_and_types={
            'DOV.CI_NB' : ['sum'], 
            'DOV.CMI_NB' : ['sum']
        }, 
        opco='3', 
        opco_col='OPCO_NBR', 
        date_range=['2021-01-01', '2021-12-31']
    )
)

In [None]:
dovs_outgs_NEW.df

In [None]:
print(dovs_outgs_NEW.get_sql_statement())

In [None]:
print(dovs_outgs_NEW.df['MJR_CAUSE_NM'].nunique())
print(dovs_outgs_NEW.df['MNR_CAUSE_NM'].nunique())
print(dovs_outgs_NEW.df['EQUIP_TYP_NM'].nunique())
print(dovs_outgs_NEW.df.shape)

In [None]:
dovs_outgs_NEW_2 = DOVSOutages(                 
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True, 
    build_sql_function=DOVSOutages_SQL.build_sql_outage, 
    build_sql_function_kwargs=dict(
        #outg_rec_nbs=cpo_df.index.get_level_values(outg_rec_nb_idx_lvl).tolist(), 
        from_table_alias='DOV', 
        datetime_col='DT_OFF_TS_FULL', 
        cols_of_interest=[
            'CI_NB', 'CMI_NB'
        ], 
        mjr_mnr_cause=['DL', 'EQF'], 
        #field_to_split='outg_rec_nbs', 
        #include_DOVS_OUTAGE_CAUSE_TYPES_DIM=True, 
        include_DOVS_EQUIPMENT_TYPES_DIM=True, 
        groupby_cols=['DOV4.EQUIP_TYP_NM', 'DOV4.SHORT_NM'], 
        agg_cols_and_types={
            'DOV.CI_NB' : ['sum'], 
            'DOV.CMI_NB' : ['sum']
        }, 
#         opco='3', 
#         opco_col='OPCO_NBR', 
#         date_range=['2021-01-01', '2021-12-31']
    )
)

In [None]:
dovs_outgs_NEW_2.df.shape

In [None]:
# print(dovs_outgs_NEW_2.df['MJR_CAUSE_NM'].nunique())
# print(dovs_outgs_NEW_2.df['MNR_CAUSE_NM'].nunique())
print(dovs_outgs_NEW_2.df['EQUIP_TYP_NM'].nunique())
print(dovs_outgs_NEW_2.df.shape)

In [None]:
dovs_outgs_NEW_2.df['EQUIP_TYP_NM'].value_counts()

In [None]:
dovs_outgs_NEW_2.df[dovs_outgs_NEW_2.df['EQUIP_TYP_NM']=='CUTOUT']

In [None]:
['DOV.MJR_CAUSE_CD', 'DOV.MNR_CAUSE_CD', 'DOV5.MJR_CAUSE_NM', 'DOV5.MNR_CAUSE_NM', 'DOV4.EQUIP_TYP_NM', 'DOV4.SHORT_NM'], 

In [None]:
print(dovs_outgs_NEW_2.get_sql_statement())

In [None]:
dovs_outgs_all = DOVSOutages(                 
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True, 
    build_sql_function=DOVSOutages_SQL.build_sql_std_outage, 
    build_sql_function_kwargs=dict(
        include_premise=False, 
#         opco='3', 
#         opco_col='OPCO_NBR', 
        date_range=['2018-01-01', '2022-09-30']
    )
)
dovs_outgs_all_df = dovs_outgs_all.df.copy()
dovs_outgs_all_df = dovs_outgs_all_df[dovs_outgs_all_df['OPRTG_UNT_NM']=='AEP-Ohio'].copy()
#-------------------------
outgs_all_mjr_mnr = dovs_outgs_all_df.groupby(['MJR_CAUSE_CD', 'MNR_CAUSE_CD', 'MJR_CAUSE_NM', 'MNR_CAUSE_NM'])[['CI_NB', 'CMI_NB']].apply(sum).reset_index()
outgs_all_mjr_mnr = set_pct_col(outgs_all_mjr_mnr, col='CMI_NB', pct_col='pct_cmi_nb')
outgs_all_mjr_mnr = set_pct_col(outgs_all_mjr_mnr, col='CI_NB', pct_col='pct_ci_nb')
outgs_all_mjr_mnr = DOVSOutages.set_mjr_mnr_cause_nm_col(outgs_all_mjr_mnr, set_null_to_NA=True, mjr_cause_nm_abbr_dict=mjr_cause_nm_abbr_dict)
#-------------------------
outgs_all_eqp_type = dovs_outgs_all_df[(dovs_outgs_all_df['MJR_CAUSE_CD']=='DL') & (dovs_outgs_all_df['MNR_CAUSE_CD']=='EQF')].copy()
outgs_all_eqp_type = outgs_all_eqp_type.groupby(['EQUIP_TYP_NM', 'SHORT_NM_EQP_TYP'])[['CI_NB', 'CMI_NB']].apply(sum).reset_index()
outgs_all_eqp_type = set_pct_col(outgs_all_eqp_type, col='CMI_NB', pct_col='pct_cmi_nb')
outgs_all_eqp_type = set_pct_col(outgs_all_eqp_type, col='CI_NB', pct_col='pct_ci_nb')
#-------------------------

In [None]:
# cpo_df = mecpo_coll_drct.get_cpo_df('01-05 Days', 'rcpo_df_norm').copy()
# outg_rec_nb_idx_lvl=0

# dovs_outgs_ede = DOVSOutages(                 
#     df_construct_type=DFConstructType.kRunSqlQuery, 
#     contstruct_df_args=None, 
#     init_df_in_constructor=True, 
#     build_sql_function=DOVSOutages_SQL.build_sql_std_outage, 
#     build_sql_function_kwargs=dict(
#         outg_rec_nbs=cpo_df.index.get_level_values(outg_rec_nb_idx_lvl).tolist(), 
#         field_to_split='outg_rec_nbs', 
#         include_premise=False
#     )
# )
# #-------------------------
# outgs_ede_mjr_mnr = dovs_outgs_ede.df.groupby(['MJR_CAUSE_CD', 'MNR_CAUSE_CD', 'MJR_CAUSE_NM', 'MNR_CAUSE_NM'])[['CI_NB', 'CMI_NB']].apply(sum).reset_index()
# outgs_ede_mjr_mnr = set_pct_col(outgs_ede_mjr_mnr, col='CMI_NB', pct_col='pct_cmi_nb')
# outgs_ede_mjr_mnr = set_pct_col(outgs_ede_mjr_mnr, col='CI_NB', pct_col='pct_ci_nb')
# outgs_ede_mjr_mnr = DOVSOutages.set_mjr_mnr_cause_nm_col(outgs_ede_mjr_mnr, set_null_to_NA=True, mjr_cause_nm_abbr_dict=mjr_cause_nm_abbr_dict)
# #-------------------------
# outgs_ede_eqp_type = dovs_outgs_ede.df[(dovs_outgs_ede.df['MJR_CAUSE_CD']=='DL') & (dovs_outgs_ede.df['MNR_CAUSE_CD']=='EQF')].copy()
# outgs_ede_eqp_type = outgs_ede_eqp_type.groupby(['EQUIP_TYP_NM', 'SHORT_NM_EQP_TYP'])[['CI_NB', 'CMI_NB']].apply(sum).reset_index()
# outgs_ede_eqp_type = set_pct_col(outgs_ede_eqp_type, col='CMI_NB', pct_col='pct_cmi_nb')
# outgs_ede_eqp_type = set_pct_col(outgs_ede_eqp_type, col='CI_NB', pct_col='pct_ci_nb')
# #-------------------------

In [None]:
outg_rec_nbs = []
outg_rec_nb_idx_lvl=0
for mecpo_an_key in mecpo_an_keys:
    cpo_df = mecpo_coll_drct.get_cpo_df(mecpo_an_key, 'rcpo_df_norm')
    outg_rec_nbs.extend(cpo_df.index.get_level_values(outg_rec_nb_idx_lvl).tolist())
outg_rec_nbs = list(set(outg_rec_nbs))

dovs_outgs_ede = DOVSOutages(                 
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True, 
    build_sql_function=DOVSOutages_SQL.build_sql_std_outage, 
    build_sql_function_kwargs=dict(
        outg_rec_nbs=outg_rec_nbs, 
        field_to_split='outg_rec_nbs', 
        include_premise=False
    )
)
#-------------------------
outgs_ede_mjr_mnr = dovs_outgs_ede.df.groupby(['MJR_CAUSE_CD', 'MNR_CAUSE_CD', 'MJR_CAUSE_NM', 'MNR_CAUSE_NM'])[['CI_NB', 'CMI_NB']].apply(sum).reset_index()
outgs_ede_mjr_mnr = set_pct_col(outgs_ede_mjr_mnr, col='CMI_NB', pct_col='pct_cmi_nb')
outgs_ede_mjr_mnr = set_pct_col(outgs_ede_mjr_mnr, col='CI_NB', pct_col='pct_ci_nb')
outgs_ede_mjr_mnr = DOVSOutages.set_mjr_mnr_cause_nm_col(outgs_ede_mjr_mnr, set_null_to_NA=True, mjr_cause_nm_abbr_dict=mjr_cause_nm_abbr_dict)
#-------------------------
outgs_ede_eqp_type = dovs_outgs_ede.df[(dovs_outgs_ede.df['MJR_CAUSE_CD']=='DL') & (dovs_outgs_ede.df['MNR_CAUSE_CD']=='EQF')].copy()
outgs_ede_eqp_type = outgs_ede_eqp_type.groupby(['EQUIP_TYP_NM', 'SHORT_NM_EQP_TYP'])[['CI_NB', 'CMI_NB']].apply(sum).reset_index()
outgs_ede_eqp_type = set_pct_col(outgs_ede_eqp_type, col='CMI_NB', pct_col='pct_cmi_nb')
outgs_ede_eqp_type = set_pct_col(outgs_ede_eqp_type, col='CI_NB', pct_col='pct_ci_nb')
#-------------------------

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, outgs_ede_mjr_mnr, 
                                       x_col='MJR_MNR_CAUSE_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Major-Minor Cause', title='Outage CMI by Cause', 
                                       x_tick_rotation=90, 
                                       palette_dict=palette_dict, order=tmp_reasons
                                      )
fig_num += 1 

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
fig, ax = draw_outages_summary_barplot(fig, ax, outgs_ede_eqp_type, 
                                       x_col='EQUIP_TYP_NM', y_col='pct_cmi_nb', 
                                       y_threshold={'threshold_col':'pct_cmi_nb', 'threshold_val':pct_cutoff}, 
                                       ylabel='% CMI', xlabel='Equipment Type', title='Outage CI by Equip. Type (for DL-EQF)', 
                                       x_tick_rotation=90
                                      )
fig_num += 1 

In [None]:
top_causes_ede_cmi = outgs_ede_mjr_mnr[outgs_ede_mjr_mnr['pct_cmi_nb']>pct_cutoff].sort_values(by='pct_cmi_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_cmi_nb']]
top_causes_ede_ci  = outgs_ede_mjr_mnr[outgs_ede_mjr_mnr['pct_ci_nb']>pct_cutoff].sort_values(by='pct_ci_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_ci_nb']]
#-----
top_causes_dovs_cmi = outgs_all_mjr_mnr[outgs_all_mjr_mnr['pct_cmi_nb']>pct_cutoff].sort_values(by='pct_cmi_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_cmi_nb']]
top_causes_dovs_ci  = outgs_all_mjr_mnr[outgs_all_mjr_mnr['pct_ci_nb']>pct_cutoff].sort_values(by='pct_ci_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_ci_nb']]
#-------------------------
top_causes_ede_cmi = top_causes_ede_cmi.rename(columns={'pct_cmi_nb':'pct'})
top_causes_ede_ci  = top_causes_ede_cmi.rename(columns={'pct_ci_nb':'pct'})
#-----
top_causes_dovs_cmi = top_causes_dovs_cmi.rename(columns={'pct_cmi_nb':'pct'})
top_causes_dovs_ci  = top_causes_dovs_ci.rename(columns={'pct_ci_nb':'pct'})
#-------------------------
top_causes = pd.concat([top_causes_ede_cmi, top_causes_ede_ci, top_causes_dovs_cmi, top_causes_dovs_ci])
top_causes = top_causes.groupby('MJR_MNR_CAUSE_NM')['pct'].apply(max).to_frame().reset_index().sort_values(by='pct', ignore_index=True, ascending=False)
palette_dict = Plot_General.get_standard_colors_dict(top_causes['MJR_MNR_CAUSE_NM'].tolist())


fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
legend_elements = []
#-------------------------
fig, ax, x_values_included = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_all_mjr_mnr, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_ci_nb', 
    sort_values=True, 
    y_threshold={'threshold_col':'pct_ci_nb', 'threshold_val':pct_cutoff}, 
    order=None, 
    return_x_values_included=True, 
    div_drawn_width_by=2, 
    relative_position_idx=0, 
    ylabel='% CI', 
    xlabel='Major-Minor Cause', 
    title='Outage CI by Cause', 
    x_tick_rotation=90, 
    palette_dict={k:'red' for k,v in palette_dict.items()}, 
    hatch='/'
)
legend_elements.append(mpatches.Patch(facecolor='red', fill=True, hatch='/', label='All Outages'))
#-------------------------
fig, ax = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_ede_mjr_mnr, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_ci_nb', 
    sort_values=False, 
    y_threshold=None, 
    order=x_values_included, 
    return_x_values_included=False, 
    div_drawn_width_by=2, 
    relative_position_idx=1, 
    ylabel='% CI', 
    xlabel='Major-Minor Cause', 
    title='Outage CI by Cause', 
    x_tick_rotation=90, 
    palette_dict={k:'blue' for k,v in palette_dict.items()}, 
    hatch='\\\\'
)
legend_elements.append(mpatches.Patch(facecolor='blue', fill=True, hatch='\\\\', label='Model Data'))
#-------------------------
ax.legend(
    title=None, 
    handles=legend_elements, 
    fontsize=20, 
    title_fontsize=30, 
    loc='upper right'
)


#-------------------------
fig_num += 1 

In [None]:
top_causes_ede_cmi = outgs_ede_mjr_mnr[outgs_ede_mjr_mnr['pct_cmi_nb']>pct_cutoff].sort_values(by='pct_cmi_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_cmi_nb']]
top_causes_ede_ci  = outgs_ede_mjr_mnr[outgs_ede_mjr_mnr['pct_ci_nb']>pct_cutoff].sort_values(by='pct_ci_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_ci_nb']]
#-----
top_causes_dovs_cmi = outgs_all_mjr_mnr[outgs_all_mjr_mnr['pct_cmi_nb']>pct_cutoff].sort_values(by='pct_cmi_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_cmi_nb']]
top_causes_dovs_ci  = outgs_all_mjr_mnr[outgs_all_mjr_mnr['pct_ci_nb']>pct_cutoff].sort_values(by='pct_ci_nb', ascending=False)[['MJR_MNR_CAUSE_NM', 'pct_ci_nb']]
#-------------------------
top_causes_ede_cmi = top_causes_ede_cmi.rename(columns={'pct_cmi_nb':'pct'})
top_causes_ede_ci  = top_causes_ede_cmi.rename(columns={'pct_ci_nb':'pct'})
#-----
top_causes_dovs_cmi = top_causes_dovs_cmi.rename(columns={'pct_cmi_nb':'pct'})
top_causes_dovs_ci  = top_causes_dovs_ci.rename(columns={'pct_ci_nb':'pct'})
#-------------------------
top_causes = pd.concat([top_causes_ede_cmi, top_causes_ede_ci, top_causes_dovs_cmi, top_causes_dovs_ci])
top_causes = top_causes.groupby('MJR_MNR_CAUSE_NM')['pct'].apply(max).to_frame().reset_index().sort_values(by='pct', ignore_index=True, ascending=False)
palette_dict = Plot_General.get_standard_colors_dict(top_causes['MJR_MNR_CAUSE_NM'].tolist())


fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
legend_elements = []
#-------------------------
fig, ax, x_values_included = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_all_mjr_mnr, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_ci_nb', 
    sort_values=True, 
    y_threshold={'threshold_col':'pct_ci_nb', 'threshold_val':pct_cutoff}, 
    order=None, 
    return_x_values_included=True, 
    div_drawn_width_by=2, 
    relative_position_idx=0, 
    ylabel='% CI', 
    xlabel='Major-Minor Cause', 
    title='Outage CI by Cause', 
    x_tick_rotation=90, 
    palette_dict={k:'red' for k,v in palette_dict.items()}, 
    hatch='/'
)
legend_elements.append(mpatches.Patch(facecolor='red', fill=True, hatch='/', label='All Outages'))
#-------------------------
fig, ax = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_ede_mjr_mnr, 
    x_col='MJR_MNR_CAUSE_NM', 
    y_col='pct_ci_nb', 
    sort_values=False, 
    y_threshold=None, 
    order=x_values_included, 
    return_x_values_included=False, 
    div_drawn_width_by=2, 
    relative_position_idx=1, 
    ylabel='% CI', 
    xlabel='Major-Minor Cause', 
    title='Outage CI by Cause', 
    x_tick_rotation=90, 
    palette_dict={k:'blue' for k,v in palette_dict.items()}, 
    hatch='\\\\'
)
legend_elements.append(mpatches.Patch(facecolor='blue', fill=True, hatch='\\\\', label='Model Data'))
#-------------------------
ax.legend(
    title=None, 
    handles=legend_elements, 
    fontsize=20, 
    title_fontsize=30, 
    loc='upper right'
)


#-------------------------
fig_num += 1 

In [None]:
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
legend_elements = []
#-------------------------
fig, ax, x_values_included = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_all_eqp_type, 
    x_col='EQUIP_TYP_NM', 
    y_col='pct_ci_nb', 
    sort_values=True, 
    y_threshold={'threshold_col':'pct_ci_nb', 'threshold_val':pct_cutoff}, 
    order=None, 
    return_x_values_included=True, 
    div_drawn_width_by=2, 
    relative_position_idx=0, 
    ylabel='% CI', 
    xlabel='Equipment Type', 
    title='Outage CI by Equip. Type (for DL-EQF)', 
    x_tick_rotation=90, 
    #palette_dict={k:'red' for k,v in palette_dict.items()}, 
    hatch='/'
)
legend_elements.append(mpatches.Patch(facecolor='red', fill=True, hatch='/', label='All Outages'))
#-------------------------
fig, ax = draw_outages_summary_barplot(
    fig=fig, 
    ax=ax, 
    df=outgs_ede_eqp_type, 
    x_col='EQUIP_TYP_NM', 
    y_col='pct_ci_nb', 
    sort_values=False, 
    y_threshold=None, 
    order=x_values_included, 
    return_x_values_included=False, 
    div_drawn_width_by=2, 
    relative_position_idx=1, 
    ylabel='% CI', 
    xlabel='Equipment Type', 
    title='Outage CI by Equip. Type (for DL-EQF)', 
    x_tick_rotation=90, 
    #palette_dict={k:'blue' for k,v in palette_dict.items()}, 
    hatch='\\\\'
)
legend_elements.append(mpatches.Patch(facecolor='blue', fill=True, hatch='\\\\', label='Model Data'))
#-------------------------
ax.legend(
    title=None, 
    handles=legend_elements, 
    fontsize=20, 
    title_fontsize=30, 
    loc='upper right'
)


#-------------------------
fig_num += 1 