In [None]:
from importlib import reload
#reload(Utilities)
#reload(clm)
# NOTE: To reload a class imported as, e.g., 
# from module import class
# One must call:
#   1. import module
#   2. reload module
#   3. from module import class

import sys, os
import re
from pathlib import Path
import json
import pickle

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns, natsort_keygen
from packaging import version
import copy

import itertools

import pyodbc
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
import matplotlib.colors as mcolors
import matplotlib.cm as cm #e.g. for cmap=cm.jet
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
import CommonLearningMethods as clm
#-----
from MeterPremise import MeterPremise
#-----
from AMI_SQL import AMI_SQL
from AMINonVee_SQL import AMINonVee_SQL
from AMIEndEvents_SQL import AMIEndEvents_SQL
from AMIUsgInst_SQL import AMIUsgInst_SQL
from DOVSOutages_SQL import DOVSOutages_SQL
#-----
from GenAn import GenAn
from AMINonVee import AMINonVee
from AMIEndEvents import AMIEndEvents
from AMIEDE_DEV import AMIEDE_DEV
from MECPODf import MECPODf
from MECPOAn import MECPOAn
from MECPOCollection import MECPOCollection
from AMIUsgInst import AMIUsgInst
from DOVSOutages import DOVSOutages
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
import TableInfos
from TableInfos import TableInfo
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
#sys.path.insert(0, os.path.join(os.path.realpath('..'), 'Utilities'))
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
from Utilities_df import DFConstructType
import Utilities_dt
import Plot_General
import Plot_Box_sns
import Plot_Hist
import Plot_Bar
import GrubbsTest
import DataFrameSubsetSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSlicer as DFSlicer

In [None]:
from sklearn.preprocessing import OrdinalEncoder

In [None]:
def adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args,
    new_max_total_counts_args=None, 
    no_outg_idxs=None,
    adjust_barplot_kwargs=None, 
    inplace=False
):
    r"""
    Adjust all of the cpo_df_subset_by_mjr_mnr_cause_args in mecpo_colls_w_get_args WITH THE EXCEPTION OF
    the elements in no_outg_idxs.
      This is because selecting a outage subset from the no-outage data doesn't make any sense!
    
    mecpo_colls_w_get_args:
      A list of dict objects.
      Each dict should have keys:
        mecpo_coll:
          The MECPOCollection object
        cpo_df_name:
          Name of cpo_df to grab from each MECPOAn in the collection for plotting
        cpo_df_subset_by_mjr_mnr_cause_args:
          Arguments to grab any subset of the data by outage major/minor cause
        max_total_counts_args=None:
          Arguments to further select a subset by the maximum number of total counts.
        mecpo_an_order:
          The order in which the analyses in MECPOAn should be plotted
          
    adjust_barplot_kwargs:
      If one wants to, e.g., change hatches for all, etc.
    """
    #-------------------------
    if not inplace:
        mecpo_colls_w_get_args = copy.deepcopy(mecpo_colls_w_get_args)
    #-------------------------
    if no_outg_idxs is not None:
        if not isinstance(no_outg_idxs, list):
            assert(isinstance(no_outg_idxs, int))
            no_outg_idxs = [no_outg_idxs]
        # Not really any harm in having element in no_outg_idxs >= len(mecpo_colls_w_get_args), as this would
        # as this would be the same functionality as no no_outg member identified in list
        # Assertion here just to make sure user understands what's going on
        for idx in no_outg_idxs:
            assert(idx<len(mecpo_colls_w_get_args))
    #-------------------------
    for idx, coll_w_get_args in enumerate(mecpo_colls_w_get_args):
        if adjust_barplot_kwargs is not None:
            Plot_General.adjust_kwargs(
                coll_w_get_args['mecpo_coll'].barplot_kwargs_shared, 
                adjust_barplot_kwargs, 
                inplace=True
            )
        if no_outg_idxs is not None and idx in no_outg_idxs:
            continue
        coll_w_get_args['cpo_df_subset_by_mjr_mnr_cause_args'] =  new_cpo_df_subset_by_mjr_mnr_cause_args
        coll_w_get_args['max_total_counts_args'] =  new_max_total_counts_args
    #-------------------------
    if inplace:
        return
    return mecpo_colls_w_get_args

In [None]:
def adjust_cpo_df_names_in_mecpo_colls_w_get_args(
    mecpo_colls_w_get_args, 
    cpo_df_names
):
    r"""
    Adjust the cpo_df_name to be used for each MECPOCollection in mecpo_colls_w_get_args.
    
    cpo_df_names:
        The new cpo_df_names, which can be a string, list of lists, or dict
        string:
            A single cpo_df_name which will be used for all MECPOCollections
        list:
            Each element should be a list of length 2.
            The 0th element should contain the index position of the MECPOCollection in mecpo_colls_w_get_args.
            The 1st element should new cpo_df_name to use.
        dict:
            Keys are the index positions of the MECPOCollections in mecpo_colls_w_get_args.
            Values are the associated new cpo_df_names to use.
    """
    #-------------------------
    assert(Utilities.is_object_one_of_types(cpo_df_names, [str, list, dict]))
    if isinstance(cpo_df_names, dict):
        cpo_df_names_dict = cpo_df_names
    elif isinstance(cpo_df_names, str):
        cpo_df_names_dict = {i:cpo_df_names for i in range(len(mecpo_colls_w_get_args))}
    elif isinstance(cpo_df_names, list):
        assert(Utilities.are_all_list_elements_of_type(cpo_df_names, list) and 
               Utilities.are_list_elements_lengths_homogeneous(cpo_df_names, 2))
        cpo_df_names_dict={}
        for idx,cpo_df_name in cpo_df_names:
            assert(isinstance(idx, int))
            assert(idx not in cpo_df_names_dict.keys())
            cpo_df_names_dict[idx] = cpo_df_name
    else:
        assert(0)
    #-------------------------
    for idx,cpo_df_name in cpo_df_names_dict.items():
        assert(idx < len(mecpo_colls_w_get_args))
        mecpo_colls_w_get_args[idx]['cpo_df_name']=cpo_df_name
    #-------------------------
    return mecpo_colls_w_get_args

In [None]:
def draw_cpo_dfs_full_vs_direct_3x2(
    fig_num, 
    mecpo_an_list_full, 
    mecpo_an_list_drct, 
    cpo_full_dfs_name,
    cpo_drct_dfs_name=None,
    cpo_df_subset_by_mjr_mnr_cause_args=None, 
    max_total_counts_args=None, 
    subplot_titles=None, 
    reason_order=None, 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=False, 
    include_xticklabels_for_all=False, 
    suptitle=None, 
    row_major=False, 
    save_args=dict(
        save_fig=False,
        save_dir='', 
        save_name=''
    ), 
    mecpo_an_list_no_outg=None,
    cpo_no_outg_dfs_name=None, 
    **kwargs
):
    r"""
    Compares full to direct cpo_dfs for six different cases.
    Intended to be used to compare 01_05, 06_10, 11_15, 16_20, 21_25, 26_30
    
    mecpo_an_list_full, mecpo_an_list_drct:
      Should be lists (or possibly dicts with matching keys) EACH CONTAINING SIX MECPOAn objects
      
    cpo_full_dfs_name:
      The name of the cpo_full pd.DataFrames, used to retrieve the correct item from mecpo_an_list_full
    cpo_drct_dfs_name:
      Similar to cpo_full_dfs_name, but for the direct collection.  If left equal to None, it will be set equal
      to cpo_full_dfs_name
      
    cpo_df_subset_by_mjr_mnr_cause_args:
      Allows one to select subsets of the pd.DataFrames by the outage type.
      This should be a dict with arguments appropriate for the MECPOAn.get_cpo_df_subset_by_mjr_mnr_cause
        function (except for the cpo_df_name argument, which will be set to cpo_full_dfs_name/cpo_drct_dfs_name)
        
    max_total_counts_args:
      Allows one to further select a subset by the maximum number of total counts.
      This should be a dict with arguments appropriate for the MECPODf.get_cpo_df_subset_below_max_total_counts
        function (except for the cpo_df argument, which will be set to return_df
      
    subplot_titles:
      A list of titles for each subpot (e.g., ['01-05 Days', '06-10 Days', ... '26-30 Days'])
      If None is given and mecpo_an_list_full,mecpo_an_list_full are dicts, the keys will be used
      
    reason_order:
      The order in which to plot the reasons along the x-axis.
      If set to None, the order will be taken from the first DF in mecpo_an_list_full, sorted by mean value
      
    n_reason_to_include:
      Number of reasons to include in the plot.  A reasonable number here is 10, as there is typically a steep drop off
      after this.
      If set equal to None, all will be included.
      
    is_rcpo:
      Set to True if the DataFrames are Reason Counts Per Outage. 
      Set to False if they are Id (enddeviceeventtypeid) Counts Per Outage.
      Used mainly to determine the size of margins to use, as the Reasons tend to be much longer than IDs
      
    replace_xtick_labels_with_ints:
      If True, the xtick labels will be replaced with integers, and a key will be printed to the right of the figures.
      
    include_xticklabels_for_all:
      By default (i.e., when include_xticklabels_for_all is False), the xtick labels are included only for the bottom
        two plots.
      If include_xticklabels_for_all==True, xtick labels are included for each subplot.
      
    suptitle:
      Overall title for collection of plots
      
    row_major:
      If True, the array of axes, axs, is flattened in row-major order.
      If False, the array of axes, axs, is flattened in column-major order.      
      Default: False, so the as the indices increase, the plots move down the first column before
               entering and traversing the second column.
      
    save_args:
      A dict containing information on whether or not to save figure, and, if so, where to save it.
      It can also simply be set to False
      
    kwargs:
        barplot_kwargs_full
        barplot_kwargs_drct
        common_barplot_kwargs
        common_general_kwargs
        sharex
        sharey
        barplot_kwargs_no_outg
    """
    #-------------------------
    assert(len(mecpo_an_list_full)==len(mecpo_an_list_full)==6)
    if isinstance(mecpo_an_list_full, dict) or isinstance(mecpo_an_list_drct, dict):
        assert(isinstance(mecpo_an_list_full, dict) and isinstance(mecpo_an_list_drct, dict))
        assert(mecpo_an_list_full.keys()==mecpo_an_list_drct.keys())
        tmp_full = []
        tmp_drct = []
        tmp_subplot_titles = []
        for key in mecpo_an_list_full.keys():
            tmp_full.append(mecpo_an_list_full[key])
            tmp_drct.append(mecpo_an_list_drct[key])
            tmp_subplot_titles.append(key)
        if subplot_titles is None:
            subplot_titles = tmp_subplot_titles
    if subplot_titles is None:
        subplot_titles = ['' for x in range(6)]
    assert(len(subplot_titles)==6)
    #--------------------------------------------------
    # Unpack kwargs
    barplot_kwargs_full   = kwargs.get('barplot_kwargs_full', dict(facecolor='red', label="Outages (All Xfmrs)"))
    barplot_kwargs_drct   = kwargs.get('barplot_kwargs_drct', dict(facecolor='green', label="Outages (Xfmr tied to Outg. Location ID)"))
    common_barplot_kwargs = kwargs.get('common_barplot_kwargs', dict(alpha=0.25, fill=True, edgecolor='black', hatch='//'))
    
    barplot_kwargs_no_outg = kwargs.get('barplot_kwargs_no_outg', dict(facecolor='orange', label="No Outages"))
    #----------
    xtick_labelrotation=90
    if is_rcpo:
        xtick_labelsize=15
    else:
        xtick_labelsize=20
    ytick_labelsize=25
    if replace_xtick_labels_with_ints:
        xtick_labelrotation=0
        xtick_labelsize=25
    dflt_common_general_kwargs = dict( 
        n_bars_to_include=n_reason_to_include, 
        keep_edges_opaque=True, 
        include_hatches=False, 
        draw_side_by_side=True, 
        draw_single_idx_full_width=None,     
        run_set_general_plotting_args=True, 
        orient='v',
        draw_legend=True, 
        legend_args=dict(fontsize=15), 
        ylabel_args = dict(ylabel=f'', fontsize=0, x=0.0, y=0.8, ha='left', va='bottom'), 
        xlabel_args = dict(xlabel='', fontsize=0, x=0.9, y=0.0, ha='right', va='top'), 
        tick_args=[dict(axis='x', labelrotation=xtick_labelrotation, labelsize=xtick_labelsize), 
                   dict(axis='y', labelsize=ytick_labelsize)]
    )
    common_general_kwargs = kwargs.get('common_general_kwargs', dflt_common_general_kwargs)
    #----------
    sharex = kwargs.get('sharex', True)
    sharey = kwargs.get('sharey', True)
    #--------------------------------------------------
    if cpo_drct_dfs_name is None:
        cpo_drct_dfs_name = cpo_full_dfs_name
    #-------------------------    
    barplot_kwargs_full = {**barplot_kwargs_full, **common_barplot_kwargs}
    barplot_kwargs_drct = {**barplot_kwargs_drct, **common_barplot_kwargs}
    barplot_kwargs_no_outg = {**barplot_kwargs_no_outg, **common_barplot_kwargs}
    #-------------------------
    if save_args:
        assert('save_fig' in save_args)
        if save_args['save_fig']:
            assert('save_dir' in save_args and 
                   'save_name' in save_args)
    #----------------------------------------------------------------------
    # BUILD THE COLLECTION OF DATAFRAMES
    #----------------------------------------------------------------------
    cpo_dfs_full = []
    for i in range(len(mecpo_an_list_full)):
        cpo_dfs_full.append(
            mecpo_an_list_full[i].get_cpo_df(
                cpo_df_name=cpo_full_dfs_name, 
                cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args, 
                max_total_counts_args=max_total_counts_args
            )
        )
            
    #-----
    cpo_dfs_drct = []
    for i in range(len(mecpo_an_list_drct)):
        cpo_dfs_drct.append(
            mecpo_an_list_drct[i].get_cpo_df(
                cpo_df_name=cpo_drct_dfs_name, 
                cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args, 
                max_total_counts_args=max_total_counts_args
            )
        )
            
    #-----
    if mecpo_an_list_no_outg is not None:
        assert(len(mecpo_an_list_no_outg)==6)
        if cpo_no_outg_dfs_name is None:
            cpo_no_outg_dfs_name = cpo_full_dfs_name
        cpo_dfs_no_outg = []
        for i in range(len(mecpo_an_list_no_outg)):
            cpo_dfs_no_outg.append(
                mecpo_an_list_no_outg[i].get_cpo_df(
                    cpo_df_name=cpo_no_outg_dfs_name, 
                    cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args, 
                    max_total_counts_args=max_total_counts_args
                )
            )
    #----------------------------------------------------------------------
    n_x=2
    n_y=3
    #-----
    fig, axs = Plot_General.default_subplots(n_x=n_x, n_y=n_y, fig_num=fig_num, sharex=sharex, sharey=sharey, 
                                             return_flattened_axes=True, row_major=row_major)
    #-------------------------
    scale_margin_top = None
    if suptitle:
        scale_margin_top = 0.75
    if not replace_xtick_labels_with_ints:
        right = None # i.e., default
        if is_rcpo:
            bottom = 0.5
        else:
            bottom = 0.15
    else:
        bottom = None #i.e., default
        right = 0.75

    subplots_adjust_args = Plot_General.get_subplots_adjust_args_std_3x2(scale_hspace=0.5, scale_margin_left=0.75, 
                                                                         scale_margin_top=scale_margin_top, 
                                                                         bottom=bottom, right=right)
    fig = Plot_General.adjust_subplots_args(fig, subplots_adjust_args)
    #-------------------------
    # When using sharex=True and sharey=True, the default behavior is:
    #   When subplots have a shared x-axis along a column, only the x tick labels of the bottom subplot are created. 
    #   Similarly, when subplots have a shared y-axis along a row, only the y tick labels of the first column subplot are created.
    #   To turn on all tick labels for x(y), set include_all_x(y)_tick_labels to True
    include_all_x_tick_labels=False # If false, only the x tick labels of the bottom subplot are created
    include_all_y_tick_labels=True  # If false, only the y tick labels of the first column subplot are created
    #-------------------------
    if reason_order is None:
        reason_order = cpo_dfs_full[0].mean().sort_values(ascending=False).index.tolist()
    common_general_kwargs['order'] = reason_order
    #-------------------------
    if replace_xtick_labels_with_ints:
        if n_reason_to_include is not None:
            xtick_elements = reason_order[:n_reason_to_include]
        else:
            xtick_elements = reason_order
        xtick_rename_dict = {xtick_el:i+1 for i,xtick_el in enumerate(xtick_elements)}
    #-------------------------
    # NOTE: xticks = np.arange(len(xtick_rename_dict)) below is to ensure all ticks are drawn,
    #       as sometimes mpl draws less ticks when there are many
    if replace_xtick_labels_with_ints:
        Plot_General.adjust_kwargs(
            general_kwargs=common_general_kwargs, 
            new_values_dict=dict(
                ax_args=dict(
                    xticks = np.arange(len(xtick_rename_dict)),
                    xticklabels=list(xtick_rename_dict.values())
                )
            ), 
            append_to_containers=False,
            inplace=True
        )
        
    #------------------------- ACTUAL PLOTTING!!! -------------------------
    #----------------------------------------------------------------------
    # The bottom two plots should always include the xtick labels
    # However, one only needs to worry about this when sharex=False
    # NOTE: If include_xticklabels_for_all==True, one cannot simply set idxs_to_exclude_xticklabels = []
    #       This would work if sharex==False, but if sharex==True, plt.subplots automatically turns off the
    #       tick labels for all but buttom.  Thus, instead of not turning them off (via, e.g., idxs_to_exclude_xticklabels 
    #       = []), one must actively turn them on as well!
    idxs_to_include_xticklabels = [2, 5]
    if row_major:
        idxs_to_include_xticklabels = [4, 5]
    idxs_to_exclude_xticklabels = [x for x in range(6) if x not in idxs_to_include_xticklabels]
    common_general_kwargs_wo_xticklabels = Plot_General.adjust_kwargs(common_general_kwargs, dict(ax_args=dict(xticklabels=[])))
    if include_xticklabels_for_all:
        idxs_to_exclude_xticklabels = []
    #-------------------------
    for i in range(len(cpo_dfs_full)):
        common_general_kwargs_i = common_general_kwargs
        if(not sharex and 
           i in idxs_to_exclude_xticklabels):
            common_general_kwargs_i = common_general_kwargs_wo_xticklabels
        #----------
        dfs_w_args=[
            (cpo_dfs_full[i], barplot_kwargs_full), 
            (cpo_dfs_drct[i], barplot_kwargs_drct)
        ]
        if mecpo_an_list_no_outg is not None:
            dfs_w_args.append((cpo_dfs_no_outg[i], barplot_kwargs_no_outg))
        
        axs[i] = Plot_Bar.plot_multiple_barplots(
            ax=axs[i], 
            dfs_w_args=dfs_w_args, 
            **common_general_kwargs_i, 
            title_args=dict(label=subplot_titles[i], fontsize=20)
        )
        if include_xticklabels_for_all:
            axs[i].tick_params(axis='x', labelbottom=True)
    #---------------------------------------------------------------------------
    # Make all have same scale
    if not sharey:
        Plot_General.make_all_axes_have_same_ylims(axs)
    #---------------------------------------------------------------------------
    subplot_layout_params = Plot_General.get_subplot_layout_params(fig)
    #-----
    if is_rcpo:
        supxlabel = 'Reason'
    else:
        supxlabel = 'ID'
    fig.supxlabel(supxlabel, fontsize=30, 
                  x=subplot_layout_params['right'], ha='right', 
                  y=0.0, va='bottom')
    fig.supylabel('Normalized Counts', fontsize=30, 
                  x=0.0, ha='left',
                  y=subplot_layout_params['top'], va='top')
    if suptitle:
        fig.suptitle(suptitle, fontsize=50, 
                     x=0.5*(subplot_layout_params['left']+subplot_layout_params['right']), ha='center')

    if include_all_y_tick_labels and sharey:
        for ax_i in axs.flatten():
            ax_i.tick_params(axis='y', labelleft=True)

    if include_all_x_tick_labels and sharex:
        for ax_i in axs.flatten():
            ax_i.tick_params(axis='x', labelbottom=True)

    if replace_xtick_labels_with_ints:
        Plot_General.generate_xtick_labels_legend_textbox(
            fig=fig, 
            xtick_rename_dict=xtick_rename_dict, 
            text_x_pos=1.02*subplot_layout_params['right'], 
            text_y_pos=subplot_layout_params['top'])
    #---------------------------------------------------------------------------
    if save_args and save_args['save_fig']:
        Plot_General.save_fig(fig, save_args['save_dir'], save_args['save_name'])   
    #---------------------------------------------------------------------------
    return fig, axs

In [None]:
def draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num, 
    mecpo_colls_w_get_args, 
    reason_order=None, 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=False, 
    include_xticklabels_for_all=False, 
    suptitle=None, 
    row_major=False, 
    save_args=dict(
        save_fig=False,
        save_dir='', 
        save_name=''
    ), 
    draw_legend_in_i0_only=True, 
    **kwargs
):
    r"""
    mecpo_colls_w_get_args:
      A list of dict objects.
      Each dict should have keys:
        mecpo_coll:
          The MECPOCollection object
        cpo_df_name:
          Name of cpo_df to grab from each MECPOAn in the collection for plotting
        cpo_df_subset_by_mjr_mnr_cause_args:
          Arguments to grab any subset of the data by outage major/minor cause.
          This should be a dict with arguments appropriate for the MECPOAn.get_cpo_df_subset_by_mjr_mnr_cause
            function (except for the cpo_df_name argument, which will be set to cpo_df_name)  
        max_total_counts_args=None:
          Arguments to further select a subset by the maximum number of total counts.
          This should be a dict with arguments appropriate for the MECPODf.get_cpo_df_subset_below_max_total_counts
            function (except for the cpo_df argument, which will be set to return_df
        mecpo_an_order:
          The order in which the analyses in MECPOAn should be plotted
          
      !!!!!!!!!!!!!!!!!!!!    
      FOR NOW (to be safe):
      !!!!!!!!!!!!!!!!!!!!
        Each mecpo_coll must have the same keys.
          This will ensure that the intended analyses are being plotted together
          These can also be used as the subplot titles
          
    reason_order:
      The order in which to plot the reasons along the x-axis.
      This should be a list of Reasons or IDs (enddeviceeventtypeid).
      If set to an index idx, the order will be taken from mecpo_colls_w_get_args[idx]['mecpo_coll'] via the
        get_rough_reason_ordering method.
      
    n_reason_to_include:
      Number of reasons to include in the plot.  A reasonable number here is 10, as there is typically a steep drop off
      after this.
      If set equal to None, all will be included.
      
    is_rcpo:
      Set to True if the DataFrames are Reason Counts Per Outage. 
      Set to False if they are Id (enddeviceeventtypeid) Counts Per Outage.
      Used mainly to determine the size of margins to use, as the Reasons tend to be much longer than IDs
      
    replace_xtick_labels_with_ints:
      If True, the xtick labels will be replaced with integers, and a key will be printed to the right of the figures.
      
    include_xticklabels_for_all:
      By default (i.e., when include_xticklabels_for_all is False), the xtick labels are included only for the bottom
        two plots.
      If include_xticklabels_for_all==True, xtick labels are included for each subplot.
      
    suptitle:
      Overall title for collection of plots
      
    row_major:
      If True, the array of axes, axs, is flattened in row-major order.
      If False, the array of axes, axs, is flattened in column-major order.      
      Default: False, so the as the indices increase, the plots move down the first column before
               entering and traversing the second column.
      
    save_args:
      A dict containing information on whether or not to save figure, and, if so, where to save it.
      It can also simply be set to False
      
    kwargs:
        common_barplot_kwargs
        common_general_kwargs
        sharex
        sharey
    """
    #-------------------------    
    # Make sure each element in mecpo_colls_w_get_args has the expected keys
    necessary_mecpo_colls_w_get_args_keys = ['mecpo_coll', 'cpo_df_name','mecpo_an_order']
    other_mecpo_colls_w_get_args_keys = ['cpo_df_subset_by_mjr_mnr_cause_args', 'max_total_counts_args']
    for coll_w_get_args in mecpo_colls_w_get_args:
        assert(len(set(necessary_mecpo_colls_w_get_args_keys).difference(set(coll_w_get_args.keys())))==0)
        for other_key in other_mecpo_colls_w_get_args_keys:
            coll_w_get_args[other_key] = coll_w_get_args.get(other_key, None)
    
    # Plotting 6 subplots (3x2), so each MECPOCollection should have 6 MECPOAn objects
    for mecpo_coll in [x['mecpo_coll'] for x in mecpo_colls_w_get_args]:
        assert(mecpo_coll.n_mecpo_ans==6)

    # (Possibly temporary) Restriction: Each MECPOCollection must have the same keys
    mecpo_an_keys = mecpo_colls_w_get_args[0]['mecpo_an_order']
    assert(len(mecpo_an_keys)==6)
    for mecpo_coll in [x['mecpo_coll'] for x in mecpo_colls_w_get_args]:
        assert(len(set(mecpo_an_keys).symmetric_difference(set(mecpo_coll.mecpo_an_keys)))==0)
    #--------------------------------------------------
    # Unpack kwargs
    common_barplot_kwargs = kwargs.get('common_barplot_kwargs', dict(alpha=0.25, fill=True, edgecolor='black', hatch='//'))
    #----------
    xtick_labelrotation=90
    if is_rcpo:
        xtick_labelsize=15
    else:
        xtick_labelsize=20
    ytick_labelsize=25
    if replace_xtick_labels_with_ints:
        xtick_labelrotation=0
        xtick_labelsize=25
    dflt_common_general_kwargs = dict( 
        n_bars_to_include=n_reason_to_include, 
        keep_edges_opaque=True, 
        include_hatches=False, 
        draw_side_by_side=True, 
        draw_single_idx_full_width=None,     
        run_set_general_plotting_args=True, 
        orient='v',
        draw_legend=True, 
        legend_args=dict(fontsize=15), 
        ylabel_args = dict(ylabel=f'', fontsize=0, x=0.0, y=0.8, ha='left', va='bottom'), 
        xlabel_args = dict(xlabel='', fontsize=0, x=0.9, y=0.0, ha='right', va='top'), 
        tick_args=[dict(axis='x', labelrotation=xtick_labelrotation, labelsize=xtick_labelsize), 
                   dict(axis='y', labelsize=ytick_labelsize)]
    )
    common_general_kwargs = kwargs.get('common_general_kwargs', dflt_common_general_kwargs)
    #----------
    sharex = kwargs.get('sharex', True)
    sharey = kwargs.get('sharey', True)
    make_ylim_eq = kwargs.get('make_ylim_eq', True)
    #--------------------------------------------------
    if save_args:
        assert('save_fig' in save_args)
        if save_args['save_fig']:
            assert('save_dir' in save_args and 
                   'save_name' in save_args)        
    #----------------------------------------------------------------------
    n_x=2
    n_y=3
    #-----
    fig, axs = Plot_General.default_subplots(n_x=n_x, n_y=n_y, fig_num=fig_num, sharex=sharex, sharey=sharey, 
                                             return_flattened_axes=True, row_major=row_major)
    #-------------------------
    scale_margin_top = None
    if suptitle:
        scale_margin_top = 0.75
    if not replace_xtick_labels_with_ints:
        right = None # i.e., default
        if is_rcpo:
            bottom = 0.5
        else:
            bottom = 0.15
    else:
        bottom = None #i.e., default
        right = 0.75

    subplots_adjust_args = Plot_General.get_subplots_adjust_args_std_3x2(scale_hspace=0.5, scale_margin_left=0.75, 
                                                                         scale_margin_top=scale_margin_top, 
                                                                         bottom=bottom, right=right)
    fig = Plot_General.adjust_subplots_args(fig, subplots_adjust_args)
    #-------------------------
    # When using sharex=True and sharey=True, the default behavior is:
    #   When subplots have a shared x-axis along a column, only the x tick labels of the bottom subplot are created. 
    #   Similarly, when subplots have a shared y-axis along a row, only the y tick labels of the first column subplot are created.
    #   To turn on all tick labels for x(y), set include_all_x(y)_tick_labels to True
    include_all_x_tick_labels=False # If false, only the x tick labels of the bottom subplot are created
    include_all_y_tick_labels=True  # If false, only the y tick labels of the first column subplot are created
    #-------------------------
    if reason_order is None:
        reason_order = 0
    assert(Utilities.is_object_one_of_types(reason_order, [int, list]))
    if isinstance(reason_order, int):
        reason_order = mecpo_colls_w_get_args[reason_order]['mecpo_coll'].get_rough_reason_ordering(
            cpo_df_name=mecpo_colls_w_get_args[reason_order]['cpo_df_name'], 
            cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[reason_order]['cpo_df_subset_by_mjr_mnr_cause_args'],
            max_total_counts_args=mecpo_colls_w_get_args[reason_order]['max_total_counts_args']
        )
    #-----
    common_general_kwargs['order'] = reason_order
    #-------------------------
    if replace_xtick_labels_with_ints:
        if n_reason_to_include is not None:
            xtick_elements = reason_order[:n_reason_to_include]
        else:
            xtick_elements = reason_order
        xtick_rename_dict = {xtick_el:i+1 for i,xtick_el in enumerate(xtick_elements)}
    #-------------------------
    # NOTE: xticks = np.arange(len(xtick_rename_dict)) below is to ensure all ticks are drawn,
    #       as sometimes mpl draws less ticks when there are many    
    if replace_xtick_labels_with_ints:
        Plot_General.adjust_kwargs(
            general_kwargs=common_general_kwargs, 
            new_values_dict=dict(
                ax_args=dict(
                    xticks = np.arange(len(xtick_rename_dict)),
                    xticklabels=list(xtick_rename_dict.values())
                )
            ), 
            append_to_containers=False,
            inplace=True
        )
        
    #------------------------- ACTUAL PLOTTING!!! -------------------------
    #----------------------------------------------------------------------
    # The bottom two plots should always include the xtick labels
    # However, one only needs to worry about this when sharex=False
    # NOTE: If include_xticklabels_for_all==True, one cannot simply set idxs_to_exclude_xticklabels = []
    #       This would work if sharex==False, but if sharex==True, plt.subplots automatically turns off the
    #       tick labels for all but buttom.  Thus, instead of not turning them off (via, e.g., idxs_to_exclude_xticklabels 
    #       = []), one must actively turn them on as well!
    idxs_to_include_xticklabels = [2, 5]
    if row_major:
        idxs_to_include_xticklabels = [4, 5]
    idxs_to_exclude_xticklabels = [x for x in range(6) if x not in idxs_to_include_xticklabels]
    common_general_kwargs_wo_xticklabels = Plot_General.adjust_kwargs(common_general_kwargs, dict(ax_args=dict(xticklabels=[])))
    if include_xticklabels_for_all:
        idxs_to_exclude_xticklabels = []
    #-------------------------
    #--------------------------------------------------    
    # Below, qualitatively 
    #   i represents iteration over the 6 subplots
    #     - Note: There are 6 MECPOAn objects in each coll_w_get_args['mecpo_coll'] in mecpo_colls_w_get_args, 
    #             one for each subplot
    #   j represents iteration over the collections (mecpo_colls_w_get_args) for a given subplot
    #-----
    # First, iterate over the plot number/MECPOAn number
    for i_plot in range(len(mecpo_an_keys)):
        # Use mecpo_an_keys[i_plot] as the subplot title
        subplot_title_i = mecpo_an_keys[i_plot]
        #-----
        # Next, build dfs_w_args_i to be plotted by iterating over the MECPOCollection
        # objects (or, more specifically, the members of mecpo_colls_w_get_args), and grabbing
        # the correct DF from each collection
        dfs_w_args_i = []
        for coll_w_get_args_j in mecpo_colls_w_get_args:
            # Get the key for the correct MECPOAn from which to grab the DF
            mecpo_an_key_ij = coll_w_get_args_j['mecpo_an_order'][i_plot] 
            assert(subplot_title_i==mecpo_an_key_ij)
            
            # As well as the cpo_df_name, cpo_df_subset_by_mjr_mnr_cause_args, and max_total_counts_args
            # NOTE: Only single value for these, so no [i_plot] accessor
            cpo_df_name_j = coll_w_get_args_j['cpo_df_name']
            cpo_df_subset_by_mjr_mnr_cause_args_j = coll_w_get_args_j['cpo_df_subset_by_mjr_mnr_cause_args']
            max_total_counts_args_j = coll_w_get_args_j['max_total_counts_args']
            #-----
            # Now, get the correct DF from coll_w_get_args_j['mecpo_coll'] given the known mecpo_an_key, cpo_df_name, 
            # cpo_df_subset_by_mjr_mnr_cause_args, and max_total_counts_args
            df_ij = coll_w_get_args_j['mecpo_coll'].get_cpo_df(
                mecpo_an_key=mecpo_an_key_ij, 
                cpo_df_name=cpo_df_name_j, 
                cpo_df_subset_by_mjr_mnr_cause_args=cpo_df_subset_by_mjr_mnr_cause_args_j, 
                max_total_counts_args=max_total_counts_args_j
            )
            #-----
            # Finally, get the barplot_kwargs_shared from coll_w_get_args_j['mecpo_coll'] and join together
            # with common_barplot_kwargs to get barplot_kwargs_ij
            barplot_kwargs_ij = coll_w_get_args_j['mecpo_coll'].barplot_kwargs_shared
            # Note: Order actually is important here.  If keys are shared, the values for the second are kept
            barplot_kwargs_ij = {**common_barplot_kwargs, **barplot_kwargs_ij}
            #-------------------------
            dfs_w_args_i.append((df_ij, barplot_kwargs_ij))
        # END for coll_w_get_args_j in mecpo_colls_w_get_args
        #-----
        # Now, perform actual plotting for i_plot
        common_general_kwargs_i = common_general_kwargs
        if(not sharex and 
           i_plot in idxs_to_exclude_xticklabels):
            common_general_kwargs_i = common_general_kwargs_wo_xticklabels
        #----------
        if draw_legend_in_i0_only and i_plot>0:
            common_general_kwargs_i['draw_legend']=False
        #----------
        axs[i_plot] = Plot_Bar.plot_multiple_barplots(
            ax=axs[i_plot], 
            dfs_w_args=dfs_w_args_i,
            **common_general_kwargs_i, 
            title_args=dict(label=subplot_title_i, fontsize=20)
        )
        if include_xticklabels_for_all:
            axs[i_plot].tick_params(axis='x', labelbottom=True)
    #---------------------------------------------------------------------------
    # Make all have same scale
    if not sharey and make_ylim_eq:
        Plot_General.make_all_axes_have_same_ylims(axs)
    #---------------------------------------------------------------------------
    subplot_layout_params = Plot_General.get_subplot_layout_params(fig)
    #-----
    if is_rcpo:
        supxlabel = 'Reason'
    else:
        supxlabel = 'ID'
    fig.supxlabel(supxlabel, fontsize=30, 
                  x=subplot_layout_params['right'], ha='right', 
                  y=0.0, va='bottom')
    fig.supylabel('Normalized Counts', fontsize=30, 
                  x=0.0, ha='left',
                  y=subplot_layout_params['top'], va='top')
    if suptitle:
        fig.suptitle(suptitle, fontsize=50, 
                     x=0.5*(subplot_layout_params['left']+subplot_layout_params['right']), ha='center')

    if include_all_y_tick_labels and sharey:
        for ax_i in axs.flatten():
            ax_i.tick_params(axis='y', labelleft=True)

    if include_all_x_tick_labels and sharex:
        for ax_i in axs.flatten():
            ax_i.tick_params(axis='x', labelbottom=True)

    if replace_xtick_labels_with_ints:
        Plot_General.generate_xtick_labels_legend_textbox(
            fig=fig, 
            xtick_rename_dict=xtick_rename_dict, 
            text_x_pos=1.02*subplot_layout_params['right'], 
            text_y_pos=subplot_layout_params['top'])
    #---------------------------------------------------------------------------
    if save_args and save_args['save_fig']:
        Plot_General.save_fig(fig, save_args['save_dir'], save_args['save_name'])   
    #---------------------------------------------------------------------------
    return fig, axs

In [None]:
run_testing_data=False
fig_num=0

run_date = '20221216' # Date of data acquisition

date_0 = '2021-01-01' # Lower limit for end events
date_1 = '2021-12-31' # Upper limit for end events

save_fig = True
# save_dir = r'C:\Users\s346557\Documents\Presentations\OutagePredictions\MeterEvents\Figures_by_xfmr\PDFs'
# save_ext = 'pdf'
# save_dir = r'C:\Users\s346557\Documents\Presentations\OutagePredictions\MeterEvents\Figures_by_xfmr\PNGs'
save_dir = r'C:\Users\s346557\Documents\Presentations\ForShane_Dallas_April2023\Figures'
save_ext = 'png'

# dovs_and_end_events_data_dir = r'C:\Users\s346557\Documents\LocalData\dovs_and_end_events_data'
# if run_testing_data:
#     dovs_and_end_events_data_dir = os.path.join(dovs_and_end_events_data_dir, r'TESTING_DATASETS')

date_pd_subdir = f"{date_0.replace('-','')}_{date_1.replace('-','')}"
dovs_and_end_events_data_dir = os.path.join(Utilities.get_local_data_dir(), r'dovs_and_end_events_data', run_date, date_pd_subdir)
assert(os.path.isdir(dovs_and_end_events_data_dir))

files_dir_outg             = os.path.join(dovs_and_end_events_data_dir, r'EndEvents')
files_dir_outg_prim_strict = os.path.join(dovs_and_end_events_data_dir, r'EndEvents_prim_strict')
files_dir_no_outg = os.path.join(dovs_and_end_events_data_dir, r'EndEvents_NoOutg')

file_path_glob = r'end_events_[0-9]*.csv'
file_path_regex = None

assert_all_cols_equal=True
include_normalize_by_nSNs=True
inclue_zero_counts=True
return_multiindex_outg_reason=False
return_normalized_separately=False


if include_normalize_by_nSNs and not return_normalized_separately:
    normalize_by_nSNs_included=True
else:
    normalize_by_nSNs_included=False
    
xfmr_equip_typ_nms_of_interest = ['TRANSFORMER, OH', 'TRANSFORMER, UG']

normalize_by_time_interval=True

In [None]:
rcpo_full_dfs_name='rcpo_df_norm_by_xfmr_nSNs'
rcpo_drct_dfs_name='rcpo_df_norm_by_xfmr_nSNs'
rcpo_no_outg_dfs_name='rcpo_df_norm_by_xfmr_nSNs'
rcpo_order_df_name = rcpo_drct_dfs_name #TODO probably change this to index in mecpo_colls_w_get_args instead


icpo_full_dfs_name='icpo_df_norm_by_xfmr_nSNs'
icpo_drct_dfs_name='icpo_df_norm_by_xfmr_nSNs'
icpo_no_outg_dfs_name='icpo_df_norm_by_xfmr_nSNs'

mecpo_idx_for_ordering = 1

In [None]:
save_dir_model_base = r'C:\Users\s346557\Documents\LocalData\dovs_and_end_events_data\20221216\Models'
merged_df_full=pd.read_pickle(os.path.join(save_dir_model_base, 'merged_df_full.pkl'))

#-------------------------
with open(os.path.join(save_dir_model_base, 'mecpo_coll_full.pkl'), 'rb') as handle:
    mecpo_coll_full = pickle.load(handle)
# mecpo_coll_full.remove_all_cpo_dfs_except(to_keep=[rcpo_full_dfs_name, icpo_full_dfs_name])
#-------------------------
with open(os.path.join(save_dir_model_base, 'mecpo_coll_drct.pkl'), 'rb') as handle:
    mecpo_coll_drct = pickle.load(handle)
# mecpo_coll_drct.remove_all_cpo_dfs_except(to_keep=[rcpo_drct_dfs_name, icpo_drct_dfs_name])
#-------------------------
with open(os.path.join(save_dir_model_base, 'mecpo_coll_no_outg.pkl'), 'rb') as handle:
    mecpo_coll_no_outg = pickle.load(handle)
# mecpo_coll_no_outg.remove_all_cpo_dfs_except(to_keep=[rcpo_no_outg_dfs_name, icpo_no_outg_dfs_name])
#-----
with open(os.path.join(save_dir_model_base, 'mecpo_coll_no_outg_prstn.pkl'), 'rb') as handle:
    mecpo_coll_no_outg_prstn = pickle.load(handle)
# mecpo_coll_no_outg_prstn.remove_all_cpo_dfs_except(to_keep=[rcpo_no_outg_dfs_name, icpo_no_outg_dfs_name])
#-----
mecpo_coll_no_outg = MECPOCollection.combine_two_mecpo_colls(
    mecpo_coll_no_outg, 
    mecpo_coll_no_outg_prstn
)
#-------------------------
mecpo_coll_full.barplot_kwargs_shared['facecolor']='red'
mecpo_coll_drct.barplot_kwargs_shared['facecolor']='green'
mecpo_coll_no_outg.barplot_kwargs_shared['facecolor']='orange'

In [None]:
merged_df_full = MECPOCollection.get_top_reasons_subset_from_merged_cpo_df(
    merged_cpo_df=merged_df_full,
    how='per_mecpo_an', 
    n_reasons_to_include=10,
    combine_others=True,
    output_combine_others_col='Other Reasons',
    SNs_tags=None, 
    is_norm=False, 
    counts_series=None
)
top_reasons = [x for x in merged_df_full['01-05 Days'].columns if x!='Other Reasons']

In [None]:
for mecpo_an_name in mecpo_coll_full.mecpo_an_keys:
#     top_full_df_i = mecpo_coll_full.get_mecpo_an(mecpo_an_name).get_top_reasons_subset_from_cpo_df(
#         cpo_df_name=rcpo_full_dfs_name, 
#         n_reasons_to_include=10,
#         combine_others=True
#     )
#     mecpo_coll_full.get_mecpo_an(mecpo_an_name).set_cpo_df(rcpo_full_dfs_name, top_full_df_i)
#     top_reasons = [x for x in top_full_df_i.columns if x!='Other Reasons']

    top_full_df_i = mecpo_coll_full.get_mecpo_an(mecpo_an_name).get_reasons_subset_from_cpo_df(
        cpo_df_name=rcpo_full_dfs_name, 
        reasons_to_include=top_reasons,
        combine_others=True
    )
    mecpo_coll_full.get_mecpo_an(mecpo_an_name).set_cpo_df(rcpo_full_dfs_name, top_full_df_i)
    #-----
    top_drct_df_i = mecpo_coll_drct.get_mecpo_an(mecpo_an_name).get_reasons_subset_from_cpo_df(
        cpo_df_name=rcpo_drct_dfs_name, 
        reasons_to_include=top_reasons,
        combine_others=True
    )
    mecpo_coll_drct.get_mecpo_an(mecpo_an_name).set_cpo_df(rcpo_drct_dfs_name, top_drct_df_i)
    #-----
    top_no_outg_df_i = mecpo_coll_no_outg.get_mecpo_an(mecpo_an_name).get_reasons_subset_from_cpo_df(
        cpo_df_name=rcpo_no_outg_dfs_name, 
        reasons_to_include=top_reasons,
        combine_others=True
    )
    mecpo_coll_no_outg.get_mecpo_an(mecpo_an_name).set_cpo_df(rcpo_no_outg_dfs_name, top_no_outg_df_i)

In [None]:
len(mecpo_coll_full.get_cpo_df('01-05 Days', rcpo_full_dfs_name).columns)

In [None]:
combine_others=True
save_dir_model_base = r'C:\Users\s346557\Documents\Presentations\ForShane_Dallas_April2023\pkls'

#-------------------------
with open(os.path.join(save_dir_model_base, 'mecpo_coll_full.pkl'), 'rb') as handle:
    mecpo_coll_full = pickle.load(handle)
mecpo_coll_full.remove_all_cpo_dfs_except(to_keep=[rcpo_full_dfs_name, icpo_full_dfs_name])
#-----
with open(os.path.join(save_dir_model_base, 'mecpo_coll_drct.pkl'), 'rb') as handle:
    mecpo_coll_drct = pickle.load(handle)
mecpo_coll_drct.remove_all_cpo_dfs_except(to_keep=[rcpo_drct_dfs_name, icpo_drct_dfs_name])
#-----
with open(os.path.join(save_dir_model_base, 'mecpo_coll_no_outg.pkl'), 'rb') as handle:
    mecpo_coll_no_outg = pickle.load(handle)
with open(os.path.join(save_dir_model_base, 'mecpo_coll_no_outg_prstn.pkl'), 'rb') as handle:
    mecpo_coll_no_outg_prstn = pickle.load(handle)
mecpo_coll_no_outg.remove_all_cpo_dfs_except(to_keep=[rcpo_no_outg_dfs_name, icpo_no_outg_dfs_name])
#-----
    
# with open(os.path.join(save_dir_model_base, 'mecpo_coll_full.pkl'), 'wb') as handle:
#     pickle.dump(mecpo_coll_full, handle, protocol=pickle.HIGHEST_PROTOCOL)
# with open(os.path.join(save_dir_model_base, 'mecpo_coll_drct.pkl'), 'wb') as handle:
#     pickle.dump(mecpo_coll_drct, handle, protocol=pickle.HIGHEST_PROTOCOL)
# with open(os.path.join(save_dir_model_base, 'mecpo_coll_no_outg.pkl'), 'wb') as handle:
#     pickle.dump(mecpo_coll_no_outg, handle, protocol=pickle.HIGHEST_PROTOCOL)
# with open(os.path.join(save_dir_model_base, 'mecpo_coll_no_outg_prstn.pkl'), 'wb') as handle:
#     pickle.dump(mecpo_coll_no_outg_prstn, handle, protocol=pickle.HIGHEST_PROTOCOL)


mecpo_coll_full.barplot_kwargs_shared['facecolor']='red'
mecpo_coll_drct.barplot_kwargs_shared['facecolor']='green'
mecpo_coll_no_outg.barplot_kwargs_shared['facecolor']='orange'
mecpo_coll_no_outg.barplot_kwargs_shared['label'] = 'Baseline'

In [None]:
len(mecpo_coll_full.get_cpo_df('01-05 Days', rcpo_full_dfs_name).columns)

In [None]:
mecpo_an_order = [
    '01-05 Days',
    '06-10 Days',
    '11-15 Days',
    '16-20 Days',
    '21-25 Days',
    '26-30 Days'
]

# mecpo_an_keys=mecpo_coll_full.mecpo_an_keys
mecpo_an_keys=mecpo_an_order

reason_order = mecpo_coll_full.get_cpo_df('01-05 Days', rcpo_full_dfs_name).columns.tolist()

In [None]:
mecpo_coll_w_get_args_full = dict(
    mecpo_coll=mecpo_coll_full, 
    cpo_df_name=rcpo_full_dfs_name, 
    cpo_df_subset_by_mjr_mnr_cause_args=None, 
    mecpo_an_order=mecpo_an_order
)
#-----
mecpo_coll_w_get_args_drct = dict(
    mecpo_coll=mecpo_coll_drct, 
    cpo_df_name=rcpo_drct_dfs_name, 
    cpo_df_subset_by_mjr_mnr_cause_args=None, 
    mecpo_an_order=mecpo_an_order
)
#-----
mecpo_coll_w_get_args_no_outg = dict(
    mecpo_coll=mecpo_coll_no_outg, 
    cpo_df_name=rcpo_no_outg_dfs_name, 
    cpo_df_subset_by_mjr_mnr_cause_args=None, 
    mecpo_an_order=mecpo_an_order
)
#-------------------------
mecpo_colls_w_get_args = [
    mecpo_coll_w_get_args_full, 
    mecpo_coll_w_get_args_drct, 
    mecpo_coll_w_get_args_no_outg
]
no_outg_idxs=[2]

In [None]:
subset_args_all_outgs = None
subset_args_dl_ol = dict(
    mjr_cause='DL',
    mnr_cause='OL', 
    addtnl_slicers=None, 
)
subset_args_dl_eqf = dict(
    mjr_cause='DL',
    mnr_cause='EQF', 
    addtnl_slicers=None, 
)
subset_args_dl_eqf_xfmr = dict(
    mjr_cause='DL',
    mnr_cause='EQF', 
    addtnl_slicers=[dict(column='EQUIP_TYP_NM', value=['TRANSFORMER, OH', 'TRANSFORMER, UG'])], 
)

In [None]:
reason_order

In [None]:
save_dir = r'C:\Users\s346557\Documents\Presentations\ForShane_Dallas_April2023\Figures'

In [None]:
save_fig = True

In [None]:
save_name = f'RCPO_dfs_3x2_full.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_all_outgs,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch=None), 
    inplace=False
)

# reason_order=mecpo_coll_drct.get_cpo_df(
#     '01-05 Days', 
#     mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
#     cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
# ).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    include_xticklabels_for_all=True, 
    suptitle='Full', 
    row_major=True, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=False, 
    make_ylim_eq=False
)

In [None]:
mecpo_coll_full.get_cpo_df('01-05 Days', rcpo_full_dfs_name).columns

In [None]:
save_name = f'RCPO_dfs_3x2_dl_ol.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_ol,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch='//'), 
    inplace=False
)

# reason_order=mecpo_coll_drct.get_cpo_df(
#     '01-05 Days', 
#     mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
#     cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
# ).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='DL_OL', 
    row_major=True, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=False, 
    make_ylim_eq=False
)

In [None]:
save_name = f'RCPO_dfs_3x2_dl_ol_2.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_ol,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch='//'), 
    inplace=False
)

# reason_order=mecpo_coll_drct.get_cpo_df(
#     '01-05 Days', 
#     mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
#     cpo_df_subset_by_mjr_mnr_cause_args=None
# ).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='DL_OL', 
    row_major=True, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=False, 
    make_ylim_eq=False
)

In [None]:
save_name = f'RCPO_dfs_3x2_dl_eqf.{save_ext}'

mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch='-'), 
    inplace=False
)

# reason_order=mecpo_coll_drct.get_cpo_df(
#     '01-05 Days', 
#     mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
#     cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
# ).mean().sort_values(ascending=False).index.tolist()

#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order, 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='DL_EQF', 
    row_major=True, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=False, 
    make_ylim_eq=False
)

In [None]:
save_name = f'RCPO_dfs_3x2_dl_eqf_xfmr.{save_ext}'
start = time.time()
print('1')
mecpo_colls_w_get_args = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf_xfmr,
    new_max_total_counts_args=None, 
    no_outg_idxs=no_outg_idxs, 
    adjust_barplot_kwargs=dict(hatch='\\\\'), 
    inplace=False
)
print(time.time()-start)
start = time.time()
print('2')
# reason_order=mecpo_coll_drct.get_cpo_df(
#     '01-05 Days', 
#     mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_name'], 
#     cpo_df_subset_by_mjr_mnr_cause_args=mecpo_colls_w_get_args[mecpo_idx_for_ordering]['cpo_df_subset_by_mjr_mnr_cause_args']
# ).mean().sort_values(ascending=False).index.tolist()
print(time.time()-start)
start = time.time()
print('3')
#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args, 
    reason_order=reason_order,  
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='DL_EQF_XFMR', 
    row_major=True, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=False, 
    make_ylim_eq=False
)
print(time.time()-start)

In [None]:
# mecpo_an_order = [
#     '01-05 Days',
#     '06-10 Days',
#     '11-15 Days',
#     '16-20 Days',
#     '21-25 Days',
#     '26-30 Days'
# ]

In [None]:
fig, ax0 = plt.subplots(1, 1, num=fig_num, figsize=[14, 6])
ax0 = Plot_Bar.plot_barplot(
    ax=ax0, 
    df = mecpo_coll_no_outg.get_cpo_df('01-05 Days', 'rcpo_df_norm_by_xfmr_nSNs'), 
    order=mecpo_coll_no_outg.get_cpo_df('01-05 Days', 'rcpo_df_norm_by_xfmr_nSNs').mean().sort_values(ascending=False).index.tolist(), 
    n_bars_to_include=10, 
    barplot_kwargs=dict(label='No Outages'), 
    orient='v', 
    draw_legend=True, 
    legend_args=dict(fontsize=15), 
    title_args=dict(label=f'End Events', fontsize=20), 
    ylabel_args = dict(ylabel=f'Normalized Counts', fontsize=20, x=0.0, y=0.4, ha='left', va='bottom'), 
    xlabel_args = dict(xlabel='Reason', fontsize=20, x=0.9, y=0.0, ha='right', va='top'), 
    tick_args=[dict(axis='x', labelrotation=90, labelsize=15), 
              dict(axis='y', labelsize=15)]
)
# Plot_General.save_fig(fig, save_dir, f'fig_7.{save_ext}')

In [None]:
mecpo_colls_w_get_args_all_outgs = [
    mecpo_coll_w_get_args_full, 
    mecpo_coll_w_get_args_drct, 
    mecpo_coll_w_get_args_no_outg
]
mecpo_colls_w_get_args_all_outgs=adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_all_outgs, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_all_outgs,
    new_max_total_counts_args=None, 
    adjust_barplot_kwargs=dict(hatch=None), 
    inplace=False
)
#----------
mecpo_colls_w_get_args_dl_ol = [
    mecpo_coll_w_get_args_full, 
    mecpo_coll_w_get_args_drct
]
mecpo_colls_w_get_args_dl_ol=adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_dl_ol, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_ol,
    new_max_total_counts_args=None, 
    adjust_barplot_kwargs=dict(hatch='//', label='DL_OL'), 
    inplace=False
)
#----------
mecpo_colls_w_get_args_dl_eqf = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_dl_ol, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf,
    new_max_total_counts_args=None, 
    adjust_barplot_kwargs=dict(hatch='-', label='DL_EQF'), 
    inplace=False
)
#----------
mecpo_colls_w_get_args_dl_eqf_xfmr = adjust_all_subset_by_mjr_mnr_cause_args(
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_dl_ol, 
    new_cpo_df_subset_by_mjr_mnr_cause_args=subset_args_dl_eqf_xfmr,
    new_max_total_counts_args=None, 
    adjust_barplot_kwargs=dict(hatch='\\\\', label='DL_EQF_XFMR'), 
    inplace=False
)



In [None]:
mecpo_colls_w_get_args_FULL = [
    *mecpo_colls_w_get_args_all_outgs, 
    *mecpo_colls_w_get_args_dl_ol, 
    *mecpo_colls_w_get_args_dl_eqf, 
    *mecpo_colls_w_get_args_dl_eqf_xfmr
]

In [None]:
# mecpo_colls_w_get_args_FULL

In [None]:
save_name = f'RCPO_dfs_3x2_ALL.{save_ext}'
#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_FULL, 
    reason_order=mecpo_coll_drct.get_cpo_df('01-05 Days', 'rcpo_df_norm_by_xfmr_nSNs').mean().sort_values(ascending=False).index.tolist(), 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='Full', 
    row_major=True, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=False, 
    make_ylim_eq=False
)

In [None]:
unq_colors = []
subset_cause_args = []
for coll_w_get_args in mecpo_colls_w_get_args_FULL:
    unq_colors.append(coll_w_get_args['mecpo_coll'].barplot_kwargs_shared['facecolor'])
    subset_cause_args.append(coll_w_get_args['cpo_df_subset_by_mjr_mnr_cause_args'])
unq_colors = list(set(unq_colors))
#-----
unq_subset_cause_args = []
for subset in subset_cause_args:
    if subset in unq_subset_cause_args:
        continue
    unq_subset_cause_args.append(subset)

In [None]:
unq_subset_cause_args

In [None]:
unq_colors

In [None]:
unq_colors=['red', 'green', 'orange']

In [None]:
mecpo_colls_w_get_args_FULL_sorted = []
for color in unq_colors:
    for subset in unq_subset_cause_args:
        for coll_w_get_args in mecpo_colls_w_get_args_FULL:
            if(coll_w_get_args['mecpo_coll'].barplot_kwargs_shared['facecolor']==color and 
               coll_w_get_args['cpo_df_subset_by_mjr_mnr_cause_args']==subset):
                mecpo_colls_w_get_args_FULL_sorted.append(coll_w_get_args)
assert(len(mecpo_colls_w_get_args_FULL_sorted)==len(mecpo_colls_w_get_args_FULL))

In [None]:
save_name = f'RCPO_dfs_3x2_ALL_SRTD.{save_ext}'
#-----
fig,axs = draw_cpo_dfs_full_vs_direct_3x2_v2(
    fig_num=1000, 
    mecpo_colls_w_get_args=mecpo_colls_w_get_args_FULL_sorted, 
    reason_order=mecpo_coll_drct.get_cpo_df('01-05 Days', 'rcpo_df_norm_by_xfmr_nSNs').mean().sort_values(ascending=False).index.tolist(), 
    n_reason_to_include=None, 
    is_rcpo=True, 
    replace_xtick_labels_with_ints=True, 
    suptitle='Full', 
    row_major=True, 
    save_args=dict(
        save_fig=save_fig,
        save_dir=save_dir, 
        save_name=save_name
    ), 
    sharex=True,
    sharey=False, 
    make_ylim_eq=False
)