In [None]:
%run ./check_DOVS_METHODS.ipynb

In [None]:
from importlib import reload
#reload(Utilities)
#reload(clm)

import sys, os
import re

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns
from packaging import version

import copy

import itertools

import pyodbc
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
import CommonLearningMethods as clm
#-----
from MeterPremise import MeterPremise
#-----
from AMI_SQL import AMI_SQL
from AMINonVee_SQL import AMINonVee_SQL
from AMIEndEvents_SQL import AMIEndEvents_SQL
from AMIUsgInst_SQL import AMIUsgInst_SQL
from DOVSOutages_SQL import DOVSOutages_SQL
#-----
from GenAn import GenAn
from AMINonVee import AMINonVee
from AMIEndEvents import AMIEndEvents
from AMIUsgInst import AMIUsgInst
from DOVSOutages import DOVSOutages
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
import TableInfos
from TableInfos import TableInfo
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
import Utilities_dt
from Utilities_df import DFConstructType
import Plot_General
import Plot_Box_sns
import GrubbsTest
import DataFrameSubsetSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSlicer as DFSlicer

In [None]:
date_0 = '2023-01-01'
date_1 = '2023-01-31'
states=['OH']
opcos='oh'

outg_rec_nbs = [
    '13330651', 
    '13325577', 
    '13329763', 
    '13278397', 
    '13183482', 
    '13183481'
]

conn_outages = Utilities.get_utldb01p_oracle_connection()

In [None]:
sql_outage_full = DOVSOutages_SQL.build_sql_std_outage(
    mjr_mnr_cause=None, 
    include_premise=True, 
    outg_rec_nbs=outg_rec_nbs, 
#     date_range=[date_0, date_1], 
#     states=states, 
    MJR_CAUSE_CD=None, 
    DEVICE_CD=None, 
    INTRPTN_TYP_CD=None, 
    CURR_REC_STAT_CD=None
).get_sql_statement()
#-----
print(f'sql_outage_full:\n{sql_outage_full}\n\n')
#-----
df_outage_OG = pd.read_sql_query(
    sql_outage_full, 
    conn_outages, 
    dtype={
        'CI_NB':np.int32, 
        'CMI_NB':np.float64, 
        'OUTG_REC_NB':np.int32
    }
)
#-----
print(f"df_outage_OG.shape = {df_outage_OG.shape}")
print(f"# OUTG_REC_NBs     = {df_outage_OG['OUTG_REC_NB'].nunique()}")

In [None]:
outg_rec_nbs_all = df_outage_OG['OUTG_REC_NB'].unique().tolist()
df_outage = df_outage_OG.copy()

In [None]:
outg_rec_nbs_all

In [None]:
start=time.time()
#-----
df_mp_outg_OG = build_active_MP_for_xfmrs_in_outages_df(
    df_outage=df_outage, 
    prem_nb_col='PREMISE_NB', 
    is_slim=False, 
    addtnl_mp_df_curr_cols=['technology_tx'], 
    addtnl_mp_df_hist_cols=['technology_tx']
)
#-----
print(f'Time for build_active_MP_for_xfmrs_in_outages_df: {time.time()-start}')
start=time.time()
#-----
df_mp_outg_OG['inst_ts'] = pd.to_datetime(df_mp_outg_OG['inst_ts'])
df_mp_outg_OG['rmvl_ts'] = pd.to_datetime(df_mp_outg_OG['rmvl_ts'])
#-------------------------
df_mp_outg = MeterPremise.drop_approx_mp_duplicates(
    mp_df = df_mp_outg_OG.copy(), 
    fuzziness=pd.Timedelta('1 hour'), 
    assert_single_overlap=True, 
    addtnl_groupby_cols=['OUTG_REC_NB', 'technology_tx'], 
    gpby_dropna=False
)
#-----
print(f'Time for drop_approx_mp_duplicates: {time.time()-start}')

In [None]:
# Some premises are listed in DOVS are simply not found in AMI
print(f"#PNs DOVS:            {df_outage['PREMISE_NB'].nunique()}")
print(f"#PNs AMI using xfmrs: {df_mp_outg['prem_nb'].nunique()}")

In [None]:
set(df_mp_outg['prem_nb'].unique().tolist()).difference(set(df_outage['PREMISE_NB'].unique().tolist()))

In [None]:
set(df_outage['PREMISE_NB'].unique().tolist()).difference(set(df_mp_outg['prem_nb'].unique().tolist()))

In [None]:
# Really only want one entry per meter (here, meter being a mfr_devc_ser_nbr/prem_nb combination)
# ALthough drop_duplicates was used, multiple entries could still exist if, e.g., a meter has two
#   non-fuzzy-overlapping intervals
assert(all(df_mp_outg[['mfr_devc_ser_nbr', 'prem_nb', 'OUTG_REC_NB']].value_counts()==1))

# # Simple-minded (if assertion fails): Let's just keep the one with the most recent install date
# df_mp_outg = df_mp_outg.iloc[df_mp_outg.reset_index().groupby(['mfr_devc_ser_nbr', 'prem_nb', 'OUTG_REC_NB'])['inst_ts'].idxmax()]
# assert(all(df_mp_outg[['mfr_devc_ser_nbr', 'prem_nb', 'OUTG_REC_NB']].value_counts()==1))

In [None]:
df_mp_outg.groupby(['OUTG_REC_NB']).apply(lambda x: 100*(x[x['technology_tx']=='AMI'].shape[0]/x.shape[0]))

In [None]:
df_mp_outg

In [None]:
mico_outage_nbs = [
    1455461, 
    1431681, 
    1475011, 
    2322101, 
    2188221, 
    2188221
]

mico_outg_rec_nbs = [
    13330651, 
    13325577, 
    13329763, 
    13278397, 
    13183482, 
    13183481
]

In [None]:
mico_outage_outg_map = dict(zip(mico_outg_rec_nbs, mico_outage_nbs))

In [None]:
df_mp_outg['OUTAGE_NB'] = df_mp_outg['OUTG_REC_NB'].map(mico_outage_outg_map)

In [None]:
df1 = df_mp_outg.groupby(['OUTG_REC_NB', 'OUTAGE_NB']).apply(lambda x: 100*(x[x['technology_tx']=='AMI'].shape[0]/x.shape[0])).to_frame(name='%AMI')
df1

In [None]:
# Some premises are listed in DOVS are simply not found in AMI
print(f"#PNs DOVS:            {df_outage['PREMISE_NB'].nunique()}")
print(f"#PNs AMI using xfmrs: {df_mp_outg['prem_nb'].nunique()}")

In [None]:
df_mp_outg.groupby(['OUTG_REC_NB', 'OUTAGE_NB']).apply(lambda x: print(x.name[0]))

In [None]:
df2 = df_mp_outg.groupby(['OUTG_REC_NB', 'OUTAGE_NB']).apply(
    lambda x: len(set(df_outage[df_outage['OUTG_REC_NB']==x.name[0]]['PREMISE_NB'].unique().tolist()).difference(set(x['prem_nb'].unique().tolist())))
).to_frame(name='# PNs missing')
df2

In [None]:
df3 = df_mp_outg.groupby(['OUTG_REC_NB', 'OUTAGE_NB']).apply(
    lambda x: df_outage[df_outage['OUTG_REC_NB']==x.name[0]]['PREMISE_NB'].nunique()
).to_frame(name='# PNs total')
df3

In [None]:
df12 = pd.merge(df1, df2, how='inner', left_index=True, right_index=True)
df12

In [None]:
df123 = pd.merge(df12, df3, how='inner', left_index=True, right_index=True)
df123

In [None]:
ami_outg_rec_nbs

In [None]:
len(set(df_outage['PREMISE_NB'].unique().tolist()).difference(set(df_mp_outg['prem_nb'].unique().tolist())))

In [None]:
df_outage[df_outage['OUTG_REC_NB']==13183481]

In [None]:
# # Keep only trsf_pole_nbs with all meters of type AMI
# df_mp_outg_ami = df_mp_outg.groupby(['trsf_pole_nb']).filter(lambda x: all(x['technology_tx']=='AMI'))

# Keep on outages with all meters of typr AMI
df_mp_outg_ami = df_mp_outg.groupby(['OUTG_REC_NB']).filter(lambda x: all(x['technology_tx']=='AMI'))

In [None]:
print(df_mp_outg.shape)
print(df_mp_outg_ami.shape)

In [None]:
ami_outg_rec_nbs

In [None]:
ami_outg_rec_nbs = df_mp_outg_ami['OUTG_REC_NB'].unique().tolist()
df_outage_ami = df_outage[df_outage['OUTG_REC_NB'].isin(ami_outg_rec_nbs)]

In [None]:
df_outage_ami_w_mp = DOVSOutages.merge_df_outage_with_mp(
    df_outage=df_outage_ami.copy(), 
    df_mp=df_mp_outg_ami, 
    merge_on_outg=['OUTG_REC_NB', 'PREMISE_NB'], 
    merge_on_mp=['OUTG_REC_NB', 'prem_nb'], 
    cols_to_include_mp=None, 
    drop_cols = None, 
    rename_cols=None, 
    inplace=True
)

In [None]:
df_outage_w_mp = DOVSOutages.merge_df_outage_with_mp(
    df_outage=df_outage.copy(), 
    df_mp=df_mp_outg, 
    merge_on_outg=['OUTG_REC_NB', 'PREMISE_NB'], 
    merge_on_mp=['OUTG_REC_NB', 'prem_nb'], 
    cols_to_include_mp=None, 
    drop_cols = None, 
    rename_cols=None, 
    inplace=True
)

In [None]:
df_outage_ami_w_mp=df_outage_w_mp

In [None]:
df_outage_ami_w_mp['OUTG_REC_NB'].nunique()

In [None]:
df_outage_ami_w_mp

In [None]:
df_outage_ami_w_mp_slim = DOVSOutages.consolidate_df_outage(df_outage_ami_w_mp)

In [None]:
df_outage_ami_w_mp_slim

In [None]:
df_outage_ami_w_mp_slim = DOVSOutages.set_search_time_in_outage_df(
    df_outage=df_outage_ami_w_mp_slim, 
    search_time_half_window=pd.Timedelta('24 hours')
)

In [None]:
df_outage_ami_w_mp_slim

In [None]:
df_outage_ami_w_mp_slim['OUTG_REC_NB'].nunique()

## AMI NonVee

In [None]:
#-------------------------
usg_split_to_CTEs=True
df_construct_type=DFConstructType.kRunSqlQuery
contstruct_df_args_ami=None
addtnl_groupby_cols=['OUTG_REC_NB', 'trsf_pole_nb']

cols_of_interest_ami = TableInfos.AMINonVee_TI.std_columns_of_interest
batch_size=10
verbose=True
n_update=1

In [None]:
# ami_sql_function_kwargs = dict(
#     cols_of_interest=cols_of_interest_ami, 
#     df_outage=df_outage_ami_w_mp_slim, 
#     split_to_CTEs=usg_split_to_CTEs, 
#     join_mp_args=False, 
#     df_args = dict(
#         addtnl_groupby_cols=addtnl_groupby_cols, 
#         mapping_to_ami={'PREMISE_NBS':'premise_nbs'}, 
#         is_df_consolidated=True
#     ), 
#     field_to_split='df_outage', 
#     field_to_split_location_in_kwargs=['df_outage'], 
# #     save_and_dump=True, 
#     save_and_dump=False, 
#     sort_coll_to_split=True,
#     batch_size=batch_size, verbose=verbose, n_update=n_update
# )
# # addtnl_ami_sql_function_kwargs = dict(
# #     build_sql_function_kwargs=dict(opco=opcos)
# # )
# # ami_sql_function_kwargs = {**ami_sql_function_kwargs, 
# #                            **addtnl_ami_sql_function_kwargs}

In [None]:
# start=time.time()
# ami_nonvee = AMINonVee(
#     df_construct_type=df_construct_type, 
#     contstruct_df_args = contstruct_df_args_ami, 
#     build_sql_function=AMINonVee_SQL.build_sql_usg_for_outages, 
#     build_sql_function_kwargs=ami_sql_function_kwargs, 
#     init_df_in_constructor=True, 
#     save_args=False
# )
# build_time = time.time()-start
# print(build_time)

In [None]:
ami_sql_function_kwargs = dict(
    cols_of_interest=cols_of_interest_ami, 
    df_outage=df_outage_ami_w_mp_slim, 
    split_to_CTEs=usg_split_to_CTEs, 
    join_mp_args=False, 
    df_args = dict(
        addtnl_groupby_cols=addtnl_groupby_cols, 
        mapping_to_ami={'PREMISE_NBS':'premise_nbs'}, 
        is_df_consolidated=True
    ), 
    field_to_split='df_outage', 
    field_to_split_location_in_kwargs=['df_outage'], 
    save_and_dump=True,  
    sort_coll_to_split=True,
    batch_size=batch_size, verbose=verbose, n_update=n_update
)
# addtnl_ami_sql_function_kwargs = dict(
#     build_sql_function_kwargs=dict(opco=opcos)
# )
# ami_sql_function_kwargs = {**ami_sql_function_kwargs, 
#                            **addtnl_ami_sql_function_kwargs}


save_args = dict(
    save_to_file=True, 
    save_dir = r'C:\Users\s346557\Documents\LocalData\dovs_check\forMico\AMINonVee', 
    save_name=r'ami_nonvee.csv', 
    index=True
)

In [None]:
start=time.time()
ami_nonvee = AMINonVee(
    df_construct_type=df_construct_type, 
    contstruct_df_args = contstruct_df_args_ami, 
    build_sql_function=AMINonVee_SQL.build_sql_usg_for_outages, 
    build_sql_function_kwargs=ami_sql_function_kwargs, 
    init_df_in_constructor=True, 
    save_args=save_args
)
build_time = time.time()-start
print(build_time)

# AMI End Events

In [None]:
#-------------------------
usg_split_to_CTEs=True
df_construct_type=DFConstructType.kRunSqlQuery
contstruct_df_args_end_events=None
addtnl_groupby_cols=['OUTG_REC_NB', 'trsf_pole_nb']

cols_of_interest_end_dev_event = TableInfos.AMIEndEvents_TI.std_columns_of_interest
batch_size=10
verbose=True
n_update=1

In [None]:
end_events_sql_function_kwargs = dict(
    cols_of_interest=cols_of_interest_end_dev_event, 
    df_outage=df_outage_ami_w_mp_slim, 
    split_to_CTEs=usg_split_to_CTEs, 
    join_mp_args=False, 
    df_args = dict(
        addtnl_groupby_cols=addtnl_groupby_cols, 
        mapping_to_ami={'PREMISE_NBS':'premise_nbs'}, 
        is_df_consolidated=True
    ), 
    field_to_split='df_outage', 
    field_to_split_location_in_kwargs=['df_outage'], 
    save_and_dump=True, 
    sort_coll_to_split=True,
    batch_size=batch_size, verbose=verbose, n_update=n_update
)
# addtnl_end_events_sql_function_kwargs = dict(
#     build_sql_function_kwargs=dict(opco=opcos)
# )
# end_events_sql_function_kwargs = {**end_events_sql_function_kwargs, 
#                                   **addtnl_end_events_sql_function_kwargs}

end_events_save_args = dict(
    save_to_file=True, 
    save_dir = r'C:\Users\s346557\Documents\LocalData\dovs_check\forMico\EndEvents', 
    save_name=r'end_events.csv', 
    index=True
)

In [None]:
start=time.time()
end_events = AMIEndEvents(
    df_construct_type=df_construct_type, 
    contstruct_df_args = contstruct_df_args_end_events, 
    build_sql_function=AMIEndEvents_SQL.build_sql_end_events_for_outages, 
    build_sql_function_kwargs=end_events_sql_function_kwargs, 
    init_df_in_constructor=True, 
    save_args=end_events_save_args
)
end_events_build_time = time.time()-start

# Analyze collected data

## AMI NonVee

In [None]:
#----------------------------------------------------------------------------------------------------
# Build mico_df_for_plt to be used in plotting times from mico_df
#----------------------------------------------------------------------------------------------------
conn_outages = Utilities.get_utldb01p_oracle_connection()
outgs_file_from_mico = r'C:\Users\s346557\Documents\LocalData\dovs_check\forMico2\2023-04-08 to 04-15 Reviews (1).xlsx'
expand_time = pd.Timedelta('1 day')
#----------------------------------------------------------------------------------------------------
mico_df_raw = pd.read_excel(outgs_file_from_mico, sheet_name='Scorecard')
mico_df = mico_df_raw.copy()
#-----
# For now, keep only the following columns:
mico_cols_to_keep = [
    'Outage #', 
    'Outage Start DT', 
    'Adj Outage Start DT', 
    'Outage End DT',
    'Adj Outage End DT', 
    'Circuit Name',
    'Step CMI'
]
mico_df = mico_df[mico_cols_to_keep]

#-------------------------
# Currently, outage numbers have -1, -2, etc. appended.
# I believe an outage number will have such multiple rows when the outage affects more than one circuit.
# In the DOVS database, these will be split iunto separate outg_rec_nbs
#-----
# I will instead merge the data via the outage number and circuit name, so remove the -1, -2, etc. from 
#   the 'Outage #', store the result in 'OUTAGE_NB' (to be consistent with DOVS), and drop 'Outage #'
mico_df['OUTAGE_NB'] = mico_df['Outage #'].apply(lambda x: re.sub('(\d*)-\d*', r'\1', x))
mico_df=mico_df.drop(columns=['Outage #'])

#-------------------------
# Each outage can also have multiple rows corresponding to the power recover steps
# Aggregate the steps into a single row for each outage
mico_df = mico_df.groupby(
    ['OUTAGE_NB', 'Circuit Name'], 
    dropna=False, 
    as_index=False, 
    group_keys=False
).agg({
    'Outage Start DT':     'min', 
    'Adj Outage Start DT': 'min', 
    'Outage End DT':       'max', 
    'Adj Outage End DT':   'max', 
    'Step CMI':            'sum'
})

# At this point, each outage (unique combinatino of 'OUTAGE_NB' and 'Circuit Name') should
#   correspond to a single row
assert(mico_df.shape[0] == mico_df.groupby(['OUTAGE_NB', 'Circuit Name']).ngroups)

#-------------------------
mico_df['Min Start Date'] = mico_df[['Outage Start DT', 'Adj Outage Start DT']].min(axis=1).dt.date - expand_time
mico_df['Max End Date']   = mico_df[['Outage End DT',   'Adj Outage End DT'  ]].max(axis=1).dt.date + expand_time

#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------

start = time.time()

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# NOTE: A single OUTAGE_NB can correspond to more than one OUTG_REC_NBs!
#       It appears this is the case when the outage affects multiple GIS_CRCT_NBs, in which case,
#         each GIS_CRCT_NB gets its own OUTG_REC_NB
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# First, grab DF containing all OUTAGE_NBs
# Note, the OUTAGE_NB is not unique, so there will generally be multiple entries
#   Here, I'm talking about the same OUTAGE_NB being used for different outages throughout the years, 
#     not a single OUTAGE_NB corresponding to multiple OUTG_REC_NBs, as described above.
#   Determine which is correct entry using the times from mico_df
sql_using_outage_nbs = DOVSOutages_SQL.build_sql_std_outage(
    mjr_mnr_cause=None, 
    include_premise=True, 
    outage_nbs=mico_df['OUTAGE_NB'].unique().tolist(), 
    date_range=[mico_df['Min Start Date'].min(), mico_df['Max End Date'].max()], 
    MJR_CAUSE_CD=None, 
    DEVICE_CD=None, 
    INTRPTN_TYP_CD=None, 
    CURR_REC_STAT_CD=None, 
    select_cols_DOVS_PREMISE_DIM=['CIRCT_NM']
).get_sql_statement()
#-----
df_using_outage_nbs = pd.read_sql_query(
    sql_using_outage_nbs, 
    conn_outages, 
    dtype={
        'CI_NB':np.int32, 
        'CMI_NB':np.float64, 
        'OUTG_REC_NB':np.int32
    }
)
#-------------------------
# Determine appropriate OUTG_REC_NBs by using 'Min Start Date', 'Max End Date'
df_using_outage_nbs = pd.merge(
    df_using_outage_nbs, 
    mico_df[['OUTAGE_NB', 'Circuit Name', 'Min Start Date', 'Max End Date']], 
    left_on=['OUTAGE_NB', 'CIRCT_NM'], 
    right_on=['OUTAGE_NB', 'Circuit Name'], 
    how='inner'
)
df_using_outage_nbs= df_using_outage_nbs[
    (df_using_outage_nbs['DT_OFF_TS_FULL'].dt.date >= df_using_outage_nbs['Min Start Date']) & 
    (df_using_outage_nbs['DT_ON_TS'].dt.date       <= df_using_outage_nbs['Max End Date'])
]
df_using_outage_nbs = df_using_outage_nbs.drop(columns=['Min Start Date', 'Max End Date', 'Circuit Name'])

print(time.time()-start)

# NOTE: Keep CIRCT_NM (although it comes from DOVS_PREMISE_DIM), as this will be used to merge as there exist 
#         duplicate OUTAGE_NB entries in Mico's df which have different OUTG_REC_NBs if the outage affects multiple circuits
df_outage_noPNs = df_using_outage_nbs.drop(columns=['OFF_TM', 'REST_TM', 'PREMISE_NB']).drop_duplicates()

#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
mico_df_for_plt = pd.merge(
    mico_df, 
    df_outage_noPNs[['OUTAGE_NB', 'CIRCT_NM', 'OUTG_REC_NB']], 
    left_on=['OUTAGE_NB', 'Circuit Name'], 
    right_on=['OUTAGE_NB', 'CIRCT_NM'], 
    how='inner'
)
#-----
mico_df_for_plt['Final Outage Start'] = mico_df_for_plt['Adj Outage Start DT'].fillna(mico_df_for_plt['Outage Start DT'])
mico_df_for_plt['Final Outage End']   = mico_df_for_plt['Adj Outage End DT'].fillna(mico_df_for_plt['Outage End DT'])
mico_df_for_plt = mico_df_for_plt.drop(columns=[
    'Adj Outage Start DT', 'Outage Start DT', 
    'Adj Outage End DT',   'Outage End DT', 
    'Circuit Name', 
    'Step CMI', 'Min Start Date', 'Max End Date'
])
mico_df_for_plt['OUTG_REC_NB'] = mico_df_for_plt['OUTG_REC_NB'].astype(str)

In [None]:
#-------------------------
calculate_by_PN = True
#-------------------------

In [None]:
#-------------------------
paths = Utilities.find_all_paths(
#     base_dir=r'C:\Users\s346557\Documents\LocalData\dovs_check\forMico\AMINonVee', 
    base_dir=r'C:\Users\s346557\Documents\LocalData\dovs_check\forMico2\AMINonVee', 
    glob_pattern=r'ami_nonvee_[0-9]*.csv', 
    regex_pattern=None
)
paths=natsorted(paths)
#-------------------------
outg_rec_nbs_in_files = dict()
for path in paths:
    assert(path not in outg_rec_nbs_in_files.keys())
    df = GenAn.read_df_from_csv(path)
    outg_rec_nbs_in_files[path] = df['OUTG_REC_NB_GPD_FOR_SQL'].unique().tolist()
outg_rec_nb_to_files_dict = invert_file_to_outg_rec_nbs_dict(outg_rec_nbs_in_files)
all_outg_rec_nbs = list(outg_rec_nb_to_files_dict.keys())

In [None]:
#-------------------------
paths_ede = Utilities.find_all_paths(
#     base_dir=r'C:\Users\s346557\Documents\LocalData\dovs_check\forMico\EndEvents', 
    base_dir=r'C:\Users\s346557\Documents\LocalData\dovs_check\forMico2\EndEvents', 
    glob_pattern=r'end_events_[0-9]*.csv', 
    regex_pattern=None
)
paths_ede=natsorted(paths_ede)
#-------------------------
outg_rec_nbs_in_files_ede = dict()
for path in paths_ede:
    assert(path not in outg_rec_nbs_in_files_ede.keys())
    df = GenAn.read_df_from_csv(path)
    outg_rec_nbs_in_files_ede[path] = df['OUTG_REC_NB_GPD_FOR_SQL'].unique().tolist()
outg_rec_nb_to_files_ede_dict = invert_file_to_outg_rec_nbs_dict(outg_rec_nbs_in_files_ede)
all_outg_rec_nbs_ede = list(outg_rec_nb_to_files_ede_dict.keys())

In [None]:
#-------------------------
instvabc_slcr = DFSlicer(
    single_slicers = [
        dict(
            column='aep_derived_uom', 
            value='VOLT', 
            comparison_operator='=='
        ), 
        dict(
            column='aep_srvc_qlty_idntfr', 
            value=['INSTVA1', 'INSTVB1', 'INSTVC1'], 
            comparison_operator='isin'
        )
    ], 
    name='VOLT, INSTV(ABC)1', 
    join_single_slicers='and'
)
#-------------------------
volt_avg_slcr = DFSlicer(
    single_slicers = [
        dict(
            column='aep_derived_uom', 
            value='VOLT', 
            comparison_operator='=='
        ), 
        dict(
            column='aep_srvc_qlty_idntfr', 
            value='AVG', 
            comparison_operator='=='
        )
    ], 
    name='VOLT, AVG', 
    join_single_slicers='and'
)
#-------------------------
slicers=[instvabc_slcr, volt_avg_slcr]

In [None]:
# start = time.time()
# outg_rec_nbs_to_remove = identify_outg_rec_nbs_to_remove(
#     paths=paths,
#     slicers=slicers, 
#     verbose=True
# )
# print(time.time()-start)

In [None]:
# outg_rec_nb = '13325577'
# outg_rec_nb = '13183481'
# outg_rec_nb = '13329763'
# outg_rec_nb = '13183481'
# outg_rec_nb = '13330651'
# outg_rec_nb = '13278397'
outg_rec_nb = '13294548'
outg_rec_nb = '13302188'

In [None]:
outg_rec_nb_to_files_dict[outg_rec_nb]

In [None]:
start=time.time()

ami_df = GenAn.read_df_from_csv_batch(outg_rec_nb_to_files_dict[outg_rec_nb])
#-------------------------
ami_df_i = ami_df[ami_df['OUTG_REC_NB_GPD_FOR_SQL']==outg_rec_nb].copy()

# Although I cannot yet call choose_best_slicer_and_perform_slicing and reduce_INSTV_ABC_1_vals_in_df, 
#   as the standard cleaning and conversions must be done first, I am able to cut down the size of
#   ami_df_i by joining the slicers with 'or' statements.
# Thus, ami_df_i will be reduced to only the subset of data which will be considered in 
#   choose_best_slicer_and_perform_slicing
# As mentioned, this will cut down the size of ami_df_i and will also save time and resources by not having
#   to run entire DF through cleaning and conversions procedures.
ami_df_i = DFSlicer.combine_slicers_and_perform_slicing(
    df=ami_df_i, 
    slicers=slicers, 
    join_slicers='or'
)


if ami_df_i.shape[0]==0:
    print('No usable data in ami_df_i')
    assert(0)

#-------------------------
ami_df_i = AMINonVee.perform_std_initiation_and_cleaning(ami_df_i)
#-----
# Should the following be added to AMINonVee.perform_std_initiation_and_cleaning?
ami_df_i = Utilities_dt.strip_tz_info_and_convert_to_dt(
    df=ami_df_i, 
    time_col='starttimeperiod', 
    placement_col='starttimeperiod_local', 
    run_quick=True, 
    n_strip=6, 
    inplace=False
)
ami_df_i = Utilities_dt.strip_tz_info_and_convert_to_dt(
    df=ami_df_i, 
    time_col='endtimeperiod', 
    placement_col='endtimeperiod_local', 
    run_quick=True, 
    n_strip=6, 
    inplace=False
)

#-------------------------
ami_df_i = choose_best_slicer_and_perform_slicing(
    df=ami_df_i, 
    slicers=slicers, 
    groupby_SN=True, 
    t_search_min_max=None, 
    time_col='starttimeperiod_local', 
    value_col=None, 
    SN_col='serialnumber', 
    return_sorted=True
)

ami_df_i = reduce_INSTV_ABC_1_vals_in_df(
    df=ami_df_i, 
    value_col='value', 
    aep_derived_uom_col='aep_derived_uom', 
    aep_srvc_qlty_idntfr_col='aep_srvc_qlty_idntfr', 
    output_aep_srvc_qlty_idntfr = 'INSTV(ABC)1'
)


if ami_df_i.shape[0]==0:
    print('No usable data in ami_df_i')
    assert(0)
    
#-------------------------
# Each serial number should have a single value per time stamp
assert(ami_df_i.groupby(['serialnumber', 'starttimeperiod_local']).ngroups == ami_df_i.shape[0])
    
print(time.time()-start)

In [None]:
ami_df_i['serialnumber'].nunique()

In [None]:
ami_df_i['aep_premise_nb'].nunique()

In [None]:
ami_df_i[['serialnumber', 'aep_premise_nb']].drop_duplicates()['aep_premise_nb'].value_counts()

In [None]:
ami_df_i.groupby(['serialnumber', 'starttimeperiod_local']).ngroups

In [None]:
ami_df_i.shape

In [None]:
if outg_rec_nb not in outg_rec_nb_to_files_ede_dict.keys():
    ede_df_i=None
else:
    ede_df = GenAn.read_df_from_csv_batch(outg_rec_nb_to_files_ede_dict[outg_rec_nb])
    ede_df_i = ede_df[ede_df['OUTG_REC_NB_GPD_FOR_SQL']==outg_rec_nb].copy()

    #-----
    ede_df_i = Utilities_dt.strip_tz_info_and_convert_to_dt(
        df=ede_df_i, 
        time_col='valuesinterval', 
        placement_col='valuesinterval_local', 
        run_quick=True, 
        n_strip=6, 
        inplace=False
    )
    ede_df_i = AMIEndEvents.reduce_end_event_reasons_in_df(df=ede_df_i)
    #-----
    ede_cols_to_keep = [
        'valuesinterval_local', 
        'reason', 
        'serialnumber', 
        'aep_premise_nb', 
        'enddeviceeventtypeid', 
        'event_type', 
        'OUTG_REC_NB_GPD_FOR_SQL', 
        'trsf_pole_nb_GPD_FOR_SQL',
    ]
    ede_df_i = ede_df_i[ede_cols_to_keep]

In [None]:
# Build dovs_df
dovs = DOVSOutages(
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None, 
    init_df_in_constructor=True,
    build_sql_function=DOVSOutages_SQL.build_sql_std_outage, 
    build_sql_function_kwargs=dict(
        outg_rec_nbs=[outg_rec_nb], 
        field_to_split='outg_rec_nbs', 
    ), 
    build_consolidated=True
)
dovs_df = dovs.df.copy()  

In [None]:
# Get the outage time from DOVS
dovs_outg_t_beg_end = dovs_df.loc[outg_rec_nb][['DT_OFF_TS_FULL', 'DT_ON_TS']].tolist()
assert(len(dovs_outg_t_beg_end)==2)
dovs_outg_t_beg, dovs_outg_t_end = dovs_outg_t_beg_end
print(f'dovs_outg_t_beg = {dovs_outg_t_beg}')
print(f'dovs_outg_t_end = {dovs_outg_t_end}')
#-------------------------
# Get the CI and CMI from DOVS
ci_cmi_dovs = dovs_df.loc[outg_rec_nb][['CI_NB', 'CMI_NB']].tolist()
assert(len(ci_cmi_dovs)==2)
ci_dovs, cmi_dovs = ci_cmi_dovs
print(f'ci_dovs  = {ci_dovs}')
print(f'cmi_dovs = {cmi_dovs}')

In [None]:
# ami_w_dovs_i = ami_df_i.merge(
#     dovs_df.drop(columns=['premise_nbs']), 
#     left_on='OUTG_REC_NB_GPD_FOR_SQL', 
#     right_index=True, 
#     how='left'
# )

In [None]:
print(f"PNs with AMI data  : {ami_df_i['aep_premise_nb'].nunique()}")
print(f"PNs with end events: {ede_df_i['aep_premise_nb'].nunique()}")
print(f"PNs from DOVS: {dovs_df.loc[[outg_rec_nb]].explode('premise_nbs')['premise_nbs'].nunique()}")

In [None]:
fig_num = 0

In [None]:
n_SNs      = ami_df_i['serialnumber'].nunique()
n_PNs      = ami_df_i['aep_premise_nb'].nunique()
n_PNs_dovs = len(dovs_df.loc[outg_rec_nb]['premise_nbs'])

In [None]:
print(f"#SNs        = {n_SNs}")
print(f"#PNs (AMI)  = {n_PNs}")
print(f"#PNs (DOVS) = {n_PNs_dovs}")

In [None]:
fig, ax = Plot_General.default_subplots()

palette = Plot_General.get_standard_colors_dict(
    keys=ami_df_i['serialnumber'].unique().tolist(), 
    palette='colorblind'
)

#-------------------------
i_subplot=0
fig, ax = AMINonVee.plot_usage_around_outage(
    fig=fig, 
    ax=ax, 
    data=ami_df_i, 
    x='starttimeperiod_local', 
    y='value', 
    hue='serialnumber', 
    out_t_beg=dovs_outg_t_beg, 
    out_t_end=dovs_outg_t_end, 
    expand_time=pd.Timedelta('12 hours'), 
    plot_time_beg_end=[dovs_outg_t_beg, dovs_outg_t_end], 
    data_label='', 
    title_args=dict(label='All', fontdict=dict(fontsize=24)), 
    ax_args=None, 
    xlabel_args=None, 
    ylabel_args=None, 
    df_mean=None, 
    df_mean_col=None, 
    mean_args=None, 
    draw_outage_limits=True, 
    include_outage_limits_text=True, 
    draw_without_hue_also=False, 
    seg_line_freq=None, 
    palette=palette, 
    lineplot_kwargs=dict(alpha=0.25)
)
ax.legend().set_visible(False)

In [None]:
res_dict = calculate_ci_cmi_w_ami_w_ede_help(
    df=ami_df_i, 
    ede_df=ede_df_i, 
    dovs_outg_t_beg_end=dovs_outg_t_beg_end, 
    expand_outg_search_time=pd.Timedelta('1 hour'), 
    conservative_estimate=True, 
    est_ede_kwargs=None, 
    audit_selection_method='ede only', 
    return_CI_SNs=False, 
    use_est_outg_times=True, 
    pct_SNs_required_for_outage_est=0, 
#     expand_outg_est_search_time=pd.Timedelta('1 hour'), 
    expand_outg_est_search_time=pd.Timedelta('12 hours'), 
    use_only_overall_endpoints_of_est_outg_times=False, 
    time_col='starttimeperiod_local', 
    value_col='value', 
    SN_col='serialnumber', 
    return_all_best_ests=True, 
    return_all_best_ests_type='pd.DataFrame'
)
#-----
ci_NEWEST    = res_dict['CI']
cmi_NEWEST   = res_dict['CMI']
best_ests_df = res_dict['all_best_ests']

In [None]:
if calculate_by_PN and best_ests_df.shape[0]>0:
    best_ests_df = combine_PNs_in_best_ests_df(
        best_ests_df, 
        likeness_thresh = pd.Timedelta('1 minutes'), 
        SN_col = 'SN', 
        PN_col = 'PN', 
        i_outg_col = 'i_outg'     
    )
    ci_NEWEST  = best_ests_df['PN'].nunique()
    cmi_NEWEST = (best_ests_df['winner_max']-best_ests_df['winner_min']).sum().total_seconds()/60

In [None]:
best_ests_df

In [None]:
if best_ests_df.shape[0]>0:
    means_df, best_ests_df_w_db_lbl = get_mean_times_w_dbscan(
        best_ests_df, 
        eps_min=5, 
        min_samples=2, 
        ests_to_include_in_clustering=['winner_min', 'winner_max'],
        ests_to_include_in_output=[
            'winner_min', 'winner_max', 
            'conservative_min', 'conservative_max', 
            'zero_times_min', 'zero_times_max'
        ], 
        return_labelled_best_ests_df=True
    )
else:
    means_df, best_ests_df_w_db_lbl = None, None

In [None]:
if means_df is not None:
    cnsrvtv_out_t_beg = means_df.min().min()
    cnsrvtv_out_t_end = means_df.max().max()
else:
    cnsrvtv_out_t_beg = dovs_outg_t_beg
    cnsrvtv_out_t_end = dovs_outg_t_end

# Instead of using get_full_part_not_outage_subset_dfs, simply grab the PNs which suffered
#   outages from best_ests_df
if best_ests_df.shape[0]>0:
    outg_SNs = best_ests_df['PN'].unique().tolist()
else:
    outg_SNs = []
#-----
ami_df_i_out      = ami_df_i[ami_df_i['aep_premise_nb'].isin(outg_SNs)]
ami_df_i_not_out  = ami_df_i[~ami_df_i['aep_premise_nb'].isin(outg_SNs)]
#-------------------------
print(f'nSNs Total               = {ami_df_i["serialnumber"].nunique()}')
print(f'ratio: {ami_df_i_out["serialnumber"].nunique()/ami_df_i["serialnumber"].nunique()}')

In [None]:
fig, axs = Plot_General.default_subplots(
    n_x=1,
    n_y=3,
    fig_num=fig_num,
    sharex=False,
    sharey=False,
    unit_figsize_width=14,
    unit_figsize_height=6, 
    return_flattened_axes=True,
    row_major=True
)
Plot_General.adjust_subplots_args(fig, dict(hspace=0.30))

palette = Plot_General.get_standard_colors_dict(
    keys=ami_df_i['serialnumber'].unique().tolist(), 
    palette='colorblind'
)

#-------------------------
i_subplot=0
fig, axs[i_subplot] = AMINonVee.plot_usage_around_outage(
    fig=fig, 
    ax=axs[i_subplot], 
    data=ami_df_i, 
    x='starttimeperiod_local', 
    y='value', 
    hue='serialnumber', 
    out_t_beg=dovs_outg_t_beg, 
    out_t_end=dovs_outg_t_end, 
    expand_time=pd.Timedelta('1 hour'), 
    plot_time_beg_end=[cnsrvtv_out_t_beg, cnsrvtv_out_t_end], 
    data_label='', 
    title_args=dict(label=f"All (#SNs = {ami_df_i['serialnumber'].nunique()})", fontdict=dict(fontsize=24)), 
    ax_args=None, 
    xlabel_args=None, 
    ylabel_args=None, 
    df_mean=None, 
    df_mean_col=None, 
    mean_args=None, 
    draw_outage_limits=True, 
    draw_outage_limits_kwargs=dict(alpha=1.0, linewidth=5.0, ymax=0.1), 
    include_outage_limits_text=dict(
        out_t_beg_text='DOVS Beg.', 
        out_t_beg_ypos=(0.12, 'ax_coord'), 
        out_t_beg_va='bottom', 
        out_t_beg_ha='center', 
        out_t_beg_color='red', 
        #-----
        out_t_end_text='DOVS End', 
        out_t_end_ypos=(0.12, 'ax_coord'), 
        out_t_end_va='bottom', 
        out_t_end_ha='center', 
        out_t_end_color='green', 
    ), 
    draw_without_hue_also=False, 
    seg_line_freq=None, 
    palette=palette
)
axs[i_subplot].legend().set_visible(False)
# add_all_best_ests_to_axis(
#     axs[i_subplot], 
#     means_df, 
#     line_kwargs_by_est_key=dict(
#         conservative=dict(alpha=0.25, linewidth=5.0, ymax=0.6), 
#         zero_times=dict(alpha=0.25, linewidth=5.0, ymax=0.4) 
#     ), 
#     keys_to_include=['winner', 'conservative', 'zero_times']
# )
Plot_General.set_general_plotting_args(
    ax=axs[i_subplot], 
    tick_args =[
        dict(axis='x', labelrotation=0, labelsize=14.0, direction='out'), 
        dict(axis='y', labelrotation=0, labelsize=14.0, direction='out')
    ], 
    xlabel_args=dict(xlabel=axs[i_subplot].get_xlabel(), fontsize=16), 
    ylabel_args=dict(ylabel=axs[i_subplot].get_ylabel(), fontsize=16)
)


#-------------------------
i_subplot=1
if ami_df_i_out.shape[0]>0:
    fig, axs[i_subplot] = AMINonVee.plot_usage_around_outage(
        fig=fig, 
        ax=axs[i_subplot], 
        data=ami_df_i_out, 
        x='starttimeperiod_local', 
        y='value', 
        hue='serialnumber', 
        out_t_beg=dovs_outg_t_beg, 
        out_t_end=dovs_outg_t_end, 
        expand_time=pd.Timedelta('1 hour'), 
        plot_time_beg_end=[cnsrvtv_out_t_beg, cnsrvtv_out_t_end], 
        data_label='', 
        title_args=dict(label=f"Out (#SNs = {ami_df_i_out['serialnumber'].nunique()})", fontdict=dict(fontsize=24)), 
        ax_args=None, 
        xlabel_args=None, 
        ylabel_args=None, 
        df_mean=None, 
        df_mean_col=None, 
        mean_args=None, 
        draw_outage_limits=True, 
        draw_outage_limits_kwargs=dict(alpha=1.0, linewidth=5.0, ymax=0.1), 
        include_outage_limits_text=dict(
            out_t_beg_text='DOVS Beg.', 
            out_t_beg_ypos=(0.12, 'ax_coord'), 
            out_t_beg_va='bottom', 
            out_t_beg_ha='center', 
            out_t_beg_color='red', 
            #-----
            out_t_end_text='DOVS End', 
            out_t_end_ypos=(0.12, 'ax_coord'), 
            out_t_end_va='bottom', 
            out_t_end_ha='center', 
            out_t_end_color='green', 
        ), 
        draw_without_hue_also=False, 
        seg_line_freq=None, 
        palette=palette
    )
    axs[i_subplot].legend().set_visible(False)
    add_all_best_ests_to_axis(
        axs[i_subplot], 
        means_df, 
        line_kwargs_by_est_key=dict(
            conservative=dict(alpha=0.25, linewidth=5.0, ymax=0.6), 
            zero_times=dict(alpha=0.25, linewidth=5.0, ymax=0.4) 
        ), 
        keys_to_include=['winner', 'conservative', 'zero_times']
    )
    #-------------------------
    mico_df_for_plt_i = mico_df_for_plt[
        (mico_df_for_plt['OUTAGE_NB']==dovs_df.loc[outg_rec_nb]['OUTAGE_NB']) & 
        (mico_df_for_plt['OUTG_REC_NB']==outg_rec_nb)
    ]
    if mico_df_for_plt_i.shape[0]>0:
        assert(mico_df_for_plt_i.shape[0]==1)
        include_mico_in_leg = True
        mico_beg, mico_end = mico_df_for_plt_i.iloc[0][['Final Outage Start', 'Final Outage End']]
        add_best_est_to_axis(
            ax=axs[i_subplot], 
            est_val_beg=mico_beg,
            est_val_end=mico_end,
            line_kwargs=dict(color_beg='maroon', color_end='darkgreen', linestyle='dotted'), 
            expand_ax_to_accommodate=False
        )
    else:
        include_mico_in_leg = False
    #-------------------------
    
    Plot_General.set_general_plotting_args(
        ax=axs[i_subplot], 
        tick_args =[
            dict(axis='x', labelrotation=0, labelsize=14.0, direction='out'), 
            dict(axis='y', labelrotation=0, labelsize=14.0, direction='out')
        ], 
        xlabel_args=dict(xlabel=axs[i_subplot].get_xlabel(), fontsize=16), 
        ylabel_args=dict(ylabel=axs[i_subplot].get_ylabel(), fontsize=16)
    )
else:
    axs[i_subplot].set_title(
        label=f'Out', 
        fontdict=dict(fontsize=24)
    )

#-------------------------
i_subplot=2
if ami_df_i_not_out.shape[0]>0:
    fig, axs[i_subplot] = AMINonVee.plot_usage_around_outage(
        fig=fig, 
        ax=axs[i_subplot], 
        data=ami_df_i_not_out, 
        x='starttimeperiod_local', 
        y='value', 
        hue='serialnumber', 
        out_t_beg=dovs_outg_t_beg, 
        out_t_end=dovs_outg_t_end, 
        expand_time=pd.Timedelta('1 hour'), 
        plot_time_beg_end=[cnsrvtv_out_t_beg, cnsrvtv_out_t_end], 
        data_label='', 
        title_args=dict(label=f"Not Out (#SNs = {ami_df_i_not_out['serialnumber'].nunique()})", fontdict=dict(fontsize=24)), 
        ax_args=None, 
        xlabel_args=None, 
        ylabel_args=None, 
        df_mean=None, 
        df_mean_col=None, 
        mean_args=None, 
        draw_outage_limits=True, 
        draw_outage_limits_kwargs=dict(alpha=1.0, linewidth=5.0, ymax=0.1), 
        include_outage_limits_text=dict(
            out_t_beg_text='DOVS Beg.', 
            out_t_beg_ypos=(0.12, 'ax_coord'), 
            out_t_beg_va='bottom', 
            out_t_beg_ha='center', 
            out_t_beg_color='red', 
            #-----
            out_t_end_text='DOVS End', 
            out_t_end_ypos=(0.12, 'ax_coord'), 
            out_t_end_va='bottom', 
            out_t_end_ha='center', 
            out_t_end_color='green', 
        ), 
        draw_without_hue_also=False, 
        seg_line_freq=None, 
        palette=palette
    )
    axs[i_subplot].legend().set_visible(False)
#     add_all_best_ests_to_axis(
#         axs[i_subplot], 
#         means_df, 
#         line_kwargs_by_est_key=dict(
#             conservative=dict(alpha=0.25, linewidth=5.0, ymax=0.6), 
#             zero_times=dict(alpha=0.25, linewidth=5.0, ymax=0.4) 
#         ), 
#         keys_to_include=['winner', 'conservative', 'zero_times']
#     )
    Plot_General.set_general_plotting_args(
        ax=axs[i_subplot], 
        tick_args =[
            dict(axis='x', labelrotation=0, labelsize=14.0, direction='out'), 
            dict(axis='y', labelrotation=0, labelsize=14.0, direction='out')
        ], 
        xlabel_args=dict(xlabel=axs[i_subplot].get_xlabel(), fontsize=16), 
        ylabel_args=dict(ylabel=axs[i_subplot].get_ylabel(), fontsize=16)
    )
else:
    axs[i_subplot].set_title(label='Not Out', fontdict=dict(fontsize=24))
        

#--------------------------------------------------
# Add legend to first plot
patch_dovs_beg = Line2D(
    [0], [0], color='red', 
    alpha=1.0, linewidth=5.0, linestyle='-', 
    label='DOVS Beg.'
)
patch_dovs_end = Line2D(
    [0], [0], color='green', 
    alpha=1.0, linewidth=5.0, linestyle='-', 
    label='DOVS End'
)
#-----
patch_ui_beg =  Line2D(
    [0], [0], color='red', 
    alpha=1.0, linewidth=5.0, linestyle=':', 
    label='Beg. Uncertainty Interval'
)
patch_ui_end =  Line2D(
    [0], [0], color='green', 
    alpha=1.0, linewidth=5.0, linestyle=':', 
    label='End Uncertainty Interval'
)
#-----
patch_best_beg =  Line2D(
    [0], [0], color='red', 
    alpha=1.0, linewidth=1.0, linestyle='--', 
    label='Best Est. Beg.'
)
patch_best_end =  Line2D(
    [0], [0], color='green', 
    alpha=1.0, linewidth=1.0, linestyle='--', 
    label='Mico Xlsx'
)
handles=[patch_dovs_beg, patch_dovs_end, patch_ui_beg, patch_ui_end, patch_best_beg, patch_best_end]
if include_mico_in_leg:
    patch_mico_beg =  Line2D(
        [0], [0], color='maroon', 
        alpha=1.0, linewidth=1.0, linestyle='dotted', 
        label='Mico Xlsx Beg.'
    )
    patch_mico_end =  Line2D(
        [0], [0], color='darkgreen', 
        alpha=1.0, linewidth=1.0, linestyle='dotted', 
        label='Mico Xlsx End'
    )
    handles.extend([patch_mico_beg, patch_mico_end])
#-------------------------
leg_1 = axs[0].legend(
    title=None, 
    handles=handles, 
    bbox_to_anchor=(1, 1.025), 
    loc='upper left', 
    fontsize=15
)        
        
#--------------------------------------------------
ci_info_fontsize = 20
left_text_x=0.95
right_text_x = 1.05
if include_mico_in_leg:
    shift_text_down = 0.05
else:
    shift_text_down = 0

fig.text(left_text_x, 0.745-shift_text_down, f'OUTG_REC_NB: {outg_rec_nb}', fontsize=ci_info_fontsize+4)
fig.text(left_text_x, 0.715-shift_text_down, f"OUTAGE_NB:     {dovs_df.loc[outg_rec_nb]['OUTAGE_NB']}", fontsize=ci_info_fontsize+4)

fig.text(left_text_x, 0.675-shift_text_down, f"#PNs from DOVS = {n_PNs_dovs}", fontsize=ci_info_fontsize)

fig.text(left_text_x, 0.640-shift_text_down, "----- Found in AMI -----", fontsize=ci_info_fontsize)
fig.text(left_text_x, 0.615-shift_text_down, f"#PNs = {n_PNs}", fontsize=ci_info_fontsize)
fig.text(left_text_x, 0.590-shift_text_down, f"#SNs = {n_SNs}", fontsize=ci_info_fontsize)

fig.text(left_text_x, 0.485-shift_text_down, '-----'*5+'\nDOVS\n'+'-----'*5, fontsize=ci_info_fontsize)
fig.text(left_text_x, 0.460-shift_text_down, f'CI    = {ci_dovs}', fontsize=ci_info_fontsize)
fig.text(left_text_x, 0.435-shift_text_down, f'CMI = {np.round(cmi_dovs, decimals=2)}', fontsize=ci_info_fontsize)

fig.text(left_text_x, 0.360-shift_text_down, '-----'*5+'\nUsing AMI\n'+'-----'*5, fontsize=ci_info_fontsize)
fig.text(left_text_x, 0.335-shift_text_down, f'CI    = {ci_NEWEST}', fontsize=ci_info_fontsize)
fig.text(left_text_x, 0.310-shift_text_down, f'CMI = {np.round(cmi_NEWEST, decimals=2)}', fontsize=ci_info_fontsize)
#-----
fig.text(
    left_text_x, 0.285-shift_text_down, 
    f'$\Delta$CI    = {ci_dovs-ci_NEWEST} ({np.round(100*(ci_dovs-ci_NEWEST)/ci_dovs, decimals=2)}%)', 
    fontsize=ci_info_fontsize
)
fig.text(
    left_text_x, 0.260-shift_text_down, 
    f'$\Delta$CMI = {np.round(cmi_dovs-cmi_NEWEST, decimals=2)} ({np.round(100*(cmi_dovs-cmi_NEWEST)/cmi_dovs, decimals=2)}%)', 
    fontsize=ci_info_fontsize
)



fig_num += 1

In [None]:
dovs_df.loc[outg_rec_nb]['OUTAGE_NB']

In [None]:
best_ests_df

In [None]:
means_df.sort_values(by=['conservative_min'])

In [None]:
if means_df is not None:
    means_df = means_df.sort_values(by=['winner_min', 'winner_max'])
    fig, axs = Plot_General.default_subplots(
        n_x=2, 
        n_y=means_df.shape[0], 
        fig_num=fig_num
    )
    if means_df.shape[0]==1:
        axs = [axs]
    Plot_General.adjust_subplots_args(fig, dict(hspace=0.30))

    palette = Plot_General.get_standard_colors_dict(
        keys=ami_df_i['serialnumber'].unique().tolist(), 
        palette='colorblind'
    )

    #-------------------------
    for i_row in range(means_df.shape[0]):
        db_label = means_df.iloc[i_row].name
        ami_df_i_subset = ami_df_i[ami_df_i['aep_premise_nb'].isin(
            best_ests_df_w_db_lbl[best_ests_df_w_db_lbl['db_label']==db_label]['PN'].tolist()
        )]
        n_SNs = ami_df_i_subset['serialnumber'].nunique()
        #****************************************
        fig, axs[i_row][0] = AMINonVee.plot_usage_around_outage(
            fig=fig, 
            ax=axs[i_row][0], 
            data=ami_df_i_subset, 
            x='starttimeperiod_local', 
            y='value', 
            hue='serialnumber', 
            out_t_beg=dovs_outg_t_beg, 
            out_t_end=dovs_outg_t_end, 
            expand_time=pd.Timedelta('15 minutes'), 
            plot_time_beg_end=[means_df.iloc[i_row]['conservative_min'], means_df.iloc[i_row]['zero_times_min']], 
            data_label='', 
            title_args=None, 
            ax_args=None, 
            xlabel_args=None, 
            ylabel_args=None, 
            df_mean=None, 
            df_mean_col=None, 
            mean_args=None, 
            draw_outage_limits=True, 
            draw_outage_limits_kwargs=dict(alpha=1.0, linewidth=5.0, ymax=0.1), 
            include_outage_limits_text=dict(
                out_t_beg_text='DOVS Beg.', 
                out_t_beg_ypos=(0.12, 'ax_coord'), 
                out_t_beg_va='bottom', 
                out_t_beg_ha='center', 
                out_t_beg_color='red', 
                #-----
                out_t_end_text='DOVS End', 
                out_t_end_ypos=(0.12, 'ax_coord'), 
                out_t_end_va='bottom', 
                out_t_end_ha='center', 
                out_t_end_color='green', 
            ),
            draw_without_hue_also=False, 
            seg_line_freq=None, 
            palette=palette
        )
        axs[i_row][0].legend().set_visible(False)
        add_all_best_ests_to_axis(
            axs[i_row][0], 
            means_df.iloc[[i_row]], 
            line_kwargs_by_est_key=dict(
                conservative=dict(alpha=0.25, linewidth=5.0, ymax=0.6), 
                zero_times=dict(alpha=0.25, linewidth=5.0, ymax=0.4) 
            ), 
            keys_to_include=['winner', 'conservative', 'zero_times'], 
            expand_ax_to_accommodate=False
        )
        axs[i_row][0].text(0.85, 0.9, f'#SNs = {n_SNs}', ha='center', va='center', transform=axs[i_row][0].transAxes, fontsize='xx-large')
        Plot_General.set_general_plotting_args(
            ax=axs[i_row][0], 
            tick_args =[
                dict(axis='x', labelrotation=0, labelsize='large', direction='out'), 
                dict(axis='y', labelrotation=0, labelsize='large', direction='out')
            ], 
            xlabel_args=dict(xlabel=axs[i_row][0].get_xlabel(), fontsize='xx-large'), 
            ylabel_args=dict(ylabel=axs[i_row][0].get_ylabel(), fontsize='xx-large')
        )
        #****************************************
        fig, axs[i_row][1] = AMINonVee.plot_usage_around_outage(
            fig=fig, 
            ax=axs[i_row][1], 
            data=ami_df_i_subset, 
            x='starttimeperiod_local', 
            y='value', 
            hue='serialnumber', 
            out_t_beg=dovs_outg_t_beg, 
            out_t_end=dovs_outg_t_end, 
            expand_time=pd.Timedelta('15 minutes'), 
            plot_time_beg_end=[means_df.iloc[i_row]['zero_times_max'], means_df.iloc[i_row]['conservative_max']], 
            data_label='', 
            title_args=None, 
            ax_args=None, 
            xlabel_args=None, 
            ylabel_args=None, 
            df_mean=None, 
            df_mean_col=None, 
            mean_args=None, 
            draw_outage_limits=True, 
            draw_outage_limits_kwargs=dict(alpha=1.0, linewidth=5.0, ymax=0.1), 
            include_outage_limits_text=dict(
                out_t_beg_text='DOVS Beg.', 
                out_t_beg_ypos=(0.12, 'ax_coord'), 
                out_t_beg_va='bottom', 
                out_t_beg_ha='center', 
                out_t_beg_color='red', 
                #-----
                out_t_end_text='DOVS End', 
                out_t_end_ypos=(0.12, 'ax_coord'), 
                out_t_end_va='bottom', 
                out_t_end_ha='center', 
                out_t_end_color='green', 
            ),
            draw_without_hue_also=False, 
            seg_line_freq=None, 
            palette=palette
        )
        axs[i_row][1].legend().set_visible(False)
        add_all_best_ests_to_axis(
            axs[i_row][1], 
            means_df.iloc[[i_row]], 
            line_kwargs_by_est_key=dict(
                conservative=dict(alpha=0.25, linewidth=5.0, ymax=0.6), 
                zero_times=dict(alpha=0.25, linewidth=5.0, ymax=0.4) 
            ), 
            keys_to_include=['winner', 'conservative', 'zero_times'], 
            expand_ax_to_accommodate=False
        )
        axs[i_row][1].text(0.15, 0.9, f'#SNs = {n_SNs}', ha='center', va='center', transform=axs[i_row][1].transAxes, fontsize='xx-large')
        Plot_General.set_general_plotting_args(
            ax=axs[i_row][1], 
            tick_args =[
                dict(axis='x', labelrotation=0, labelsize='large', direction='out'), 
                dict(axis='y', labelrotation=0, labelsize='large', direction='out')
            ], 
            xlabel_args=dict(xlabel=axs[i_row][1].get_xlabel(), fontsize='xx-large'), 
            ylabel_args=dict(ylabel=axs[i_row][1].get_ylabel(), fontsize='xx-large')
        )
        
    #--------------------------------------------------
    # Add legend to first row
    patch_dovs_beg = Line2D(
        [0], [0], color='red', 
        alpha=1.0, linewidth=5.0, linestyle='-', 
        label='DOVS Beg.'
    )
    patch_dovs_end = Line2D(
        [0], [0], color='green', 
        alpha=1.0, linewidth=5.0, linestyle='-', 
        label='DOVS End'
    )
    #-----
    patch_ui_beg =  Line2D(
        [0], [0], color='red', 
        alpha=1.0, linewidth=5.0, linestyle=':', 
        label='Beg. Uncertainty Interval'
    )
    patch_ui_end =  Line2D(
        [0], [0], color='green', 
        alpha=1.0, linewidth=5.0, linestyle=':', 
        label='End Uncertainty Interval'
    )
    #-----
    patch_best_beg =  Line2D(
        [0], [0], color='red', 
        alpha=1.0, linewidth=1.0, linestyle='--', 
        label='Best Est. Beg.'
    )
    patch_best_end =  Line2D(
        [0], [0], color='green', 
        alpha=1.0, linewidth=1.0, linestyle='--', 
        label='Best Est. End'
    )
    #-------------------------
    leg_1 = axs[0][1].legend(
        title=None, 
        handles=[patch_dovs_beg, patch_dovs_end, patch_ui_beg, patch_ui_end, patch_best_beg, patch_best_end], 
        bbox_to_anchor=(1, 1.025), 
        loc='upper left', 
        fontsize=15
    )    
        
    #-------------------------
    fig.suptitle(f"OUTG_REC_NB: {outg_rec_nb}", y=0.95, fontsize='xx-large')
    #-------------------------
    # pdf.savefig(fig, bbox_inches='tight')
    
    fig_num+=1

In [None]:
if means_df is not None:
    means_df = means_df.sort_values(by=['winner_min', 'winner_max'])
    fig, axs = Plot_General.default_subplots(
        n_x=1, 
        n_y=means_df.shape[0], 
        fig_num=fig_num
    )
    if means_df.shape[0]==1:
        axs = [axs]
    Plot_General.adjust_subplots_args(fig, dict(hspace=0.30))

    palette = Plot_General.get_standard_colors_dict(
        keys=ami_df_i['serialnumber'].unique().tolist(), 
        palette='colorblind'
    )

    #-------------------------
    for i_row in range(means_df.shape[0]):
        db_label = means_df.iloc[i_row].name
        ami_df_i_subset = ami_df_i[ami_df_i['aep_premise_nb'].isin(
            best_ests_df_w_db_lbl[best_ests_df_w_db_lbl['db_label']==db_label]['PN'].tolist()
        )]
        n_SNs = ami_df_i_subset['serialnumber'].nunique()
        #****************************************
        fig, axs[i_row] = AMINonVee.plot_usage_around_outage(
            fig=fig, 
            ax=axs[i_row], 
            data=ami_df_i_subset, 
            x='starttimeperiod_local', 
            y='value', 
            hue='serialnumber', 
            out_t_beg=dovs_outg_t_beg, 
            out_t_end=dovs_outg_t_end, 
            expand_time=pd.Timedelta('15 minutes'), 
            plot_time_beg_end=[means_df.iloc[i_row]['conservative_min'], means_df.iloc[i_row]['conservative_max']], 
            data_label='', 
            title_args=None, 
            ax_args=None, 
            xlabel_args=None, 
            ylabel_args=None, 
            df_mean=None, 
            df_mean_col=None, 
            mean_args=None, 
            draw_outage_limits=True, 
            draw_outage_limits_kwargs=dict(alpha=1.0, linewidth=5.0, ymax=0.1), 
            include_outage_limits_text=dict(
                out_t_beg_text='DOVS Beg.', 
                out_t_beg_ypos=(0.12, 'ax_coord'), 
                out_t_beg_va='bottom', 
                out_t_beg_ha='center', 
                out_t_beg_color='red', 
                #-----
                out_t_end_text='DOVS End', 
                out_t_end_ypos=(0.12, 'ax_coord'), 
                out_t_end_va='bottom', 
                out_t_end_ha='center', 
                out_t_end_color='green', 
            ),
            draw_without_hue_also=False, 
            seg_line_freq=None, 
            palette=palette
        )
        axs[i_row].legend().set_visible(False)
        add_all_best_ests_to_axis(
            axs[i_row], 
            means_df.iloc[[i_row]], 
            line_kwargs_by_est_key=dict(
                conservative=dict(alpha=0.25, linewidth=5.0, ymax=0.6), 
                zero_times=dict(alpha=0.25, linewidth=5.0, ymax=0.4) 
            ), 
            keys_to_include=['winner', 'conservative', 'zero_times'], 
            expand_ax_to_accommodate=False
        )
        axs[i_row].text(0.85, 0.9, f'#SNs = {n_SNs}', ha='center', va='center', transform=axs[i_row].transAxes, fontsize='xx-large')
        Plot_General.set_general_plotting_args(
            ax=axs[i_row], 
            tick_args =[
                dict(axis='x', labelrotation=0, labelsize='large', direction='out'), 
                dict(axis='y', labelrotation=0, labelsize='large', direction='out')
            ], 
            xlabel_args=dict(xlabel=axs[i_row].get_xlabel(), fontsize='xx-large'), 
            ylabel_args=dict(ylabel=axs[i_row].get_ylabel(), fontsize='xx-large')
        )
    #--------------------------------------------------
    # Add legend to first row
    patch_dovs_beg = Line2D(
        [0], [0], color='red', 
        alpha=1.0, linewidth=5.0, linestyle='-', 
        label='DOVS Beg.'
    )
    patch_dovs_end = Line2D(
        [0], [0], color='green', 
        alpha=1.0, linewidth=5.0, linestyle='-', 
        label='DOVS End'
    )
    #-----
    patch_ui_beg =  Line2D(
        [0], [0], color='red', 
        alpha=1.0, linewidth=5.0, linestyle=':', 
        label='Beg. Uncertainty Interval'
    )
    patch_ui_end =  Line2D(
        [0], [0], color='green', 
        alpha=1.0, linewidth=5.0, linestyle=':', 
        label='End Uncertainty Interval'
    )
    #-----
    patch_best_beg =  Line2D(
        [0], [0], color='red', 
        alpha=1.0, linewidth=1.0, linestyle='--', 
        label='Best Est. Beg.'
    )
    patch_best_end =  Line2D(
        [0], [0], color='green', 
        alpha=1.0, linewidth=1.0, linestyle='--', 
        label='Best Est. End'
    )
    #-------------------------
    leg_1 = axs[0].legend(
        title=None, 
        handles=[patch_dovs_beg, patch_dovs_end, patch_ui_beg, patch_ui_end, patch_best_beg, patch_best_end], 
        bbox_to_anchor=(1, 1.025), 
        loc='upper left', 
        fontsize=15
    )    
    
    
    fig_num+=1

In [None]:
# pdf = PdfPages(r'C:\Users\s346557\Documents\LocalData\dovs_check\forMico\Results\BySN_{}.pdf'.format(outg_rec_nb))

In [None]:
# #-------------------------
# for i_plot, SN_i in enumerate(ami_df_i['serialnumber'].unique().tolist()):
#     fig, ax = Plot_General.default_subplots(fig_num=fig_num)
    
#     ami_df_i_subset = ami_df_i[ami_df_i['aep_premise_nb']==SN_i]
#     best_ests_df_i = best_ests_df[best_ests_df['SN']==PN_i]
#     if best_ests_df_i.shape[0]>0:
#         plot_time_beg_end = [best_ests_df_i['conservative_min'].min(), best_ests_df_i['conservative_max'].max()]
#     else:
#         plot_time_beg_end = [dovs_outg_t_beg, dovs_outg_t_end]
#     #****************************************
#     fig, ax = AMINonVee.plot_usage_around_outage(
#         fig=fig, 
#         ax=ax, 
#         data=ami_df_i_subset, 
#         x='starttimeperiod_local', 
#         y='value', 
#         hue='serialnumber', 
#         out_t_beg=dovs_outg_t_beg, 
#         out_t_end=dovs_outg_t_end, 
#         expand_time=pd.Timedelta('15 minutes'), 
#         plot_time_beg_end=plot_time_beg_end, 
#         data_label='', 
#         title_args=None, 
#         ax_args=None, 
#         xlabel_args=None, 
#         ylabel_args=None, 
#         df_mean=None, 
#         df_mean_col=None, 
#         mean_args=None, 
#         draw_outage_limits=True, 
#         draw_outage_limits_kwargs=dict(alpha=1.0, linewidth=5.0, ymax=0.1), 
#         include_outage_limits_text=dict(
#             out_t_beg_text='DOVS Beg.', 
#             out_t_beg_ypos=(0.12, 'ax_coord'), 
#             out_t_beg_va='bottom', 
#             out_t_beg_ha='center', 
#             out_t_beg_color='red', 
#             #-----
#             out_t_end_text='DOVS End', 
#             out_t_end_ypos=(0.12, 'ax_coord'), 
#             out_t_end_va='bottom', 
#             out_t_end_ha='center', 
#             out_t_end_color='green', 
#         ),
#         draw_without_hue_also=False, 
#         seg_line_freq=None
#     )
#     ax.legend().set_visible(False)
#     if best_ests_df_i.shape[0]>0:
#         add_all_best_ests_to_axis(
#             ax, 
#             best_ests_df_i, 
#             line_kwargs_by_est_key=dict(
#                 conservative=dict(alpha=0.25, linewidth=5.0, ymax=0.6), 
#                 zero_times=dict(alpha=0.25, linewidth=5.0, ymax=0.4) 
#             ), 
#             keys_to_include=['winner', 'conservative', 'zero_times'], 
#             expand_ax_to_accommodate=False
#         )
#     ax.text(0.85, 0.9, f'SNs = {SN_i}', ha='center', va='center', transform=ax.transAxes, fontsize='xx-large')
#     Plot_General.set_general_plotting_args(
#         ax=ax, 
#         tick_args =[
#             dict(axis='x', labelrotation=0, labelsize='large', direction='out'), 
#             dict(axis='y', labelrotation=0, labelsize='large', direction='out')
#         ], 
#         xlabel_args=dict(xlabel=ax.get_xlabel(), fontsize='xx-large'), 
#         ylabel_args=dict(ylabel=ax.get_ylabel(), fontsize='xx-large')
#     )
#     pdf.savefig(fig, bbox_inches='tight')
#     plt.close(fig)
#     fig_num+=1

In [None]:
# pdf.close()