In [None]:
%run ./check_DOVS_METHODS.ipynb

In [None]:
from importlib import reload
import sys, os
import re
from pathlib import Path
import pickle

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns, natsort_keygen
from packaging import version

import copy
import itertools
import adjustText

import pyodbc
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
import CommonLearningMethods as clm
#-----
from MeterPremise import MeterPremise
#-----
from AMI_SQL import AMI_SQL
from AMINonVee_SQL import AMINonVee_SQL
from AMIEndEvents_SQL import AMIEndEvents_SQL
from AMIUsgInst_SQL import AMIUsgInst_SQL
from DOVSOutages_SQL import DOVSOutages_SQL
#-----
from GenAn import GenAn
from AMINonVee import AMINonVee
from AMIEndEvents import AMIEndEvents
from AMIUsgInst import AMIUsgInst
from DOVSOutages import DOVSOutages
from DOVSAudit import DOVSAudit
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
import TableInfos
from TableInfos import TableInfo
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
from Utilities_df import DFConstructType
import Utilities_dt
import PDFMerger
import Plot_General
import Plot_Box_sns
import Plot_Hist
import GrubbsTest
import DataFrameSubsetSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSlicer as DFSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSingleSlicer as DFSingleSlicer
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
import matplotlib.colors as mcolors
import matplotlib.cm as cm #e.g. for cmap=cm.jet

# Analyze collected data

## AMI NonVee

In [None]:
import warnings
warnings.filterwarnings("ignore", "use_inf_as_na")

In [None]:
date_0 = '2024-11-17'
date_1 = '2024-11-23'
#-------------------------
save_dir_base = r'C:\Users\s346557\Documents\LocalData\dovs_check'
save_subdir = f"{date_0.replace('-','')}_{date_1.replace('-','')}"
#-----
base_dir     = os.path.join(save_dir_base, save_subdir, 'AllOPCOs')
save_dir     = os.path.join(base_dir, r'Results')
#-----
assert(os.path.exists(save_dir))
#-----
dovs_audits_subdir = 'dovs_audits'
assert(os.path.isdir(os.path.join(save_dir, dovs_audits_subdir)))

In [None]:
save_dir

In [None]:
#--------------------------------------------------
include_suboutg_endpt_plots=False
#-------------------------
n_PNs_w_power_threshold = 95

In [None]:
#-------------------------
#-----
paths = Utilities.find_all_paths(
    base_dir      = os.path.join(save_dir, dovs_audits_subdir), 
    glob_pattern  = r'*.pkl', 
    regex_pattern = None
)
paths=natsorted(paths)

In [None]:
len(paths)

In [None]:
# In some cases, keeping all of the PdfPages objects open becomes taxing in terms of memory
# Therefore, I will save all of the PDFs as separate documents, closing each at the end of each iteration
#   and collecting all in their respective single files at the end.
# The following paths are for the final, single files
#--------------------------------------------------
res_tmp_subdir = 'TMP_Results'
res_pdf_path   = os.path.join(save_dir, r'Results.pdf')
#-------------------------
res_dovs_beg_tmp_subdir = 'TMP_Results_dovs_beg'
res_dovs_beg_pdf_path   = Utilities.append_to_path(
    res_pdf_path, 
    '_dovs_beg', 
    ext_to_find='.pdf', 
    append_to_end_if_ext_no_found=False
)
#-------------------------
res_w_endpts_tmp_subdir = 'TMP_Results_w_suboutg_endpt'
res_w_endpts_pdf_path   = Utilities.append_to_path(
    res_pdf_path, 
    '_w_suboutg_endpt_plots', 
    ext_to_find='.pdf', 
    append_to_end_if_ext_no_found=False
)
#--------------------------------------------------
tmp_subdirs = [
    res_tmp_subdir, 
    res_dovs_beg_tmp_subdir, 
    res_w_endpts_tmp_subdir
]
#-----
# tmp_subdir_paths = Utilities.make_tmp_save_dir(
#     base_dir_path = save_dir,
#     tmp_dir_name  = tmp_subdirs, 
#     return_path   = True
# )

In [None]:
# Find any pre-existing results
prex_pdf_paths = Utilities.find_all_paths(
    base_dir      = os.path.join(save_dir, 'TMP_Results'), 
    glob_pattern  = r'*.pdf', 
    regex_pattern = None
)
prex_pdf_paths = natsorted(prex_pdf_paths)
prex_pdfs      = [Path(x).stem for x in prex_pdf_paths]

In [None]:
#--------------------------------------------------
outgs_pass = []
outgs_fail = []
#-------------------------
fig_num                           = 0
all_detailed_summary_dfs          = []
all_detailed_summary_dfs_dovs_beg = []
ci_cmi_summary_dfs                = []
warnings_text                     = ''

#--------------------------------------------------

#----------------------------------------------------------------------------------------------------
# Now, iterate through all paths
for i_path, path_i in enumerate(paths):
    if Path(path_i).stem in prex_pdfs:
        continue
    print(f'\n\ti_path: {i_path}/{len(paths)}')
    #--------------------------------------------------
    try:
        audit_i = DOVSAudit(outg_rec_nb = path_i)
        print(f'\toutg_rec_nb = {audit_i.outg_rec_nb}')
        #--------------------------------------------------
        if not audit_i.can_analyze:
            outgs_fail.append((audit_i.outg_rec_nb, "not can_analyze (likely overlapping DOVS)"))
            continue
        # #--------------------------------------------------
        # detailed_summary_df_i = audit_i.get_detailed_summary_df(
        #     dovs_beg        = False, 
        #     delta_t_off_cut = pd.Timedelta('5min'), 
        #     delta_t_on_cut  = pd.Timedelta('5min'), 
        #     delta_ci_cut    = 3, 
        #     delta_cmi_cut   = None, 
        #     n_PNs_w_power_threshold = n_PNs_w_power_threshold, 
        # )
        # if detailed_summary_df_i.shape[0]>0:
        #     all_detailed_summary_dfs.append(detailed_summary_df_i)
        # #-----
        # detailed_summary_df_dovs_beg_i = audit_i.get_detailed_summary_df(
        #     dovs_beg        = True, 
        #     delta_t_off_cut = pd.Timedelta('5min'), 
        #     delta_t_on_cut  = pd.Timedelta('5min'), 
        #     delta_ci_cut    = 3, 
        #     delta_cmi_cut   = None, 
        #     n_PNs_w_power_threshold = n_PNs_w_power_threshold, 
        # )
        # if detailed_summary_df_dovs_beg_i.shape[0]>0:
        #     all_detailed_summary_dfs_dovs_beg.append(detailed_summary_df_dovs_beg_i)
        # #-----
        # warnings_text += audit_i.generate_warnings_text()
        # #-------------------------
        # ci_cmi_summary_dfs.append(
        #     pd.DataFrame(
        #         dict(
        #             outg_rec_nb      = audit_i.outg_rec_nb, 
        #             ci_dovs          = audit_i.ci_dovs,   
        #             ci_ami           = audit_i.ci, 
        #             ci_ami_dovs_beg  = audit_i.ci_dovs_beg, 
        #             cmi_dovs         = audit_i.cmi_dovs, 
        #             cmi_ami          = audit_i.cmi, 
        #             cmi_ami_dovs_beg = audit_i.cmi_dovs_beg
        #         ), 
        #         index=[len(ci_cmi_summary_dfs)]
        #     )
        # )
        #----------------------------------------------------------------------------------------------------
        # ######################### PLOTTING #########################
        #----------------------------------------------------------------------------------------------------
        fig, axs = audit_i.plot_results(
            include_dovs_beg_text      = True, 
            name                       = 'AMI', 
            expand_time                = pd.Timedelta('1 hour'), 
            n_PNs_w_power_threshold    = n_PNs_w_power_threshold, 
            fig_num                    = fig_num
        )    
        Plot_General.save_fig(
            fig         = fig, 
            save_dir    = os.path.join(save_dir, res_tmp_subdir), 
            save_name   = f"{audit_i.outg_rec_nb}.pdf", 
            bbox_inches = 'tight'
        )
        if include_suboutg_endpt_plots:
            Plot_General.save_fig(
                fig         = fig, 
                save_dir    = os.path.join(save_dir, res_w_endpts_tmp_subdir), 
                save_name   = f"{audit_i.outg_rec_nb}_0.pdf", 
                bbox_inches = 'tight'
            )
        fig.clear()
        plt.close(fig)
        fig_num += 1
        
        #----------------------------------------------------------------------------------------------------
        if audit_i.best_ests_means_df_dovs_beg is not None:
            fig, axs = audit_i.plot_results_dovs_beg(
                include_full_alg_text      = True, 
                name                       = 'AMI w/ DOVS t_beg', 
                expand_time                = pd.Timedelta('1 hour'), 
                n_PNs_w_power_threshold    = n_PNs_w_power_threshold, 
                fig_num                    = fig_num
            )    
            Plot_General.save_fig(
                fig         = fig, 
                save_dir    = os.path.join(save_dir, res_dovs_beg_tmp_subdir), 
                save_name   = f"{audit_i.outg_rec_nb}.pdf", 
                bbox_inches = 'tight'
            )
            fig.clear()
            plt.close(fig)
            fig_num += 1
        
        #----------------------------------------------------------------------------------------------------
        if include_suboutg_endpt_plots:
            fig_axs = audit_i.plot_zoomed_endpts(
                expand_time = pd.Timedelta('15 minutes'), 
                fig_num     = fig_num
            )
            if fig_axs is not None:
                fig = fig_axs[0]
                axs = fig_axs[1]
                #-------------------------
                Plot_General.save_fig(
                    fig         = fig, 
                    save_dir    = os.path.join(save_dir, res_w_endpts_tmp_subdir), 
                    save_name   = f"{audit_i.outg_rec_nb}_1.pdf", 
                    bbox_inches = 'tight'
                ) 
                fig.clear()
                plt.close(fig)
                fig_num += 1
    
        #----------------------------------------------------------------------------------------------------
        outgs_pass.append(audit_i.outg_rec_nb)

    except:
        outgs_fail.append((audit_i.outg_rec_nb, "Unknown"))
        
#----------------------------------------------------------------------------------------------------
# detailed_summary_df          = Utilities_df.concat_dfs(
#     dfs                  = all_detailed_summary_dfs, 
#     axis                 = 0, 
#     make_col_types_equal = False
# )
# detailed_summary_df_dovs_beg = Utilities_df.concat_dfs(
#     dfs                  = all_detailed_summary_dfs_dovs_beg, 
#     axis                 = 0, 
#     make_col_types_equal = False
# )
# #-------------------------
# detailed_summary_df = DOVSAudit.sort_detailed_summary_df(
#     detailed_summary_df = detailed_summary_df, 
#     how                 = 'abs_delta_ci_cmi', 
# )
# #-----
# detailed_summary_df_dovs_beg = DOVSAudit.sort_detailed_summary_df(
#     detailed_summary_df = detailed_summary_df_dovs_beg, 
#     how                 = 'abs_delta_ci_cmi', 
# )
# #-------------------------
# ci_cmi_summary_df            = Utilities_df.concat_dfs(
#     dfs                  = ci_cmi_summary_dfs, 
#     axis                 = 0, 
#     make_col_types_equal = False
# )

In [None]:
PDFMerger.merge_all_pdfs_in_dir(
    dir_to_merge = os.path.join(save_dir, res_tmp_subdir), 
    output_path  = res_pdf_path, 
)
#-----
PDFMerger.merge_all_pdfs_in_dir(
    dir_to_merge = os.path.join(save_dir, res_dovs_beg_tmp_subdir), 
    output_path  = res_dovs_beg_pdf_path, 
)
#-----
PDFMerger.merge_all_pdfs_in_dir(
    dir_to_merge = os.path.join(save_dir, res_w_endpts_tmp_subdir), 
    output_path  = res_w_endpts_pdf_path, 
)
#-------------------------
# Utilities.del_tmp_save_dir(
#     base_dir_path = save_dir,
#     tmp_dir_name  = tmp_subdirs
# )

In [None]:
# ci_cmi_summary_df['ci_dovs']         = ci_cmi_summary_df['ci_dovs'].astype(float)
# ci_cmi_summary_df['ci_ami']          = ci_cmi_summary_df['ci_ami'].astype(float)
# ci_cmi_summary_df['ci_ami_dovs_beg'] = ci_cmi_summary_df['ci_ami_dovs_beg'].astype(float)
# #-----
# ci_cmi_summary_df['delta_ci_dovs_ami']  = ci_cmi_summary_df['ci_dovs']-ci_cmi_summary_df['ci_ami']
# ci_cmi_summary_df['delta_cmi_dovs_ami'] = ci_cmi_summary_df['cmi_dovs']-ci_cmi_summary_df['cmi_ami']
# #-----
# ci_cmi_summary_df['delta_ci_dovs_ami_dovs_beg']  = ci_cmi_summary_df['ci_dovs']-ci_cmi_summary_df['ci_ami_dovs_beg']
# ci_cmi_summary_df['delta_cmi_dovs_ami_dovs_beg'] = ci_cmi_summary_df['cmi_dovs']-ci_cmi_summary_df['cmi_ami_dovs_beg']
# #-----
# # For plotting purposes, make a outg_rec_in column which is simply 0 to delta_df.shape[0]-1
# ci_cmi_summary_df['outg_rec_int'] = range(ci_cmi_summary_df.shape[0])
# #-----

In [None]:
# detailed_summary_df.to_pickle(os.path.join(save_dir, r'detailed_summary.pkl'))
# detailed_summary_df_dovs_beg.to_pickle(os.path.join(save_dir, r'detailed_summary_dovs_beg.pkl'))
# ci_cmi_summary_df.to_pickle(os.path.join(save_dir, r'ci_cmi_summary.pkl'))
# #-----
# detailed_summary_df.to_csv(os.path.join(save_dir, r'detailed_summary.csv'))
# detailed_summary_df_dovs_beg.to_csv(os.path.join(save_dir, r'detailed_summary_dovs_beg.csv'))
# ci_cmi_summary_df.to_csv(os.path.join(save_dir, r'ci_cmi_summary.csv'))
# #-----
# # For Mico and Amanda
# detailed_summary_df_dovs_beg.to_csv(os.path.join(save_dir, f'detailed_summary_dovs_beg_{save_subdir}.csv'))
# #-----
# with open(os.path.join(save_dir, r'warnings.txt'), 'w') as f:
#     f.write(warnings_text)

In [None]:
print(f"#OUTG_REC_NBs = {len(paths)}")
print(f"\tpass: {len(outgs_pass)}")
print(f"\tfail: {len(outgs_fail)}")

In [None]:
outgs_fail

In [None]:
assert(0)

# ===========================================================

In [None]:
# df_1 = pd.read_pickle(r'C:\Users\s346557\Documents\LocalData\dovs_check\20240811_20240817\AllOPCOs\ResultsDEV5\detailed_summary.pkl')
# df_2 = pd.read_pickle(r'C:\Users\s346557\Documents\LocalData\dovs_check\20240811_20240817\AllOPCOs\Results5\detailed_summary.pkl')

df_1 = pd.read_pickle(r'C:\Users\s346557\Documents\LocalData\dovs_check\20240811_20240817\AllOPCOs\Results_ToTest\detailed_summary.pkl')
df_2 = pd.read_pickle(r'C:\Users\s346557\Documents\LocalData\dovs_check\20240811_20240817\AllOPCOs\from_EngLaptop\ResultsDEV_ToTest\detailed_summary.pkl')

In [None]:
print(df_1.shape)
print(df_2.shape)

In [None]:
df_1.equals(df_2)

In [None]:
dffs = Utilities_df.get_dfs_diff(df_1.reset_index(), df_2.reset_index())
dffs

In [None]:
print(dffs.index.get_level_values(1).nunique())
print(dffs['df1_values'].nunique())
print(dffs['df2_values'].nunique())

In [None]:
len(set(df_1.index).symmetric_difference(set(df_2.index)))

In [None]:
[x[1] for x in set(df_1.index).symmetric_difference(set(df_2.index))]

In [None]:
len(set(df_1.index).difference(set(df_2.index)))

In [None]:
len(set(df_2.index).difference(set(df_1.index)))

In [None]:
set(df_2.index).difference(set(df_1.index))

In [None]:
[x[1] for x in set(df_2.index).difference(set(df_1.index))]

In [None]:
set(df_1.index).difference(set(df_2.index))

In [None]:
[x[1] for x in set(df_1.index).difference(set(df_2.index))]

In [None]:
df_1.loc['2376841']

In [None]:
df_2

In [None]:
df_2.loc[list(set(df_1.index).symmetric_difference(set(df_2.index)))]

In [None]:
list(set(df_1.index).intersection(set(df_2.index)))

In [None]:
df_1.loc[list(set(df_1.index).intersection(set(df_2.index)))].equals(df_2.loc[list(set(df_1.index).intersection(set(df_2.index)))])

In [None]:
df_1b = df_1.loc[list(set(df_1.index).intersection(set(df_2.index)))].reset_index().copy()
df_2b = df_2.loc[list(set(df_1.index).intersection(set(df_2.index)))].reset_index().copy()

In [None]:
dffs = Utilities_df.get_dfs_diff(df_1b, df_2b)
dffs

In [None]:
dffs.loc[0].iloc[0]['df1_values']

In [None]:
dffs.loc[0].iloc[0]['df2_values']

In [None]:
dffs.loc[0]['df1_values']==dffs.loc[0]['df2_values']

In [None]:
dffs.index.get_level_values(1).unique()

In [None]:
df_1b