In [1]:
from importlib import reload
import sys, os
import re
import copy
from pathlib import Path
import pickle

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns
from packaging import version
import itertools
import pyodbc
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
from AMI_SQL import AMI_SQL
from DOVSOutages_SQL import DOVSOutages_SQL
from GenAn import GenAn
from DOVSOutages import DOVSOutages
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
import TableInfos
from TableInfos import TableInfo
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
#sys.path.insert(0, os.path.join(os.path.realpath('..'), 'Utilities'))
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
from Utilities_df import DFConstructType
import Utilities_dt
import DataFrameSubsetSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSlicer as DFSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSingleSlicer as DFSingleSlicer

In [2]:
def assess_deltas_for_df_i(
    df_i, 
    outg_rec_nb_col = 'OUTG_REC_NB', 
    crt_dt_col      = 'CRT_DT', 
    crt_dt_ts_col   = 'CRT_DT_TS', 
    field_nm_col    = 'FIELD_NM', 
    old_val_tx_col  = 'OLD_VAL_TX', 
    new_val_tx_col  = 'NEW_VAL_TX', 
    verbose         = True
):
    r"""
    Designed to work inside of groupby.apply call!
    """
    #--------------------------------------------------
    assert(df_i[outg_rec_nb_col].nunique()==1)
    assert(df_i[crt_dt_col].nunique()==1)
    #-----
    outg_rec_nb     = df_i[outg_rec_nb_col].unique().tolist()[0]
    warning_tripped = False
    #--------------------------------------------------
    # At most, the field_nm_col column should contain the following values: ['Comments', 'Tot Cust Min Interr', 'Tot Cust Interr']
    #   'Comments' must be contained
    assert(set(df_i[field_nm_col].unique()).difference(set(['Comments', 'Tot Cust Min Interr', 'Tot Cust Interr']))==set())
    
    #--------------------------------------------------
    assert('Comments' in df_i[field_nm_col].unique().tolist())
    #-----
    comment_old_new   = df_i.loc[df_i[field_nm_col]=='Comments'][[old_val_tx_col, new_val_tx_col]]
    assert(comment_old_new.shape[0]==1)
    comment_old   = comment_old_new.iloc[0][old_val_tx_col]
    comment_new   = comment_old_new.iloc[0][new_val_tx_col]
    #-----
    assert('XJXX' in comment_new.upper())
    
    #--------------------------------------------------
    if 'Tot Cust Min Interr' in df_i[field_nm_col].unique().tolist():
        cmi_old_new   = df_i.loc[df_i[field_nm_col]=='Tot Cust Min Interr'][[old_val_tx_col, new_val_tx_col, crt_dt_ts_col]]
        # I suppose the content of the else statement below would handle both cases, but this makes it more explicitly clear what is going on
        if cmi_old_new.shape[0]==1:
            cmi_old = float(cmi_old_new.iloc[0][old_val_tx_col])
            cmi_new = float(cmi_old_new.iloc[0][new_val_tx_col])
        else:
            cmi_old = float(cmi_old_new.sort_values(by=[crt_dt_ts_col], ascending=True).iloc[0][old_val_tx_col])
            cmi_new = float(cmi_old_new.sort_values(by=[crt_dt_ts_col], ascending=True).iloc[-1][new_val_tx_col])

        #-----
        delta_cmi = cmi_old-cmi_new
    else:
        if verbose:
            print(f'Warning: Unable to calculate delta_cmi for OUTG_REC_NB = {outg_rec_nb}')
        warning_tripped = True
        cmi_old   = np.nan
        cmi_new   = np.nan
        delta_cmi = 0
        
    #--------------------------------------------------
    if 'Tot Cust Interr' in df_i[field_nm_col].unique().tolist():
        ci_old_new   = df_i.loc[df_i[field_nm_col]=='Tot Cust Interr'][[old_val_tx_col, new_val_tx_col, crt_dt_ts_col]]
        # I suppose the content of the else statement below would handle both cases, but this makes it more explicitly clear what is going on
        if ci_old_new.shape[0]==1:
            ci_old = float(ci_old_new.iloc[0][old_val_tx_col])
            ci_new = float(ci_old_new.iloc[0][new_val_tx_col])
        else:
            ci_old = float(ci_old_new.sort_values(by=[crt_dt_ts_col], ascending=True).iloc[0][old_val_tx_col])
            ci_new = float(ci_old_new.sort_values(by=[crt_dt_ts_col], ascending=True).iloc[-1][new_val_tx_col])            
        #-----
        delta_ci = ci_old-ci_new
    else:
        if verbose:
            print(f'Warning: Unable to calculate delta_ci for OUTG_REC_NB = {outg_rec_nb}')
        warning_tripped = True
        ci_old   = np.nan
        ci_new   = np.nan
        delta_ci = 0

    #--------------------------------------------------
    
    return_srs = pd.Series({
        'cmi_old'       : cmi_old, 
        'cmi_new'       : cmi_new, 
        'delta_cmi'     : delta_cmi, 
        #-----
        'ci_old'        : ci_old, 
        'ci_new'        : ci_new, 
        'delta_ci'      : delta_ci, 
    })
    #--------------------------------------------------
    if warning_tripped and verbose:
        print('\n')
    #--------------------------------------------------
    return return_srs

In [3]:
def get_dates_01_from_dir_name(
    name
):
    r"""
    Super simple function.
    name must be, e.g., '20230806_20230812'
    """
    #-------------------------
    dates = name.split('_')
    assert(len(dates)==2)
    date_0 = dates[0]
    date_1 = dates[1]
    #-------------------------
    return pd.to_datetime(date_0), pd.to_datetime(date_1)


def amass_ci_cmi_summaries(
    summary_paths, 
    return_included  = False,
    save_dir         = None, 
    date_in_fname    = True
    
):
    r"""
    summary_paths:
        Can be a list of paths or a dict with (name,path) (key,value) pairs
    """
    #--------------------------------------------------
    assert(Utilities.is_object_one_of_types(summary_paths, [list, dict]))
    if isinstance(summary_paths, list):
        summary_paths = {x:x for x in summary_paths}
    #--------------------------------------------------
    included_dict    = dict()
    ci_cmi_summaries = []
    for name_i, summary_path_i in summary_paths.items():
        assert(name_i not in included_dict.keys())
        if os.path.exists(summary_path_i):
            ci_cmi_summary_i = pd.read_pickle(summary_path_i)
            ci_cmi_summaries.append(ci_cmi_summary_i)
            included_dict[name_i] = True
        else:
            included_dict[name_i] = False
    #--------------------------------------------------
    cols = ci_cmi_summaries[0].columns.tolist()
    for ci_cmi_summary_i in ci_cmi_summaries:
        assert(ci_cmi_summary_i.columns.tolist()==cols)
    ci_cmi_summary = pd.concat(ci_cmi_summaries, axis=0)
    #--------------------------------------------------
    if save_dir is not None:
        save_path = os.path.join(save_dir, 'ci_cmi_summary.pkl')
        if date_in_fname:
            date_str  = datetime.datetime.today().strftime('%Y%m%d')
            appndx    = f'_Updated{date_str}'
            #-----
            save_path = Utilities.append_to_path(
                save_path                     = save_path, 
                appendix                      = appndx, 
                ext_to_find                   = '.pkl', 
                append_to_end_if_ext_no_found = False
            )
        #-------------------------
        ci_cmi_summary.to_pickle(save_path)
    #--------------------------------------------------
    if return_included:
        return ci_cmi_summary, included_dict
    else:
        return ci_cmi_summary

def amass_outg_rec_nb_to_files_dicts(
    results_dirs, 
    opcos            = None, 
    dict_fname       = 'outg_rec_nb_to_files_dict.pkl', 
    return_included  = False,
    save_dict_to_pkl = False, 
    date_in_fname    = True
    
):
    r"""
    results_dirs:
        All paths in results_dirs are expected to share the same parent directory!
    
    opcos:
        For now, if not None, must be simple string.
        So, e.g., 'oh' or 'swp'
    """
    #--------------------------------------------------
    # All paths in results_dirs are expected to share the same parent directory!
    parent_dir = Path(results_dirs[0]).parent
    for dir_i in results_dirs:
        assert(Path(dir_i).parent == parent_dir)
    #--------------------------------------------------
    assert(opcos is None or isinstance(opcos, str))
    if opcos is None:
        opcos = 'AllOPCOs'
    #-------------------------
    included_dict             = dict()
    outg_rec_nb_to_files_dict = dict()
    for dir_i in results_dirs:
        assert(dir_i not in included_dict.keys())
        base_dir_dict_i = os.path.join(dir_i, opcos)
        if os.path.exists(os.path.join(base_dir_dict_i, dict_fname)):
            with open(os.path.join(base_dir_dict_i, dict_fname), 'rb') as handle:
                outg_rec_nb_to_files_dict_i = pickle.load(handle)
                #-----
                assert(set(outg_rec_nb_to_files_dict_i.keys()).intersection(set(outg_rec_nb_to_files_dict.keys()))==set())
                outg_rec_nb_to_files_dict = outg_rec_nb_to_files_dict | outg_rec_nb_to_files_dict_i
            included_dict[dir_i] = True
        else:
            included_dict[dir_i] = False
    #--------------------------------------------------
    if save_dict_to_pkl:
        save_path = os.path.join(parent_dir, dict_fname)
        appndx     = f'_{opcos}'
        if date_in_fname:
            date_str  = datetime.datetime.today().strftime('%Y%m%d')
            appndx += f'_Updated{date_str}'
        save_path = Utilities.append_to_path(
            save_path                     = save_path, 
            appendix                      = appndx, 
            ext_to_find                   = '.pkl', 
            append_to_end_if_ext_no_found = False
        )
        #-------------------------
        with open(save_path, 'wb') as handle:
            pickle.dump(outg_rec_nb_to_files_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    #--------------------------------------------------
    if return_included:
        return outg_rec_nb_to_files_dict, included_dict
    else:
        return outg_rec_nb_to_files_dict


def get_results_dirs(
    base_dir
):
    r"""
    """
    #-------------------------
    # Basic glob pattern: r'[0-9]*_[0-9]*'
    glob_digit       = r'[0-9]'
    n_digits_in_date = 8
    #-----
    glob_date    = glob_digit*n_digits_in_date
    glob_pattern = glob_date + '_' + glob_date
    #-------------------------
    results_dirs = Utilities.find_all_paths(
        base_dir          = base_dir, 
        glob_pattern      = glob_pattern, 
        regex_pattern     = None, 
        regex_ignore_case = False, 
        recursive         = False
    )
    #-------------------------
    return results_dirs


def get_summary_paths(
    results_dirs, 
    subdirs         = r'AllOPCOs\Results', 
    fname           = r'ci_cmi_summary.pkl', 
    verbose         = True,
):
    r"""
    """
    #-------------------------
    if subdirs is None:
        subdirs = ''
    #-------------------------
    summary_paths = {}
    included_dict = {}
    for result_dir_i in results_dirs:
        assert(result_dir_i not in included_dict.keys())
        assert(os.path.isdir(result_dir_i))
        #-----
        dir_i_name = Path(result_dir_i).name
        #-----
        assert(dir_i_name not in summary_paths.keys())
        #-------------------------
        path_i = os.path.join(result_dir_i, subdirs, fname)
        if os.path.exists(path_i):
            summary_paths[dir_i_name] = os.path.join(result_dir_i, subdirs, fname)
            included_dict[result_dir_i] = True
        else:
            included_dict[result_dir_i] = False
    #-------------------------
    if verbose:
        not_included = {k:v for k,v in included_dict.items() if v==False}
        if len(not_included)>0:
            print(f"NOT INCLUDED:")
            for result_dir_i in not_included.keys():
                print(f"\t{result_dir_i}")
    #-------------------------
    return summary_paths


def impose_minmax_dates_on_summary_paths(
    summary_paths, 
    min_date      = None, 
    max_date      = None    
):
    r"""
    summary_paths:
        MUST be a dict with 
            keys   = to the date range of the data 
            values = ci_cmi_summary.pkl path
        e.g., = {
                '20230806_20230812': 'U:\\CloudData\\dovs_check\\Weekly\\20230806_20230812\\AllOPCOs\\Results\\ci_cmi_summary.pkl',
                '20230813_20230819': 'U:\\CloudData\\dovs_check\\Weekly\\20230813_20230819\\AllOPCOs\\Results\\ci_cmi_summary.pkl',
                ...
            }
    """
    #-------------------------
    if min_date is None and max_date is None:
        return summary_paths
    #-------------------------
    if min_date is None:
        min_date = pd.Timestamp.min
    if max_date is None:
        max_date = pd.Timestamp.max
    #-------------------------
    results_to_keep = {
        k:v for k,v in summary_paths.items() 
        if get_dates_01_from_dir_name(k)[0] >= min_date and get_dates_01_from_dir_name(k)[1] <= max_date
    }
    #-------------------------
    return results_to_keep

# As Documented in DOVS

In [5]:
conn_outages = Utilities.get_utldb01p_oracle_connection()
schema_name = 'DOVSADM'
table_name = 'DOVS_AUDIT_LOG_DIM'

In [6]:
cols_to_select = [
    dict(
        field_desc = "to_char(AUDT_LOG_ID)", 
        alias      = "AUDT_LOG_ID")
    , 
    "USER_ID", 
    "OUTG_REC_NB", 
    "CRT_DT_TS", 
    dict(
        field_desc = "TRUNC(CRT_DT_TS)", 
        alias      = "CRT_DT"
    ), 
    "TABLE_NM", 
    "FIELD_NM", 
    "OLD_VAL_TX", 
    "NEW_VAL_TX", 
    "MOD_TYP_TX", 
    "SOURCE_NM", 
    dict(
        field_desc = "TO_CHAR(EDW_LAST_UPDT_DT,'yyyy-mm-dd hh24:mi:ss.ff')", 
        alias      = "EDW_LAST_UPDT_DT"
    ), 
    dict(
        field_desc = "to_char(MODIFICATION_NB)", 
        alias      = "MODIFICATION_NB"
    ), 
    "OUTAGE_NB"
]
#--------------------------------------------------
sql_select = SQLSelect(field_descs=cols_to_select)
#--------------------------------------------------

In [7]:
sql_1 = """
{} 
FROM DOVSADM.DOVS_AUDIT_LOG_DIM
WHERE NEW_VAL_TX LIKE '%XJXX%'
AND   OLD_VAL_TX NOT LIKE '%XJXX%'
""".format(sql_select.get_statement_string())
df_1 = pd.read_sql_query(sql_1, conn_outages, dtype={'OUTG_REC_NB':np.int32}) 
df_1.shape

  df_1 = pd.read_sql_query(sql_1, conn_outages, dtype={'OUTG_REC_NB':np.int32})


(1002, 14)

In [8]:
# Not sure why TO_DATE doesn't work below, but TRUNC does
sql_2 = """
{} 
FROM DOVSADM.DOVS_AUDIT_LOG_DIM
WHERE OUTG_REC_NB IN ({})
AND   TRUNC(CRT_DT_TS) IN ({})
AND   FIELD_NM IN ('Comments', 'Tot Cust Min Interr', 'Tot Cust Interr')
""".format(
    sql_select.get_statement_string(), 
    Utilities_sql.join_list(df_1['OUTG_REC_NB'].unique().tolist(), quotes_needed=True), 
    Utilities_sql.join_list(df_1['CRT_DT_TS'].dt.date.unique().tolist(), quotes_needed=True)
)
df_2 = pd.read_sql_query(sql_2, conn_outages, dtype={'OUTG_REC_NB':np.int32, 'AUDT_LOG_ID':np.int32, 'MODIFICATION_NB':np.int32}) 
df_2.shape

  df_2 = pd.read_sql_query(sql_2, conn_outages, dtype={'OUTG_REC_NB':np.int32, 'AUDT_LOG_ID':np.int32, 'MODIFICATION_NB':np.int32})


DatabaseError: Execution failed on sql '
SELECT
	to_char(AUDT_LOG_ID) AS AUDT_LOG_ID,
	USER_ID,
	OUTG_REC_NB,
	CRT_DT_TS,
	TRUNC(CRT_DT_TS) AS CRT_DT,
	TABLE_NM,
	FIELD_NM,
	OLD_VAL_TX,
	NEW_VAL_TX,
	MOD_TYP_TX,
	SOURCE_NM,
	TO_CHAR(EDW_LAST_UPDT_DT,'yyyy-mm-dd hh24:mi:ss.ff') AS EDW_LAST_UPDT_DT,
	to_char(MODIFICATION_NB) AS MODIFICATION_NB,
	OUTAGE_NB 
FROM DOVSADM.DOVS_AUDIT_LOG_DIM
WHERE OUTG_REC_NB IN ('13736048','13734808','13737892','13787464','13787770','13784814','13782382','13787496','13787888','13784623','13787013','13785489','13788614','13788151','13787981','13785170','13786448','14094479','14095327','14095749','14095916','14094106','14093030','14093053','14093831','14094673','14094956','14095148','14092885','14092759','14095477','14094913','14094478','14094390','14093389','14093524','14093560','14093787','14093694','14093723','14093880','14094717','14095244','14094063','14105246','14095945','14096327','14096103','14096137','14104414','14105975','14106194','14107225','14096881','14107704','14111726','14101935','14096426','14097616','14102333','14115359','14104264','14108895','14114982','14110764','14111088','14100972','14101225','14101784','14103186','14103112','14104305','14098252','14100540','14100961','14105355','14105475','14106259','14105817','14100144','14100811','14098247','14097509','14098147','14099152','14098590','14099106','14099193','14096768','14096812','14098013','14099092','14102574','14102619','14102673','14102928','14108261','14108277','14109682','14108170','14096571','14100962','14098202','14098251','14097350','14099000','14110538','14111132','14110585','14115360','14109210','14107958','14110900','14097921','14098942','14098907','14099489','14099752','14099718','14108548','14098165','14098925','14098814','14098884','14098708','14100948','14101073','14098824','14096128','14111693','14115274','14099095','14100167','14100207','14099954','14100489','14096494','14098741','14099178','14103360','14103444','14103551','14103879','14100377','14099016','14102898','14108201','14112665','14112706','14112904','14113333','14113382','14115134','14107917','14107834','14107996','14100133','14100076','14099538','14105120','14107334','14110760','14111812','14115042','14104104','14108065','14109139','14098108','14115112','14115266','14115484','14116034','14117845','14120327','14124540','14118503','14123481','14118253','14121876','14125091','14124585','14116024','14125167','14116415','14118494','14122826','14123095','14117075','14116682','14116648','14118002','14117767','14124949','14118569','14117924','14122251','14118155','14117069','14118109','14118420','14118850','14118989','14124583','14124507','14118287','14121198','14120946','14121010','14120912','14120992','14120888','14119324','14119539','14121200','14120733','14121372','14119495','14121340','14121397','14122076','14122559','14119810','14122842','14120326','14120642','14121006','14124923','14120788','14121453','14123986','14123501','14116324','14116407','14119540','14120256','14119909','14116117','14118785','14118934','14123554','14117506','14118477','14116899','14145933','14109103','14108864','14105895','14106759','14106157','14106405','14096029','13913308','13913570','13909714','13913102','13912960','13915154','13910428','13914291','13913886','13904648','13904950','13905463','13914440','13904191','13904934','13907556','13913998','13904484','13908491','13908090','13913325','13826183','13806327','13824649','13806149','13825412','13808546','13829849','13806329','13812747','13812964','13806274','13824714','13807726','13812237','13813068','13814049','13815414','13814692','13815207','13828988','13827589','13813256','13831500','13831197','13832422','13811821','13808454','13815657','13812886','13828357','13829824','13809575','13811368','13825363','13824018','13827347','13816052','13816042','13818424','13898368','13893089','13900267','13893562','13893695','13897752','13893723','13819367','13895284','13900129','13819754','13822708','13817428','13820080','13823314','13892678','13892923','13899058','13893228','13819105','13820665','13817751','13893488','14117318','14117465','14117895','14119153','14119046','14120452','14119211','14128171','14130061','14129939','14127428','14127890','14127912','14128363','14130193','14000229','14000399','13951035','13965900','13923280','14127182','13983466','13924200','14125668','14129680','14127730','14129206','14134517','14134596','14131534','14131900','14128300','14130960','13895684','14161363','14161156','14162086','14147616','14147642','14151024','14153628','14155568','14147919','14148398','14153314','14150958','14148095','14148233','14149165','14151140','14151123','14151242','14152270','14152540','14152288','14147443','14160004','14149398','14149612','14149449','14149478','14131082','14130839','14130020','14133068','14129225','14125934','14126782','14133158','14133823','14126803','14132088','14132109','14132539','14133195','14127599','14132483','14130868','14129809','14000523','14001006','14127736','14130909','13923268','14126928','14127527','14133273','14131256','14129323','14127911','14129422','14151903','14151412','14152036','14149166','14148809','14149390','14150590','14151737','14151961','14159923','14151547','14149103','14149164','14163896','14149235','14148763','14153961','14155091','14154097','14150751','14151124','14150671','14152245','14165302','14177130','14151993','14152192','14152239','14148338','14158323','14160644','14162323','14152363','14155288','14155345','14134437','14132126','14131652','14130700','14130212','14132554','14134393','14131577','14132302','14133287','14133596','14134058','14126148','14132504','14133103','14129437','14129721','14129986','14130070','14129784','14130941','14131481','14131960','14133028','14127586','14126183','14001407','14133452','14133583','14128560','14134321','14126721','14133470','13999482','13950411','14156654','14148713','14149700','14150089','14148849','14148111','14148534','14148180','14152582','14153223','14148264','14152701','14153588','14155801','14157920','14157268','14158734','14149929','14150541','14149826','14150227','14151302','14150362','14153349','14154133','14164867','14165143','14174727','14150280','14150410','14163376','14148507','14152273','14152461','14152755','14153812','14150684','14151244','14151460','14152129','14164045','14150589','14150972','14161778','14163207','14149653','14147527','14148034','14148302','14150685','14154174','14155719','14140390','14141456','14142063','14140144','14129600','14130787','14130303','14143700','14143561','14140027','14143753','14131650','14156496','14132188','14133270','14175377','14157030','14161929','14180182','14163265','14141170','14141310','14141464','14141449','14132193','14132705','14142673','14142629','14143174','14143445','14144439','14133896','14181211','14156892','14167698','14143128','14140269','14134170','14129529','14129527','14180237','14157569','14142593','14142651','14131506','14132868','14135227','14131694','14134373','14174052','14141317','14132933','14133502','14133606','14129399','14129466','14133937','14130888','14168668','14156825','14175091','14141363','14134006','14130147','14132958','14134276','14133617','14161155','14181658','14158603','14157736','14130585','14130719','14130947','14134402','14134440','14132249','14132005','14132030','14131080','14129235','14139581','14143092','14143299','14143421','14130587','14130799','14170729','14158450','14161780','14167679','14140227','14139994','14143581','14143539','14144067','14130674','14139901','14140625','14142220','14140660','14140907','14142236','14142229','14142582','14142531','14140173','14140619','14142945','14131522','14156053','14141583','14142243','14142555','14140633','14140022','14144066','14144140','14140384','14132934','14131860','14129264','14141290','14143048','14142804','14142887','14140970','14132018','14132559','14174243','14162044','14175242','14146288','14169133','14160009','14159531','14145948','14181242','14164716','14167000','14147298','14146190','14146596','14146777','14175317','14174593','14179142','14145367','14172077','14162832','14145302','14145268','14145635','14178051','14173153','14169052','14146506','14147416','14176396','14171679','14180148','14176889','14146352','14146613','14146852','14147146','14168690','14158708','14175308','14166723','14175245','14147687','14161716','14166258','14164651','14147098','14159674','14172205','14169671','14180297','14145794','14145938','14175935','14180150','14168445','14146655','14171691','14158202','14164926','14160670','14168189','14178048','14179901','14181372','14167024','14179221','14173788','14173686','14170578','14146754','14165067','14177154','14171689','14145489','14161718','14146311','14174497','14162739','14145319','14165319','14157812','14171055','14173542','14160781','14160850','14161500','14161488','14171524','14171909','14181584','14156857','14172747','14171844','14159426','14165334','14158371','14145226','14163776','14169030','14166418','14169173','14169932','14171359','14177840','14145129','14145128','14168004','14159318','14171277','14178623','23782692','14145932','14162388','14161475','14165239','14158034','14163525','14157658','14183639','14178912','14179039','14180480','14173114','14173182','14174181','14175515','14166936','14171246','14172778','14177968','14176318','14179373','14173401','14174684','14178527','14178996','14169675','14171784','14173968','14173853','14172846','14172718','14176013','14176378','14178314','14170227','14180146','14179345','14168964','14169230','14169442','14169226','14174772','14175215','14179398','14181077','14181079','14179621','14180685','14176717','14178336','14172953','14180795','14179647','14176485','14176426','14166417','14169480','14171466','14169328','14179570','14176748','14177748','14177870','14169888','14180980','14170808','14169155','14177087','14000086','14000155','14002292','14130346','14131979','14133049','14130322','14128538','14131906','14132945','14133255','14130063','14130616','14129931','14128949','14128198','14128892','14127743','14129160','14128786','14127874','14128780','14128481','14126994','14128772','14126895','14127039','14128841','14127393','14127813','14127756','14128357','14128180','14127268','14128507','14128689','14128679','14128291','14129101','14127387','14127797','14125932','14125994','14125843','14126649','14126248','14125986','14137012','14138026','14125805','14109137','14109718','14136731','14136820','14136563','14135968','14136888','14126830','14125526','14103239','14103417','14104742','14104714','14106790','14103225','14138325','14135175','14107079','14107102','14136106','14125925','14109879','14109967','14110030','14137183','14137308','14137326','14137600','14138691','14127742','14107861','14127170','14126663','14126484','14105712','14106751','14136615','14136661','14137028','14137029','14126057','14125758','14113464','14113723','14113899','14111216','14106148','14105980','14106365','14106058','14106244','14137410','14113474','14136349','14137240','14138730','14138492','14135147','14135483','14135718','14109703','14109878','14110610','14110487','14110017','14104846','14104513','14138577','14138982','14126231','14127747','14125692','14125760','14107866','14111744','14128847','14114122','14114409','14115307','14139066','14138692','14138767','14178878','14182044','14173113','14181406','14179403','14181293','14176486','14138148','14137917','14104107','14138564','14138161','14138420','14138726')
AND   TRUNC(CRT_DT_TS) IN ('2024-02-07','2024-03-08','2024-10-08','2024-10-17','2024-10-14','2024-10-18','2024-06-13','2024-04-23','2024-04-05','2024-10-21','2024-10-25','2024-12-03','2024-10-28','2024-12-02','2024-12-04','2024-10-22','2024-10-30')
AND   FIELD_NM IN ('Comments', 'Tot Cust Min Interr', 'Tot Cust Interr')
': ('HY000', '[HY000] [Oracle][ODBC][Ora]ORA-01795: maximum number of expressions in a list is 1000\n\x00\x00⡐괓Ǫ\x00⯠괓Ǫ\x00⫰괓Ǫ\x00⠠괓Ǫ\x00ⶐ괓Ǫ\x00ⷀ괓Ǫ\x00ⷰ괓Ǫ\x00⸠괓Ǫ\x00⹐괓Ǫ\x00⺀괓Ǫ\x00⺰괓Ǫ\x00⻠괓Ǫ\x00⽰괓Ǫ\x00⾠괓Ǫ\x00⿐괓Ǫ\x00\u3000괓Ǫ\x00〰괓Ǫ\x00だ괓Ǫ\x00ゐ괓Ǫ\x00 (1795) (SQLExecDirectW)')

In [28]:
df_1['OUTG_REC_NB'].nunique()

1002

In [None]:
#--------------------------------------------------
# The following is for safety, but in most cases it likely has no effect
#-----
# NOTE:  df_2.groupby(['OUTG_REC_NB', 'CRT_DT']).filter(lambda x: x.name in outg_rec_nb_crt_dt_pairs)
#        df_2.groupby(['OUTG_REC_NB', 'CRT_DT'], as_index=False, group_keys=False).apply(lambda x: x if x.name in outg_rec_nb_crt_dt_pairs else None)
#--------------------------------------------------
outg_rec_nb_crt_dt_pairs = df_1[['OUTG_REC_NB', 'CRT_DT']].values.tolist()
# Get unique pairs
outg_rec_nb_crt_dt_pairs = list(set([tuple(x) for x in outg_rec_nb_crt_dt_pairs]))
#-------------------------
df_2 = df_2.groupby(['OUTG_REC_NB', 'CRT_DT']).filter(lambda x: x.name in outg_rec_nb_crt_dt_pairs)
#--------------------------------------------------

In [None]:
#--------------------------------------------------
df_3 = df_2.groupby(['OUTG_REC_NB', 'CRT_DT']).apply(lambda x: assess_deltas_for_df_i(df_i=x))
#-------------------------
# I expect each OUTG_REC_NB to have only been updated by DOVSAudit results on a single day
# This boils down to each 'OUTG_REC_NB' having a single unique CRT_DT
assert(df_3.index.nunique() == df_3.shape[0])
df_3 = df_3.reset_index(drop=False)

In [None]:
df_3['delta_cmi'].sum()

In [None]:
df_3['delta_ci'].sum()

In [None]:
df_3b = DOVSOutages.get_outg_info_for_df(
    df               = df_3, 
    outg_rec_nb_idfr = 'OUTG_REC_NB', 
    build_sql_function = DOVSOutages_SQL.build_sql_outage, 
    build_sql_function_kwargs = dict(cols_of_interest = ['DT_OFF_TS', 'OUTG_REC_NB'])
)

In [None]:
# df_3b = DOVSOutages.append_outg_info_to_df(
#     df               = df_3b.set_index('OUTG_REC_NB'), 
#     outg_rec_nb_idfr = 'index', 
#     build_sql_function = DOVSOutages_SQL.build_sql_outage, 
#     build_sql_function_kwargs = dict(cols_of_interest = ['DT_OFF_TS', 'OUTG_REC_NB'])
# )

# My Results

In [None]:
results_dirs = get_results_dirs(base_dir = r'U:\CloudData\dovs_check\Weekly')
# min_date = None
# max_date = None

min_date = df_3b['DT_OFF_TS'].min()
max_date = df_3b['DT_OFF_TS'].max()

In [None]:
summary_paths = get_summary_paths(
    results_dirs, 
    subdirs         = r'AllOPCOs\Results', 
    fname           = r'ci_cmi_summary.pkl', 
    verbose         = True,
)
#-------------------------
results_to_keep = impose_minmax_dates_on_summary_paths(
    summary_paths, 
    min_date      = min_date, 
    max_date      = max_date    
)
#--------------------------------------------------
ci_cmi_summary, included_dict_summaries = amass_ci_cmi_summaries(
    summary_paths    = results_to_keep, 
    return_included  = True,
    save_dir         = None, 
    date_in_fname    = True
)
#-------------------------
delta_ci_net  = ci_cmi_summary['delta_ci_dovs_ami'].sum()
delta_cmi_net = ci_cmi_summary['delta_cmi_dovs_ami'].sum()
#-----
delta_ci_net_dovs_beg  = ci_cmi_summary['delta_ci_dovs_ami_dovs_beg'].sum()
delta_cmi_net_dovs_beg = ci_cmi_summary['delta_cmi_dovs_ami_dovs_beg'].sum()


#--------------------------------------------------
ci_cmi_summary_all, included_dict_summaries_all = amass_ci_cmi_summaries(
    summary_paths    = summary_paths, 
    return_included  = True,
    save_dir         = None, 
    date_in_fname    = True
)
#-------------------------
delta_ci_net_all  = ci_cmi_summary_all['delta_ci_dovs_ami'].sum()
delta_cmi_net_all = ci_cmi_summary_all['delta_cmi_dovs_ami'].sum()
#-----
delta_ci_net_dovs_beg_all  = ci_cmi_summary_all['delta_ci_dovs_ami_dovs_beg'].sum()
delta_cmi_net_dovs_beg_all = ci_cmi_summary_all['delta_cmi_dovs_ami_dovs_beg'].sum()

In [None]:
# outg_rec_nb_to_files_dict, included_dict_ntf_dct = amass_outg_rec_nb_to_files_dicts(
#     results_dirs     = results_dirs, 
#     opcos            = None, 
#     dict_fname       = 'outg_rec_nb_to_files_dict.pkl', 
#     return_included  = True,
#     save_dict_to_pkl = False, 
#     date_in_fname    = True
# )

In [None]:
df_3['OUTG_REC_NB'].astype(str)

In [None]:
set(df_3['OUTG_REC_NB'].astype(str).unique()).difference(set(list(outg_rec_nb_to_files_dict.keys())))

In [None]:
df_3[df_3['OUTG_REC_NB']==13806327]

In [None]:
ci_cmi_summary_overlap = ci_cmi_summary[ci_cmi_summary['outg_rec_nb'].isin(df_3['OUTG_REC_NB'].astype(str).unique())].copy()
ci_cmi_summary_overlap

In [None]:
ci_cmi_summary_overlap['delta_ci_dovs_ami_dovs_beg'].sum()

In [None]:
ci_cmi_summary_overlap['delta_cmi_dovs_ami_dovs_beg'].sum()

In [None]:
print(f"Results supplied (All)")
print(f"delta_ci:  {np.round(delta_ci_net_dovs_beg_all, 2)}")
print(f"delta_cmi: {np.round(delta_cmi_net_dovs_beg_all, 2)}")
print()
print(f"Results supplied ({min_date.date()} - {max_date.date()})")
print(f"delta_ci:  {np.round(delta_ci_net_dovs_beg, 2)}")
print(f"delta_cmi: {np.round(delta_cmi_net_dovs_beg, 2)}")
print()
print(f"Results changed in DOVS and marked XJXX ({min_date.date()} - {max_date.date()})")
print(f"delta_ci:  {np.round(df_3['delta_ci'].sum(), 2)}")
print(f"delta_cmi: {np.round(df_3['delta_cmi'].sum(), 2)}")
print()
print(f"Results supplied (Including only those OUTG_REC_NBs changed in DOVS)")
print(f"delta_ci:  {np.round(ci_cmi_summary_overlap['delta_ci_dovs_ami_dovs_beg'].sum(), 2)}")
print(f"delta_cmi: {np.round(ci_cmi_summary_overlap['delta_cmi_dovs_ami_dovs_beg'].sum(), 2)}")
print()