In [None]:
import pandas as pd
pd.__version__

# Directions for running:

For the purposes of this demonstration, let's assume your AEP User ID is s123456, and your local Documents directory
is located at C:\Users\s123456\Documents

1. If not already done, clone the Analysis GitHub repo (https://github.aepsc.com/s346557/Analysis).
<br>- I will assume the repo was cloned into the Documents directory, i.e. I assume your local copy of the repo is located at C:\Users\s123456\Documents\Analysis (and therefore, this Jupyter notebook should be located at C:\Users\s123456\Documents\Analysis\JupyterNbs\IT_Demo.ipynb)
<br><br>

2. Create a simple text file containing your AEP passwords.
- I suggest you use the file pwd_file_template.txt in the Analysis directory (C:\Users\s123456\Documents\Analysis\pwd_file_template.txt) to create your own password file.
    - DO NOT ALTER the pwd_file_template.txt file, create a new pwd_file.txt file!
- I further suggest you name your password file pwd_file.txt and place it in the Analysis directory (C:\Users\s123456\Documents\Analysis\pwd_file.txt).
    - The Git repo is set up to ignore pwd_file.txt in the Analysis directory, so your information will not be pushed up to the repo if saved in this manner.
- NOTE: At one point, my Athena and Oracle passwords were different, which is why there is a 'Main' and 'Oracle' entry in the password file.  Likely you will put the same password for both entries.
<br><br>

3. IF NOT ALREADY DONE, run the method Utilities_config.generate_initial_config_file to initiate your config.yaml file
- I suggest you input arguments for all three parameters (aep_user_id, pwd_file_path, and local_data_dir)
    - If no aep_user_id is given, the code will attempt to determine your AEP User ID from the contents of your C:\Users directory
    - If no pwd_file_path is given, it is assumed to exist, be named pwd_file.txt, and be located in the Analysis directory (C:\Users\s123456\Documents\Analysis\pwd_file.txt)
    - If local_data_dir is not None, it should point to a directory when you plan to store any results (my personal local_data_dir is located at C:\Users\s346557\Documents\LocalData\).
        - If you are not planning to save or load any files locally, I believe this can be kept as None

In [None]:
from importlib import reload

import sys, os
import re
from pathlib import Path
import pickle

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns, natsort_keygen
from packaging import version

import copy
import itertools
import adjustText

import pyodbc
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
import matplotlib.colors as mcolors
import matplotlib.cm as cm #e.g. for cmap=cm.jet
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config

# ----------------------------------------------------------------------------------------------------
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# ONLY NEED TO RUN ONCE!
So, if you have already run Utilities_config.generate_initial_config_file (and your configuration has not changed since), there is no need to run again

In [None]:
run_config=False

# REPLACE VALUES BELOW WITH YOUR OWN!!!!!!!!!!!!!!!!!!!!!!!!!
aep_user_id = 's123456'
pwd_file_path = r'C:\Users\s3123456\Documents\Analysis\pwd_file.txt'
local_data_dir = r'C:\Users\s123456\Documents\LocalData'
#-----
# Names below should equal those you chose for the databases in AQT.
# If you don't have permissions for one, simply leave the default value.
# At the time of writing (Feb. 2024), to use the software, one typically needs connections to Athena Prod, 
#   UTLDB01P, and possibly EEMSP
athena_prod_dsn = 'Athena Prod'
athena_dev_dsn  = 'Athena Dev'
athena_qa_dsn   = 'Athena QA'
utldb01p_dsn    = 'UTLDB01P'
eemsp_dsn       = 'EEMSP'

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
if run_config:
    Utilities_config.generate_initial_config_file(
        aep_user_id                  = aep_user_id, 
        pwd_file_path                = pwd_file_path, 
        local_data_dir               = local_data_dir, 
        create_local_data_dir_if_dne = True, 
        athena_prod_dsn              = athena_prod_dsn, 
        athena_dev_dsn               = athena_dev_dsn, 
        athena_qa_dsn                = athena_qa_dsn, 
        utldb01p_dsn                 = utldb01p_dsn, 
        eemsp_dsn                    = eemsp_dsn, 
    )

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# ----------------------------------------------------------------------------------------------------

In [None]:
#---------------------------------------------------------------------
import CommonLearningMethods as clm
#-----
from MeterPremise import MeterPremise
from EEMSP import EEMSP
#-----
from AMI_SQL import AMI_SQL
from AMINonVee_SQL import AMINonVee_SQL
from AMIEndEvents_SQL import AMIEndEvents_SQL
from AMIUsgInst_SQL import AMIUsgInst_SQL
from DOVSOutages_SQL import DOVSOutages_SQL
#-----
from GenAn import GenAn
from AMINonVee import AMINonVee
from AMIEndEvents import AMIEndEvents
from MECPODf import MECPODf
from MECPOAn import MECPOAn
from AMIUsgInst import AMIUsgInst
from DOVSOutages import DOVSOutages
from DOVSAudit import DOVSAudit
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
import TableInfos
from TableInfos import TableInfo
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
from Utilities_df import DFConstructType
import Utilities_dt
import Plot_General
import DataFrameSubsetSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSlicer as DFSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSingleSlicer as DFSingleSlicer
#---------------------------------------------------------------------

# ----------------------------------------------------------------------------------------------------
# GENERAL STUFF
# ----------------------------------------------------------------------------------------------------

### MeterPremise class

In [None]:
mp = MeterPremise(
    init_df_in_constructor    = False, 
    build_sql_function        = MeterPremise.build_sql_meter_premise, 
    build_sql_function_kwargs = dict(
        curr_cust_nm = 'HENNESSEY KEVIN', 
        state        = 'oh'
    ), 
)

In [None]:
print(mp.get_sql_statement())

In [None]:
mp = MeterPremise(
    init_df_in_constructor    = True, 
    build_sql_function        = MeterPremise.build_sql_meter_premise, 
    build_sql_function_kwargs = dict(
        curr_cust_nm = 'HENNESSEY KEVIN', 
        state        = 'OH'
    ), 
)

In [None]:
mp.df

### AMINonVee

In [None]:
ami = AMINonVee(
    init_df_in_constructor    = True, 
    build_sql_function        = AMINonVee_SQL.build_sql_usg, 
    build_sql_function_kwargs = dict(
        premise_nbs = mp.df['prem_nb'].unique().tolist(), 
        date_range  = ['2024-01-01', '2024-01-07']
    ), 
)

In [None]:
ami.df

In [None]:
print(ami.get_sql_statement())

### AMIEndEvents

In [None]:
ami_ede = AMIEndEvents(
    init_df_in_constructor    = True, 
    build_sql_function        = AMIEndEvents_SQL.build_sql_end_events, 
    build_sql_function_kwargs = dict(
        premise_nbs = mp.df['prem_nb'].unique().tolist(), 
        date_range  = ['2023-01-01', '2023-12-31']
    ), 
)

In [None]:
ami_ede.df

### DOVS

In [None]:
dovs = DOVSOutages(
    init_df_in_constructor    = True, 
    build_sql_function        = DOVSOutages_SQL.build_sql_std_outage, 
    build_sql_function_kwargs = dict(
        premise_nbs = mp.df['prem_nb'].unique().tolist(), 
        date_range  = ['2023-01-01', '2023-12-31']
    ), 
    build_consolidated        = True
)

In [None]:
dovs.df

# ----------------------------------------------------------------------------------------------------
# RUNNING THE ALGORITHM
# ----------------------------------------------------------------------------------------------------

## No local data, everything on-the-fly

In [None]:
# outg_rec_nb = '13464549'
outg_rec_nb = dovs.df.index[0]
search_time_half_window=pd.Timedelta('24 hours')

In [None]:
#-------------------------
audit_i = DOVSAudit(
    outg_rec_nb=outg_rec_nb
)
#-------------------------
audit_i.load_dovs(
    dovs_df           = None, 
    dovs_df_info_dict = None
)
#-------------------------
audit_i.build_mp_df(
    drop_mp_dups_fuzziness=pd.Timedelta('1 hour')
)
#-------------------------
audit_i.run_ami_and_ede_daq(
    search_time_half_window=search_time_half_window
)

In [None]:
audit_i.build_best_ests_df()
audit_i.best_ests_df

In [None]:
audit_i.identify_overlaps(overlaps_dovs_sql_fcn=DOVSOutages_SQL.build_sql_outage)

In [None]:
audit_i.best_ests_df_w_keep_info

In [None]:
audit_i.best_ests_df

In [None]:
audit_i.resolve_overlapping_audits()

In [None]:
fig, ax = audit_i.plot_ami_around_outage(
    expand_time=pd.Timedelta('1H'), 
    slicer=None, 
    draw_legend=False, 
    fig_num=0, 
    x='starttimeperiod_local', 
    y='value', 
    hue='aep_premise_nb', 
)

In [None]:
fig, ax = audit_i.plot_ami_around_outage(
    expand_time=pd.Timedelta('1H'), 
    slicer=DFSingleSlicer(
        column = 'aep_premise_nb', 
        value  = audit_i.ami_df_i['aep_premise_nb'].unique().tolist()[:4], 
        comparison_operator = 'isin'
    ), 
    draw_legend=True, 
    fig_num=0, 
    x='starttimeperiod_local', 
    y='value', 
    hue='aep_premise_nb', 
)

### More interesting results...

In [None]:
outg_rec_nb = '13582178'
search_time_half_window=pd.Timedelta('24 hours')

In [None]:
#-------------------------
audit_i = DOVSAudit(
    outg_rec_nb=outg_rec_nb
)
#-------------------------
audit_i.load_dovs(
    dovs_df           = None, 
    dovs_df_info_dict = None
)
#-------------------------
audit_i.build_mp_df(
    drop_mp_dups_fuzziness=pd.Timedelta('1 hour')
)
#-------------------------
audit_i.run_ami_and_ede_daq(
    search_time_half_window=search_time_half_window
)
#-------------------------
audit_i.build_best_ests_df()
#-------------------------
audit_i.identify_overlaps(overlaps_dovs_sql_fcn=DOVSOutages_SQL.build_sql_outage)
#-------------------------
audit_i.resolve_overlapping_audits()
#-------------------------

In [None]:
means_df, best_ests_df_w_db_lbl = DOVSAudit.get_mean_times_w_dbscan(
    best_ests_df                  = audit_i.best_ests_df, 
    eps_min                       = 5, 
    min_samples                   = 2, 
    ests_to_include_in_clustering = ['winner_min', 'winner_max'],
    ests_to_include_in_output     = [
        'winner_min', 'winner_max', 
        'conservative_min', 'conservative_max', 
        'zero_times_min', 'zero_times_max'
    ], 
    return_labelled_best_ests_df  = True
)
#-------------------------
n_PNs_w_power_srs = DOVSAudit.build_n_PNs_w_power_srs(
    best_ests_df  = audit_i.best_ests_df, 
    ami_df_i      = audit_i.ami_df_i, 
    return_pct    = True, 
    PN_col        = 'PN', 
    t_min_col     = 'winner_min', 
    t_max_col     = 'winner_max', 
    i_outg_col    = 'i_outg', 
    PN_col_ami_df = 'aep_premise_nb'
)   

In [None]:
#-------------------------
ami_df_i = audit_i.ami_df_i.copy()
#--------------------------------------------------
# Instead of using get_full_part_not_outage_subset_dfs, simply grab the PNs which suffered
#   outages from best_ests_df
if audit_i.best_ests_df.shape[0]>0:
    outg_SNs = audit_i.best_ests_df['PN'].unique().tolist()
    removed_due_to_overlap_col = 'removed_due_to_overlap'
else:
    outg_SNs = []
    removed_due_to_overlap_col = None
#-----
ami_df_i_out      = ami_df_i[ami_df_i['aep_premise_nb'].isin(outg_SNs)]
ami_df_i_not_out  = ami_df_i[~ami_df_i['aep_premise_nb'].isin(outg_SNs)]  

In [None]:
#--------------------------------------------------
if audit_i.best_ests_df_w_keep_info is not None and audit_i.best_ests_df_w_keep_info.shape[0]>0:
    ptntl_ovrlp_outg_rec_nbs = list(set(audit_i.best_ests_df_w_keep_info['overlap_DOVS'].sum()))
    if len(ptntl_ovrlp_outg_rec_nbs)>0:
        ovrlp_dovs = DOVSOutages(
            df_construct_type=DFConstructType.kRunSqlQuery, 
            contstruct_df_args=None, 
            init_df_in_constructor=True,
            build_sql_function=DOVSOutages_SQL.build_sql_outage, 
            build_sql_function_kwargs=dict(
                outg_rec_nbs=ptntl_ovrlp_outg_rec_nbs, 
                include_premise=True
            ), 
            build_consolidated=True
        )
        other_dovs_events_df = ovrlp_dovs.df.reset_index().copy()
    else:
        other_dovs_events_df = None
else:
    other_dovs_events_df = None

In [None]:
%run ./check_DOVS_METHODS.ipynb

In [None]:
removed_due_to_overlap_col =None
n_PNs_w_power_threshold = 0.95
#----------------------------------------------------------------------------------------------------
fig, axs = plot_all_out_not_NEW(
    fig_num                    = 0, 
    ami_df_i                   = ami_df_i, 
    ami_df_i_out               = ami_df_i_out, 
    ami_df_i_not_out           = ami_df_i_not_out, 
    dovs_outg_t_beg            = audit_i.dovs_outg_t_beg_end[0], 
    dovs_outg_t_end            = audit_i.dovs_outg_t_beg_end[1], 
    cnsrvtv_out_t_beg          = audit_i.dovs_outg_t_beg_end[0], 
    cnsrvtv_out_t_end          = audit_i.dovs_outg_t_beg_end[1], 
    means_df                   = means_df, 
    outg_rec_nb                = audit_i.outg_rec_nb, 
    outage_nb                  = audit_i.outage_nb, 
    n_PNs_dovs                 = audit_i.n_PNs_dovs, 
    ci_dovs                    = audit_i.ci_cmi_dovs[0], 
    cmi_dovs                   = audit_i.ci_cmi_dovs[1], 
    ci_ami                     = audit_i.ci, 
    cmi_ami                    = audit_i.cmi, 
    name                       = 'AMI', 
    results_2_dict             = None, 
    expand_time                = pd.Timedelta('1 hour'), 
    removed_due_to_overlap_col = removed_due_to_overlap_col, 
    mean_keys_to_include       = ['winner', 'conservative', 'zero_times'], 
    default_subplots_args      = dict(n_x=2, n_y=2, row_major=True, sharex=True), 
    other_dovs_events_df       = other_dovs_events_df, 
    leg_i_plot                 = 1, 
    leg_kwargs                 = dict(ncols=1, fontsize=15, bbox_to_anchor=(1, 1.2)), 
    ci_info_fontsize           = 16, 
    left_text_x                = 0.915  
)

if n_PNs_w_power_srs is not None:
    fig, axs[3] = DOVSAudit.static_plot_n_PNs_w_power_srs(
        n_PNs_w_power_srs = n_PNs_w_power_srs, 
        simp_freq         = '1T', 
        threshold         = n_PNs_w_power_threshold, 
        fig_num           = 0, 
        fig_ax            = (fig, axs[3]), 
        threshold_color   = 'magenta'
    )

for ax_i in axs:
    ax_i.xaxis.set_tick_params(labelbottom=True)