In [None]:
from importlib import reload
#reload(Utilities)

import sys, os
import re

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype
from pandas.api.types import is_timedelta64_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns
from packaging import version
import itertools
from dateutil.parser import parse
from operator import itemgetter

from pmdarima import auto_arima
import statsmodels.api as sm
from statsmodels.tsa.stattools import acovf, acf, pacf, pacf_yw, pacf_ols
from pandas.plotting import lag_plot
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.graphics.tsaplots import month_plot, quarter_plot, seasonal_plot
from statsmodels.tsa.arima_model import ARMA, ARIMA, ARMAResults, ARIMAResults
from statsmodels.tsa.statespace.sarimax import SARIMAX

from arch import arch_model

from scipy.stats.mstats import trim
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
# import constants for the days of the week
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
#---------------------------------------------------------------------
import pyodbc
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
from AMINonVee import AMINonVee
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
import Utilities_dt
import Plot_Box_sns
import GrubbsTest
import DickeyFullerTest as dft

In [None]:
import warnings
warnings.filterwarnings('ignore')

# -----------------------------------------------------------------------------------------
# -----------------------------------------------------------------------------------------

## Trying to combine total, received, and delivered for circuit data

In [None]:
def build_circuit_non_vals(df, non_val_cols, 
                           aep_srvc_qlty_idntfr_col='aep_srvc_qlty_idntfr', 
                           aep_srvc_qlty_idntfr_vals={'rec':'RECEIVED', 'del':'DELIVERED', 'tot':'TOTAL'}):
    # Typically, df_rec, df_del, and df_tot will not all contain to entire set of
    # dates.  To simplify the merge, first grab the total set of non_vals_df.
    # This is achieved using the combine_first method, which will keep the first non-null
    # element between the two datasets.
    # Then, merges can be done using just the value_cols and time_idx.
    # The non_vals_df will be combined with the vals_df in the end
    #------------------------
    df_rec = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['rec']]
    df_del = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['del']]
    df_tot = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['tot']]
    #------------------------
    non_vals_df = df_del[non_val_cols].combine_first(df_rec[non_val_cols])
    non_vals_df = non_vals_df.combine_first(df_tot[non_val_cols])
    assert(non_vals_df.isna().sum().sum()==0) #TODO should this assert remain?
    return non_vals_df

In [None]:
def get_circuit_total_vals_for_time_idx(gp1):
    # gp1 should be a DataFrame from a grouped call on time index
    assert(gp1.index.nunique()==1)
    
    # Should only be 'RECEIVED', 'DELIVERED', and 'TOTAL' as possible
    # values for aep_srvc_qlty_idntfr below
    aep_srvc_qlty_idntfrs = gp1['aep_srvc_qlty_idntfr'].tolist()
    assert(len(aep_srvc_qlty_idntfrs)<=3)
    
    if 'RECEIVED' in aep_srvc_qlty_idntfrs:
        received = gp1[gp1['aep_srvc_qlty_idntfr']=='RECEIVED']
        assert(received.shape[0]==1)
        received = received.iloc[0][['value_sum_cir', 'counts_cir']].to_dict()
    else:
        received = {'value_sum_cir':0, 'counts_cir':0}
    #-----
    if 'DELIVERED' in aep_srvc_qlty_idntfrs:
        delivered = gp1[gp1['aep_srvc_qlty_idntfr']=='DELIVERED']
        assert(delivered.shape[0]==1)
        delivered = delivered.iloc[0][['value_sum_cir', 'counts_cir']].to_dict()
    else:
        delivered = {'value_sum_cir':0, 'counts_cir':0}
    #-----
    if 'TOTAL' in aep_srvc_qlty_idntfrs:
        total = gp1[gp1['aep_srvc_qlty_idntfr']=='TOTAL']
        assert(total.shape[0]==1)
        total = total.iloc[0][['value_sum_cir', 'counts_cir']].to_dict()
    else:
        total = {'value_sum_cir':0, 'counts_cir':0}
    #----------------------------------------------
    net_sum = total['value_sum_cir']+delivered['value_sum_cir']-received['value_sum_cir']
    net_counts = max(received['counts_cir'], total['counts_cir']+delivered['counts_cir'])
    net_mean = net_sum/net_counts
    
    return {'value_sum_cir':net_sum, 
            'counts_cir':net_counts, 
            'value_mean_cir':net_mean}

In [None]:
def get_ciruit_total_vals_method1(df, non_val_cols, 
                                  aep_srvc_qlty_idntfr_col='aep_srvc_qlty_idntfr', 
                                  aep_srvc_qlty_idntfr_vals={'rec':'RECEIVED', 'del':'DELIVERED', 'tot':'TOTAL'}):
    results_series = df.groupby(level=0).apply(get_circuit_total_vals_for_time_idx)
    results_df = pd.DataFrame.from_dict(results_series.to_dict(), orient='index')

    non_vals_df = build_circuit_non_vals(df, non_val_cols, 
                                         aep_srvc_qlty_idntfr_col=aep_srvc_qlty_idntfr_col, 
                                         aep_srvc_qlty_idntfr_vals=aep_srvc_qlty_idntfr_vals)
    assert(non_vals_df.shape[0]==results_df.shape[0])
    results_df = results_df.merge(non_vals_df, how='inner', left_index=True, right_index=True)
    results_df[aep_srvc_qlty_idntfr_col] = 'TOTAL'
    return results_df

In [None]:
def get_ciruit_total_vals_method2(df, non_val_cols, 
                                  value_cols_dict = {'value_sum_cir_col':'value_sum_cir', 
                                                     'counts_cir_col':'counts_cir', 
                                                     'value_mean_cir_col':'value_mean_cir'}, 
                                  remove_intermediate_cols=True, 
                                  aep_srvc_qlty_idntfr_col='aep_srvc_qlty_idntfr', 
                                  aep_srvc_qlty_idntfr_vals={'rec':'RECEIVED', 'del':'DELIVERED', 'tot':'TOTAL'}, 
                                  maintain_original_col_order=True):
    # TODO How to enforce assert(len(aep_srvc_qlty_idntfrs)<=3)
    #---------------------------------------------------
    expected_keys = ['counts_cir_col', 'value_mean_cir_col', 'value_sum_cir_col']
    assert(len(set(value_cols_dict.keys()).intersection(set(expected_keys)))==3)    
    value_cols = list(value_cols_dict.values())
    #---------------------------------------------------
    df_rec = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['rec']]
    df_del = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['del']]
    df_tot = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['tot']]
    #---------------------------------------------------    
    # This essentially enforces assert(len(aep_srvc_qlty_idntfrs)<=3)
    assert(df_rec.shape[0]==df_rec.index.nunique())
    assert(df_del.shape[0]==df_del.index.nunique())
    assert(df_tot.shape[0]==df_tot.index.nunique())
    #---------------------------------------------------
    return_df = df_del[value_cols].merge(df_rec[value_cols], 
                                         left_index=True, right_index=True, 
                                         how='outer', suffixes=('_del', '_rec')) 
    # Note: Suffix argument will do nothing here, as df_tot contains value_cols, whereas
    #       return_df currently contains [f'x_{del}' for x in value_cols] and [f'x_{rec}' for x in value_cols]
    #       Therefore, there are no overlapping column names.  As such, we must rename using .rename
    return_df = return_df.merge(df_tot[value_cols], left_index=True, right_index=True, how='outer')
    return_df = return_df.rename(columns = {x:f'{x}_tot' for x in value_cols})
    # I used an outer merge so there will always been an entry for delivered, received, and total in return_df
    # However, if one of these did not exist in the original, the outer merge will leave a NaN value in the cell
    # Therefore, I call return_df.fillna(0) to replace any of these NaNs with 0
    return_df = return_df.fillna(0)
    #---------------------------------------------------
    value_sum_cir_col = value_cols_dict['value_sum_cir_col']
    counts_cir_col = value_cols_dict['counts_cir_col']
    value_mean_cir_col = value_cols_dict['value_mean_cir_col']
    #---------------------------------------------------
    # Made a list of newly created columns, which will be need if remove_intermediate_cols==True
    new_col_tags = ['_del', '_rec', '_tot']
    new_cols = [value_col+tag for value_col in value_cols for tag in new_col_tags]
    new_cols.append(f"{counts_cir_col}_del_plus_tot")
    #---------------------------------------------------
    return_df[f"{counts_cir_col}_del_plus_tot"] = (return_df[f"{counts_cir_col}_del"] +
                                                   return_df[f"{counts_cir_col}_tot"])
    return_df[counts_cir_col] = return_df[[f"{counts_cir_col}_rec", 
                                           f"{counts_cir_col}_del_plus_tot"]].max(axis=1)
    return_df[value_sum_cir_col] = (return_df[f"{value_sum_cir_col}_tot"] +
                                    return_df[f"{value_sum_cir_col}_del"] -
                                    return_df[f"{value_sum_cir_col}_rec"])
    return_df[value_mean_cir_col] = return_df[value_sum_cir_col]/return_df[value_cols_dict['counts_cir_col']]
    return_df[counts_cir_col]=return_df[counts_cir_col].astype('int64')
    #---------------------------------------------------
    if remove_intermediate_cols:
        return_df = return_df.drop(columns=new_cols)
    #---------------------------------------------------
    non_vals_df = build_circuit_non_vals(df, non_val_cols, 
                                         aep_srvc_qlty_idntfr_col=aep_srvc_qlty_idntfr_col, 
                                         aep_srvc_qlty_idntfr_vals=aep_srvc_qlty_idntfr_vals)
    assert(non_vals_df.shape[0]==return_df.shape[0])
    return_df = return_df.merge(non_vals_df, how='inner', left_index=True, right_index=True)
    return_df[aep_srvc_qlty_idntfr_col] = 'TOTAL'
    if maintain_original_col_order:
        cols_from_og = [x for x in df.columns if x in return_df.columns]
        return_df = Utilities_df.move_cols_to_front(return_df, cols_from_og)
    return return_df

# -----------------------------------------------------------------------------------------
# -----------------------------------------------------------------------------------------

In [None]:
fig_num = 0

In [None]:
cols_of_interest = [
    'serialnumber',
    'prem_nb', 
    'srvc_pole_nb', 
    'trsf_pole_nb',
    'annual_kwh',
    'station_nb',
    'xfmr_nb', 
    'starttimeperiod', 
    'endtimeperiod', 
    'aep_endtime_utc', 
    'timezoneoffset', 
    'aep_derived_uom',
    'aep_srvc_qlty_idntfr', 
    'value', 
    'aep_usage_dt'
]

cols_of_interest_cir = [
    'starttimeperiod', 
    'endtimeperiod', 
    'aep_endtime_utc', 
    'timezoneoffset',
    'aep_derived_uom', 
    'aep_srvc_qlty_idntfr', 
    'aep_usage_dt', 
    'value_sum',
    'counts', 
    'value_mean'
]

In [None]:
df_outage_dir = os.path.join(Utilities.get_local_data_dir(), r'sample_outages\outg_rec_nb_11751094')
df_circuit_dir = os.path.join(Utilities.get_local_data_dir(), r'sample_circuits\NewMethod\outg_rec_nb_11751094')
df_xfmr_circuit_dir = os.path.join(Utilities.get_local_data_dir(), r'sample_circuits\NewMethod\GroupByXfmr_v2\outg_rec_nb_11751094')

out_t_beg_local = pd.to_datetime('2020-06-21 10:52:00')
out_t_end_local = pd.to_datetime('2020-06-21 12:50:00')
no_outage_center_local = pd.to_datetime('2020-08-15')

In [None]:
freqs=['H', '4H', 'D', 'MS']
time_col_for_agg='endtimeperiod_utc'
other_grouper_cols=['serialnumber']
build_agg_dfs=True
other_cols_to_keep = ['prem_nb', 'srvc_pole_nb', 'trsf_pole_nb', 'station_nb', 
                      'xfmr_nb', 'aep_derived_uom', 'aep_srvc_qlty_idntfr']
other_cols_to_keep_agg = ['aep_derived_uom', 'aep_srvc_qlty_idntfr']
agg_cols=['value', 'annual_kwh']
agg_types=['mean']
mix_agg_functions=False

# Should not be any overlap between other_cols_to_keep and agg_cols
assert(len(set(other_cols_to_keep).intersection(set(agg_cols)))==0)
assert(len(set(other_cols_to_keep_agg).intersection(set(agg_cols)))==0)

In [None]:
df_key = 'df'
df_agg_key = 'df_agg'

# Load Outage data

In [None]:
kwh_vlt_dfs_dict = AMINonVee.assemble_kwh_vlt_dfs_from_saved_csvs(file_dir=df_outage_dir, glob_pattern=r'outg_rec_nb_*.csv', 
                                                        cols_of_interest=cols_of_interest, 
                                                        verbose=True)
df_kwh_15T = kwh_vlt_dfs_dict['kwh']
df_vlt_15T = kwh_vlt_dfs_dict['vlt']
# #-------------------------------------------------------------------------------------------------
dfs_kwh_dict = AMINonVee.build_time_resampled_dfs(df_kwh_15T, base_freq='15T', freqs=freqs, 
                                 other_grouper_cols=other_grouper_cols, other_cols_to_keep=other_cols_to_keep, 
                                 build_agg_dfs=build_agg_dfs, time_col_for_agg=time_col_for_agg, 
                                 agg_cols=agg_cols, agg_types=agg_types, 
                                 other_cols_to_keep_agg=other_cols_to_keep_agg, mix_agg_functions=mix_agg_functions, 
                                 df_key=df_key, df_agg_key=df_agg_key)
#-----
assert(df_kwh_15T.equals(dfs_kwh_dict['15T'][df_key]))
#-------------------------------------------------------------------------------------------------
dfs_vlt_dict = AMINonVee.build_time_resampled_dfs(df_vlt_15T, base_freq='15T', freqs=freqs, 
                                 other_grouper_cols=other_grouper_cols, other_cols_to_keep=other_cols_to_keep, 
                                 build_agg_dfs=build_agg_dfs, time_col_for_agg=time_col_for_agg, 
                                 agg_cols=agg_cols, agg_types=agg_types, 
                                 other_cols_to_keep_agg=other_cols_to_keep_agg, mix_agg_functions=mix_agg_functions, 
                                 df_key=df_key, df_agg_key=df_agg_key)
#-----
assert(df_vlt_15T.equals(dfs_vlt_dict['15T'][df_key]))
#-------------------------------------------------------------------------------------------------

In [None]:
# dfs_kwh_dict['H']['df_agg']

# Load Circuit data

In [None]:
#TODO!!!!!!!!!!!!!!!!!!
#TODO!!!!!!!!!!!!!!!!!!
#TODO!!!!!!!!!!!!!!!!!!
# There seems to be inconsistency in the database
# For some, each has a 'RECEIVED' and 'DELIVERED'
# For others, each has a 'RECEIVED' and 'TOTAL'
# I have not found a case yet with all three ('RECEIVED', 'DELIVERED', and 'TOTAL')
# It seems that the 'RECEIVED' counts is typically correct
# However, may want to simply take the max of n_TOTAL+n_DELIVERED and n_RECEIVED

kwh_vlt_dfs_cir_dict = AMINonVee.assemble_kwh_vlt_dfs_from_saved_csvs(file_dir=df_circuit_dir, glob_pattern=r'outg_rec_nb_*q[0-9].csv', 
                                                            value_cols=['value_sum', 'counts', 'value_mean'], 
                                                            cols_of_interest=cols_of_interest_cir, 
                                                            combine_kwh_delivered_and_received=False, 
                                                            merge_and_groupby_cols=['aep_endtime_utc'], 
                                                            verbose=True)
df_cir_kwh_15T = kwh_vlt_dfs_cir_dict['kwh']
df_cir_vlt_15T = kwh_vlt_dfs_cir_dict['vlt']

##TODO Currently, counts includes Null values as well.  The code below changes it back
df_cir_kwh_15T = df_cir_kwh_15T[df_cir_kwh_15T['value_sum'].notna()]
df_cir_kwh_15T.loc[df_cir_kwh_15T['value_sum']!=0,'counts']=round(df_cir_kwh_15T[df_cir_kwh_15T['value_sum']!=0]['value_sum']/df_cir_kwh_15T[df_cir_kwh_15T['value_sum']!=0]['value_mean'])
df_cir_kwh_15T['counts'] = df_cir_kwh_15T['counts'].astype(int)

df_cir_vlt_15T = df_cir_vlt_15T[df_cir_vlt_15T['value_sum'].notna()]
df_cir_vlt_15T.loc[df_cir_vlt_15T['value_sum']!=0,'counts']=round(df_cir_vlt_15T[df_cir_vlt_15T['value_sum']!=0]['value_sum']/df_cir_vlt_15T[df_cir_vlt_15T['value_sum']!=0]['value_mean'])
df_cir_vlt_15T['counts'] = df_cir_vlt_15T['counts'].astype(int)

#-------------------------------------------------------------------------------------------------
rename_agg_cols_cir={'value_sum':'value_sum_cir', 'counts':'counts_cir', 'value_mean':'value_mean_cir'}
df_cir_kwh_15T = df_cir_kwh_15T.rename(columns=rename_agg_cols_cir)
df_cir_vlt_15T = df_cir_vlt_15T.rename(columns=rename_agg_cols_cir)
agg_cols_cir=list(rename_agg_cols_cir.values())

#TODO!!!!!!!!!!!!!!!!!!
#TODO!!!!!!!!!!!!!!!!!!
#TODO!!!!!!!!!!!!!!!!!!
# Need to handle how delivered, received, total will be combined before re-indexing!
# For now, let's simply use 'TOTAL' from kwh
df_cir_kwh_15T_FULL = df_cir_kwh_15T.copy()
#df_cir_kwh_15T = df_cir_kwh_15T[df_cir_kwh_15T['aep_srvc_qlty_idntfr']=='TOTAL'].copy()

# TODO!!!!!!!!!!!!!!!!!!!!!
# How to handle different aep_srvc_qlty_idntfr for vlt?
# ['INSTVA1', 'INSTVC1', 'INSTVB1', 'AVG']
# For now, 'AVG' from vlt
df_cir_vlt_15T_FULL = df_cir_vlt_15T.copy()
df_cir_vlt_15T = df_cir_vlt_15T[df_cir_vlt_15T['aep_srvc_qlty_idntfr']=='AVG'].copy()

### Use combine method on full dataset and then resample/aggregate

In [None]:
non_val_cols = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset',
                'aep_derived_uom', 'aep_usage_dt', 'aep_endtime_utc_from_timestamp', 
                'starttimeperiod_utc', 'endtimeperiod_utc']
value_cols_dict = {'value_sum_cir_col':'value_sum_cir', 
                   'counts_cir_col':'counts_cir', 
                   'value_mean_cir_col':'value_mean_cir'}

In [None]:
df_cir_kwh_15T = get_ciruit_total_vals_method2(df_cir_kwh_15T, non_val_cols, 
                                               value_cols_dict=value_cols_dict, remove_intermediate_cols=True)
#-------------------------------------------------------------------------------------------------
# TODO!!!!!!!!!!!!!!!!!
# Also, would expect e.g. dfs_cir_kwh_dict['H'][df_key]==dfs_cir_kwh_dict['H'][df_agg_key]
# However, this is close but not exactly true
# It appears there are some times with duplicate entries
# Compare the size of df_cir_kwh_15T[df_cir_kwh_15T['aep_srvc_qlty_idntfr']=='TOTAL']
# to the number of unique indices
#
#

dfs_cir_kwh_dict = AMINonVee.build_time_resampled_dfs(df_cir_kwh_15T, freqs=freqs, other_grouper_cols=[], 
                                     build_agg_dfs=build_agg_dfs, time_col_for_agg=time_col_for_agg, 
                                     agg_cols=agg_cols_cir, agg_types=['mean', 'sum'], 
                                     df_key=df_key, df_agg_key=df_agg_key)
#-----
assert(df_cir_kwh_15T.equals(dfs_cir_kwh_dict['15T'][df_key]))
#-------------------------------------------------------------------------------------------------
dfs_cir_vlt_dict = AMINonVee.build_time_resampled_dfs(df_cir_vlt_15T, freqs=freqs, other_grouper_cols=[], 
                                     build_agg_dfs=build_agg_dfs, time_col_for_agg=time_col_for_agg, agg_cols=agg_cols_cir, 
                                     df_key=df_key, df_agg_key=df_agg_key)
#-----
assert(df_cir_vlt_15T.equals(dfs_cir_vlt_dict['15T'][df_key]))
#-------------------------------------------------------------------------------------------------

In [None]:
print('Checking endtimeperiod_utc against aep_endtime_utc_from_timestamp')
print('df_kwh_15T:     ', all(df_kwh_15T['endtimeperiod_utc']==df_kwh_15T['aep_endtime_utc_from_timestamp']))
print('df_vlt_15T:     ', all(df_vlt_15T['endtimeperiod_utc']==df_vlt_15T['aep_endtime_utc_from_timestamp']))
#-----
print('df_cir_kwh_15T: ', all(df_cir_kwh_15T['endtimeperiod_utc']==df_cir_kwh_15T['aep_endtime_utc_from_timestamp']))
print('df_cir_vlt_15T: ', all(df_cir_vlt_15T['endtimeperiod_utc']==df_cir_vlt_15T['aep_endtime_utc_from_timestamp']))

In [None]:
# df_kwh_15T.head()

# Load Xfmr Circuit data

In [None]:
def build_mean_from_sum_and_counts(df, sum_x_col, n_counts_col, placement_col):
    df[placement_col] = df[sum_x_col]/df[n_counts_col]
    return df

def build_std_from_mossom(df, sum_x_col, sum_x2_col, n_counts_col, placement_col, sample_std=True):
    # mossom = Mean Of Sqaures minus Square Of Means
    #   variance = std**2 ~ bar{x**2}-(bar{x})**2
    # However, here I will actually be using sum(x**2) and sum(x) instead
    #   ==> std**2 ~ (1/n)*sum(x**2) - (1/n**2)*sum(x)
    # Default to sample std (std_s w/ n-1 in denominator), 
    #    not population std (std_p w/ n in denominator)
    # 
    # std_p**2 = bar{x**2}-(bar{x})**2
    #          = (1/n)*sum(x**2) - (1/n**2)*sum(x)
    # std_s**2 = (n/(n-1))*std_p**2 = (n/(n-1))*(bar{x**2}-(bar{x})**2)
    #          = (n/(n-1))*((1/n)*sum(x**2) - (1/n**2)*sum(x))
    df[placement_col] = np.sqrt(
        (df[sum_x2_col]/df[n_counts_col] - df[sum_x_col]*df[sum_x_col]/(df[n_counts_col]*df[n_counts_col]))
    )
    if sample_std:
        df[placement_col] = df[placement_col]*np.sqrt((df[n_counts_col]/(df[n_counts_col]-1)))
    return df

def build_mean_and_std_from_sum_x_x2_and_counts(df, sum_x_col, sum_x2_col, n_counts_col, 
                                                placement_col_mean, placement_col_std, 
                                                sample_std=True):
    df = build_mean_from_sum_and_counts(df, sum_x_col, n_counts_col, placement_col_mean)
    df = build_std_from_mossom(df, sum_x_col, sum_x2_col, n_counts_col, placement_col_std)
    return df

In [None]:
def build_net_counts_for_del_rec_tot(merged_df, col_name_base, 
           col_tags = {'rec':'_rec', 'del':'_del', 'tot':'_tot'}, 
           remove_intermediate_cols=True):
    col_rec = col_name_base+col_tags['rec']
    col_del = col_name_base+col_tags['del']
    col_tot = col_name_base+col_tags['tot']
    #---------------------------------------------------    
    merged_df[f"{col_name_base}_del_plus_tot"] = merged_df[col_del] + merged_df[col_tot]
    merged_df[col_name_base] = merged_df[[col_rec, f"{col_name_base}_del_plus_tot"]].max(axis=1)
    merged_df[col_name_base]=merged_df[col_name_base].astype('int64')
    #---------------------------------------------------
    if remove_intermediate_cols:
        merged_df = merged_df.drop(columns=[col_rec, col_del, col_tot, f"{col_name_base}_del_plus_tot"])
    return merged_df

def build_net_value_for_del_rec_tot(merged_df, col_name_base, 
                                    col_tags = {'rec':'_rec', 'del':'_del', 'tot':'_tot'}, 
                                    remove_intermediate_cols=True):
    col_rec = col_name_base+col_tags['rec']
    col_del = col_name_base+col_tags['del']
    col_tot = col_name_base+col_tags['tot']
    #--------------------------------------------------- 
    merged_df[col_name_base] = (merged_df[col_tot] +
                                merged_df[col_del] -
                                merged_df[col_rec])
    #--------------------------------------------------- 
    if remove_intermediate_cols:
        merged_df = merged_df.drop(columns=[col_rec, col_del, col_tot])
    return merged_df










def get_ciruit_total_vals(df, non_val_cols, 
                          value_cols_dict_list = [
                              {'value_col':'sum_value_sum', 'counts_col':'sum_counts', 'mean_col':'mean_value_sum'}, 
                              {'value_col':'sum_value_sq_sum', 'counts_col':'sum_counts', 'mean_col':'mean_value_sq_sum'}, 
                              {'value_col':'sum_value_mean', 'counts_col':'sum_counts', 'mean_col':'mean_value_mean'},
                              {'value_col':'sum_value_std', 'counts_col':'sum_counts', 'mean_col':'mean_value_std'}
                          ], 
                          remove_intermediate_cols=True, 
                          aep_srvc_qlty_idntfr_col='aep_srvc_qlty_idntfr', 
                          aep_srvc_qlty_idntfr_vals={'rec':'RECEIVED', 'del':'DELIVERED', 'tot':'TOTAL'}, 
                          maintain_original_col_order=True):
    #--------------------------------------------------- 
    value_cols = [x['value_col'] for x in value_cols_dict_list]
    counts_cols = list(set(x['counts_col'] for x in value_cols_dict_list))
    merge_cols = value_cols + counts_cols
    #---------------------------------------------------
    df_rec = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['rec']]
    df_del = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['del']]
    df_tot = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['tot']]
    #---------------------------------------------------    
    # This essentially enforces assert(len(aep_srvc_qlty_idntfrs)<=3)
    # -- each line ensures that every date in df has exactly one entry
    assert(df_rec.shape[0]==df_rec.index.nunique())
    assert(df_del.shape[0]==df_del.index.nunique())
    assert(df_tot.shape[0]==df_tot.index.nunique())
    #---------------------------------------------------
    return_df = df_del[merge_cols].merge(df_rec[merge_cols], 
                                         left_index=True, right_index=True, 
                                         how='outer', suffixes=('_del', '_rec')) 
    # Note: Suffix argument will do nothing here, as df_tot contains merge_cols, whereas
    #       return_df currently contains [f'x_{del}' for x in merge_cols] and [f'x_{rec}' for x in merge_cols]
    #       Therefore, there are no overlapping column names.  As such, we must rename using .rename
    return_df = return_df.merge(df_tot[merge_cols], left_index=True, right_index=True, how='outer')
    return_df = return_df.rename(columns = {x:f'{x}_tot' for x in merge_cols})
    # I used an outer merge so there will always been an entry for delivered, received, and total in return_df
    # However, if one of these did not exist in the original, the outer merge will leave a NaN value in the cell
    # Therefore, I call return_df.fillna(0) to replace any of these NaNs with 0
    return_df = return_df.fillna(0)
    #---------------------------------------------------
    for counts_col in counts_cols:
        return_df = build_net_counts_for_del_rec_tot(return_df, col_name_base=counts_col, 
                                                     col_tags = {'rec':'_rec', 'del':'_del', 'tot':'_tot'}, 
                                                     remove_intermediate_cols=remove_intermediate_cols)
    for value_col in value_cols:
        return_df = build_net_value_for_del_rec_tot(return_df, col_name_base=value_col, 
                                                    col_tags = {'rec':'_rec', 'del':'_del', 'tot':'_tot'}, 
                                                    remove_intermediate_cols=remove_intermediate_cols)
    for cols_dict in value_cols_dict_list:
        if (cols_dict.get('mean_col', None) is None or 
            cols_dict.get('counts_col', None) is None):
            continue
        return_df = build_mean_from_sum_and_counts(return_df, cols_dict['value_col'], cols_dict['counts_col'], cols_dict['mean_col'])
    #---------------------------------------------------
    non_vals_df = build_circuit_non_vals(df, non_val_cols, 
                                         aep_srvc_qlty_idntfr_col=aep_srvc_qlty_idntfr_col, 
                                         aep_srvc_qlty_idntfr_vals=aep_srvc_qlty_idntfr_vals)
    assert(non_vals_df.shape[0]==return_df.shape[0])
    return_df = return_df.merge(non_vals_df, how='inner', left_index=True, right_index=True)
    return_df[aep_srvc_qlty_idntfr_col] = 'TOTAL'
    if maintain_original_col_order:
        cols_from_og = [x for x in df.columns if x in return_df.columns]
        return_df = Utilities_df.move_cols_to_front(return_df, cols_from_og)
    return return_df


# def get_ciruit_total_vals(df, non_val_cols, value_cols, counts_cols, 
#                           mean_cols_dict_list=None, 
#                           remove_intermediate_cols=True, 
#                           aep_srvc_qlty_idntfr_col='aep_srvc_qlty_idntfr', 
#                           aep_srvc_qlty_idntfr_vals={'rec':'RECEIVED', 'del':'DELIVERED', 'tot':'TOTAL'}, 
#                           maintain_original_col_order=True):
#     # Note: If new mean columns need to be calculated, use value_cols_dict_list
#     #       Each entry should be a dict with keys: 'value_col', 'counts_col', 'mean_col' 
#     #--------------------------------------------------- 
#     merge_cols = value_cols + counts_cols
#     #---------------------------------------------------
#     df_rec = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['rec']]
#     df_del = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['del']]
#     df_tot = df[df[aep_srvc_qlty_idntfr_col]==aep_srvc_qlty_idntfr_vals['tot']]
#     #---------------------------------------------------    
#     # This essentially enforces assert(len(aep_srvc_qlty_idntfrs)<=3)
#     # -- each line ensures that every date in df has exactly one entry
#     assert(df_rec.shape[0]==df_rec.index.nunique())
#     assert(df_del.shape[0]==df_del.index.nunique())
#     assert(df_tot.shape[0]==df_tot.index.nunique())
#     #---------------------------------------------------
#     return_df = df_del[merge_cols].merge(df_rec[merge_cols], 
#                                          left_index=True, right_index=True, 
#                                          how='outer', suffixes=('_del', '_rec')) 
#     # Note: Suffix argument will do nothing here, as df_tot contains merge_cols, whereas
#     #       return_df currently contains [f'x_{del}' for x in merge_cols] and [f'x_{rec}' for x in merge_cols]
#     #       Therefore, there are no overlapping column names.  As such, we must rename using .rename
#     return_df = return_df.merge(df_tot[merge_cols], left_index=True, right_index=True, how='outer')
#     return_df = return_df.rename(columns = {x:f'{x}_tot' for x in merge_cols})
#     # I used an outer merge so there will always been an entry for delivered, received, and total in return_df
#     # However, if one of these did not exist in the original, the outer merge will leave a NaN value in the cell
#     # Therefore, I call return_df.fillna(0) to replace any of these NaNs with 0
#     return_df = return_df.fillna(0)
#     #---------------------------------------------------
#     for counts_col in counts_cols:
#         return_df = build_net_counts_for_del_rec_tot(return_df, col_name_base=counts_col, 
#                                                      col_tags = {'rec':'_rec', 'del':'_del', 'tot':'_tot'}, 
#                                                      remove_intermediate_cols=remove_intermediate_cols)
#     for value_col in value_cols:
#         return_df = build_net_value_for_del_rec_tot(return_df, col_name_base=value_col, 
#                                                     col_tags = {'rec':'_rec', 'del':'_del', 'tot':'_tot'}, 
#                                                     remove_intermediate_cols=remove_intermediate_cols)
#     if mean_cols_dict_list:
#         for mean_cols_dict in mean_cols_dict_list:
#             return_df = build_mean_from_sum_and_counts(return_df, mean_cols_dict['value_col'], mean_cols_dict['counts_col'], mean_cols_dict['mean_col'])
#     #---------------------------------------------------
#     non_vals_df = build_circuit_non_vals(df, non_val_cols, 
#                                          aep_srvc_qlty_idntfr_col=aep_srvc_qlty_idntfr_col, 
#                                          aep_srvc_qlty_idntfr_vals=aep_srvc_qlty_idntfr_vals)
#     assert(non_vals_df.shape[0]==return_df.shape[0])
#     return_df = return_df.merge(non_vals_df, how='inner', left_index=True, right_index=True)
#     return_df[aep_srvc_qlty_idntfr_col] = 'TOTAL'
#     if maintain_original_col_order:
#         cols_from_og = [x for x in df.columns if x in return_df.columns]
#         return_df = Utilities_df.move_cols_to_front(return_df, cols_from_og)
#     return return_df

In [None]:
glob_pattern=r'outg_rec_nb_*q[0-9].csv'
value_cols_xfmr_cir = ['sum_value_sum', 'mean_value_sum', 'sum_value_sq_sum',
                       'mean_value_sq_sum', 'sum_value_mean', 'mean_value_mean',
                       'sum_value_std', 'mean_value_std', 'sum_counts', 'mean_counts',
                       'sum_counts_including_null', 'mean_counts_including_null']
agg_cols_xfmr_cir=value_cols_xfmr_cir
agg_types_xfmr_cir = ['mean', 'sum', 'count']
cols_of_interest_xfmr_cir = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset',
                             'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'aep_usage_dt',
                             'sum_value_sum', 'mean_value_sum', 'sum_value_sq_sum',
                             'mean_value_sq_sum', 'sum_value_mean', 'mean_value_mean',
                             'sum_value_std', 'mean_value_std', 'sum_counts', 'mean_counts',
                             'sum_counts_including_null', 'mean_counts_including_null']
combine_kwh_delivered_and_received=False
merge_and_groupby_cols=['aep_endtime_utc']
verbose=True

In [None]:
# glob_pattern=r'outg_rec_nb_*q[0-9].csv'
# value_cols_xfmr_cir = ['sum_value_sum', 'sum_value_sq_sum', 'mean_value_mean', 'mean_value_std', 
#                        'sum_counts', 'sum_counts_including_null']
# agg_cols_xfmr_cir=value_cols_xfmr_cir
# agg_types_xfmr_cir = ['mean', 'sum', 'count']
# cols_of_interest_xfmr_cir = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset',
#                              'aep_derived_uom', 'aep_srvc_qlty_idntfr', 'aep_usage_dt',
#                              'sum_value_sum', 'sum_value_sq_sum', 'mean_value_mean', 'mean_value_std', 
#                              'sum_counts', 'sum_counts_including_null']
# combine_kwh_delivered_and_received=False
# merge_and_groupby_cols=['aep_endtime_utc']
# verbose=True

In [None]:
kwh_vlt_dfs_xfmr_cir_dict = AMINonVee.assemble_kwh_vlt_dfs_from_saved_csvs(file_dir=df_xfmr_circuit_dir, glob_pattern=glob_pattern, 
                                                                 value_cols=value_cols_xfmr_cir, 
                                                                 cols_of_interest=cols_of_interest_xfmr_cir, 
                                                                 combine_kwh_delivered_and_received=combine_kwh_delivered_and_received, 
                                                                 merge_and_groupby_cols=merge_and_groupby_cols, 
                                                                 verbose=verbose)
df_xfmr_cir_kwh_15T = kwh_vlt_dfs_xfmr_cir_dict['kwh']
df_xfmr_cir_vlt_15T = kwh_vlt_dfs_xfmr_cir_dict['vlt']

#TODO!!!!!!!!!!!!!!!!!!
#TODO!!!!!!!!!!!!!!!!!!
#TODO!!!!!!!!!!!!!!!!!!
# Need to handle how delivered, received, total will be combined before re-indexing!
# For now, let's simply use 'TOTAL' from kwh
df_xfmr_cir_kwh_15T_FULL = df_xfmr_cir_kwh_15T.copy()
#df_cir_kwh_15T = df_cir_kwh_15T[df_cir_kwh_15T['aep_srvc_qlty_idntfr']=='TOTAL'].copy()

# TODO!!!!!!!!!!!!!!!!!!!!!
# How to handle different aep_srvc_qlty_idntfr for vlt?
# ['INSTVA1', 'INSTVC1', 'INSTVB1', 'AVG']
# For now, 'AVG' from vlt
df_xfmr_cir_vlt_15T_FULL = df_xfmr_cir_vlt_15T.copy()
df_xfmr_cir_vlt_15T = df_xfmr_cir_vlt_15T[df_xfmr_cir_vlt_15T['aep_srvc_qlty_idntfr']=='AVG'].copy()

In [None]:
df_xfmr_cir_vlt_15T.head()

In [None]:
df_xfmr_circuit_dir_NEW = os.path.join(Utilities.get_local_data_dir(), r'sample_circuits\NewMethod\GroupByXfmr_v3_NET\outg_rec_nb_11751094')

In [None]:
csvs = Utilities.find_all_paths(base_dir=df_xfmr_circuit_dir_NEW, glob_pattern=glob_pattern)

start_time_col = 'starttimeperiod'
end_time_col = 'endtimeperiod'
aep_endtime_utc_col = 'aep_endtime_utc'
timezoneoffset_col = 'timezoneoffset'

dfs_full = []
for csv in csvs:
    if verbose:
        print('Reading file: ', csv)
    df = pd.read_csv(csv)
    df = Utilities_df.remove_prepend_from_columns_in_df(df)
    #df = df[cols_of_interest]
    if df.shape[0]==0:
        continue
    dfs_full.append(df)
df_xfmr_cir_kwh_15T = pd.concat(dfs_full)
df_xfmr_cir_kwh_15T = Utilities_dt.convert_timestamp_to_utc_in_df(df_xfmr_cir_kwh_15T, timestamp_col=aep_endtime_utc_col)
df_xfmr_cir_kwh_15T = Utilities_dt.build_utc_time_column(df_xfmr_cir_kwh_15T, time_col=[start_time_col, end_time_col])
df_xfmr_cir_kwh_15T = Utilities_dt.convert_timezoneoffset_col_to_timedelta(df_xfmr_cir_kwh_15T, timezoneoffset_col=timezoneoffset_col)

df_xfmr_cir_kwh_15T = df_xfmr_cir_kwh_15T.set_index(f'{aep_endtime_utc_col}_from_timestamp', drop=False).sort_index()
df_xfmr_cir_kwh_15T.index.name='time_idx'

In [None]:
df_xfmr_cir_kwh_15T.head()

In [None]:
sum_x2_col = 'sum_value_sq_sum'
sum_x_col = 'sum_value_sum'
n_counts_col = 'sum_counts'
placement_col_mean = 'value_mean_pool'
placement_col_std = 'value_std_pool'

df_xfmr_cir_kwh_15T = build_mean_and_std_from_sum_x_x2_and_counts(df_xfmr_cir_kwh_15T, sum_x_col, sum_x2_col, n_counts_col, 
                                                                  placement_col_mean, placement_col_std)


### Use combine method on full dataset and then resample/aggregate

In [None]:
# non_val_cols = ['starttimeperiod', 'endtimeperiod', 'aep_endtime_utc', 'timezoneoffset',
#                 'aep_derived_uom', 'aep_usage_dt', 'aep_endtime_utc_from_timestamp', 
#                 'starttimeperiod_utc', 'endtimeperiod_utc']
# value_cols_dict_list = [
#     {'value_col':'sum_value_sum', 'counts_col':'sum_counts', 'mean_col':'mean_value_sum'}, 
#     {'value_col':'sum_value_sq_sum', 'counts_col':'sum_counts', 'mean_col':'mean_value_sq_sum'}, 
#     {'value_col':'sum_value_mean', 'counts_col':'sum_counts', 'mean_col':'mean_value_mean'}
# ]

# remaining_vals = []
# for value_cols_dict in value_cols_dict_list:
#     remaining_vals.extend(value_cols_dict.values())
# remaining_vals = list(set(remaining_vals))

# df_xfmr_cir_kwh_15T = get_ciruit_total_vals(df_xfmr_cir_kwh_15T, non_val_cols, 
#                                             value_cols_dict_list=value_cols_dict_list, 
#                                             remove_intermediate_cols=True)
# # df_xfmr_cir_kwh_15T = df_xfmr_cir_kwh_15T[df_xfmr_cir_kwh_15T['aep_srvc_qlty_idntfr']=='TOTAL']

In [None]:

df_xfmr_cir_kwh_15T = build_mean_and_std_from_sum_x_x2_and_counts(df_xfmr_cir_kwh_15T, sum_x_col, sum_x2_col, n_counts_col, 
                                                                  placement_col_mean, placement_col_std)
#-------------------------------------------------------------------------------------------------
# TODO!!!!!!!!!!!!!!!!!
# Also, would expect e.g. dfs_cir_kwh_dict['H'][df_key]==dfs_cir_kwh_dict['H'][df_agg_key]
# However, this is close but not exactly true
# It appears there are some times with duplicate entries
# Compare the size of df_cir_kwh_15T[df_cir_kwh_15T['aep_srvc_qlty_idntfr']=='TOTAL']
# to the number of unique indices
#
#

dfs_xfmr_cir_kwh_dict = AMINonVee.build_time_resampled_dfs(df_xfmr_cir_kwh_15T, freqs=freqs, other_grouper_cols=[], 
                                     build_agg_dfs=build_agg_dfs, time_col_for_agg=time_col_for_agg, 
                                     agg_cols=value_cols_xfmr_cir, agg_types=agg_types_xfmr_cir, 
                                     df_key=df_key, df_agg_key=df_agg_key)
#-----
assert(df_xfmr_cir_kwh_15T.equals(dfs_xfmr_cir_kwh_dict['15T'][df_key]))
#-------------------------------------------------------------------------------------------------
dfs_xfmr_cir_vlt_dict = AMINonVee.build_time_resampled_dfs(df_xfmr_cir_vlt_15T, freqs=freqs, other_grouper_cols=[], 
                                     build_agg_dfs=build_agg_dfs, time_col_for_agg=time_col_for_agg, 
                                     agg_cols=value_cols_xfmr_cir, agg_types=agg_types_xfmr_cir, 
                                     df_key=df_key, df_agg_key=df_agg_key)
#-----
assert(df_xfmr_cir_vlt_15T.equals(dfs_xfmr_cir_vlt_dict['15T'][df_key]))
#-------------------------------------------------------------------------------------------------

In [None]:
print('Checking endtimeperiod_utc against aep_endtime_utc_from_timestamp')
print('df_kwh_15T:     ', all(df_kwh_15T['endtimeperiod_utc']==df_kwh_15T['aep_endtime_utc_from_timestamp']))
print('df_vlt_15T:     ', all(df_vlt_15T['endtimeperiod_utc']==df_vlt_15T['aep_endtime_utc_from_timestamp']))
#-----
print('df_cir_kwh_15T: ', all(df_cir_kwh_15T['endtimeperiod_utc']==df_cir_kwh_15T['aep_endtime_utc_from_timestamp']))
print('df_cir_vlt_15T: ', all(df_cir_vlt_15T['endtimeperiod_utc']==df_cir_vlt_15T['aep_endtime_utc_from_timestamp']))

# Convert out_t_beg_local and out_t_end_local to UTC

In [None]:
[out_t_beg, out_t_end, no_outage_center] = Utilities_dt.determine_timezone_and_convert_local_to_utc_time([out_t_beg_local, out_t_end_local, no_outage_center_local], 
                                                                                            pd.to_timedelta(df_kwh_15T['timezoneoffset'].unique()).tolist())

In [None]:
expand_time = datetime.timedelta(days=3)
ax=df_cir_kwh_15T_FULL[df_cir_kwh_15T_FULL['aep_srvc_qlty_idntfr']=='TOTAL'][out_t_beg-expand_time:out_t_end+expand_time]['value_mean_cir'].plot(figsize=(20,10), label='TOTAL', color='red')
ax=df_cir_kwh_15T_FULL[df_cir_kwh_15T_FULL['aep_srvc_qlty_idntfr']=='DELIVERED'][out_t_beg-expand_time:out_t_end+expand_time]['value_mean_cir'].plot(label='DELIVERED')
ax=df_cir_kwh_15T[out_t_beg-expand_time:out_t_end+expand_time]['value_mean_cir'].plot(label='AVG/NET', color='purple')
ax.axvline(out_t_beg, color='red')
ax.axvline(out_t_end, color='lawngreen')
ax.legend()

In [None]:
save_figs=False
expand_time = datetime.timedelta(days=3)
fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[14, 6])
subplots_adjust_args = {'left':0.075, 'right':0.975, 
                        'bottom':0.10, 'top':0.95, 
                        'wspace':0.2, 'hspace':0.35}
plt.subplots_adjust(**subplots_adjust_args)
df_cir_kwh_15T_FULL[df_cir_kwh_15T_FULL['aep_srvc_qlty_idntfr']=='TOTAL'][out_t_beg-expand_time:out_t_end+expand_time]['value_mean_cir'].plot(ax=ax, figsize=(20,10), label='TOTAL', color='red')
df_cir_kwh_15T_FULL[df_cir_kwh_15T_FULL['aep_srvc_qlty_idntfr']=='DELIVERED'][out_t_beg-expand_time:out_t_end+expand_time]['value_mean_cir'].plot(ax=ax, label='DELIVERED')
df_cir_kwh_15T[out_t_beg-expand_time:out_t_end+expand_time]['value_mean_cir'].plot(ax=ax, label='AVG/NET', color='purple')
ax.axvline(out_t_beg, color='red')
ax.axvline(out_t_end, color='lawngreen')
ax.legend(fontsize=15)
ax.set_title(label='15T Circuit Average Consumption', fontsize=25)
ax.set_xlabel(xlabel='Datetime', fontsize=20, x=0.9, y=0.0, ha='right', va='top')
ax.tick_params(axis='both', labelsize=15)
#----------------------------------------
if save_figs:
    save_dir = r'C:\Users\s346557\Documents\Presentations\GroupMeetings\20220120\Figures\SampleUsage'
    save_name = 'totalreceived_vs_deliveredreceived.png'
    fig.savefig(os.path.join(save_dir, save_name))
#----------------------------------------    

# Correlations

In [None]:
def get_correlation_coeff(df_1, col_1, df_2, col_2):    
    common_idxs_kwh = set(df_1.index).intersection(set(df_2.index))
    df_1_to_corr = df_1.loc[common_idxs_kwh]
    df_2_to_corr = df_2.loc[common_idxs_kwh]
    #-----
    corr = df_1_to_corr[col_1].corr(df_2_to_corr[col_2])
    return corr

In [None]:
df_xfmr_cir_vlt_15T.columns

In [None]:
all(df_xfmr_cir_kwh_15T['value_mean_pool']==df_cir_kwh_15T['value_mean_cir'])

In [None]:
save_figs=False
expand_time = datetime.timedelta(days=3)
fig, (ax0,ax1) = plt.subplots(2, 1, num=fig_num, figsize=[14, 12])
subplots_adjust_args = {'left':0.075, 'right':0.80, 
                        'bottom':0.10, 'top':0.95, 
                        'wspace':0.2, 'hspace':0.35}
plt.subplots_adjust(**subplots_adjust_args)

fig,ax0 = AMINonVee.plot_usage_around_outage(fig, ax0, data=dfs_kwh_dict['15T'][df_agg_key], 
                                  x='endtimeperiod_utc', y='mean_mtrs value', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Outage Mean', 
                                  title_args=dict(label=f'15T Average Consumption (kWh)', fontsize=25), 
                                  draw_outage_limits=True, 
                                  ylabel_args = dict(ylabel='kWh', fontsize=20, x=0.0, y=0.8, ha='left', va='bottom'), 
                                  xlabel_args = dict(xlabel='', fontsize=0, x=0.9, y=0.0, ha='right', va='top'), 
                                   seg_line_freq='D')

fig,ax0 = AMINonVee.plot_usage_around_outage(fig, ax0, data=df_cir_kwh_15T, 
                                  x='endtimeperiod_utc', y='value_mean_cir', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Mean')

# fig,ax0 = AMINonVee.plot_usage_around_outage(fig, ax0, data=df_xfmr_cir_kwh_15T, 
#                                   x='endtimeperiod_utc', y='value_mean_pool', hue=None, 
#                                   out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Mean(2)')

fig,ax0 = AMINonVee.plot_usage_around_outage(fig, ax0, data=df_xfmr_cir_kwh_15T, 
                                  x='endtimeperiod_utc', y='mean_value_mean', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Xfmr Mean')

kwh_corr = get_correlation_coeff(dfs_kwh_dict['15T'][df_agg_key], 'mean_mtrs value', 
                                 df_cir_kwh_15T, 'value_mean_cir')
plt.text(0.825, 1.075, f'Corr. 1-2 = {kwh_corr}', transform=ax0.transAxes, fontsize=15)
kwh_corr = get_correlation_coeff(dfs_kwh_dict['15T'][df_agg_key], 'mean_mtrs value', 
                                 df_xfmr_cir_kwh_15T, 'mean_value_mean')
plt.text(0.825, 1.025, f'Corr. 1-3 = {kwh_corr}', transform=ax0.transAxes, fontsize=15)


fig,ax1 = AMINonVee.plot_usage_around_outage(fig, ax1, data=dfs_vlt_dict['15T'][df_agg_key], 
                                  x='endtimeperiod_utc', y='mean_mtrs value', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Outage Mean', 
                                  title_args=dict(label=f'15T Average Voltage (V)', fontsize=25), 
                                  draw_outage_limits=True, 
                                  ylabel_args = dict(ylabel='Voltage (V)', fontsize=20, x=0.0, y=0.8, ha='left', va='bottom'), 
                                  xlabel_args = dict(xlabel='Usage Start Time', fontsize=20, x=0.9, y=0.0, ha='right', va='top'), 
                                   seg_line_freq='D')
ax1.set_ylim([225,250])

fig,ax1 = AMINonVee.plot_usage_around_outage(fig, ax1, data=df_cir_vlt_15T, 
                                  x='endtimeperiod_utc', y='value_mean_cir', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Mean')

fig,ax1 = AMINonVee.plot_usage_around_outage(fig, ax1, data=df_xfmr_cir_vlt_15T, 
                                  x='endtimeperiod_utc', y='mean_value_mean', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Xfmr Mean')

vlt_corr = get_correlation_coeff(dfs_vlt_dict['15T'][df_agg_key], 'mean_mtrs value', 
                                 df_cir_vlt_15T, 'value_mean_cir')
plt.text(0.825, 1.075, f'Corr. 1-2 = {vlt_corr}', transform=ax1.transAxes, fontsize=15)
vlt_corr = get_correlation_coeff(dfs_vlt_dict['15T'][df_agg_key], 'mean_mtrs value', 
                                 df_xfmr_cir_vlt_15T, 'mean_value_mean')
plt.text(0.825, 1.025, f'Corr. 1-3 = {vlt_corr}', transform=ax1.transAxes, fontsize=15)
#----------------------------------------
if save_figs:
    save_dir = r'C:\Users\s346557\Documents\Presentations\GroupMeetings\20220120\Figures\SampleUsage'
    save_name = '15T_outage_consumption_vs_circuit.png'
    fig.savefig(os.path.join(save_dir, save_name))
#----------------------------------------   
fig_num +=1

In [None]:
# dfs_xfmr_cir_kwh_dict['H'][df_key].columns
# 'mean_TRS mean_value_mean'

In [None]:
expand_time = datetime.timedelta(days=3)
fig, (ax0,ax1) = plt.subplots(2, 1, num=fig_num, figsize=[14, 12])
plt.subplots_adjust(hspace=0.3)

fig,ax0 = AMINonVee.plot_usage_around_outage(fig, ax0, data=dfs_kwh_dict['H'][df_agg_key], 
                                  x='endtimeperiod_utc', y='mean_TRS mean_mtrs value', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Outage Mean', 
                                  title_args=dict(label=f'Hourly Average Consumption (kWh)', fontsize=25), 
                                  draw_outage_limits=True, 
                                  ylabel_args = dict(ylabel='kWh', fontsize=20, x=0.0, y=0.8, ha='left', va='bottom'), 
                                  xlabel_args = dict(xlabel='', fontsize=0, x=0.9, y=0.0, ha='right', va='top'), 
                                   seg_line_freq='D')

fig,ax0 = AMINonVee.plot_usage_around_outage(fig, ax0, data=dfs_cir_kwh_dict['H'][df_key], 
                                  x='date', y='mean_TRS value_mean_cir', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Mean')

fig,ax0 = AMINonVee.plot_usage_around_outage(fig, ax0, dfs_xfmr_cir_kwh_dict['H'][df_key], 
                                  x='date', y='mean_TRS mean_value_mean', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Mean (2)')

kwh_corr = get_correlation_coeff(dfs_kwh_dict['H'][df_agg_key], 'mean_TRS mean_mtrs value', 
                                 dfs_cir_kwh_dict['H'][df_key], 'mean_TRS value_mean_cir')
plt.text(1.01, 1.01, f'Corr = {kwh_corr}', transform=ax0.transAxes, fontsize=15)

fig,ax1 = AMINonVee.plot_usage_around_outage(fig, ax1, data=dfs_vlt_dict['H'][df_agg_key], 
                                  x='endtimeperiod_utc', y='mean_TRS mean_mtrs value', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Outage Mean', 
                                  title_args=dict(label=f'Hourly Average Voltage (V)', fontsize=25), 
                                  draw_outage_limits=True, 
                                  ylabel_args = dict(ylabel='Voltage (V)', fontsize=20, x=0.0, y=0.8, ha='left', va='bottom'), 
                                  xlabel_args = dict(xlabel='Usage Start Time', fontsize=20, x=0.9, y=0.0, ha='right', va='top'), 
                                   seg_line_freq='D')
ax1.set_ylim([225,250])

fig,ax1 = AMINonVee.plot_usage_around_outage(fig, ax1, data=dfs_cir_vlt_dict['H'][df_key], 
                                  x='date', y='mean_TRS value_mean_cir', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Mean')

vlt_corr = get_correlation_coeff(dfs_vlt_dict['H'][df_agg_key], 'mean_TRS mean_mtrs value', 
                                 dfs_cir_vlt_dict['H'][df_key], 'mean_TRS value_mean_cir')
plt.text(1.01, 1.01, f'Corr = {vlt_corr}', transform=ax1.transAxes, fontsize=15)

fig_num +=1

In [None]:
expand_time = datetime.timedelta(days=31)
fig, (ax0,ax1) = plt.subplots(2, 1, num=fig_num, figsize=[14, 12])
plt.subplots_adjust(hspace=0.3)

fig,ax0 = AMINonVee.plot_usage_around_outage(fig, ax0, data=dfs_kwh_dict['D'][df_agg_key], 
                                  x='endtimeperiod_utc', y='mean_TRS mean_mtrs value', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Outage Mean', 
                                  title_args=dict(label=f'Daily Average Consumption (kWh)', fontsize=25), 
                                  draw_outage_limits=True, 
                                  ylabel_args = dict(ylabel='kWh', fontsize=20, x=0.0, y=0.8, ha='left', va='bottom'), 
                                  xlabel_args = dict(xlabel='', fontsize=0, x=0.9, y=0.0, ha='right', va='top'), 
                                   seg_line_freq='7D')

fig,ax0 = AMINonVee.plot_usage_around_outage(fig, ax0, data=dfs_cir_kwh_dict['D'][df_key], 
                                  x='date', y='mean_TRS value_mean_cir', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Mean')

kwh_corr = get_correlation_coeff(dfs_kwh_dict['D'][df_agg_key], 'mean_TRS mean_mtrs value', 
                                 dfs_cir_kwh_dict['D'][df_key], 'mean_TRS value_mean_cir')
plt.text(1.01, 1.01, f'Corr = {kwh_corr}', transform=ax0.transAxes, fontsize=15)

fig,ax1 = AMINonVee.plot_usage_around_outage(fig, ax1, data=dfs_vlt_dict['D'][df_agg_key], 
                                  x='endtimeperiod_utc', y='mean_TRS mean_mtrs value', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Outage Mean', 
                                  title_args=dict(label=f'Daily Average Voltage (V)', fontsize=25), 
                                  draw_outage_limits=True, 
                                  ylabel_args = dict(ylabel='Voltage (V)', fontsize=20, x=0.0, y=0.8, ha='left', va='bottom'), 
                                  xlabel_args = dict(xlabel='Usage Start Time', fontsize=20, x=0.9, y=0.0, ha='right', va='top'), 
                                   seg_line_freq='7D')
ax1.set_ylim([225,250])

fig,ax1 = AMINonVee.plot_usage_around_outage(fig, ax1, data=dfs_cir_vlt_dict['D'][df_key], 
                                  x='date', y='mean_TRS value_mean_cir', hue=None, 
                                  out_t_beg=out_t_beg, out_t_end=out_t_end, expand_time=expand_time, data_label='Circuit Mean')

vlt_corr = get_correlation_coeff(dfs_vlt_dict['D'][df_agg_key], 'mean_TRS mean_mtrs value', 
                                 dfs_cir_vlt_dict['D'][df_key], 'mean_TRS value_mean_cir')
plt.text(1.01, 1.01, f'Corr = {vlt_corr}', transform=ax1.transAxes, fontsize=15)

fig_num +=1