# usage_instantaneous was taking a while to run something, and I wanted to keep developing, which is why this notebooke exists

In [1]:
%run AMI_SQL.ipynb
%run AMIUsgInst_SQL.ipynb
%run AMIUsgInst.ipynb
%run MeterPremise.ipynb
%run DOVSOutages_SQL.ipynb
%run DOVSOutages.ipynb

In [2]:
import sys, os
import re

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype
from scipy import stats
import datetime

import pyodbc
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
import CommonLearningMethods as clm
#---------------------------------------------------------------------
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
import TableInfos
from TableInfos import TableInfo
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
from Utilities_df import DFConstructType
import Utilities_dt

In [3]:
conn_outages = Utilities.get_utldb01p_oracle_connection()
conn_aws = Utilities.get_athena_prod_aws_connection()

In [4]:
my_meter_premise_info = pd.read_csv(r'C:\Users\s346557\Documents\my_meter_premise_info.csv')
my_meter_premise_info = clm.remove_prepend_from_columns_in_df(my_meter_premise_info)
my_meter_premise_info_dict = my_meter_premise_info.squeeze().to_dict()

# ---------------------------------------------------------------
# OUTAGES
# ---------------------------------------------------------------

In [5]:
dovs_outgs = DOVSOutages(df_construct_type=DFConstructType.kRunSqlQuery, 
                         build_sql_function_kwargs=dict(
                             specific_dates=['2020-10-12','2017-09-16','2020-07-01'], 
                             outg_rec_nbs=['11947217','11946640','10143524','11770899']
                         ), 
                         init_df_in_constructor=True)
df_outage = dovs_outgs.get_df().copy()

In [6]:
#TODO
#search_time_half_window = 60 #minutes
search_time_half_window = 60*24*14 #minutes
search_time_half_window = datetime.timedelta(minutes=search_time_half_window)

# NOTE: search_time_half_window can also be a vector of values...
df_outage['search_time_half_window'] = search_time_half_window
df_outage['t_search_min'] = df_outage['DT_OFF_TS_FULL'] - df_outage['search_time_half_window']
df_outage['t_search_max'] = df_outage['DT_ON_TS'] + df_outage['search_time_half_window']

# Usage Instantaneous

In [7]:
cols_of_interest_usg_inst = TableInfos.AMIUsgInst_TI.std_columns_of_interest
cols_of_interest_met_prem = TableInfos.MeterPremise_TI.std_columns_of_interest

In [8]:
sql_usg_inst_for_outages = AMIUsgInst_SQL.build_sql_usg_inst_for_outages(
    cols_of_interest_usg_inst, 
    df_outage, 
    build_sql_function_kwargs=dict(opco='oh')
)
print(sql_usg_inst_for_outages)

WITH EDE_gnrl AS (
	SELECT
		un_rin.read_type,
		un_rin.serialnumber,
		un_rin.aep_premise_nb,
		un_rin.timezoneoffset,
		un_rin.aep_readtime,
		un_rin.aep_readtime_utc,
		un_rin.measurement_type,
		un_rin.measurement_value,
		un_rin.longitude,
		un_rin.latitude,
		un_rin.aep_opco,
		un_rin.aep_read_dt
	FROM usage_instantaneous.inst_msr_consume un_rin
	WHERE un_rin.aep_opco = 'oh'
	AND   (
		aep_read_dt BETWEEN '2017-09-02' AND '2017-09-30' OR 
		aep_read_dt BETWEEN '2020-06-17' AND '2020-07-15' OR 
		aep_read_dt BETWEEN '2020-09-28' AND '2020-10-26'
	)
), 
EDE_0 AS (
	SELECT
		EDE_gnrl.*,
		'10143524' AS OUTG_REC_NB
	FROM EDE_gnrl
	WHERE aep_read_dt BETWEEN '2017-09-02' AND '2017-09-30'
	AND   aep_premise_nb IN ('100420620','100547600','100683783','101020620','101330620','102920620','103520620','103647600','103783783','104120620','104430620','105030620','105293783','105710620','106620620','106747600','106883783','107220620','107483783','108130620','108393783','108810620','109720620','

In [9]:
df_1 = pd.read_sql(sql_usg_inst_for_outages, conn_aws)
df_1 = clm.remove_table_aliases(df_1)


In [10]:
df_1

Unnamed: 0,read_type,serialnumber,aep_premise_nb,timezoneoffset,aep_readtime,aep_readtime_utc,measurement_type,measurement_value,longitude,latitude,aep_opco,aep_read_dt,outg_rec_nb
0,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_fndmt_harm1,31.8000,-83.2817183,40.8232183,oh,2020-07-04,11770899
1,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_fndmt_harm3,36.9000,-83.2817183,40.8232183,oh,2020-07-04,11770899
2,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_fndmt_only3,36.9000,-83.2817183,40.8232183,oh,2020-07-04,11770899
3,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_harmonic_distortion1,9.0000,-83.2817183,40.8232183,oh,2020-07-04,11770899
4,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_harmonic_distortion2,0.0000,-83.2817183,40.8232183,oh,2020-07-04,11770899
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7239,st03_fatal_errors,881984523,101330620,-04:00,2020-09-30 18:01:12,1601503272,under_voltage_status,,-82.9532280,40.06823800,oh,2020-09-30,11947217
7240,st03_fatal_errors,882468091,108130620,-04:00,2020-09-30 18:19:37,1601504377,dsp_error_status,,-82.9535730,40.06827500,oh,2020-09-30,11947217
7241,st03_fatal_errors,882468091,108130620,-04:00,2020-09-30 18:19:37,1601504377,nvram_error_status,,-82.9535730,40.06827500,oh,2020-09-30,11947217
7242,st03_fatal_errors,882468091,108130620,-04:00,2020-09-30 18:19:37,1601504377,ram_error_status,,-82.9535730,40.06827500,oh,2020-09-30,11947217


In [11]:
# reload(Utilities_df)
# %run GenAn.ipynb
# %run AMIUsgInst.ipynb

In [12]:
ami_usg_inst = AMIUsgInst(
    df_construct_type=DFConstructType.kRunSqlQuery, 
    contstruct_df_args=None,
    init_df_in_constructor=True, 
    build_sql_function=AMIUsgInst_SQL.build_sql_usg_inst_for_outages, 
    build_sql_function_kwargs = dict(
        cols_of_interest=cols_of_interest_usg_inst, 
        df_outage=df_outage, 
        build_sql_function_kwargs=dict(opco='oh')
    )
)

In [13]:
ami_usg_inst.df

Unnamed: 0,read_type,serialnumber,aep_premise_nb,timezoneoffset,aep_readtime,aep_readtime_utc,measurement_type,measurement_value,longitude,latitude,aep_opco,aep_read_dt,outg_rec_nb
0,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_fndmt_harm1,31.8,-83.2817183,40.8232183,oh,2020-07-04,11770899
1,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_fndmt_harm3,36.9,-83.2817183,40.8232183,oh,2020-07-04,11770899
2,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_fndmt_only3,36.9,-83.2817183,40.8232183,oh,2020-07-04,11770899
3,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_harmonic_distortion1,9.0,-83.2817183,40.8232183,oh,2020-07-04,11770899
4,mt110_voltage_event,682540032,077308983,-04:00,2020-07-04 14:45:15,1593888315,curr_harmonic_distortion2,0.0,-83.2817183,40.8232183,oh,2020-07-04,11770899
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7239,st03_fatal_errors,881984523,101330620,-04:00,2020-09-30 18:01:12,1601503272,under_voltage_status,,-82.9532280,40.06823800,oh,2020-09-30,11947217
7240,st03_fatal_errors,882468091,108130620,-04:00,2020-09-30 18:19:37,1601504377,dsp_error_status,,-82.9535730,40.06827500,oh,2020-09-30,11947217
7241,st03_fatal_errors,882468091,108130620,-04:00,2020-09-30 18:19:37,1601504377,nvram_error_status,,-82.9535730,40.06827500,oh,2020-09-30,11947217
7242,st03_fatal_errors,882468091,108130620,-04:00,2020-09-30 18:19:37,1601504377,ram_error_status,,-82.9535730,40.06827500,oh,2020-09-30,11947217


In [None]:
sql_usg_inst_join_mp_for_outages = AMIUsgInst_SQL.build_sql_usg_inst_for_outages(
    cols_of_interest=cols_of_interest_usg_inst, 
    df_outage=df_outage, 
    build_sql_function_kwargs=dict(opco='oh', 
                                   aep_derived_uoms_and_idntfrs=['KWH']), 
    join_mp_args=dict(
        join_with_CTE=True, 
        build_mp_kwargs=dict(cols_of_interest=cols_of_interest_met_prem)
    )
)
#print(sql_usg_inst_join_mp_for_outages)

In [None]:
test_df = pd.read_sql(sql_usg_inst_join_mp_for_outages, conn_aws)

In [None]:
test_df = clm.remove_table_aliases(test_df)

In [None]:
test_df

In [None]:
start = time.time()
new_df = pd.read_sql(sql_usg_inst_for_outages, conn_aws)
print(time.time()-start)
new_df = clm.remove_table_aliases(new_df)

In [None]:
sql_usg_inst = AMIUsgInst_SQL.build_sql_usg_inst(
    cols_of_interest_usg_inst, 
    serial_numbers=[my_meter_premise_info_dict['mfr_devc_ser_nbr']], 
    aep_premise_nbs=[my_meter_premise_info_dict['prem_nb']], 
    aep_opco='oh', 
    date_range=date_range
)

In [None]:
sql_usg_inst.print()

In [None]:
df_usg_inst_OG = pd.read_sql(sql_usg_inst, conn_aws)
df_usg_inst_OG = clm.remove_table_aliases(df_usg_inst_OG)

In [None]:
df_usg_inst_OG.head()

# -----

In [None]:
df_usg_inst = df_usg_inst_OG.copy()

In [None]:
print(f'df_usg_inst.shape = {df_usg_inst.shape}')
print("\ndf_usg_inst['measurement_type'].unique()\n", '-'*25+'\n', df_usg_inst['measurement_type'].unique())

In [None]:
df_usg_inst['aep_readtime'] = pd.to_datetime(df_usg_inst['aep_readtime'])
df_usg_inst = Utilities_df.convert_col_types(
    df=df_usg_inst, 
    cols_and_types_dict={'measurement_value':float}, 
    to_numeric_errors='coerce', 
    inplace=True
)

In [None]:
sort_by = ['aep_readtime', 'measurement_type']
df_usg_inst = df_usg_inst.sort_values(by=sort_by, ignore_index=True)

In [None]:
for idx,gp_df in df_usg_inst.groupby('aep_read_dt'):
    print(gp_df['aep_readtime'].nunique())

### Why are there entries which are EXACTLY the same except for read time (read_time, aep_readtime, aep_readtime_utc)

In [None]:
df_usg_inst.iloc[1]==df_usg_inst.iloc[6]

In [None]:
df_usg_inst[(df_usg_inst['aep_read_dt']=='2021-10-12') & (df_usg_inst['measurement_type']=='instantaneous_kw')]

In [None]:
my_df_usg_inst = df_usg_inst.copy()

In [None]:
my_df_usg_inst_dupl = my_df_usg_inst[my_df_usg_inst.duplicated(subset=[x for x in my_df_usg_inst.columns 
                                                                       if x not in ['aep_readtime', 'aep_readtime_utc']], keep=False)]
my_df_usg_inst_dupl_gpd = my_df_usg_inst_dupl.groupby([x for x in my_df_usg_inst.columns 
                                                       if x not in ['aep_readtime', 'aep_readtime_utc']])

In [None]:
get_group_idx=0
my_df_usg_inst_dupl_gpd.get_group(list(my_df_usg_inst_dupl_gpd.groups.keys())[get_group_idx])

In [None]:
my_df_usg_inst.shape

In [None]:
my_df_usg_inst.drop_duplicates(subset=[x for x in my_df_usg_inst.columns 
                                       if x not in ['aep_readtime', 'aep_readtime_utc']]).shape

# Setup df_usage_inst

In [None]:
conn = Utilities.get_athena_prod_aws_connection()

In [None]:
#date_range = ['2020-10-12', '2020-10-13']
date_range = ['2020-10-12', '2020-11-12']

# Randomly selected
premise_nbs = [
    '072163781',
    '100166573',
    '101258511',
    '105347161',
    '109612790',
    '075671313',
    '106737082',
    '102970840',
    '103596600',
    '107782860'
]

state_abbr_txs = ['OH']

cols_of_interest_usg_inst = [
    'read_type',
    'serialnumber',
    'aep_premise_nb',
    'timezoneoffset',
    'aep_readtime',
    'aep_readtime_utc',
    'measurement_type',
    'measurement_value',
    'longitude',
    'latitude',
    'aep_opco',
    'aep_read_dt'
]

sql_usage_inst = (
"""
SELECT {}
FROM usage_instantaneous.inst_msr_consume
WHERE aep_opco = 'oh' 
AND aep_state IN ({})
AND aep_premise_nb IN ({})
AND aep_read_dt BETWEEN '{}' AND '{}'
"""
).format(','.join(cols_of_interest_usg_inst), 
         ','.join(["'{}'".format(x) for x in state_abbr_txs]), 
         ','.join(["'{}'".format(x) for x in premise_nbs]), 
         date_range[0], 
         date_range[1])

# sql_usage_inst = (
# """
# SELECT {}
# FROM usage_instantaneous.inst_msr_consume
# WHERE aep_opco = 'oh' 
# AND aep_state IN ({})
# AND aep_read_dt BETWEEN '{}' AND '{}'
# LIMIT 100000
# """
# ).format(','.join(cols_of_interest_usg_inst), 
#          ','.join(["'{}'".format(x) for x in state_abbr_txs]), 
#          date_range[0], 
#          date_range[1])

In [None]:
print(sql_usage_inst)

In [None]:
df_usage_inst_OG = pd.read_sql(sql_usage_inst, conn)
df_usage_inst_OG = clm.remove_table_aliases(df_usage_inst_OG)

In [None]:
df_usage_inst = df_usage_inst_OG.copy()

In [None]:
print(f'df_usage_inst.shape = {df_usage_inst.shape}')

In [None]:
df_usage_inst = Utilities_df.convert_col_types(
    df=df_usage_inst, 
    cols_and_types_dict={'measurement_value':float}, 
    to_numeric_errors='coerce', 
    inplace=True
)

In [None]:
df_usage_inst

In [None]:
df_usage_inst.dtypes

In [None]:
df_usage_inst.drop_duplicates(subset=[x for x in df_usage_inst.columns 
                                      if x not in ['aep_readtime', 'aep_readtime_utc']]).shape

In [None]:
print(df_usage_inst.columns)
df_usage_inst.head(10)

In [None]:
df_usage_inst['read_type'].unique()

In [None]:
len(df_usage_inst['serialnumber'].unique())

In [None]:
df_usage_inst.groupby(['serialnumber', 'aep_read_dt']).ngroups

In [None]:
for idx, gp_df in df_usage_inst.groupby(['serialnumber', 'aep_read_dt']):
    print(gp_df['aep_readtime'].nunique())

In [None]:
df_usage_inst['read_type'].unique()

In [None]:
df_usage_inst['measurement_type'].unique()

In [None]:
df_usage_inst.groupby('measurement_type')['measurement_value'].mean()

In [None]:
fig_num = 0

In [None]:
# tmp_df = df_usage_inst[df_usage_inst['measurement_type']=='power_factor_phase_a']
# fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
# #fig.suptitle('Transformer Voltages By Date', fontsize=25, fontweight='bold')
# sns.stripplot(ax=ax, x='serialnumber', y='measurement_value', data=tmp_df, jitter=False)
# ax.tick_params(axis='x', labelrotation=90, labelsize=7.0, direction='in');
# fig_num +=1


In [None]:
# measurement_types = df_usage_inst['measurement_type'].unique().tolist()
# for measurement_type in measurement_types:
#     tmp_df = df_usage_inst[df_usage_inst['measurement_type']==measurement_type]
#     fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
#     fig.suptitle(f'Measurement Type = {measurement_type}', fontsize=25, fontweight='bold')
#     sns.stripplot(ax=ax, x='serialnumber', y='measurement_value', data=tmp_df, jitter=False)
#     ax.tick_params(axis='x', labelrotation=90, labelsize=7.0, direction='in');
#     fig_num +=1

In [None]:
# measurement_types = df_usage_inst['measurement_type'].unique().tolist()
# for measurement_type in measurement_types:
#     tmp_df = df_usage_inst[df_usage_inst['measurement_type']==measurement_type]
#     fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
#     fig.suptitle(f'Measurement Type = {measurement_type}', fontsize=25, fontweight='bold')
#     sns.boxplot(ax=ax, x='read_type', y='measurement_value', data=tmp_df)
#     ax.tick_params(axis='x', labelrotation=90, labelsize=7.0, direction='in');
#     fig_num +=1

In [None]:
state_abbr_txs = ['OH']

# limit = 10000
# date_range = ['2020-10-12', '2020-10-13']

limit = 1000
date_range = ['2020-10-12', '2020-11-12']

cols_of_interest_usg_inst = [
    'read_type',
    'serialnumber',
    'aep_premise_nb',
    'timezoneoffset',
    'aep_readtime',
    'aep_readtime_utc',
    'measurement_type',
    'measurement_value',
    'longitude',
    'latitude',
    'aep_opco',
    'aep_read_dt'
]


sql_gen = (
"""
SELECT {0}
FROM usage_instantaneous.inst_msr_consume
WHERE aep_opco = 'oh' 
AND aep_read_dt BETWEEN '{1}' AND '{2}'
AND read_type = {3}
LIMIT {4}
"""
).format('{}', 
         date_range[0], date_range[1], 
         '{}', 
         limit)

sql_1 = sql_gen.format(','.join(cols_of_interest_usg_inst), "'mt110_voltage_event'")
sql_2 = sql_gen.format(','.join(cols_of_interest_usg_inst), "'mt113_st28_am_temp_powerquality'")
sql_3 = sql_gen.format(','.join(cols_of_interest_usg_inst), "'mt115_load_control_status'")
sql_4 = sql_gen.format(','.join(cols_of_interest_usg_inst), "'mt117_capacitor_voltage'")
sql_5 = sql_gen.format(','.join(cols_of_interest_usg_inst), "'mt72_kv2c'")
sql_6 = sql_gen.format(','.join(cols_of_interest_usg_inst), "'mt113_st28_pm_temp_powerquality'")
sql_7 = sql_gen.format(','.join(cols_of_interest_usg_inst), "'st03_fatal_errors'")

In [None]:
df_1 = pd.read_sql(sql_1, conn)
df_2 = pd.read_sql(sql_2, conn)
df_3 = pd.read_sql(sql_3, conn)
df_4 = pd.read_sql(sql_4, conn)
df_5 = pd.read_sql(sql_5, conn)
df_6 = pd.read_sql(sql_6, conn)
df_7 = pd.read_sql(sql_7, conn)

In [None]:
df_usage_inst_2_OG = pd.concat([df_1, df_2, df_3, df_4, df_5, df_6, df_7])

In [None]:
df_usage_inst_2 = df_usage_inst_2_OG.copy()

In [None]:
df_usage_inst_2 = clm.remove_table_aliases(df_usage_inst_2)

df_usage_inst_2 = df_usage_inst_2[df_usage_inst_2['measurement_value'].notna()] # Don't want rows with null measurement values, I guess...
df_usage_inst_2 = df_usage_inst_2[df_usage_inst_2['measurement_value']!=' '] # Don't want rows with measurement values = ' ', I guess...
df_usage_inst_2 = df_usage_inst_2[df_usage_inst_2['measurement_value']!='FAULT']

In [None]:
df_usage_inst_2['measurement_value'] = df_usage_inst_2['measurement_value'].astype(float)

In [None]:
# measurement_types = df_usage_inst_2['measurement_type'].unique().tolist()
# for measurement_type in measurement_types:
#     tmp_df = df_usage_inst_2[df_usage_inst_2['measurement_type']==measurement_type]
#     fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
#     fig.suptitle(f'Measurement Type = {measurement_type}', fontsize=25, fontweight='bold')
#     sns.boxplot(ax=ax, x='read_type', y='measurement_value', data=tmp_df)
#     ax.tick_params(axis='x', labelrotation=90, labelsize=7.0, direction='in');
#     fig_num +=1

In [None]:
df_usage_inst_2

In [None]:
measurement_types = df_usage_inst_2['measurement_type'].unique().tolist()
read_types = df_usage_inst_2['read_type'].unique().tolist()
for read_type in read_types:
    tmp_df = df_usage_inst_2[df_usage_inst_2['read_type']==read_type]
    fig, ax = plt.subplots(1, 1, num=fig_num, figsize=[11, 8.5])
    fig.suptitle(f'Read Type = {read_type}', fontsize=25, fontweight='bold')
    sns.boxplot(ax=ax, x='measurement_type', y='measurement_value', data=tmp_df)
    ax.tick_params(axis='x', labelrotation=90, labelsize=7.0, direction='in');
    fig_num +=1

In [None]:
measurement_types_in_each_read_type = {}
for name, group in df_usage_inst_2.groupby('read_type'):
    print('-'*25+'\n'+f'{name}'+'\n'+'-'*25)
    unique_measurement_types = group['measurement_type'].unique().tolist()
    print('Unique measurement types:')
    print(*unique_measurement_types, sep='\n')
    print()
    assert(name not in measurement_types_in_each_read_type)
    measurement_types_in_each_read_type[name] = unique_measurement_types

In [None]:
measurement_types_in_each_read_type

In [None]:
df_usage_inst_2.head()

In [None]:
unq_msr_types_by_sn = []
unq_read_types_by_sn = []
for idx, gp in df_usage_inst_2.groupby('serialnumber'):
    msr_types_i = gp['measurement_type'].unique().tolist()
    read_types_i = gp['read_type'].unique().tolist()
    if msr_types_i not in unq_msr_types_by_sn:
        unq_msr_types_by_sn.append(msr_types_i)
    if read_types_i not in unq_read_types_by_sn:
        unq_read_types_by_sn.append(read_types_i)

In [None]:
len(unq_msr_types_by_sn)

In [None]:
len(unq_read_types_by_sn)

In [None]:
unq_read_types_by_sn

In [None]:
unq_msr_types_by_sn

In [None]:
df_usage_inst_2.shape

In [None]:
df_usage_inst_2.drop_duplicates(subset=[x for x in df_usage_inst_2.columns 
                                      if x not in ['aep_readtime', 'aep_readtime_utc']]).shape

In [None]:
possible_read_type_groupings=[]
for idx,gp_df in df_usage_inst_2.groupby('serialnumber'):
    if gp_df['read_type'].unique().tolist() not in possible_read_type_groupings:
        possible_read_type_groupings.append(gp_df['read_type'].unique().tolist())
#     if gp_df['read_type'].nunique()>1:
#         print(f'serialnunber = {idx}')
#         print(gp_df['read_type'].unique())
#         print()

In [None]:
possible_read_type_groupings

In [None]:
sorted(df_usage_inst_2['read_type'].unique().tolist())

In [None]:
# mt110_voltage_event by self or with mt72_kv2c
# mt72_kv2c by self or with mt110_voltage_event

# mt113_st28_am_temp_powerquality by self or with mt113_st28_pm_temp_powerquality or mt115_load_control_status
# mt113_st28_pm_temp_powerquality by self or with mt113_st28_am_temp_powerquality or mt115_load_control_status
# mt115_load_control_status by self or with mt113_st28_am_temp_powerquality or mt113_st28_pm_temp_powerquality

In [None]:
# my_df_usg_inst['read_type'].unique()