In [1]:
from importlib import reload
#reload(Utilities)
#reload(clm)
# NOTE: To reload a class imported as, e.g., 
# from module import class
# One must call:
#   1. import module
#   2. reload module
#   3. from module import class

import sys, os
import re
from pathlib import Path
import json
import pickle

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype
from scipy import stats
import datetime
import time
from natsort import natsorted, ns, natsort_keygen
from packaging import version
import copy

import itertools

import pyodbc
#---------------------------------------------------------------------
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
from matplotlib import dates
import matplotlib.colors as mcolors
import matplotlib.cm as cm #e.g. for cmap=cm.jet
#---------------------------------------------------------------------
sys.path.insert(0, os.path.realpath('..'))
import Utilities_config
#-----
import CommonLearningMethods as clm
#-----
from MeterPremise import MeterPremise
#-----
from AMI_SQL import AMI_SQL
from AMINonVee_SQL import AMINonVee_SQL
from AMIEndEvents_SQL import AMIEndEvents_SQL
from AMIUsgInst_SQL import AMIUsgInst_SQL
from DOVSOutages_SQL import DOVSOutages_SQL
#-----
from GenAn import GenAn
from AMINonVee import AMINonVee
from AMIEndEvents import AMIEndEvents
from MECPODf import MECPODf
from MECPOAn import MECPOAn
from AMIUsgInst import AMIUsgInst
from DOVSOutages import DOVSOutages
#---------------------------------------------------------------------
sys.path.insert(0, Utilities_config.get_sql_aids_dir())
import Utilities_sql
import TableInfos
from TableInfos import TableInfo
from SQLElement import SQLElement
from SQLElementsCollection import SQLElementsCollection
from SQLSelect import SQLSelectElement, SQLSelect
from SQLFrom import SQLFrom
from SQLWhere import SQLWhereElement, SQLWhere
from SQLJoin import SQLJoin, SQLJoinCollection
from SQLGroupBy import SQLGroupByElement, SQLGroupBy
from SQLHaving import SQLHaving
from SQLOrderBy import SQLOrderByElement, SQLOrderBy
from SQLQuery import SQLQuery
from SQLQueryGeneric import SQLQueryGeneric
#---------------------------------------------------------------------
#sys.path.insert(0, os.path.join(os.path.realpath('..'), 'Utilities'))
sys.path.insert(0, Utilities_config.get_utilities_dir())
import Utilities
import Utilities_df
from Utilities_df import DFConstructType
import Utilities_dt
import Plot_General
import Plot_Box_sns
import Plot_Hist
import Plot_Bar
import GrubbsTest
import DataFrameSubsetSlicer
from DataFrameSubsetSlicer import DataFrameSubsetSlicer as DFSlicer

In [2]:
def build_sample_dfs(
    distinct_ids,
    save_dir, 
    years = [2020, 2021, 2022, 2023], 
    limit_per_month=10000, 
    conn_db=Utilities.get_athena_prod_aws_connection()
):
    r"""
    """
    #-------------------------
    general_sql = r"""
    SELECT
        EDE.issuertracking_id,
        EDE.serialnumber,
        EDE.enddeviceeventtypeid,
        EDE.valuesinterval,
        EDE.aep_premise_nb,
        EDE.reason,
        EDE.event_type,
        EDE.aep_opco,
        EDE.aep_event_dt
    FROM meter_events.end_device_event EDE
    WHERE EDE.aep_event_dt BETWEEN '{dt_0}' AND '{dt_1}'
    AND   EDE.enddeviceeventtypeid = '{ede_typeid}'
    LIMIT {limit}
    """
    #-------------------------
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    #-------------------------
    for i,distinct_id in enumerate(distinct_ids):
        print(f"distinct_id = {distinct_id} ({i+1}/{len(distinct_ids)})")
        dfs_for_id = []
        save_name_for_id = 'df_'+distinct_id.replace('.', '_')+'.pkl'
        #-----
        for year in years:
            # No harm in assuming all months have 31 days
            for month in range(1,13):
                dt_0 = "{year}-{month:02d}-01".format(year=year, month=month)
                dt_1 = "{year}-{month:02d}-31".format(year=year, month=month)
                sql_i = general_sql.format(
                    dt_0=dt_0, 
                    dt_1=dt_1, 
                    ede_typeid=distinct_id, 
                    limit=limit_per_month
                )
                df_i = pd.read_sql_query(sql_i, conn_db)
                dfs_for_id.append(df_i)
        assert(len(dfs_for_id) == 12*len(years))
        df_for_id = pd.concat(dfs_for_id)
        df_for_id.to_pickle((os.path.join(save_dir, save_name_for_id)))

In [3]:
save_dir_base = r'C:\Users\s346557\Documents\LocalData\dovs_and_end_events_data\regex'
build_samples = False

In [4]:
# distinct_ids = AMIEndEvents.get_end_event_distinct_fields(
#     '2020-01-01', 
#     '2023-03-21', 
#     fields=['enddeviceeventtypeid']
# )
# distinct_ids.to_pickle(os.path.join(save_dir_base, 'distinct_ids.pkl'))
#-----
distinct_ids = pd.read_pickle(os.path.join(save_dir_base, 'distinct_ids.pkl'))
#-----
distinct_ids = natsorted(distinct_ids['enddeviceeventtypeid'].tolist())

In [5]:
distinct_ids

[None,
 '3.2.0.28',
 '3.2.0.85',
 '3.2.22.28',
 '3.2.22.150',
 '3.7.19.242',
 '3.7.19.243',
 '3.7.22.4',
 '3.7.22.19',
 '3.8.0.215',
 '3.9.83.159',
 '3.11.63.161',
 '3.12.0.257',
 '3.12.17.257',
 '3.12.48.28',
 '3.12.48.219',
 '3.12.93.28',
 '3.12.93.219',
 '3.12.136.38',
 '3.12.136.85',
 '3.18.1.199',
 '3.18.1.220',
 '3.18.72.28',
 '3.18.72.79',
 '3.18.72.85',
 '3.18.85.28',
 '3.18.85.79',
 '3.18.85.85',
 '3.18.92.28',
 '3.18.92.79',
 '3.18.92.85',
 '3.21.1.79',
 '3.21.1.173',
 '3.21.3.28',
 '3.21.3.79',
 '3.21.17.28',
 '3.21.18.79',
 '3.21.38.223',
 '3.21.38.248',
 '3.21.43.223',
 '3.21.43.248',
 '3.21.67.28',
 '3.21.67.79',
 '3.21.82.28',
 '3.21.82.79',
 '3.22.12.243',
 '3.22.19.242',
 '3.23.17.139',
 '3.23.136.47',
 '3.23.136.85',
 '3.25.17.3',
 '3.26.0.47',
 '3.26.0.216',
 '3.26.17.185',
 '3.26.17.216',
 '3.26.38.37',
 '3.26.38.47',
 '3.26.38.73',
 '3.26.38.93',
 '3.26.38.150',
 '3.26.136.47',
 '3.26.136.66',
 '3.26.136.216',
 '3.31.1.143',
 '3.33.1.219',
 '3.33.1.257',
 '3.35.0.2

In [6]:
if build_samples:
    build_sample_dfs(
        distinct_ids=distinct_ids,
        save_dir=os.path.join(save_dir_base, 'sample_dfs'), 
        years = [2020, 2021, 2022, 2023], 
        limit_per_month=10000, 
        conn_db=Utilities.get_athena_prod_aws_connection()
    )

In [7]:
sample_df_paths = Utilities.find_all_paths(
    base_dir=os.path.join(save_dir_base, 'sample_dfs'), 
    glob_pattern=r'df_*.pkl'
)
#-----
ids_with_paths_dict = {}
for path_i in sample_df_paths:
    name_i = Path(path_i).name
    name_i = re.sub(r'df_(.*).pkl', r'\1', name_i)
    name_i = name_i.replace('_', '.')
    assert(name_i not in ids_with_paths_dict.keys())
    ids_with_paths_dict[name_i] = path_i

In [8]:
# natsorted(ids_with_paths_dict.keys())

# ---------------------------------------------------------------------------------------------------

In [9]:
dict_documented = {}

# ---------------------------------------------------------------------------------------------------

# '3.2.0.28'

In [None]:
df_3_2_0_28 = pd.read_pickle(ids_with_paths_dict['3.2.0.28'])
#-----
print(f"shape[0] = {df_3_2_0_28.shape[0]}")
print(f"# Unique reasons = {df_3_2_0_28['reason'].nunique()}")
print(df_3_2_0_28['reason'].unique())

In [None]:
dict_3_2_0_28 = {
        # Examples
        # 'Power Loss cleared for meter 00:13:50:05:ff:0d:dd:17'
        '3.2.0.28':    [
            #-----
            (r'(Power Loss cleared)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*', r'\1')
#             #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_2_0_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_2_0_28['3.2.0.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_2_0_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_2_0_28}

# '3.2.0.85'

In [None]:
df_3_2_0_85 = pd.read_pickle(ids_with_paths_dict['3.2.0.85'])
#-----
print(f"shape[0] = {df_3_2_0_85.shape[0]}")
print(f"# Unique reasons = {df_3_2_0_85['reason'].nunique()}")
print(df_3_2_0_85['reason'].unique())

In [None]:
dict_3_2_0_85 = {
        # Examples
        # 'Power loss detected on meter 00:13:50:05:ff:0d:dd:17'
        '3.2.0.85':    [
            #-----
            (r'(Power loss detected)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*', r'\1')
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_2_0_85['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_2_0_85['3.2.0.85']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_2_0_85.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_2_0_85}

# '3.2.22.28'

In [None]:
df_3_2_22_28 = pd.read_pickle(ids_with_paths_dict['3.2.22.28'])
#-----
print(f"shape[0] = {df_3_2_22_28.shape[0]}")
print(f"# Unique reasons = {df_3_2_22_28['reason'].nunique()}")
print(df_3_2_22_28['reason'].unique())

In [None]:
dict_3_2_22_28 = {
        # Examples
        # 'Low Battery cleared for meter 00:13:50:02:00:0a:9e:1e.'
        '3.2.22.28':    [
            #-----
            (r'(Low Battery cleared)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*', r'\1')
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_2_22_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_2_22_28['3.2.22.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_2_22_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_2_22_28}

# '3.2.22.150'

In [47]:
df_3_2_22_150 = pd.read_pickle(ids_with_paths_dict['3.2.22.150'])
#-----
print(f"shape[0] = {df_3_2_22_150.shape[0]}")
print(f"# Unique reasons = {df_3_2_22_150['reason'].nunique()}")
print(df_3_2_22_150['reason'].unique())

shape[0] = 92
# Unique reasons = 48
['Low Battery (C1219 Table 3) occurred for meter 00:13:50:02:00:0a:9e:1e.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:05:ff:02:72:01.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:02:00:0a:a2:c6.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:05:ff:06:67:48.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:05:ff:1b:6d:70.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:02:00:0a:a8:e3.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:02:00:0a:a8:a6.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:01:01:5a:59:20.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:01:01:60:ed:1e.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:05:ff:1f:f9:bf.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:05:ff:1e:61:86.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:02:00:0a:bd:72.'
 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:02:00:

In [51]:
dict_3_2_22_150 = {
        # Examples
        # 'Low Battery (C1219 Table 3) occurred for meter 00:13:50:02:00:0a:9e:1e.'
        '3.2.22.150':    [
            #-----
#             (r'(Low Battery\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
            (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [52]:
curated_reasons = []
for reason_i in df_3_2_22_150['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_2_22_150['3.2.22.150']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

Low Battery (C1219 Table 3)


In [53]:
natsorted(curated_reasons)

['Low Battery (C1219 Table 3)']

In [None]:
assert(len(set(dict_3_2_22_150.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_2_22_150}

# '3.7.19.242'

In [None]:
df_3_7_19_242 = pd.read_pickle(ids_with_paths_dict['3.7.19.242'])
#-----
print(f"shape[0] = {df_3_7_19_242.shape[0]}")
print(f"# Unique reasons = {df_3_7_19_242['reason'].nunique()}")
print(df_3_7_19_242['reason'].unique())

In [None]:
dict_3_7_19_242 = {
        # Examples
        # 'Test Mode Started occurred for meter 00:13:50:05:ff:1d:2d:4a.'
        # 'Meter event Test Mode Started  Time event occurred on meter = 01/20/2020 12:56:50  Sequence number = 56  User id = 1  Event argument = 00-00'
        # SEEMS RARE, BUT HAVE SEEN:
        # 'Meter is currently in test mode.'
        '3.7.19.242':    [
            #-----
            (r'(Test Mode Started)(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1'), 
            #-----
            (r'(Meter event Test Mode Started)\s*Time event occurred on meter = .* Sequence number = .* User id = .*  Event argument = .*', r'\1'), 
            #-----
            (r'(Meter event Test Mode Started).*', 'Test Mode Started'), 
            #-----
            (r'(Meter is currently in test mode)[\s\.]*', r'\1')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_7_19_242['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_7_19_242['3.7.19.242']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_7_19_242.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_7_19_242}

# '3.7.19.243'

In [None]:
df_3_7_19_243 = pd.read_pickle(ids_with_paths_dict['3.7.19.243'])
#-----
print(f"shape[0] = {df_3_7_19_243.shape[0]}")
print(f"# Unique reasons = {df_3_7_19_243['reason'].nunique()}")
print(df_3_7_19_243['reason'].unique())

In [None]:
dict_3_7_19_243 = {
        # Examples
        # 'Test Mode Stopped occurred for meter 00:13:50:03:ff:06:20:96.'
        # 'Meter event Test Mode Stopped  Time event occurred on meter = 01/02/2020 08:25:04  Sequence number = 573  User id = 1  Event argument = 00-00'
        '3.7.19.243':    [
            #-----
            (r'(Test Mode Stopped)(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1'), 
            #-----
            (r'(Meter event Test Mode Stopped)\s*Time event occurred on meter = .* Sequence number = .* User id = .*  Event argument = .*', r'\1'), 
            #-----
            (r'(Meter event Test Mode Stopped).*', 'Test Mode Stopped')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_7_19_243['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_7_19_243['3.7.19.243']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_7_19_243.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_7_19_243}

# '3.7.22.4'

In [None]:
df_3_7_22_4 = pd.read_pickle(ids_with_paths_dict['3.7.22.4'])
#-----
print(f"shape[0] = {df_3_7_22_4.shape[0]}")
print(f"# Unique reasons = {df_3_7_22_4['reason'].nunique()}")
print(df_3_7_22_4['reason'].unique())

In [None]:
dict_3_7_22_4 = {
        # Examples
        # 'NIC operating on backup battery: 00:13:50:ff:fe:70:96:ac, Reboot Count: 6, NIC timestamp: 2021-01-14T10:17:54.000-05:00, Received timestamp: 2021-01-14T10:17:55.728-05:00'
        '3.7.22.4':    [
            #-----
            (r'(NIC operating on backup battery)\:(\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?), Reboot Count: .*, NIC timestamp: .*, Received timestamp: .*', r'\1'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_7_22_4['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_7_22_4['3.7.22.4']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_7_22_4.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_7_22_4}

# '3.7.22.19'

In [None]:
df_3_7_22_19 = pd.read_pickle(ids_with_paths_dict['3.7.22.19'])
#-----
print(f"shape[0] = {df_3_7_22_19.shape[0]}")
print(f"# Unique reasons = {df_3_7_22_19['reason'].nunique()}")
print(df_3_7_22_19['reason'].unique())

In [None]:
dict_3_7_22_19 = {
        # Examples:
        # All seem to be: 'NIC backup battery inactive'
        # So, don't need to run any regex, but should still so no flags raised
        '3.7.22.19':    [
            #-----
            (r'(NIC backup battery inactive).*', r'\1')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_7_22_19['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_7_22_19['3.7.22.19']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_7_22_19.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_7_22_19}

# '3.8.0.215'

In [None]:
df_3_8_0_215 = pd.read_pickle(ids_with_paths_dict['3.8.0.215'])
#-----
print(f"shape[0] = {df_3_8_0_215.shape[0]}")
print(f"# Unique reasons = {df_3_8_0_215['reason'].nunique()}")
print(df_3_8_0_215['reason'].unique())

In [None]:
dict_3_8_0_215 = {
        # Examples
        # 'Demand Reset occurred for meter 00:13:50:05:ff:18:b3:c1.'
        '3.8.0.215':    [
            #-----
            (r'(Demand Reset)(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1') 
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_8_0_215['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_8_0_215['3.8.0.215']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_8_0_215.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_8_0_215}

# '3.9.83.159'

In [None]:
df_3_9_83_159 = pd.read_pickle(ids_with_paths_dict['3.9.83.159'])
#-----
print(f"shape[0] = {df_3_9_83_159.shape[0]}")
print(f"# Unique reasons = {df_3_9_83_159['reason'].nunique()}")
print(df_3_9_83_159['reason'].unique())

In [None]:
dict_3_9_83_159 = {
        # Examples
        # 'Meter Program Seal mismatch for Device [Device ID, MAC Id] = [1ND785727916NMD06, 00:13:50:05:ff:1d:16:96] - program seal = 0bda0569c8e6ad375375eaa177034d3f61a80478 vs. UIQ program seal = e2dbe1ccbeef1f2df38adffeeafdb86fbd8fb006 at 2021-01-08T06:16:49.116-05:00.'
        '3.9.83.159':    [
            #-----
            (r'(Meter Program Seal mismatch for Device)\s*\[Device ID, MAC Id\]\s*=\s*.*', r'\1')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_9_83_159['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_9_83_159['3.9.83.159']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_9_83_159.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_9_83_159}

# '3.11.63.161'

In [10]:
df_3_11_63_161 = pd.read_pickle(ids_with_paths_dict['3.11.63.161'])
#-----
print(f"shape[0] = {df_3_11_63_161.shape[0]}")
print(f"# Unique reasons = {df_3_11_63_161['reason'].nunique()}")
print(df_3_11_63_161['reason'].unique())

shape[0] = 72980
# Unique reasons = 62937
['Requested operation JOB_OP_TYPE_ARB_METER_COMMAND could not be applied to the given device type and firmware version. Device: 00:13:50:05:ff:2d:e0:5f, DeviceType: DID_SUBTYPE_I210CRD_HAN, Firmware Version: 3.12.5c'
 'Requested operation JOB_OP_TYPE_ARB_METER_COMMAND could not be applied to the given device type and firmware version. Device: 00:13:50:05:ff:2d:e3:b7, DeviceType: DID_SUBTYPE_I210CRD_HAN, Firmware Version: 3.12.5c'
 'Requested operation JOB_OP_TYPE_ARB_METER_COMMAND could not be applied to the given device type and firmware version. Device: 00:13:50:05:ff:2e:28:ab, DeviceType: DID_SUBTYPE_I210CRD_HAN, Firmware Version: 3.12.5c'
 ...
 'Requested operation JOB_OP_NEW_DATA_READ could not be applied to the given device type and firmware version. Device: 00:13:50:05:ff:3e:15:38, DeviceType: DID_SUBTYPE_I210CRD_HAN, Firmware Version: 3.12.5c'
 'Requested operation JOB_OP_PROVISION_GET_STATUS could not be applied to the given device typ

In [14]:
dict_3_11_63_161 = {
        # Examples:
        # 'Requested operation JOB_OP_TYPE_ARB_METER_COMMAND could not be applied to the given device type and firmware version. Device: 00:13:50:05:ff:2d:ff:34, DeviceType: DID_SUBTYPE_I210CRD_HAN, Firmware Version: 3.12.5c'
        '3.11.63.161':  [
            #-----
            # Below, Device sometimes blank, sometime MAC-esque
            (r'(Requested operation)\s*(.*?)\s*(could not be applied) to the given device type and firmware version.\s*Device\:?(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)?,\s*DeviceType\:?.*,\s*Firmware Version\:?.*', r'\1 \3: \2')
            #-----
        ]
}


In [15]:
curated_reasons = []
for reason_i in df_3_11_63_161['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_11_63_161['3.11.63.161']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

Requested operation could not be applied: JOB_OP_PROVISION_DISCONNECT
Requested operation could not be applied: JOB_OP_NIC_NEW_DATA_READ
Requested operation could not be applied: JOB_OP_PROVISION_GET_STATUS
Requested operation could not be applied: JOB_OP_TYPE_DEV_PROG_READ
Requested operation could not be applied: JOB_OP_NEW_DATA_READ
Requested operation could not be applied: JOB_OP_REGISTER_CURR_READ
Requested operation could not be applied: JOB_OP_REGISTER_SELF_READ
Requested operation could not be applied: JOB_OP_PROVISION_CONNECT
Requested operation could not be applied: JOB_OP_LP_READ
Requested operation could not be applied: JOB_OP_TYPE_ARB_METER_COMMAND
Requested operation could not be applied: JOB_OP_TYPE_DEMAND_RESET


In [None]:
assert(len(set(dict_3_11_63_161.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_11_63_161}

# '3.12.0.257'

In [None]:
df_3_12_0_257 = pd.read_pickle(ids_with_paths_dict['3.12.0.257'])
#-----
print(f"shape[0] = {df_3_12_0_257.shape[0]}")
print(f"# Unique reasons = {df_3_12_0_257['reason'].nunique()}")
print(df_3_12_0_257['reason'].unique())

In [None]:
dict_3_12_0_257 = {
        # Examples
        # 'Meter event Tamper Attempt Suspected  Time event occurred on meter = 01/10/2020 10:03:37  Sequence number = 1189  User id = 0  Event argument = 00-00'
        # 'Meter detected a Tamper Attempt.'
        # 'Tamper attempt detected.'
        '3.12.0.257':   [
            #-----
            (r'(Meter event Tamper Attempt Suspected)\s*Time event occurred on meter = .* Sequence number = .* User id = .*  Event argument = .*', r'\1'), 
            #-----
            (r'(Meter event Tamper Attempt Suspected).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
            #-----
            (r'(Meter detected a Tamper Attempt|Tamper attempt detected).*', 'Meter detected a Tamper Attempt'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_12_0_257['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_12_0_257['3.12.0.257']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_12_0_257.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_12_0_257}

# '3.12.17.257'

In [None]:
df_3_12_17_257 = pd.read_pickle(ids_with_paths_dict['3.12.17.257'])
#-----
print(f"shape[0] = {df_3_12_17_257.shape[0]}")
print(f"# Unique reasons = {df_3_12_17_257['reason'].nunique()}")
print(df_3_12_17_257['reason'].unique())

In [None]:
dict_3_12_17_257 = {
        # Examples:
        # 'Tamper (Meter Inversion) detected on meter 00:13:50:05:ff:27:3f:a0.'
        '3.12.17.257':  [
            #-----
            (r'(Tamper\s*(?:\(.*\))?\s*detected)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)\.?', '') 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_12_17_257['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_12_17_257['3.12.17.257']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_12_17_257.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_12_17_257}

# '3.12.48.28'

In [None]:
df_3_12_48_28 = pd.read_pickle(ids_with_paths_dict['3.12.48.28'])
#-----
print(f"shape[0] = {df_3_12_48_28.shape[0]}")
print(f"# Unique reasons = {df_3_12_48_28['reason'].nunique()}")
print(df_3_12_48_28['reason'].unique())

In [None]:
dict_3_12_48_28 = {
        # Examples:
        # 'Diag1 Condition cleared for meter 00:13:50:05:ff:04:55:a1.'
        # SEEMS RARE, BUT HAVE SEEN:
        # Reverse energy cleared for meter 00:13:50:05:ff:0d:dd:17.
        '3.12.48.28':    [
            #-----
            (r'((?:Diag1 Condition|Reverse energy) cleared)(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_12_48_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_12_48_28['3.12.48.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_12_48_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_12_48_28}

# '3.12.48.219'

In [17]:
df_3_12_48_219 = pd.read_pickle(ids_with_paths_dict['3.12.48.219'])
#-----
print(f"shape[0] = {df_3_12_48_219.shape[0]}")
print(f"# Unique reasons = {df_3_12_48_219['reason'].nunique()}")
print(df_3_12_48_219['reason'].unique())

shape[0] = 2459882
# Unique reasons = 27622
['KV2c meter event Polarity, Cross Phase, Reverse Energy Flow Diagnostic flags:Phase C Voltage'
 'KV2c meter event Polarity, Cross Phase, Reverse Energy Flow Diagnostic flags:Phase A Current'
 'KV2c meter event Polarity, Cross Phase, Reverse Energy Flow Diagnostic flags:Phase B Voltage, Phase C Voltage'
 ...
 'Diag1: Polarity, Cross Phase, Reverse Energy Flow occurred for meter 00:13:50:05:ff:2c:f6:ef. Angle out of tolerance [Voltage - Phase  C].'
 'Diag1: Polarity, Cross Phase, Reverse Energy Flow occurred for meter 00:13:50:05:ff:24:a6:88. Angle out of tolerance [Voltage - Phase  C].'
 'Diag1: Polarity, Cross Phase, Reverse Energy Flow occurred for meter 00:13:50:05:ff:34:8d:e9. Angle out of tolerance [Voltage - Phase  B][Current - Phase  B].']


In [30]:
dict_3_12_48_219 = {
        #-------------------------
        # Examples:
        # 'Diag1: Polarity, Cross Phase, Reverse Energy Flow occurred for meter 00:13:50:03:00:34:b7:e9. Angle out of tolerance [Voltage - Phase  B].'
        # 'KV2c meter event Polarity, Cross Phase, Reverse Energy Flow Diagnostic flags:Phase A Current'
        # 'Service disconnect operation occurred with status: Close operation started'
        # 'Reverse energy (C1219 Table 3) occurred for meter 00:13:50:05:ff:0d:dd:17.'
        '3.12.48.219':  [
            #-----
            (r'(Diag1: Polarity, Cross Phase, Reverse Energy Flow)(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)\s*(Angle out of tolerance) \[.*\]', r'\1: \2'), 
            #-----
            (r'(KV2c meter event Polarity, Cross Phase, Reverse Energy Flow) Diagnostic flags:.*', r'\1'), 
            #-----
            (r'(Service disconnect operation occurred with status:) (Open|Close) operation (started|succeeded)\s*', r'\1 \2 \3'), 
            #-----
            (r'((?:Diag1 Condition|Reverse energy)\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1'), 
            #-----
        ]
}

In [31]:
curated_reasons = []
for reason_i in df_3_12_48_219['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_12_48_219['3.12.48.219']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

Diag1: Polarity, Cross Phase, Reverse Energy Flow: Angle out of tolerance.
Service disconnect operation occurred with status: Close started
Service disconnect operation occurred with status: Open started
Service disconnect operation occurred with status: Close succeeded
Service disconnect operation occurred with status: Open succeeded
Reverse energy (C1219 Table 3)
KV2c meter event Polarity, Cross Phase, Reverse Energy Flow


In [29]:
natsorted(curated_reasons)

['Diag1: Polarity, Cross Phase, Reverse Energy Flow: Angle out of tolerance.',
 'KV2c meter event Polarity, Cross Phase, Reverse Energy Flow',
 'Reverse energy (C1219 Table 3) ',
 'Service disconnect operation occurred with status: Close started',
 'Service disconnect operation occurred with status: Close started ',
 'Service disconnect operation occurred with status: Close succeeded ',
 'Service disconnect operation occurred with status: Open started ',
 'Service disconnect operation occurred with status: Open succeeded']

In [None]:
assert(len(set(dict_3_12_48_219.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_12_48_219}

# '3.12.93.28'

In [None]:
df_3_12_93_28 = pd.read_pickle(ids_with_paths_dict['3.12.93.28'])
#-----
print(f"shape[0] = {df_3_12_93_28.shape[0]}")
print(f"# Unique reasons = {df_3_12_93_28['reason'].nunique()}")
print(df_3_12_93_28['reason'].unique())

In [None]:
dict_3_12_93_28 = {
        # Examples
        # 'Cleared: Meter00:13:50:05:ff:07:88:4b, cleared tamper detection (C1219 Table 3)'
        '3.12.93.28': [
            #-----
            (r'(Cleared: Meter)(?:\s*(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?), (cleared tamper detection.*)', r'\1 \2'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_12_93_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_12_93_28['3.12.93.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_12_93_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_12_93_28}

# '3.12.93.219'

In [54]:
df_3_12_93_219 = pd.read_pickle(ids_with_paths_dict['3.12.93.219'])
#-----
print(f"shape[0] = {df_3_12_93_219.shape[0]}")
print(f"# Unique reasons = {df_3_12_93_219['reason'].nunique()}")
print(df_3_12_93_219['reason'].unique())

shape[0] = 3900000
# Unique reasons = 426463
['Meter event Reverse Rotation Detected  Time event occurred on meter = 01/31/2020 15:58:24  Sequence number = 16908  User id = 0  Event argument = 00-00'
 'Meter detected a Reverse Rotation.'
 'Meter event Reverse Rotation Detected  Time event occurred on meter = 01/31/2020 12:55:31  Sequence number = 36052  User id = 0  Event argument = 00-00'
 ...
 'Meter event Reverse Rotation Detected  Time event occurred on meter = 03/09/2023 09:23:03  Sequence number = 1965  User id = 0  Event argument = 00-00 '
 'Meter event Reverse Rotation Detected  Time event occurred on meter = 03/09/2023 09:24:39  Sequence number = 3636  User id = 0  Event argument = 00-00 '
 'Meter event Reverse Rotation Detected  Time event occurred on meter = 03/09/2023 10:06:33  Sequence number = 596  User id = 0  Event argument = 00-00 ']


In [58]:
dict_3_12_93_219 = {
        # Examples:
        # 'Meter event Reverse Rotation Detected  Time event occurred on meter = 01/28/2020 15:31:34  Sequence number = 854  User id = 0  Event argument = 00-00'
        # 'Meter detected a Reverse Rotation.'
        # 'Meter 00:13:50:05:ff:0b:ae:84, detected tampering (C1219 Table 3)'
        # SEEMS RARE, BUT HAVE SEEN:
        # Meter event Demand Reset Occurred  Time event occurred on meter = 10/25/2020 10:07:33  Sequence number = 1256  User id = 1  Event argument = 00-00
        '3.12.93.219':    [
            #-----
            (r'(Meter)(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\.,]?)\s*(detected tampering\s*(?:\(.*\))?)\s*', r'\1 \2'), 
            #-----
            (r'(Meter event (?:Reverse Rotation Detected|Demand Reset Occurred))\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'),
            #-----
            (r'(Meter event Reverse Rotation Detected).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
            #-----
            (r'(Meter detected a Reverse Rotation).*', 'Meter event Reverse Rotation Detected'), 
            #-----
        ]
}

In [59]:
curated_reasons = []
for reason_i in df_3_12_93_219['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_12_93_219['3.12.93.219']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

Meter detected tampering (C1219 Table 3)
Meter event Reverse Rotation Detected
Meter event Demand Reset Occurred


In [57]:
natsorted(curated_reasons)

['Meter detected tampering (C1219 Table 3)',
 'Meter event Demand Reset Occurred',
 'Meter event Reverse Rotation Detected']

In [60]:
natsorted(curated_reasons)

['Meter detected tampering (C1219 Table 3)',
 'Meter event Demand Reset Occurred',
 'Meter event Reverse Rotation Detected']

In [None]:
assert(len(set(dict_3_12_93_219.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_12_93_219}

# '3.12.136.38'

In [None]:
df_3_12_136_38 = pd.read_pickle(ids_with_paths_dict['3.12.136.38'])
#-----
print(f"shape[0] = {df_3_12_136_38.shape[0]}")
print(f"# Unique reasons = {df_3_12_136_38['reason'].nunique()}")
print(df_3_12_136_38['reason'].unique())

In [None]:
dict_3_12_136_38 = {
        # Examples:
        # 'A NET_MGMT command was sent from fdc9:ccbe:52c0:cbc0:250:56ff:feb5:91c7 with a key that has insufficient privileges to execute it. ID: 53 READ SUBID: 65535 ASSOC_ID: 8448'
        # 'A NET_MGMT command was sent from fdc9:ccbe:52c0:cbc0:a6ba:99ff:fe12:1b0e with a key that has insufficient privileges to execute it. ID: 207 WRITE SUBID: 65535 ASSOC_ID: 768'
        '3.12.136.38':  [
            #-----
            # BELOW, ID seemed meter-specific (or, at least, not general), as READ/WRITE SUBID and ASSOC_ID seem to only take on a few different values
            # Therefore, group 3, for ID, is not included in output
            (r'A?\s+?(.*command was sent) from(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)\s*(with a key that has insufficient privileges) to execute it.*(ID:\s*[0-9]+)\s*((?:READ|WRITE)\s*SUBID:\s*[0-9]+)\s*(ASSOC_ID:\s*[0-9]+).*', r'\1 \2: \4 \5'), 
            #-----
        ]
}


In [None]:
curated_reasons = []
for reason_i in df_3_12_136_38['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_12_136_38['3.12.136.38']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_12_136_38.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_12_136_38}

# '3.12.136.85'

In [None]:
df_3_12_136_85 = pd.read_pickle(ids_with_paths_dict['3.12.136.85'])
#-----
print(f"shape[0] = {df_3_12_136_85.shape[0]}")
print(f"# Unique reasons = {df_3_12_136_85['reason'].nunique()}")
print(df_3_12_136_85['reason'].unique())

In [None]:
dict_3_12_136_85 = {
        # Examples:
        # 'NET_MGMT command failed consecutively for 1 times for fd37:ec90:20c2:5c58:250:56ff:feb5:1010. WRITE'
        # 'Secure association operation failed consecutively for 1 times for fdc9:ccbe:52c0:d3a0:250:56ff:feb5:ec3c. 16270'
        # 'failed consecutively for 1 times for 0:0:0:0:0:0:0:0.'
        # 'N/A failed consecutively for 1 times for 0:0:0:0:0:0:0:0. N/A'
        '3.12.136.85':  [
            #-----
            (r'(.*) failed consecutively for [0-9]+ times?(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*', r'\1 failed consecutively for 1 or more times'), 
            #-----
            (r'^failed consecutively for [0-9]+ times? for 0:0:0:0:0:0:0:0.$', 'N/A failed consecutively for 1 or more times'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_12_136_85['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_12_136_85['3.12.136.85']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_12_136_85.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_12_136_85}

# '3.18.1.199'

In [None]:
df_3_18_1_199 = pd.read_pickle(ids_with_paths_dict['3.18.1.199'])
#-----
print(f"shape[0] = {df_3_18_1_199.shape[0]}")
print(f"# Unique reasons = {df_3_18_1_199['reason'].nunique()}")
print(df_3_18_1_199['reason'].unique())

In [None]:
dict_3_18_1_199 = {
        # Examples:
        # 'Meter detected a RAM failure.'
        # So, don't need to run any regex, but should still so no flags raised
        '3.18.1.199':    [
            #-----
            (r'(Meter detected a RAM failure).*', r'\1')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_18_1_199['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_1_199['3.18.1.199']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_18_1_199.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_1_199}

# '3.18.1.220'

In [None]:
df_3_18_1_220 = pd.read_pickle(ids_with_paths_dict['3.18.1.220'])
#-----
print(f"shape[0] = {df_3_18_1_220.shape[0]}")
print(f"# Unique reasons = {df_3_18_1_220['reason'].nunique()}")
print(df_3_18_1_220['reason'].unique())

In [None]:
dict_3_18_1_220 = {
        # Examples:
        # All seem to be: 'Meter detected a ROM failure.'
        # So, don't need to run any regex, but should still so no flags raised
        '3.18.1.220':    [
            #-----
            (r'(Meter detected a ROM failure).*', r'\1')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_18_1_220['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_1_220['3.18.1.220']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_18_1_220.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_1_220}

# '3.18.72.28'

In [None]:
df_3_18_72_28 = pd.read_pickle(ids_with_paths_dict['3.18.72.28'])
#-----
print(f"shape[0] = {df_3_18_72_28.shape[0]}")
print(f"# Unique reasons = {df_3_18_72_28['reason'].nunique()}")
print(df_3_18_72_28['reason'].unique())

In [None]:
dict_3_18_72_28 = {
        # Examples:
        # 'NVRAM Error cleared for meter 00:13:50:05:ff:0d:7e:64.'
        '3.18.72.28':   [
            #-----
            (r'(NVRAM Error cleared)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_18_72_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_72_28['3.18.72.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_18_72_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_72_28}

# '3.18.72.79'

In [61]:
df_3_18_72_79 = pd.read_pickle(ids_with_paths_dict['3.18.72.79'])
#-----
print(f"shape[0] = {df_3_18_72_79.shape[0]}")
print(f"# Unique reasons = {df_3_18_72_79['reason'].nunique()}")
print(df_3_18_72_79['reason'].unique())

shape[0] = 6143
# Unique reasons = 3524
['NVRAM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:11:10:cc.'
 'NVRAM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:10:fd:3b.'
 'NVRAM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1d:77:d9.'
 ...
 'NVRAM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:04:8d:47.'
 'NVRAM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:04:8d:64.'
 'NVRAM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:00:46:a7.']


In [66]:
dict_3_18_72_79 = {
        # Examples:
        # 'NVRAM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:20:91:6c.'
        '3.18.72.79':   [
            #-----
            (r'(NVRAM Error\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1'), 
            #-----
        ]
}

In [67]:
curated_reasons = []
for reason_i in df_3_18_72_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_72_79['3.18.72.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

NVRAM Error (C1219 Table 3)
NVRAM Error (C1219 Table 3: Er000200)


In [64]:
natsorted(curated_reasons)

['NVRAM Error (C1219 Table 3) ', 'NVRAM Error (C1219 Table 3: Er000200) ']

In [68]:
natsorted(curated_reasons)

['NVRAM Error (C1219 Table 3)', 'NVRAM Error (C1219 Table 3: Er000200)']

In [None]:
assert(len(set(dict_3_18_72_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_72_79}

# '3.18.72.85'

In [24]:
df_3_18_72_85 = pd.read_pickle(ids_with_paths_dict['3.18.72.85'])
#-----
print(f"shape[0] = {df_3_18_72_85.shape[0]}")
print(f"# Unique reasons = {df_3_18_72_85['reason'].nunique()}")
print(df_3_18_72_85['reason'].unique())

shape[0] = 1815480
# Unique reasons = 1638510
['Meter event Nonvolatile Memory Failure Detected  Time event occurred on meter = 01/16/2020 15:00:11  Sequence number = 1339  User id = 0  Event argument = 00-00'
 'Meter detected a nonvolatile memory failure.'
 'Meter event Nonvolatile Memory Failure Detected  Time event occurred on meter = 01/16/2020 15:52:15  Sequence number = 33574  User id = 0  Event argument = 00-00'
 ...
 'Meter event Nonvolatile Memory Failure Detected  Time event occurred on meter = 07/03/2023 18:37:09  Sequence number = 31521  User id = 0  Event argument = 00-00'
 'Meter event Nonvolatile Memory Failure Detected  Time event occurred on meter = 9/19/2023 4:21:16 PM  Sequence number = 16663  User id = 0  Event argument = 00-00'
 'Meter event Nonvolatile Memory Failure Detected  Time event occurred on meter = 12/11/2023 01:17:41  Sequence number = 17951  User id = 44624  Event argument = FF-00']


In [28]:
dict_3_18_72_85 = {
        # Examples:
        # 'Meter event Nonvolatile Memory Failure Detected  Time event occurred on meter = 01/21/2020 09:45:01  Sequence number = 3335  User id = 0  Event argument = 00-00'
        # 'null'????????????
        # SEEMS RARE, BUT HAVE SEEN:
        # 'Meter event Demand Reset Occurred  Time event occurred on meter = 01/29/2023  Sequence number = 711  User id = 1  Event argument = 00-00 '
        # 'Meter event Reset List Pointers  Time event occurred on meter = 09/07/2021  Sequence number = 227  User id = 0  Event argument = 03-00'
        # 'KV2c meter event Received kWh Caution Diagnostic flags:Phase A Voltage'
        # 'Universal event Unsupported Event(41) with priority Alarm. '
        '3.18.72.85':   [
            #-----
#             (r'(Meter event (?:Nonvolatile Memory Failure Detected|Demand Reset Occurred|Reset List Pointers))\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'),
            (r'(Meter event.*?)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            # FAILSAFE in case Sequence number etc. not found above
            # NOTE: Cannot do (Meter event.*).*, as that will reduce everything altered above donw to simply 'Meter event'
            #       Also, if any additional info (e.g, Sequence number) is to be extracted using above, this must be removed
            (r'(Meter event (?:Nonvolatile Memory Failure Detected|Demand Reset Occurred|Reset List Pointers)).*', r'\1'),
            #-----
            (r'(Meter detected a nonvolatile memory failure).*', 'Meter event Nonvolatile Memory Failure Detected'), 
            #-----
            (r'(KV2c meter event .*?)\s*(Diagnostic flags)\s*\:\s*(.*?)\s*$', r'\1 (\2 = \3)'),
            #-----
            (r'(Universal event Unsupported Event\s*(?:\(.*\))?\s*with priority Alarm)[\s*\.]*', r'\1'), 
            #-----
            ('null', 'Reason is null, ID=3.18.72.85')
            #-----
        ]
}

In [29]:
curated_reasons = []
for reason_i in df_3_18_72_85['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_72_85['3.18.72.85']
    )
#     if curated_reason_i=='Meter event ':
#         print(reason_i)
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*sorted(curated_reasons), sep='\n')

KV2c meter event Received kWh Caution (Diagnostic flags = Phase A Voltage)
Meter event Demand Reset Occurred
Meter event Nonvolatile Memory Failure Detected
Meter event Reset List Pointers
Reason is null, ID=3.18.72.85
Universal event Unsupported Event(41) with priority Alarm


In [27]:
natsorted(curated_reasons)

['KV2c meter event Received kWh Caution  (Diagnostic flags = Phase A Voltage)',
 'Meter event Demand Reset Occurred',
 'Meter event Nonvolatile Memory Failure Detected',
 'Meter event Reset List Pointers',
 'Reason is null, ID=3.18.72.85',
 'Universal event Unsupported Event(41) with priority Alarm']

In [30]:
natsorted(curated_reasons)

['KV2c meter event Received kWh Caution (Diagnostic flags = Phase A Voltage)',
 'Meter event Demand Reset Occurred',
 'Meter event Nonvolatile Memory Failure Detected',
 'Meter event Reset List Pointers',
 'Reason is null, ID=3.18.72.85',
 'Universal event Unsupported Event(41) with priority Alarm']

In [None]:
assert(len(set(dict_3_18_72_85.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_72_85}

In [None]:
test_str = 'Meter event Nonvolatile Memory Failure Detected  Time event occurred on meter = 07/03/2023 18:37:09  Sequence number = 31521  User id = 0  Event argument = 00-00'
pattern = r'(Meter event .*?)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*'
repl = r'\1'

re.sub(pattern, repl, test_str)

# '3.18.85.28'

In [None]:
df_3_18_85_28 = pd.read_pickle(ids_with_paths_dict['3.18.85.28'])
#-----
print(f"shape[0] = {df_3_18_85_28.shape[0]}")
print(f"# Unique reasons = {df_3_18_85_28['reason'].nunique()}")
print(df_3_18_85_28['reason'].unique())

In [None]:
dict_3_18_85_28 = {
        # Examples:
        # 'RAM Error cleared for meter 00:13:50:05:ff:0d:dd:17.'
        '3.18.85.28':    [
            #-----
            (r'(RAM Error cleared)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_18_85_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_85_28['3.18.85.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_18_85_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_85_28}

# '3.18.85.79'

In [69]:
df_3_18_85_79 = pd.read_pickle(ids_with_paths_dict['3.18.85.79'])
#-----
print(f"shape[0] = {df_3_18_85_79.shape[0]}")
print(f"# Unique reasons = {df_3_18_85_79['reason'].nunique()}")
print(df_3_18_85_79['reason'].unique())

shape[0] = 1
# Unique reasons = 1
['RAM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:0d:dd:17.']


In [76]:
dict_3_18_85_79 = {
        # Examples
        # 'RAM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:0d:dd:17.'
        '3.18.85.79':   [
            #-----
#             (r'(RAM Error\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
            (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [77]:
curated_reasons = []
for reason_i in df_3_18_85_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_85_79['3.18.85.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

RAM Error (C1219 Table 3)


In [72]:
natsorted(curated_reasons)

['RAM Error (C1219 Table 3) ']

In [78]:
natsorted(curated_reasons)

['RAM Error (C1219 Table 3)']

In [None]:
assert(len(set(dict_3_18_85_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_85_79}

# '3.18.85.85'

In [None]:
df_3_18_85_85 = pd.read_pickle(ids_with_paths_dict['3.18.85.85'])
#-----
print(f"shape[0] = {df_3_18_85_85.shape[0]}")
print(f"# Unique reasons = {df_3_18_85_85['reason'].nunique()}")
print(df_3_18_85_85['reason'].unique())

In [None]:
dict_3_18_85_85 = {
        # Examples
        # 'Meter detected a RAM failure.'
        # 'Meter event Ram Failure Detected  Time event occurred on meter = 7/27/2021 12:45:53 PM  Sequence number = 89  User id = 34891  Event argument = 7D-75'
        '3.18.85.85':   [
            #-----
            (r'(Meter event Ram Failure Detected)\s*Time event occurred on meter\s*=.*\s*Sequence number=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Ram Failure Detected).*', r'\1'),
            #-----
            (r'(Meter detected a RAM failure).*', 'Meter event Ram Failure Detected'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_18_85_85['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_85_85['3.18.85.85']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_18_85_85.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_85_85}

# '3.18.92.28'

In [None]:
df_3_18_92_28 = pd.read_pickle(ids_with_paths_dict['3.18.92.28'])
#-----
print(f"shape[0] = {df_3_18_92_28.shape[0]}")
print(f"# Unique reasons = {df_3_18_92_28['reason'].nunique()}")
print(df_3_18_92_28['reason'].unique())

In [None]:
dict_3_18_92_28 = {
        # Examples
        # 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1f:ba:d0.'
        '3.18.92.28':   [
            #-----
            (r'(ROM Error cleared)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}


In [None]:
curated_reasons = []
for reason_i in df_3_18_92_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_92_28['3.18.92.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_18_92_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_92_28}

# '3.18.92.79'

In [79]:
df_3_18_92_79 = pd.read_pickle(ids_with_paths_dict['3.18.92.79'])
#-----
print(f"shape[0] = {df_3_18_92_79.shape[0]}")
print(f"# Unique reasons = {df_3_18_92_79['reason'].nunique()}")
print(df_3_18_92_79['reason'].unique())

shape[0] = 29
# Unique reasons = 26
['ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1f:ba:d0.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1b:35:ed.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:0f:6e:8e.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1b:3e:4e.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1b:6e:54.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:04:1a:66.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1b:27:df.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:0f:6e:d0.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:13:c1:55.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:0f:0e:34.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1b:3c:9a.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1f:c7:11.'
 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1f:c2:93.'
 'ROM Error (C1

In [86]:
dict_3_18_92_79 = {
        # Examples
        # 'ROM Error (C1219 Table 3) occurred for meter 00:13:50:05:ff:1f:ba:d0.'
        '3.18.92.79':   [
            #-----
#             (r'(ROM Error\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
            (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [87]:
curated_reasons = []
for reason_i in df_3_18_92_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_92_79['3.18.92.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

ROM Error (C1219 Table 3)


In [82]:
natsorted(curated_reasons)

['ROM Error (C1219 Table 3) ']

In [88]:
natsorted(curated_reasons)

['ROM Error (C1219 Table 3)']

In [None]:
assert(len(set(dict_3_18_92_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_92_79}

# '3.18.92.85'

In [None]:
df_3_18_92_85 = pd.read_pickle(ids_with_paths_dict['3.18.92.85'])
#-----
print(f"shape[0] = {df_3_18_92_85.shape[0]}")
print(f"# Unique reasons = {df_3_18_92_85['reason'].nunique()}")
print(df_3_18_92_85['reason'].unique())

In [None]:
dict_3_18_92_85 = {
        #-------------------------
        # Examples
        # 'Meter event Rom Failure Detected  Time event occurred on meter = 03/16/2020 07:00:51  Sequence number = 1983  User id = 1024  Event argument = 00-00'
        # 'Meter detected a ROM failure.'
        # 'ROM failure detected.'
        '3.18.92.85':    [
            #-----
            (r'(Meter event Rom Failure Detected)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Rom Failure Detected).*', r'\1'),
            #-----
            (r'^(?:Meter detected a ROM failure|ROM failure detected).*$', 'Meter event Rom Failure Detected'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_18_92_85['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_18_92_85['3.18.92.85']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_18_92_85.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_18_92_85}

# '3.21.1.79'

In [None]:
df_3_21_1_79 = pd.read_pickle(ids_with_paths_dict['3.21.1.79'])
#-----
print(f"shape[0] = {df_3_21_1_79.shape[0]}")
print(f"# Unique reasons = {df_3_21_1_79['reason'].nunique()}")
print(df_3_21_1_79['reason'].unique())

In [None]:
dict_3_21_1_79 = {
        # Examples
        # 'Meter event Measurement Error Detected  Time event occurred on meter = 08/26/2021 08:27:25  Sequence number = 307  User id = 0  Event argument = 00-00 '
        '3.21.1.79':    [
            #-----
            (r'(Meter event Measurement Error Detected)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Measurement Error Detected).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
            #-----
            (r'(Meter detected a measurement error).*', 'Meter event Measurement Error Detected')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_1_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_1_79['3.21.1.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_1_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_1_79}

# '3.21.1.173'

In [None]:
df_3_21_1_173 = pd.read_pickle(ids_with_paths_dict['3.21.1.173'])
#-----
print(f"shape[0] = {df_3_21_1_173.shape[0]}")
print(f"# Unique reasons = {df_3_21_1_173['reason'].nunique()}")
print(df_3_21_1_173['reason'].unique())

In [None]:
dict_3_21_1_173 = {
        # Examples
        # 'Meter event Nonvolatile Memory Failure Detected  Time event occurred on meter = 08/30/2021 04:00:11  Sequence number = 3256  User id = 0  Event argument = 00-00 '
        '3.21.1.173':    [
            #-----
            (r'(Meter event Nonvolatile Memory Failure Detected)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Nonvolatile Memory Failure Detected).*', r'\1'),
            #-----
            (r'(Meter detected a nonvolatile memory failure).*', 'Meter event Nonvolatile Memory Failure Detected'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_1_173['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_1_173['3.21.1.173']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_1_173.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_1_173}

# '3.21.3.28'

In [None]:
df_3_21_3_28 = pd.read_pickle(ids_with_paths_dict['3.21.3.28'])
#-----
print(f"shape[0] = {df_3_21_3_28.shape[0]}")
print(f"# Unique reasons = {df_3_21_3_28['reason'].nunique()}")
print(df_3_21_3_28['reason'].unique())

In [None]:
dict_3_21_3_28 = {
        # Examples
        # 'System Error cleared for meter 00:13:50:05:ff:11:2d:23.'
        '3.21.3.28':    [
            #-----
            (r'(System Error cleared)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_3_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_3_28['3.21.3.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_3_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_3_28}

# '3.21.3.79'

In [89]:
df_3_21_3_79 = pd.read_pickle(ids_with_paths_dict['3.21.3.79'])
#-----
print(f"shape[0] = {df_3_21_3_79.shape[0]}")
print(f"# Unique reasons = {df_3_21_3_79['reason'].nunique()}")
print(df_3_21_3_79['reason'].unique())

shape[0] = 209
# Unique reasons = 204
['System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:0f:17:ba.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:07:6a:3f.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:14:cb:05.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:11:7f:dd.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:11:8e:70.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:14:cb:09.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:25:b0:3d.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:0f:0d:e1.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:14:d2:a6.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:0f:0d:e3.'
 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:0f:03:18.'
 'System Error (

In [96]:
dict_3_21_3_79 = {
        # Examples
        # 'System Error (C1219 Table 3: Er000020) occurred for meter 00:13:50:05:ff:0f:17:ba.'
        '3.21.3.79':    [
            #-----
#             (r'(System Error\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
#             # COULD ALSO DO
            (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [97]:
curated_reasons = []
for reason_i in df_3_21_3_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_3_79['3.21.3.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

System Error (C1219 Table 3: Er000020)


In [92]:
natsorted(curated_reasons)

['System Error (C1219 Table 3: Er000020) ']

In [98]:
natsorted(curated_reasons)

['System Error (C1219 Table 3: Er000020)']

In [None]:
assert(len(set(dict_3_21_3_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_3_79}

# '3.21.17.28'

In [None]:
df_3_21_17_28 = pd.read_pickle(ids_with_paths_dict['3.21.17.28'])
#-----
print(f"shape[0] = {df_3_21_17_28.shape[0]}")
print(f"# Unique reasons = {df_3_21_17_28['reason'].nunique()}")
print(df_3_21_17_28['reason'].unique())

In [None]:
dict_3_21_17_28 = {
        # Examples
        # 'Meter detected a self check error.'
        '3.21.17.28':    [
            #-----
            (r'(Meter detected a self check error).', r'\1'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_17_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_17_28['3.21.17.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_17_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_17_28}

# '3.21.18.79'

In [None]:
df_3_21_18_79 = pd.read_pickle(ids_with_paths_dict['3.21.18.79'])
#-----
print(f"shape[0] = {df_3_21_18_79.shape[0]}")
print(f"# Unique reasons = {df_3_21_18_79['reason'].nunique()}")
print(df_3_21_18_79['reason'].unique())

In [None]:
dict_3_21_18_79 = {
        # Examples
        # 'Meter detected a self check error.'
        '3.21.18.79':    [
            #-----
            (r'(Meter detected a self check error).*', r'\1'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_18_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_18_79['3.21.18.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_18_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_18_79}

# '3.21.38.223'

In [None]:
df_3_21_38_223 = pd.read_pickle(ids_with_paths_dict['3.21.38.223'])
#-----
print(f"shape[0] = {df_3_21_38_223.shape[0]}")
print(f"# Unique reasons = {df_3_21_38_223['reason'].nunique()}")
print(df_3_21_38_223['reason'].unique())

In [None]:
dict_3_21_38_223 = {
        # Examples
        # 'Detected end of voltage sag on meter 00:13:50:05:ff:32:66:22 on one or several phases. Duration: 21 cycles (less than a second), Min RMS Voltage: Phase A 118.1 V, Phase B 95.4 V, Phase C 120.3 V, RMS Current (at min voltage): Phase A 1.0 A, Phase B 0.2 A, Phase C 1.2 A'
        '3.21.38.223':  [
            #-----
            (r'(Detected end of voltage sag)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*Duration:.*, Min RMS Voltage: Phase A .*, Phase B .*, Phase C .*, RMS Current \(at min voltage\): Phase A .*, Phase B .*, Phase C .*', r'\1'), 
            #-----
            # Failsafe
            (r'(Detected end of voltage sag)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*', r'\1'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_38_223['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_38_223['3.21.38.223']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_38_223.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_38_223}

# '3.21.38.248'

In [None]:
df_3_21_38_248 = pd.read_pickle(ids_with_paths_dict['3.21.38.248'])
#-----
print(f"shape[0] = {df_3_21_38_248.shape[0]}")
print(f"# Unique reasons = {df_3_21_38_248['reason'].nunique()}")
print(df_3_21_38_248['reason'].unique())

In [None]:
dict_3_21_38_248 = {
        # Examples
        # 'Detected end of voltage swell on meter 00:13:50:05:ff:1b:49:b0 on one or several phases. Duration: 5 cycles (less than a second), Max RMS Voltage: Phase A 111.2 V, Phase B 133.2 V, Phase C 216.1 V, RMS Current (at max voltage): Phase A 2.8 A, Phase B 1.3 A, Phase C 0.8 A'
        '3.21.38.248':  [
            #-----
            (r'(Detected end of voltage swell)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*Duration: .*, Min RMS Voltage: .*, RMS Current \(at min voltage\): .*', r'\1'), 
            #-----
            # Failsafe
            (r'(Detected end of voltage swell)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*', r'\1')  
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_38_248['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_38_248['3.21.38.248']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_38_248.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_38_248}

# '3.21.43.223'

In [None]:
df_3_21_43_223 = pd.read_pickle(ids_with_paths_dict['3.21.43.223'])
#-----
print(f"shape[0] = {df_3_21_43_223.shape[0]}")
print(f"# Unique reasons = {df_3_21_43_223['reason'].nunique()}")
print(df_3_21_43_223['reason'].unique())

In [None]:
dict_3_21_43_223 = {
        # Examples
        # 'Detected end of voltage sag on meter 00:13:50:03:ff:01:a6:36. Duration: 2 seconds, Min RMS Voltage: Phase A 227.9 V, RMS Current (at min voltage): Phase A 0.0 A'
        '3.21.43.223':  [
            #-----
            (r'(Detected end of voltage sag)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*Duration: .*, Min RMS Voltage: .*, RMS Current \(at min voltage\): .*', r'\1'), 
            #-----
            (r'(Detected end of voltage sag)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*', r'\1'), # failsafe  
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_43_223['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_43_223['3.21.43.223']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_43_223.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_43_223}

# '3.21.43.248'

In [None]:
df_3_21_43_248 = pd.read_pickle(ids_with_paths_dict['3.21.43.248'])
#-----
print(f"shape[0] = {df_3_21_43_248.shape[0]}")
print(f"# Unique reasons = {df_3_21_43_248['reason'].nunique()}")
print(df_3_21_43_248['reason'].unique())

In [None]:
dict_3_21_43_248 = {
        # Examples
        # 'Detected end of voltage swell on meter 00:13:50:05:ff:2d:2c:25. Duration: 195 seconds, Max RMS Voltage: Phase A 265.7 V, RMS Current (at max voltage): Phase A 0.0 A'
        '3.21.43.248':  [
            #-----
            (r'(Detected end of voltage swell)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*Duration: .*, Min RMS Voltage: .*, RMS Current \(at min voltage\): .*', r'\1'), 
            #-----
            (r'(Detected end of voltage swell)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*', r'\1'), # failsafe  
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_43_248['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_43_248['3.21.43.248']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_43_248.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_43_248}

# '3.21.67.28'

In [None]:
df_3_21_67_28 = pd.read_pickle(ids_with_paths_dict['3.21.67.28'])
#-----
print(f"shape[0] = {df_3_21_67_28.shape[0]}")
print(f"# Unique reasons = {df_3_21_67_28['reason'].nunique()}")
print(df_3_21_67_28['reason'].unique())

In [None]:
dict_3_21_67_28 = {
        # Examples
        # 'Measurement Error cleared for meter 00:13:50:05:ff:07:c0:55.'
        '3.21.67.28':    [
            #-----
            (r'(Measurement Error.*cleared)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
            # COULD ALSO DO
            (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_67_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_67_28['3.21.67.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_67_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_67_28}

# '3.21.67.79'

In [99]:
df_3_21_67_79 = pd.read_pickle(ids_with_paths_dict['3.21.67.79'])
#-----
print(f"shape[0] = {df_3_21_67_79.shape[0]}")
print(f"# Unique reasons = {df_3_21_67_79['reason'].nunique()}")
print(df_3_21_67_79['reason'].unique())

shape[0] = 45365
# Unique reasons = 24035
['Meter event Measurement Error Detected  Time event occurred on meter = 01/01/2020 05:02:43  Sequence number = 241  User id = 0  Event argument = 00-00'
 'Meter event Measurement Error Detected  Time event occurred on meter = 01/01/2020 20:51:18  Sequence number = 1878  User id = 0  Event argument = 00-00'
 'Meter event Measurement Error Detected  Time event occurred on meter = 01/01/2020 07:56:50  Sequence number = 242  User id = 0  Event argument = 00-00'
 ...
 'Meter event Measurement Error Detected  Time event occurred on meter = 03/14/2023 02:00:56  Sequence number = 3953  User id = 0  Event argument = 00-00 '
 'Meter event Measurement Error Detected  Time event occurred on meter = 03/14/2023 03:27:02  Sequence number = 3954  User id = 0  Event argument = 00-00 '
 'Meter event Measurement Error Detected  Time event occurred on meter = 9/12/2023 6:06:57 AM  Sequence number = 89  User id = 34891  Event argument = 7D-75']


In [103]:
dict_3_21_67_79 = {
        # Examples
        # 'Meter event Measurement Error Detected  Time event occurred on meter = 01/26/2020 19:47:00  Sequence number = 284  User id = 0  Event argument = 00-00'
        # 'Measurement Error (C1219 Table 3) occurred for meter 00:13:50:03:ff:06:d6:d5.'
        '3.21.67.79':    [
            #-----
            (r'(Measurement Error\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1'), 
            #-----
            (r'(Meter event Measurement Error Detected)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Measurement Error Detected).*', r'\1'),
            #-----
            (r'(Meter detected a measurement error).*', 'Meter event Measurement Error Detected')
            #-----
        ]
}

In [104]:
curated_reasons = []
for reason_i in df_3_21_67_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_67_79['3.21.67.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

Measurement Error (C1219 Table 3: Er100000)
Measurement Error (C1219 Table 3)
Meter event Measurement Error Detected


In [102]:
natsorted(curated_reasons)

['Measurement Error (C1219 Table 3) ',
 'Measurement Error (C1219 Table 3: Er100000) ',
 'Meter event Measurement Error Detected']

In [105]:
natsorted(curated_reasons)

['Measurement Error (C1219 Table 3)',
 'Measurement Error (C1219 Table 3: Er100000)',
 'Meter event Measurement Error Detected']

In [None]:
assert(len(set(dict_3_21_67_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_67_79}

# '3.21.82.28'

In [None]:
df_3_21_82_28 = pd.read_pickle(ids_with_paths_dict['3.21.82.28'])
#-----
print(f"shape[0] = {df_3_21_82_28.shape[0]}")
print(f"# Unique reasons = {df_3_21_82_28['reason'].nunique()}")
print(df_3_21_82_28['reason'].unique())

In [None]:
dict_3_21_82_28 = {
        # Examples
        # 'DSP Error cleared for meter 00:13:50:05:ff:0b:87:fc.'
        '3.21.82.28':    [
            #-----
            (r'(DSP Error cleared)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1')
            #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_21_82_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_82_28['3.21.82.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_21_82_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_82_28}

# '3.21.82.79'

In [106]:
df_3_21_82_79 = pd.read_pickle(ids_with_paths_dict['3.21.82.79'])
#-----
print(f"shape[0] = {df_3_21_82_79.shape[0]}")
print(f"# Unique reasons = {df_3_21_82_79['reason'].nunique()}")
print(df_3_21_82_79['reason'].unique())

shape[0] = 83
# Unique reasons = 32
['DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:05:ff:15:af:91.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:05:ff:0b:87:fc.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:05:ff:1b:35:ed.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:02:00:03:69:e3.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:05:ff:04:1a:66.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:05:ff:13:c0:a6.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:05:ff:07:ba:b9.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:05:ff:13:bb:db.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:05:ff:0f:0c:b6.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:03:ff:02:1f:cb.'
 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:03:ff:06:78:70.'
 'DSP Error (C1219 Table 3: Er200000) occurred for 

In [113]:
dict_3_21_82_79 = {
        # Examples
        # 'DSP Error (C1219 Table 3: Er200000) occurred for meter 00:13:50:05:ff:2c:f3:e2.'
        '3.21.82.79':    [
            #-----
#             (r'(DSP Error\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1') 
            #-----
#             # COULD ALSO DO
            (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [114]:
curated_reasons = []
for reason_i in df_3_21_82_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_21_82_79['3.21.82.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

DSP Error (C1219 Table 3: Er200000)


In [109]:
natsorted(curated_reasons)

['DSP Error (C1219 Table 3: Er200000) ']

In [115]:
natsorted(curated_reasons)

['DSP Error (C1219 Table 3: Er200000)']

In [None]:
assert(len(set(dict_3_21_82_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_21_82_79}

# '3.22.12.243'

In [None]:
df_3_22_12_243 = pd.read_pickle(ids_with_paths_dict['3.22.12.243'])
#-----
print(f"shape[0] = {df_3_22_12_243.shape[0]}")
print(f"# Unique reasons = {df_3_22_12_243['reason'].nunique()}")
print(df_3_22_12_243['reason'].unique())

In [None]:
dict_3_22_12_243 = {
        # Examples
        # 'Meter event Test Mode Stopped  Time event occurred on meter = 08/27/2021 10:04:04  Sequence number = 23834  User id = 1  Event argument = 00-00 '
        '3.22.12.243':   [
            #-----
            (r'(Meter event Test Mode Stopped)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Test Mode Stopped).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_22_12_243['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_22_12_243['3.22.12.243']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_22_12_243.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_22_12_243}

# '3.22.19.242'

In [None]:
df_3_22_19_242 = pd.read_pickle(ids_with_paths_dict['3.22.19.242'])
#-----
print(f"shape[0] = {df_3_22_19_242.shape[0]}")
print(f"# Unique reasons = {df_3_22_19_242['reason'].nunique()}")
print(df_3_22_19_242['reason'].unique())

In [None]:
dict_3_22_19_242 = {
        # Examples
        # 'Meter event Test Mode Started  Time event occurred on meter = 08/27/2021 07:48:19  Sequence number = 765  User id = 1  Event argument = 00-00 '
        '3.22.19.242':   [
            #-----
            (r'(Meter event Test Mode Started)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Test Mode Started).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_22_19_242['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_22_19_242['3.22.19.242']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_22_19_242.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_22_19_242}

# '3.23.17.139'

In [None]:
df_3_23_17_139 = pd.read_pickle(ids_with_paths_dict['3.23.17.139'])
#-----
print(f"shape[0] = {df_3_23_17_139.shape[0]}")
print(f"# Unique reasons = {df_3_23_17_139['reason'].nunique()}")
print(df_3_23_17_139['reason'].unique())

In [None]:
dict_3_23_17_139 = {
        # Examples
        # 'Device 00:13:50:05:ff:2e:17:7e has been determined to have exceeded the max allowable trap threshold, 20, within a certain time limit, 3600 seconds.'
        '3.23.17.139':   [
            #-----
            (r'(Device)(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+).*(exceeded the max allowable trap threshold).*', r'\1 \2'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_23_17_139['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_23_17_139['3.23.17.139']
    )
#     if curated_reason_i != 'Primary Power Up occurred for meter':
#         print(curated_reason_i)
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_23_17_139.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_23_17_139}

# '3.23.136.47'

In [None]:
df_3_23_136_47 = pd.read_pickle(ids_with_paths_dict['3.23.136.47'])
#-----
print(f"shape[0] = {df_3_23_136_47.shape[0]}")
print(f"# Unique reasons = {df_3_23_136_47['reason'].nunique()}")
print(df_3_23_136_47['reason'].unique())

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

In [None]:
dict_3_23_136_47 = {
        # Examples
        # 'Access Point 00:13:50:08:ff:00:06:8e has lost connectivity with FHSS 900 MHz band.'
        '3.23.136.47':  [
            #-----
            (r'(Access Point)(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)\s*(has lost connectivity with FHSS 900 MHz band.)', r'\1 \2') 
            #-----
        ]
}

# # COULD ALSO DO (and in this case, should probably do)
# # NOTE: Here, the MAC-esque code occurs in middle of string, not the end as is common elsewhere.
# #       This is why the pattern here excludes [\s*\.]* at the end
# dict_3_23_136_47 = {
#         '3.23.136.47':  [
#             #-----
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', '')
#             #-----
#         ]
# }

In [None]:
curated_reasons = []
for reason_i in df_3_23_136_47['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_23_136_47['3.23.136.47']
    )
#     if curated_reason_i != 'Primary Power Up occurred for meter':
#         print(curated_reason_i)
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_23_136_47.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_23_136_47}

# '3.23.136.85'

In [None]:
df_3_23_136_85 = pd.read_pickle(ids_with_paths_dict['3.23.136.85'])
#-----
print(f"shape[0] = {df_3_23_136_85.shape[0]}")
print(f"# Unique reasons = {df_3_23_136_85['reason'].nunique()}")
print(df_3_23_136_85['reason'].unique())

In [None]:
dict_3_23_136_85 = {
        # Examples
        # Device: 00:13:50:05:ff:20:f5:ef Time: 2021-01-20T06:03:47.000-05:00 Failed Device: 00:13:50:05:ff:20:f5:ef Reason: Security public key mismatch Reboot Counter: 44 Refresh Counter: 0 Seconds since last reboot 6872803
        # 'NIC Link Layer Handshake Failed: Device: 00:13:50:05:ff:19:4e:6b, Rejected neighbor Mac ID: 00:13:50:05:ff:18:a3:52, Rejection Cause: invalid eblob signature'
        '3.23.136.85':  [
            #-----
            # Below, Device and Failed Device sometimes blank, sometime MAC-esque
            (r'Device\:?(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)? Time: .* Failed Device\:?(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)? (Reason: .*) Reboot Counter: .* Refresh Counter: .*', r'Device Failed: \1'), 
            #-----
            (r'(NIC Link Layer Handshake Failed): Device:(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+), Rejected neighbor Mac ID:(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+), (Rejection Cause: .*)', r'\1: \2')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_23_136_85['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_23_136_85['3.23.136.85']
    )
#     if curated_reason_i != 'Primary Power Up occurred for meter':
#         print(curated_reason_i)
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_23_136_85.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_23_136_85}

# '3.25.17.3'

In [None]:
df_3_25_17_3 = pd.read_pickle(ids_with_paths_dict['3.25.17.3'])
#-----
print(f"shape[0] = {df_3_25_17_3.shape[0]}")
print(f"# Unique reasons = {df_3_25_17_3['reason'].nunique()}")
print(df_3_25_17_3['reason'].unique())

In [None]:
dict_3_25_17_3 = {
        # Examples
        # 'Meter generated an energy polarity gyrbox call in event.'
        '3.25.17.3':   [
            #-----
            (r'(Meter generated an energy polarity gyrbox call in event).*', r'\1'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_25_17_3['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_25_17_3['3.25.17.3']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_25_17_3.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_25_17_3}

# '3.26.0.47'

In [None]:
df_3_26_0_47 = pd.read_pickle(ids_with_paths_dict['3.26.0.47'])
#-----
print(f"shape[0] = {df_3_26_0_47.shape[0]}")
print(f"# Unique reasons = {df_3_26_0_47['reason'].nunique()}")
print(df_3_26_0_47['reason'].unique())

In [None]:
dict_3_26_0_47 = {
        # Examples
        # 'Primary Power Down occurred for meter 00:13:50:05:ff:16:5f:61.'
        # 'Meter had a power outage. Unsafe power fail count = 32714'
        # 'Meter event Primary Power Down  Time event occurred on meter = 08/05/2021 16:59:27  Sequence number = 691  User id = 0  Event argument = 00-00'
        '3.26.0.47': [
            #-----
            (r'(Primary Power Down)(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1'), 
            #-----
            (r'(Meter had a power outage)\.?\s*(Unsafe power fail)\s*count\s*=\s*[0-9]+', r'\1 (\2)'), 
            #-----
            (r'^(Meter had a power outage).*$', r'\1'), #Fail proof, in case Unsafe power fail, count, etc., not found 
            #-----
            (r'(Meter event Primary Power Down)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Primary Power Down).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_26_0_47['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_0_47['3.26.0.47']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_26_0_47.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_0_47}

# '3.26.0.216'

In [None]:
df_3_26_0_216 = pd.read_pickle(ids_with_paths_dict['3.26.0.216'])
#-----
print(f"shape[0] = {df_3_26_0_216.shape[0]}")
print(f"# Unique reasons = {df_3_26_0_216['reason'].nunique()}")
print(df_3_26_0_216['reason'].unique())

In [None]:
dict_3_26_0_216 = {
        # Examples
        # 'Primary Power Up occurred for meter 00:13:50:05:ff:22:2a:0c.'
        # 'Meter event Primary Power Up Time event occurred on meter = 08/14/2020 Sequence number = 343 User id = 0 Event argument = 00-00'
        '3.26.0.216':   [
            #-----
            (r'(Primary Power Up)(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1'), 
            #-----
            (r'Meter event (Primary Power Up)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'Meter event (Primary Power Up).*', r'\1'),  #Fail proof, in case time, sequence, etc., not found 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_26_0_216['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_0_216['3.26.0.216']
    )
#     if curated_reason_i != 'Primary Power Up occurred for meter':
#         print(curated_reason_i)
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_26_0_216.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_0_216}

# '3.26.17.185'

In [None]:
df_3_26_17_185 = pd.read_pickle(ids_with_paths_dict['3.26.17.185'])
#-----
print(f"shape[0] = {df_3_26_17_185.shape[0]}")
print(f"# Unique reasons = {df_3_26_17_185['reason'].nunique()}")
print(df_3_26_17_185['reason'].unique())

In [None]:
dict_3_26_17_185 = {
        # Examples
        # 'Meter had a power outage. Unsafe power fail count = 13050'
        # Meter event Primary Power Down  Time event occurred on meter = 08/31/2021 20:07:18  Sequence number = 103  User id = 0  Event argument = 00-00 
        '3.26.17.185':  [
            #-----
            (r'(Meter had a power outage).\s*(Unsafe power fail) count = [0-9]+', r'\1 (\2)'), 
            #-----
            (r'(Meter event Primary Power Down)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Primary Power Down).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_26_17_185['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_17_185['3.26.17.185']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_26_17_185.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_17_185}

# '3.26.17.216'

In [None]:
df_3_26_17_216 = pd.read_pickle(ids_with_paths_dict['3.26.17.216'])
#-----
print(f"shape[0] = {df_3_26_17_216.shape[0]}")
print(f"# Unique reasons = {df_3_26_17_216['reason'].nunique()}")
print(df_3_26_17_216['reason'].unique())

In [None]:
dict_3_26_17_216 = {
        # Examples
        # 'Meter event Primary Power Up  Time event occurred on meter = 08/31/2021 20:07:44  Sequence number = 104  User id = 0  Event argument = 00-00 '
        '3.26.17.216':  [
            #-----
            (r'Meter event (Primary Power Up)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'Meter event (Primary Power Up).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_26_17_216['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_17_216['3.26.17.216']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_26_17_216.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_17_216}

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# '3.26.38.37'

In [None]:
df_3_26_38_37 = pd.read_pickle(ids_with_paths_dict['3.26.38.37'])
#-----
print(f"shape[0] = {df_3_26_38_37.shape[0]}")
print(f"# Unique reasons = {df_3_26_38_37['reason'].nunique()}")
print(df_3_26_38_37['reason'].unique())

In [None]:
dict_3_26_38_37_OG = {
        # Examples
        # 'Under Voltage cleared (CA000400) for meter 00:13:50:05:ff:0b:88:ec.'
        # 'Under Voltage (CA000400) cleared for meter 00:13:50:05:ff:15:e8:3b.'
        # 'Under Voltage (Diagnostic 6) cleared for meter 00:13:50:05:ff:3f:87:07N/A.'
        '3.26.38.37':   [
            #-----
            # NOTE: Due to annoying N/A at end of some MAC-esque IDs, I have to specify length of numerical entries
            #       as [0-9a-zA-Z]{1,2}, instead of the more general [0-9a-zA-Z]+ found elsewhere!
            (r'\s+(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+(?:N/A)?\.?', ''), 
            #-----
            (
                (
                    r'(Under Voltage)\s*'      
                    r'(\([0-9a-zA-Z\s]*\))\s*'\
                    r'([0-9a-zA-Z]*)?\s?'\
                    r'(for meter\:?\s*)'\
                    r'(?:(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+)?[\s:,.]*'\
                    r'(?:Phase\s{1,2}[ABC](?:(?:\s*and\s*[ABC])|(?:,\s*[ABC])*))?\s*'\
                    r'(Voltage out of tolerance)?'
                ), 
                r'\1 \2 \3 \4 \5'
            ), 
            #-----
            (
                (
                    r'(Under Voltage)\s*'
                    r'([0-9a-zA-Z]*)?\s*'\
                    r'(\([0-9a-zA-Z\s]*\))\s*'\
                    r'(for meter\:?\s*)'\
                    r'(?:(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+)?[\s:,.]*'\
                    r'(?:Phase\s{1,2}[ABC](?:(?:\s*and\s*[ABC])|(?:,\s*[ABC])*))?\s*'\
                    r'(Voltage out of tolerance)?'
                ), 
                r'\1 \3 \2 \4 \5'
            ), 
            #-----
            ('meterN/A', 'meter'), 
            #-----
            (r'N/A', ''), 
            #-----
            (r'[!"#$%&\'(*+,-./:;<=>?@[\\^_`{|~\s]*$', ''), 
            #-----
            (r'\s{2,}', ' '), 
            #-----
            (r'\s*(.*)', r'\1')
            #-----
        ]
}

In [None]:
dict_3_26_38_37 = {
        # Examples
        # 'Under Voltage cleared (CA000400) for meter 00:13:50:05:ff:0b:88:ec.'
        # 'Under Voltage (CA000400) cleared for meter 00:13:50:05:ff:15:e8:3b.'
        # 'Under Voltage (Diagnostic 6) cleared for meter 00:13:50:05:ff:3f:87:07N/A.'
        '3.26.38.37':   [
            #-----
            # NOTE: Due to annoying N/A at end of some MAC-esque IDs, I have to specify length of numerical entries
            #       as [0-9a-zA-Z]{1,2}, instead of the more general [0-9a-zA-Z]+ found elsewhere!
            (r'(Under Voltage|Low Potential|Diag6 Condition)\s*(\scleared)?\s*(\s\(.*\))?\s*(\scleared)?\s*(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+(?:N/A)?\.?)', r'\1\3\2\4')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_26_38_37['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_38_37['3.26.38.37']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_26_38_37.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_38_37}

In [None]:
test_str = 'Under Voltage cleared (CA000400) for meter 00:13:50:05:ff:0b:88:ec.'
pattern = r'(Under Voltage|Low Potential|Diag6 Condition)\s*(\scleared)?\s*(\s\(.*\))?\s*(\scleared)?\s*(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)'
# repl = r'1:\1 2:\2 3:\3 4:\4'
# repl = r'1:\1 3:\3 2:\2 4:\4'
repl = r'\1\3\2\4'

re.sub(pattern, repl, test_str)

In [None]:
test_str = 'Under Voltage (CA000400) cleared for meter 00:13:50:05:ff:0c:79:88.'
pattern = r'(Under Voltage|Low Potential|Diag6 Condition)\s*(\scleared)?\s*(\s\(.*\))?\s*(\scleared)?\s*(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)'
# repl = r'1:\1 2:\2 3:\3 4:\4'
# repl = r'1:\1 3:\3 2:\2 4:\4'

repl = r'\1\3\2\4'
re.sub(pattern, repl, test_str)

In [None]:
test_str = 'Low Potential cleared for meter 00:13:50:05:ff:1b:3a:81.'
pattern = r'(Under Voltage|Low Potential|Diag6 Condition)\s*(\scleared)?\s*(\s\(.*\))?\s*(\scleared)?\s*(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)'
# repl = r'1:\1 2:\2 3:\3 4:\4'
# repl = r'1:\1~3:\3~2:\2~4:\4'
repl = r'\1\3\2\4'

re.sub(pattern, repl, test_str)

In [None]:
test_str = 'Low Potential cleared for meter 00:13:50:05:ff:1b:3a:81.'
pattern = r'(Under Voltage|Low Potential|Diag6 Condition)\s*(\scleared)?\s*(\s\(.*\))?\s*(\scleared)?\s*(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)'
# repl = r'1:\1 2:\2 3:\3 4:\4'
# repl = r'1:\1~3:\3~2:\2~4:\4'
# repl = r'\1 \3\2\4'
repl = r'\1\3\2\4'

re.sub(pattern, repl, test_str)

# '3.26.38.47'

In [116]:
df_3_26_38_47 = pd.read_pickle(ids_with_paths_dict['3.26.38.47'])
#-----
print(f"shape[0] = {df_3_26_38_47.shape[0]}")
print(f"# Unique reasons = {df_3_26_38_47['reason'].nunique()}")
print(df_3_26_38_47['reason'].unique())

shape[0] = 1055446
# Unique reasons = 106912
['Low Potential (C1219 Table 3) occurred for meter 00:13:50:05:ff:1b:67:3b.'
 'Low Potential (C1219 Table 3) occurred for meter 00:13:50:05:ff:15:e8:3b.'
 'Low Potential (C1219 Table 3) occurred for meter 00:13:50:05:ff:26:2d:3f.'
 ...
 'Low Potential (C1219 Table 3) occurred for meter 00:13:50:03:ff:02:32:93.'
 'Low Potential (C1219 Table 3) occurred for meter 00:13:50:03:ff:03:0b:91.'
 'Low Potential (C1219 Table 3) occurred for meter 00:13:50:05:ff:2c:e5:72.']


In [123]:
dict_3_26_38_47 = {
        # Examples
        # 'Low Potential (C1219 Table 3) occurred for meter 00:13:50:05:ff:1b:67:3b.'
        '3.26.38.47': [
            #-----
#             (r'(Low Potential\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', r'\1') 
            #-----
#             # COULD ALSO DO
            (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
            #-----
        ]
}

In [124]:
curated_reasons = []
for reason_i in df_3_26_38_47['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_38_47['3.26.38.47']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

Low Potential (C1219 Table 3)


In [119]:
natsorted(curated_reasons)

['Low Potential (C1219 Table 3) ']

In [125]:
natsorted(curated_reasons)

['Low Potential (C1219 Table 3)']

In [None]:
assert(len(set(dict_3_26_38_47.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_38_47}

# '3.26.38.73'

In [None]:
df_3_26_38_73 = pd.read_pickle(ids_with_paths_dict['3.26.38.73'])
#-----
print(f"shape[0] = {df_3_26_38_73.shape[0]}")
print(f"# Unique reasons = {df_3_26_38_73['reason'].nunique()}")
print(df_3_26_38_73['reason'].unique())

In [None]:
dict_3_26_38_73 = {
        # Examples
        # 'Diag7 Condition cleared for meter 00:13:50:03:00:4d:41:fa.'
        # 'Over Voltage (Diagnostic 7) cleared for meter 00:13:50:05:ff:37:87:3dN/A.'
        '3.26.38.73':   [
            #-----
            # NOTE: Due to annoying N/A at end of some MAC-esque IDs, I have to specify length of numerical entries
            #       as [0-9a-zA-Z]{1,2}, instead of the more general [0-9a-zA-Z]+ found elsewhere!
            # NOTE: Also, the use of non-greedy (.*?) at beginning.
            #       If (.*) were used instead, results would be, e.g.
            #          'Over Voltage (Diagnostic 7) cleared for meter' instead of 'Over Voltage (Diagnostic 7) cleared'
            (r'(.*?)(?:(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+(?:N/A)?\.?)', r'\1')
            #-----
            #-----
#             # COULD ALSO DO
#             # NOTE: Due to annoying N/A at end of some MAC-esque IDs, I have to specify length of numerical entries
#             #       as [0-9a-zA-Z]{1,2}, instead of the more general [0-9a-zA-Z]+ found elsewhere!
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+(?:N/A)?\.?)', '')
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_26_38_73['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_38_73['3.26.38.73']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_26_38_73.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_38_73}

# '3.26.38.93'

In [40]:
df_3_26_38_93 = pd.read_pickle(ids_with_paths_dict['3.26.38.93'])
#-----
print(f"shape[0] = {df_3_26_38_93.shape[0]}")
print(f"# Unique reasons = {df_3_26_38_93['reason'].nunique()}")
print(df_3_26_38_93['reason'].unique())

shape[0] = 3861627
# Unique reasons = 189382
['KV2c meter event Over Voltage Diagnostic flags:Phase A Voltage'
 'Diag7: Over Voltage, Element A occurred for meter 00:13:50:ff:fe:03:4e:12.'
 'Diag7: Over Voltage, Element A occurred for meter 00:13:50:05:ff:04:35:4a.'
 ...
 'Over Voltage (Diagnostic 7) occurred for meter 00:13:50:05:ff:1e:e7:b5: Phase A.'
 'Over Voltage (Diagnostic 7) occurred for meter 00:13:50:05:ff:1b:c3:31: Phase A.'
 'Over Voltage (Diagnostic 7) occurred for meter 00:13:50:05:ff:23:94:7d: Phase A.']


# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

In [44]:
dict_3_26_38_93 = {
        # Examples
        # 'Diag7: Over Voltage, Element A occurred for meter 03.34.0.400:13:50:02:00:0a:d7:98.'
        # 'Over Voltage (Diagnostic 7) occurred for meter 00:13:50:05:ff:41:8a:8f: Phase A.'
        # SEEMS RARE, BUT HAVE SEEN:
        # 'KV2c meter event Over Voltage Diagnostic flags:Phase A Voltage'
        # 'KV2c meter event Under Voltage Caution Diagnostic flags:Phase B Voltage'
        # 'KV2c meter event Received kWh Caution Diagnostic flags:Phase A Voltage '
        # 'KV2c meter event Received kWh Caution Cleared '
        '3.26.38.93':   [
            #-----
            (r'(KV2c meter event (?:(?:Over|Under) Voltage)|(?:Voltage Out of Tolerance)|(?:(?:Received|Delivered) kWh))(?:\s*Caution)?\s*(Diagnostic flags)\s*\:\s*(.*?)\s*$', r'\1 (\2 = \3)'), 
            #-----
            (r'(KV2c meter event (?:(?:Over|Under) Voltage)|(?:Voltage Out of Tolerance)|(?:(?:Received|Delivered) kWh))(?:\s*Caution)?\s*Cleared\s*$', r'\1 Cleared'), 
            #-----
            (r'(.*Over Voltage.*?)(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)(.*?)\s*$', r'\1\2') 
            #-----
        ]
}

# # COULD ALSO PROBABLY DO SOMETHING SIMILAR TO
# dict_3_26_38_93 = {
#         '3.26.38.93':   [
#             #-----
#             # NOTE: .* at end, not found in most of others (although, maybe it should be?)
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?).*', '')
#             #-----
#         ]
# }

In [45]:
curated_reasons = []
for reason_i in df_3_26_38_93['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_38_93['3.26.38.93']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

KV2c meter event Over Voltage (Diagnostic flags = Phase A Voltage)
KV2c meter event Received kWh Cleared
KV2c meter event Received kWh (Diagnostic flags = Phase A Voltage)
KV2c meter event Under Voltage (Diagnostic flags = Phase B Voltage)
KV2c meter event Voltage Out of Tolerance (Diagnostic flags = Phase A Voltage)
Diag7: Over Voltage, Element A
Over Voltage (Diagnostic 7): Phase A.


In [43]:
natsorted(curated_reasons)

['Diag7: Over Voltage, Element A ',
 'KV2c meter event Over Voltage (Diagnostic flags = Phase A Voltage)',
 'KV2c meter event Received kWh (Diagnostic flags = Phase A Voltage)',
 'KV2c meter event Received kWh Cleared',
 'KV2c meter event Under Voltage (Diagnostic flags = Phase B Voltage)',
 'KV2c meter event Voltage Out of Tolerance (Diagnostic flags = Phase A Voltage)',
 'Over Voltage (Diagnostic 7) : Phase A.']

In [46]:
natsorted(curated_reasons)

['Diag7: Over Voltage, Element A',
 'KV2c meter event Over Voltage (Diagnostic flags = Phase A Voltage)',
 'KV2c meter event Received kWh (Diagnostic flags = Phase A Voltage)',
 'KV2c meter event Received kWh Cleared',
 'KV2c meter event Under Voltage (Diagnostic flags = Phase B Voltage)',
 'KV2c meter event Voltage Out of Tolerance (Diagnostic flags = Phase A Voltage)',
 'Over Voltage (Diagnostic 7): Phase A.']

In [None]:
assert(len(set(dict_3_26_38_93.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_38_93}

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# '3.26.38.150'

In [10]:
df_3_26_38_150 = pd.read_pickle(ids_with_paths_dict['3.26.38.150'])
#-----
print(f"shape[0] = {df_3_26_38_150.shape[0]}")
print(f"# Unique reasons = {df_3_26_38_150['reason'].nunique()}")
print(df_3_26_38_150['reason'].unique())

shape[0] = 390000
# Unique reasons = 20949
['Under Voltage (CA000400) for meter 00:13:50:05:ff:0b:88:ec. Phase  C Voltage out of tolerance.'
 'Under Voltage (CA000400) for meter 00:13:50:05:ff:0b:88:ec. Phase  A Voltage out of tolerance.'
 'Under Voltage (CA000400) occurred for meter 00:13:50:05:ff:0d:10:85: Phase A.'
 ...
 'Under Voltage (CA000400) occurred for meter 00:13:50:05:ff:1e:e6:d6: Phase A.'
 'Under Voltage (CA000400) occurred for meter 00:13:50:05:ff:1f:3b:3d: Phase C.'
 'Under Voltage (CA000400) for meter 00:13:50:05:ff:49:11:14. Phase  A Voltage out of tolerance.']


In [11]:
dict_3_26_38_150_OG = {
        # Examples
        # 'Under Voltage (CA000400) for meter 00:13:50:05:ff:0b:88:ec. Phase  C Voltage out of tolerance.'
        # 'Under Voltage (CA000400) occurred for meter 00:13:50:05:ff:15:e8:3b: Phase A.'
        '3.26.38.150':  [
            #-----
            (r'\s+(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+(?:N/A)?\.?', ''), 
            #-----
            (
                (
                    r'(Under Voltage)\s*'      
                    r'(\([0-9a-zA-Z\s]*\))\s*'\
                    r'([0-9a-zA-Z]*)?\s?'\
                    r'(for meter\:?\s*)'\
                    r'(?:(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+)?[\s:,.]*'\
                    r'(?:Phase\s{1,2}[ABC](?:(?:\s*and\s*[ABC])|(?:,\s*[ABC])*))?\s*'\
                    r'(Voltage out of tolerance)?'
                ), 
                r'\1 \2 \3 \4 \5'
            ), 
            #-----
            (
                (
                    r'(Under Voltage)\s*'
                    r'([0-9a-zA-Z]*)?\s*'\
                    r'(\([0-9a-zA-Z\s]*\))\s*'\
                    r'(for meter\:?\s*)'\
                    r'(?:(?:[0-9a-zA-Z]{1,2})(?:\:[0-9a-zA-Z]{1,2})+)?[\s:,.]*'\
                    r'(?:Phase\s{1,2}[ABC](?:(?:\s*and\s*[ABC])|(?:,\s*[ABC])*))?\s*'\
                    r'(Voltage out of tolerance)?'
                ), 
                r'\1 \3 \2 \4 \5'
            ), 
            #-----
            (r'[!"#$%&\'(*+,-./:;<=>?@[\\^_`{|~\s]*$', ''), 
            #-----
            (r'\s{2,}', ' '), 
            #-----
            (r'\s*(.*)', r'\1')
            #-----
        ]
}

In [21]:
dict_3_26_38_150 = {
        # Examples
        # 'Under Voltage (CA000400) for meter 00:13:50:05:ff:0b:88:ec. Phase  C Voltage out of tolerance.'
        # 'Under Voltage (CA000400) occurred for meter 00:13:50:05:ff:15:e8:3b: Phase A.'
        # 'KV2c meter event Under Voltage Diagnostic flags:Phase A Voltage'
        # 'Diag6: Under Voltage, Element A occurred for meter 00:13:50:05:ff:0b:88:ec.'
        '3.26.38.150':   [
            #-----
            # NOTE: Due to annoying N/A at end of some MAC-esque IDs, I have to specify length of numerical entries
            #       as [0-9a-zA-Z]{1,2}, instead of the more general [0-9a-zA-Z]+ found elsewhere!
            # NOTE: Also, the use of non-greedy (.*?) at beginning.
            #       If (.*) were used instead, results would be, e.g.
            #          'Diag6: Under Voltage, Element A occurred for meter'
            #       instead of 
            #          'Diag6: Under Voltage, Element A'
            (r'(Under Voltage\s*(?:\(.*\))?)\s*(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\.\:]?)(\s?.*?)', r'\1\2'), 
            #-----
            (r'(Diag6.*?)(?:(?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\.\:]?)(\s?.*?)', r'\1\2'), 
            #-----
            (r'(KV2c meter event .*?)\s*(Diagnostic flags)\s*\:\s*(.*?)\s*$', r'\1 (\2 = \3)')
            #-----
        ]
}

In [22]:
curated_reasons = []
for reason_i in df_3_26_38_150['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_38_150['3.26.38.150']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*natsorted(curated_reasons), sep='\n')

Diag6: Under Voltage, Element A
KV2c meter event Under Voltage (Diagnostic flags = Phase A Voltage)
Under Voltage (CA000400) Phase  A Voltage out of tolerance.
Under Voltage (CA000400) Phase  A, B Voltage out of tolerance.
Under Voltage (CA000400) Phase  A, B, C Voltage out of tolerance.
Under Voltage (CA000400) Phase  A, C Voltage out of tolerance.
Under Voltage (CA000400) Phase  B Voltage out of tolerance.
Under Voltage (CA000400) Phase  B, C Voltage out of tolerance.
Under Voltage (CA000400) Phase  C Voltage out of tolerance.
Under Voltage (CA000400) Phase A and C.
Under Voltage (CA000400) Phase A.
Under Voltage (CA000400) Phase C.
Under Voltage (Diagnostic 6) Phase A.


In [14]:
natsorted(curated_reasons)

['Diag6: Under Voltage, Element A',
 'KV2c meter event Under Voltage  (Diagnostic flags = Phase A Voltage)',
 'Under Voltage (CA000400) Phase  A Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  A, B Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  A, B, C Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  A, C Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  B Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  B, C Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  C Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase A and C.',
 'Under Voltage (CA000400) Phase A.',
 'Under Voltage (CA000400) Phase C.',
 'Under Voltage (Diagnostic 6) Phase A.']

In [23]:
natsorted(curated_reasons)

['Diag6: Under Voltage, Element A',
 'KV2c meter event Under Voltage (Diagnostic flags = Phase A Voltage)',
 'Under Voltage (CA000400) Phase  A Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  A, B Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  A, B, C Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  A, C Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  B Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  B, C Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase  C Voltage out of tolerance.',
 'Under Voltage (CA000400) Phase A and C.',
 'Under Voltage (CA000400) Phase A.',
 'Under Voltage (CA000400) Phase C.',
 'Under Voltage (Diagnostic 6) Phase A.']

In [None]:
assert(len(set(dict_3_26_38_150.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_38_150}

# '3.26.136.47'

In [None]:
df_3_26_136_47 = pd.read_pickle(ids_with_paths_dict['3.26.136.47'])
#-----
print(f"shape[0] = {df_3_26_136_47.shape[0]}")
print(f"# Unique reasons = {df_3_26_136_47['reason'].nunique()}")
print(df_3_26_136_47['reason'].unique())

In [None]:
dict_3_26_136_47 = {
        # Examples
        # 'Last Gasp - NIC power lost for device: 00:13:50:05:ff:0b:83:e4, Reboot Count: 12264, NIC timestamp: 2020-01-19T22:21:02.000-05:00, Received timestamp: 2020-01-19T22:21:04.353-05:00, Fail Reason: [0x49] LG_ZERO_X_DETECTOR ,LG_DIRECT_NOTIFICATION'
        '3.26.136.47':  [
            #-----
            (r'(Last Gasp - NIC power lost for device):(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+).*(Fail Reason: .*)$', r'\1, \2')
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_26_136_47['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_136_47['3.26.136.47']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_26_136_47.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_136_47}

# '3.26.136.66'

In [None]:
df_3_26_136_66 = pd.read_pickle(ids_with_paths_dict['3.26.136.66'])
#-----
print(f"shape[0] = {df_3_26_136_66.shape[0]}")
print(f"# Unique reasons = {df_3_26_136_66['reason'].nunique()}")
print(df_3_26_136_66['reason'].unique())

In [None]:
dict_3_26_136_66 = {
        # Examples
        # 'Device 00:13:50:05:ff:0b:8a:18, Last Gasp State: EL_EVENT_POWER_FAIL_DETECT_LG_DISABLED, Detector State: EL_EVENT_POWER_FAIL_DETECT_NIC_ZX_DISABLED, Reboot Count: 108'
        '3.26.136.66':  [
            #-----
            (r'Device(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+), (Last Gasp State: .*), (Detector State: .*), Reboot Count: \d*', r'\1, \2'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_26_136_66['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_136_66['3.26.136.66']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_26_136_66.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_136_66}

# '3.26.136.216'

In [None]:
df_3_26_136_216 = pd.read_pickle(ids_with_paths_dict['3.26.136.216'])
#-----
print(f"shape[0] = {df_3_26_136_216.shape[0]}")
print(f"# Unique reasons = {df_3_26_136_216['reason'].nunique()}")
print(df_3_26_136_216['reason'].unique())

In [None]:
dict_3_26_136_216 = {
        # Examples
        # 'NIC Power Restore Trap Received from device: 00:13:50:05:ff:0c:7e:93, Reboot Count: 61, NIC timestamp: 2020-01-27T09:19:56.000-05:00, Received Timestamp: 2020-01-27T09:19:57.204-05:00, Power Restoration Timestamp: 2020-01-27T09:19:42.000-05:00, State: 4,p'
        '3.26.136.216': [
            #-----
            (r'(NIC Power Restore Trap Received from device):(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+).*', r'\1'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_26_136_216['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_26_136_216['3.26.136.216']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_26_136_216.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_26_136_216}

# '3.31.1.143'

In [None]:
df_3_31_1_143 = pd.read_pickle(ids_with_paths_dict['3.31.1.143'])
#-----
print(f"shape[0] = {df_3_31_1_143.shape[0]}")
print(f"# Unique reasons = {df_3_31_1_143['reason'].nunique()}")
print(df_3_31_1_143['reason'].unique())

In [None]:
dict_3_31_1_143 = {
        # Examples
        # 'Meter assumed to be disconnected has reported Load side voltage indicating a potential case of tamper.'
        '3.31.1.143':   [
            #-----
            (r'(Meter assumed to be disconnected has reported Load side voltage indicating a potential case of tamper).*', r'\1'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_31_1_143['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_31_1_143['3.31.1.143']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_31_1_143.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_31_1_143}

# '3.33.1.219'

In [None]:
df_3_33_1_219 = pd.read_pickle(ids_with_paths_dict['3.33.1.219'])
#-----
print(f"shape[0] = {df_3_33_1_219.shape[0]}")
print(f"# Unique reasons = {df_3_33_1_219['reason'].nunique()}")
print(df_3_33_1_219['reason'].unique())

In [None]:
dict_3_33_1_219 = {
        # Examples
        # 'Meter event Reverse Rotation Detected  Time event occurred on meter = 08/30/2021 04:25:38  Sequence number = 13124  User id = 0  Event argument = 00-00 '
        '3.33.1.219':   [
            #-----
            (r'(Meter event Reverse Rotation Detected)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Reverse Rotation Detected).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
            #-----
            (r'(Meter detected a Reverse Rotation).*', 'Meter event Reverse Rotation Detected'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_33_1_219['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_33_1_219['3.33.1.219']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_33_1_219.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_33_1_219}

# '3.33.1.257'

In [None]:
df_3_33_1_257 = pd.read_pickle(ids_with_paths_dict['3.33.1.257'])
#-----
print(f"shape[0] = {df_3_33_1_257.shape[0]}")
print(f"# Unique reasons = {df_3_33_1_257['reason'].nunique()}")
print(df_3_33_1_257['reason'].unique())

In [None]:
dict_3_33_1_257 = {
        # Examples
        # 'Meter detected a Tamper Attempt.'
        # 'Meter event Tamper Attempt Suspected  Time event occurred on meter = 08/28/2021 08:45:05  Sequence number = 72  User id = 0  Event argument = 00-00 '
        '3.33.1.257':   [
            #-----
            (r'(Meter event Tamper Attempt Suspected)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Tamper Attempt Suspected).*', r'\1'), #Fail proof, in case time, sequence, etc., not found 
            #-----
            (r'(Meter detected a Tamper Attempt).*', 'Meter event Tamper Attempt Suspected'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_33_1_257['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_33_1_257['3.33.1.257']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_33_1_257.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_33_1_257}

# '3.35.0.28'

In [None]:
df_3_35_0_28 = pd.read_pickle(ids_with_paths_dict['3.35.0.28'])
#-----
print(f"shape[0] = {df_3_35_0_28.shape[0]}")
print(f"# Unique reasons = {df_3_35_0_28['reason'].nunique()}")
print(df_3_35_0_28['reason'].unique())

In [None]:
dict_3_35_0_28 = {
        # Examples
        # 'Cleared: Meter 00:13:50:05:ff:11:69:bb detected a high temperature condition. (C1219 Table 3)'
        '3.35.0.28': [
            #-----
            (r'(Cleared: Meter)(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)\s*(detected a high temperature condition)\.\s*(.*)', r'\1 \2 \3') 
            #-----
            #-----
            # COULD ALSO DO
            # NOTE: Here, the MAC-esque code occurs in middle of string, not the end as is common elsewhere.
            #       This is why the pattern here excludes [\s*\.]* at the end
            (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+\.?)', '')
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_35_0_28['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_35_0_28['3.35.0.28']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_35_0_28.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_35_0_28}

# '3.35.0.40'

In [None]:
df_3_35_0_40 = pd.read_pickle(ids_with_paths_dict['3.35.0.40'])
#-----
print(f"shape[0] = {df_3_35_0_40.shape[0]}")
print(f"# Unique reasons = {df_3_35_0_40['reason'].nunique()}")
print(df_3_35_0_40['reason'].unique())

In [None]:
dict_3_35_0_40 = {
        #-------------------------
        # Examples
        # 'Meter 00:13:50:05:ff:11:69:bb detected a high temperature condition. (C1219 Table 3)'
        # 'Meter's temperature threshold exceeded.'
        # 'Meter event AX Temp Threshold Exceeded  Time event occurred on meter = 02/19/2021 13:33:50  Sequence number = 5115  User id = 0  Event argument = 00-00 '
        # 'Meter event S4TemperatureThreshold  Time event occurred on meter = 05/25/2023 17:12:50  Sequence number = 45  User id = 0  Event argument = 00-00-00-00-00-00 '
        '3.35.0.40': [
            #-----
            (r'(Meter)(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)\s*(detected a high temperature condition)\.\s*(.*)', r'\1 \2 \3'), 
            #-----
            (r"(Meter's temperature threshold exceeded).", r'\1'), 
            #-----
            (r'(Meter event .*?)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event (?:AX Temp Threshold Exceeded|S4TemperatureThreshold)).*', r'\1'),
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_35_0_40['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_35_0_40['3.35.0.40']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_35_0_40.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_35_0_40}

# '3.36.0.79'

In [None]:
df_3_36_0_79 = pd.read_pickle(ids_with_paths_dict['3.36.0.79'])
#-----
print(f"shape[0] = {df_3_36_0_79.shape[0]}")
print(f"# Unique reasons = {df_3_36_0_79['reason'].nunique()}")
print(df_3_36_0_79['reason'].unique())

In [None]:
dict_3_36_0_79 = {
        # Examples
        # 'Meter detected a clock error.'
        # 'Meter event Clock Error Detected  Time event occurred on meter = 04/18/2020 10:00:10  Sequence number = 332  User id = 0  Event argument = 00-00'
        '3.36.0.79': [
            #-----
            (r'(Meter event Clock Error Detected)\s*Time event occurred on meter\s*=.*\s*Sequence number\s*=.*\s*User id\s*=.*\s*Event argument\s*=.*', r'\1'), 
            #-----
            (r'(Meter event Clock Error Detected).*', r'\1'),
            #-----
            (r'(Meter detected a clock error).*', 'Meter event Clock Error Detected'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_36_0_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_36_0_79['3.36.0.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_36_0_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_36_0_79}

# '3.36.1.29'

In [None]:
df_3_36_1_29 = pd.read_pickle(ids_with_paths_dict['3.36.1.29'])
#-----
print(f"shape[0] = {df_3_36_1_29.shape[0]}")
print(f"# Unique reasons = {df_3_36_1_29['reason'].nunique()}")
print(df_3_36_1_29['reason'].unique())

In [None]:
dict_3_36_1_29 = {
        # Examples
        # 'Meter detected a clock error.'
        '3.36.1.29': [
            #-----
            ('(Meter detected a clock error).', r'\1'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_36_1_29['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_36_1_29['3.36.1.29']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_36_1_29.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_36_1_29}

# '3.36.114.73'

In [None]:
df_3_36_114_73 = pd.read_pickle(ids_with_paths_dict['3.36.114.73'])
#-----
print(f"shape[0] = {df_3_36_114_73.shape[0]}")
print(f"# Unique reasons = {df_3_36_114_73['reason'].nunique()}")
print(df_3_36_114_73['reason'].unique())

In [None]:
dict_3_36_114_73 = {
        # Examples
        # 'Meter 00:13:50:05:ff:2e:13:de, detected loss of time (C1219 Table 3)'
        '3.36.114.73':  [
            #-----
            (r'(Meter)(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+),\s*(detected loss of time .*)', r'\1 \2'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_36_114_73['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_36_114_73['3.36.114.73']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_36_114_73.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_36_114_73}

# '3.36.114.159'

In [None]:
df_3_36_114_159 = pd.read_pickle(ids_with_paths_dict['3.36.114.159'])
#-----
print(f"shape[0] = {df_3_36_114_159.shape[0]}")
print(f"# Unique reasons = {df_3_36_114_159['reason'].nunique()}")
print(df_3_36_114_159['reason'].unique())

In [None]:
dict_3_36_114_159 = {
        # Examples
        # 'Ignoring Interval Read data for device 00:13:50:05:ff:1e:fa:a4 as it has time in the future 2069-05-02 18:51:00.0'
        '3.36.114.159': [
            #-----
            (r'(Ignoring (?:Interval|Register) Read data for device)(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)\s*(as it has time in the future).*', r'\1 \2'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_36_114_159['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_36_114_159['3.36.114.159']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_36_114_159.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_36_114_159}

# '3.36.136.73'

In [None]:
df_3_36_136_73 = pd.read_pickle(ids_with_paths_dict['3.36.136.73'])
#-----
print(f"shape[0] = {df_3_36_136_73.shape[0]}")
print(f"# Unique reasons = {df_3_36_136_73['reason'].nunique()}")
print(df_3_36_136_73['reason'].unique())

In [None]:
dict_3_36_136_73 = {
        # Examples
        # 'Meter 00:13:50:05:ff:14:c9:61 needs explicit time sync. Drift: 14391 s, Encountered Problems:  TS_ERR_LP_BX, TS_ERR_BIG_DRIFT [0x44], Meter_Time: 06:33:41'
        '3.36.136.73':  [
            #-----
            (r'(Meter)(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)\s*(needs explicit time sync.) Drift: -?\d* s, (Encountered Problems:\s*.*), Meter_Time.*', r'\1 \2 \3'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_36_136_73['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_36_136_73['3.36.136.73']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*sorted(curated_reasons), sep='\n')

In [None]:
assert(len(set(dict_3_36_136_73.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_36_136_73}

# '3.36.136.79'

In [None]:
df_3_36_136_79 = pd.read_pickle(ids_with_paths_dict['3.36.136.79'])
#-----
print(f"shape[0] = {df_3_36_136_79.shape[0]}")
print(f"# Unique reasons = {df_3_36_136_79['reason'].nunique()}")
print(df_3_36_136_79['reason'].unique())

In [None]:
dict_3_36_136_79 = {
        # Examples
        # 'Error occurred when attempting to synch meter time with NIC time for device 00:13:50:05:ff:29:e1:20'
        # So, standard beg/end patterns should suffice
        '3.36.136.79':    [
            #-----
            (r'(Error occurred when attempting to synch meter time with NIC time for device)(?:\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+)', r'\1')
            #-----
#             #-----
#             # COULD ALSO DO
#             (r'((?:\s*occurred\s*)?(?:\s*(?:for|on)?\s*(?:meter)?)?\s+(?:[0-9a-zA-Z]+)(?:\:[0-9a-zA-Z]+)+[\s*\.]*)', '')
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_36_136_79['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_36_136_79['3.36.136.79']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_36_136_79.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_36_136_79}

# '3.38.1.139'

In [None]:
df_3_38_1_139 = pd.read_pickle(ids_with_paths_dict['3.38.1.139'])
#-----
print(f"shape[0] = {df_3_38_1_139.shape[0]}")
print(f"# Unique reasons = {df_3_38_1_139['reason'].nunique()}")
print(df_3_38_1_139['reason'].unique())

In [None]:
dict_3_38_1_139 = {
        # Examples
        # 'KV2c meter event Polarity, Cross Phase, Reverse Energy Flow Diagnostic flags:Phase B Voltage '
        # 'KV2c meter event Polarity, Cross Phase, Reverse Energy Flow Diagnostic flags:Phase B Voltage, Phase C Voltage '
        '3.38.1.139':    [
            #-----
            (r'(KV2c meter event Polarity, Cross Phase, Reverse Energy Flow) Diagnostic flags:.*', r'\1'), 
            #-----
        ]
}

In [None]:
curated_reasons = []
for reason_i in df_3_38_1_139['reason'].unique().tolist():
    curated_reason_i = AMIEndEvents.reduce_end_event_reason(
        reason=reason_i, 
        patterns=dict_3_38_1_139['3.38.1.139']
    )
    curated_reasons.append(curated_reason_i)
curated_reasons = list(set(curated_reasons))
print(*curated_reasons, sep='\n')

In [None]:
assert(len(set(dict_3_38_1_139.keys()).intersection(set(dict_documented.keys())))==0)
dict_documented = {**dict_documented, **dict_3_38_1_139}

In [None]:
natsorted(ids_with_paths_dict.keys())

In [None]:
regexs_to_document = [x for x in ids_with_paths_dict.keys() if x not in dict_documented.keys()]
natsorted(regexs_to_document)