## Generate labels for Alarm Type Prediction

In [None]:
import datetime
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from tqdm import tqdm
import seaborn as sns
import pickle

In [None]:
inv_df = pd.read_csv('all_inverters.csv')
# inv_df.head()

target_codes = [7006, 3511, 7502, 7501, 3504, 6448, 1500, 7704]
alarm_df = pd.read_csv('all_alarms.csv')
alarm_df = alarm_df[alarm_df["Error Code"].isin(target_codes)]
alarm_df = alarm_df[(alarm_df.hod >= 6) & (alarm_df.hod <= 18)]  # original (6,17)
print(alarm_df.shape)
inverters = sorted(alarm_df["Controller Name"].unique().tolist())

In [None]:
alarm_df.head()

In [None]:
with open('inverter-labels-v3.pkl', 'rb') as handle:
    inv_labels = pickle.load(handle)

In [None]:
label_df = {'inverters': [], 'positive': [], 'negative': []}
label_col = 'label_24h'
for inv in inv_labels.keys():
    x = inv_labels[inv]
    y = dict(x[label_col].value_counts())
    label_df['inverters'].append(inv)
    if 1 in y:
        label_df['positive'].append(y[1])
    else:
        label_df['positive'].append(0)
    label_df['negative'].append(y[0])
label_df = pd.DataFrame(label_df)
label_df

In [None]:
df_i = inv_labels['INV-01']
df_i = df_i[df_i['label_24h']==1]
print(df_i.shape[0])
df_i = df_i[(df_i['date'].dt.hour == 6) & (df_i.date.dt.minute==0)].copy()
df_i.shape

In [None]:
df_i = inv_labels['INV-01']
df_i = df_i[df_i['label']==1]
print(df_i.shape[0])
# df_i = df_i[(df_i['date'].dt.year == 2019) & (df_i.date.dt.month==5)].copy()
df_i.shape

In [None]:
df_i.to_csv('inv_01.csv')

Label Generation for Multi-Class Classification

    - for each data point where the previous label is 1 (i.e., there is at least one alarm in the next 24 hours) get all the alarm numbers from the alarm_df
    - since there could be multiple alarms and each alarm can have more than count the labels are actually a count of each alarm type

In [None]:
def concat_alarms(dt, df_):
    df = df_[(df_['Raised Time'] > dt) & (df_['Raised Time'] <= dt + pd.Timedelta(1, 'D'))]
    return df['Error Code'].to_list()

feature_dfs = dict()
for inv in inv_labels.keys():
    df_inv = inv_labels[inv]
    df_inv = df_inv[df_inv.label_24h==1].copy(deep=True)
    
    alarm_df_i = alarm_df[alarm_df["Controller Name"] == inv].copy()
    alarm_df_i['Raised Time'] = pd.to_datetime(alarm_df_i['Raised Time'])
    alarm_df_i = alarm_df_i.sort_values('Raised Time').reset_index(drop=True)
    alarm_min, alarm_max = alarm_df_i['Raised Time'].min(), alarm_df_i['Raised Time'].max()

    # generate the labels for multi-class classification
    df_inv['alarm_24h_concat'] = df_inv['date'].apply(lambda x: concat_alarms(x, alarm_df_i))
    for t in target_codes:
        df_inv['count_'+str(t)] = df_inv['alarm_24h_concat'].apply(lambda x: x.count(t))

    x = df_inv.copy(deep=True)
    feature_dfs[inv] = x
    print(f"{inv} #alarms: {alarm_df_i.shape[0]}, #data-points: {df_inv.shape[0]}")


In [None]:
feature_dfs['INV-01']

In [None]:
all_dfs = []
for inv in inv_labels.keys():
    x = feature_dfs[inv]
    x['inverter'] = inv
    all_dfs.append(x)
all_dfs = pd.concat(all_dfs)
all_dfs.shape

In [None]:
# if we take only at 6 AM then many examples are dropped
# all_dfs = all_dfs[(all_dfs['date'].dt.hour == 6) & (all_dfs.date.dt.minute==0)].copy()
all_dfs['only_date'] = all_dfs['date'].dt.date
all_dfs = all_dfs.groupby(['inverter', 'only_date']).first()
all_dfs = all_dfs.reset_index(drop=False)
all_dfs

In [None]:
for t in target_codes:
    all_dfs['label_'+str(t)] = np.where(all_dfs['count_'+str(t)] > 0, 1, 0)
    print(t, all_dfs['label_'+str(t)].value_counts(), all_dfs['label_'+str(t)].value_counts(True))

In [None]:
all_dfs.columns

In [None]:
all_dfs.to_csv('multiclass_labels.csv', index=False)

In [None]:
all_dfs[all_dfs.inverter=='INV-01'][['date', 'label_24h', 'alarm_24h_concat']].to_csv('inv_01.csv')

In [None]:
alarm_df_i = alarm_df[alarm_df["Controller Name"] == 'INV-01'].copy()
alarm_df_i['Raised Time'] = pd.to_datetime(alarm_df_i['Raised Time'])
alarm_df_i = alarm_df_i.sort_values(by=['Raised Time'])

alarm_df_i
# alarm_df_i = alarm_df_i.sort_values('Raised Time').reset_index(drop=True)
# alarm_df_i[(alarm_df_i['Raised Time'].dt.year==2020) & (alarm_df_i['Raised Time'].dt.month==2)  & (alarm_df_i['Raised Time'].dt.day==8)]

In [None]:
alarm_df_i.to_csv('inv_01.csv')

In [None]:
all_dfs[['count_'+str(t) for t in target_codes]].describe()

In [None]:
all_dfs[all_dfs['count_7006']==334]

In [None]:
alarm_df_i = alarm_df[alarm_df["Controller Name"] == 'INV-20'].copy()
alarm_df_i['Raised Time'] = pd.to_datetime(alarm_df_i['Raised Time'])
alarm_df_i = alarm_df_i.sort_values('Raised Time').reset_index(drop=True)
alarm_df_i[(alarm_df_i['Raised Time'].dt.year==2020) & (alarm_df_i['Raised Time'].dt.month==2)  & (alarm_df_i['Raised Time'].dt.day==8)]
# alarm_df_i[alarm_df_i['Raised Time'].dt.date=='2020-02-08']