# Module Load

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score
import math
from MIL1D import *
from MIL2D import *

from tqdm.notebook import tqdm

In [None]:
# CPU
# device = torch.device("cpu")

# GPU
device = torch.device("cuda:0")

# Data load

In [None]:
df = pd.read_pickle('dataset.p')
df['clabel5'].astype(int)
df.head()

In [None]:
training_df = df.iloc[:int(len(df)*0.7)]
test_df = df.iloc[int(len(df)*0.7):] 

# Hyper parameter Setting

In [None]:
seq_len = 12
batch_size = 1020
learning_rate = 1e-2
max_grad_norm = 1
Epochs = 20

# Dataset & Dataloader Generation

In [None]:
class AEADDataset(torch.utils.data.Dataset):
    def __init__(self, DatasetDf, seq_len):
        self.hots, self.powers, self.gases, self.hotwaters, self.waters, self.labels = [], [], [], [], [], []

        for i in range(len(DatasetDf)):
            if i + seq_len >= len(DatasetDf):
                break

            self.hots.append([DatasetDf['hot'].to_list()[i + j] for j in range(seq_len)])
            self.powers.append([DatasetDf['power'].to_list()[i + j] for j in range(seq_len)])
            self.gases.append([DatasetDf['gas'].to_list()[i + j] for j in range(seq_len)])
            self.hotwaters.append([DatasetDf['hotwater'].to_list()[i + j] for j in range(seq_len)])
            self.waters.append([DatasetDf['water'].to_list()[i + j] for j in range(seq_len)])

            temp_labels = [DatasetDf['clabel5'].to_list()[i + j] for j in range(seq_len)]
            if np.sum(temp_labels) == 0:
                self.labels.append(0)
            else:
                self.labels.append(1)

    def __getitem__(self, i):
        return torch.tensor(self.hots[i]), torch.tensor(self.powers[i]), torch.tensor(self.gases[i]), torch.tensor(self.hotwaters[i]), torch.tensor(self.waters[i]), torch.tensor(self.labels[i])

    def __len__(self):
        return (len(self.labels))

In [None]:
class AEADDataset2(torch.utils.data.Dataset):
    def __init__(self, DatasetDf, seq_len):
        self.hots, self.powers, self.gases, self.hotwaters, self.waters, self.labels = [], [], [], [], [], []
        self.real_labels = []
        self.hot_labels = []
        self.power_labels = []
        self.gas_labels = []
        self.hotwater_labels = []
        self.water_labels = []

        for i in range(len(DatasetDf)):
            if i + seq_len >= len(DatasetDf):
                break

            self.hots.append([DatasetDf['hot'].to_list()[i + j] for j in range(seq_len)])
            self.powers.append([DatasetDf['power'].to_list()[i + j] for j in range(seq_len)])
            self.gases.append([DatasetDf['gas'].to_list()[i + j] for j in range(seq_len)])
            self.hotwaters.append([DatasetDf['hotwater'].to_list()[i + j] for j in range(seq_len)])
            self.waters.append([DatasetDf['water'].to_list()[i + j] for j in range(seq_len)])
            
            temp_labels = [DatasetDf['clabel5'].to_list()[i + j] for j in range(seq_len)]
            self.real_labels.append(temp_labels)
            
            if np.sum(temp_labels) == 0:
                self.labels.append(0)
            else:
                self.labels.append(1)
                
            temp_hot_labels = [DatasetDf['hot_label'].to_list()[i + j] for j in range(seq_len)]
            self.hot_labels.append(temp_hot_labels)
            
            temp_power_labels = [DatasetDf['power_label'].to_list()[i + j] for j in range(seq_len)]
            self.power_labels.append(temp_power_labels)

            temp_gas_labels = [DatasetDf['gas_label'].to_list()[i + j] for j in range(seq_len)]
            self.gas_labels.append(temp_gas_labels)
            
            temp_hotwater_labels = [DatasetDf['hotwater_label'].to_list()[i + j] for j in range(seq_len)]
            self.hotwater_labels.append(temp_hotwater_labels)
            
            temp_water_labels = [DatasetDf['water_label'].to_list()[i + j] for j in range(seq_len)]
            self.water_labels.append(temp_water_labels)
            

    def __getitem__(self, i):
        return torch.tensor(self.hots[i]), torch.tensor(self.powers[i]), torch.tensor(self.gases[i]), torch.tensor(self.hotwaters[i]), torch.tensor(self.waters[i]), torch.tensor(self.labels[i]), torch.tensor(self.real_labels[i]), torch.tensor(self.hot_labels[i]), torch.tensor(self.power_labels[i]), torch.tensor(self.gas_labels[i]), torch.tensor(self.hotwater_labels[i]), torch.tensor(self.water_labels[i])

    def __len__(self):
        return (len(self.labels))

In [None]:
class AEADDataset3(torch.utils.data.Dataset):
    def __init__(self, DatasetDf, seq_len):
        self.hots, self.powers, self.gases, self.hotwaters, self.waters, self.labels = [], [], [], [], [], []
        self.real_labels = []
        self.hot_labels = []
        self.power_labels = []
        self.gas_labels = []
        self.hotwater_labels = []
        self.water_labels = []

        for i in range(len(DatasetDf)):
            if i + seq_len >= len(DatasetDf):
                break

            self.hots.append([DatasetDf['hot'].to_list()[i + j] for j in range(seq_len)])
            self.powers.append([DatasetDf['power'].to_list()[i + j] for j in range(seq_len)])
            self.gases.append([DatasetDf['gas'].to_list()[i + j] for j in range(seq_len)])
            self.hotwaters.append([DatasetDf['hotwater'].to_list()[i + j] for j in range(seq_len)])
            self.waters.append([DatasetDf['water'].to_list()[i + j] for j in range(seq_len)])
            
            temp_labels = [DatasetDf['clabel5'].to_list()[i + j] for j in range(seq_len)]
            self.real_labels.append(temp_labels)
            
            if np.sum(temp_labels) == 0:
                self.labels.append(0)
            else:
                self.labels.append(1)
                
            temp_hot_labels = [DatasetDf['hot_label'].to_list()[i + j] for j in range(seq_len)]
            self.hot_labels.append(temp_hot_labels)
            
            temp_power_labels = [DatasetDf['power_label'].to_list()[i + j] for j in range(seq_len)]
            self.power_labels.append(temp_power_labels)

            temp_gas_labels = [DatasetDf['gas_label'].to_list()[i + j] for j in range(seq_len)]
            self.gas_labels.append(temp_gas_labels)
            
            temp_hotwater_labels = [DatasetDf['hotwater_label'].to_list()[i + j] for j in range(seq_len)]
            self.hotwater_labels.append(temp_hotwater_labels)
            
            temp_water_labels = [DatasetDf['water_label'].to_list()[i + j] for j in range(seq_len)]
            self.water_labels.append(temp_water_labels)
            

    def __getitem__(self, i):
        return (
            np.array(self.hot_labels[i]), 
            np.array(self.power_labels[i]), 
            np.array(self.gas_labels[i]), 
            np.array(self.hotwater_labels[i]), 
            np.array(self.water_labels[i])
        )

    def __len__(self):
        return (len(self.labels))

In [None]:
trainset = AEADDataset(training_df, seq_len)

In [None]:
testset = AEADDataset2(test_df, seq_len)

In [None]:
seg_testset = AEADDataset3(test_df, seq_len)

In [None]:
len(np.array(trainset.labels)[np.array(trainset.labels) == 0]), len(np.array(trainset.labels)[np.array(trainset.labels) == 1])

In [None]:
train_dataloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=0, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=0)

# Model Selection

In [None]:
dmilnet = MILLET(window=12, dim=5, sub_window=1).to(device)

optimizer = torch.optim.AdamW(dmilnet.parameters(), 1e-2)
loss_fn = torch.nn.NLLLoss()

In [None]:
best_loss_path = 'modelName.p'

best_f1 = 0

train_f1, train_acc, train_auc = [], [], []
valid_f1, valid_acc, valid_auc = [], [], []

for e in tqdm(range(int(Epochs*5))):
    train_output = []
    train_label = []

    valid_output = []
    valid_label = []

    train_check = True
    valid_check = False
    
    for batch_id, (hots, powers, gases, hotwaters, waters, labels) in enumerate(train_dataloader):
        if train_check:
            dmilnet.train()
            optimizer.zero_grad()
            hots = hots.to(device)
            powers = powers.to(device)
            gases = gases.to(device)
            hotwaters = hotwaters.to(device)
            waters = waters.to(device)
            label = labels.long().to(device)
            x = torch.concat([
                hots.reshape(hots.shape[0], 1, -1), 
                powers.reshape(hots.shape[0], 1, -1), 
                gases.reshape(hots.shape[0], 1, -1), 
                hotwaters.reshape(hots.shape[0], 1, -1), 
                waters.reshape(hots.shape[0], 1, -1)
            ], 1)

            out, _, _ = dmilnet(x)

            loss = loss_fn(out, label)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(dmilnet.parameters(), max_grad_norm)
            optimizer.step()

            temp_out = out.detach().cpu().numpy()
            temp_label = label.detach().cpu().numpy()
            train_output += list(temp_out)
            train_label += list(temp_label)
            
        if batch_id == int(len(train_dataloader)*0.8):
            if e % 50 == 0:
                print(f'-----------------------------------------------{e} Train END--------------------------------------------')
                print(f'F1 score: {f1_score(np.array(train_label), np.argmax(train_output, axis=1))}')
                print(f'ROC-AUC score: {roc_auc_score(np.array(train_label), np.argmax(train_output, axis=1))}')
                print(f'ACC score: {accuracy_score(np.array(train_label), np.argmax(train_output, axis=1))}')

            train_f1.append(f1_score(np.array(train_label), np.argmax(train_output, axis=1)))
            train_acc.append(accuracy_score(np.array(train_label), np.argmax(train_output, axis=1)))
            train_auc.append(roc_auc_score(np.array(train_label), np.argmax(train_output, axis=1)))
            
            train_check = False
            valid_check = True
        
        if valid_check:
            dmilnet.eval()
            hots = hots.to(device)
            powers = powers.to(device)
            gases = gases.to(device)
            hotwaters = hotwaters.to(device)
            waters = waters.to(device)
            label = labels.long().to(device)
            x = torch.concat([
                hots.reshape(hots.shape[0], 1, -1), 
                powers.reshape(hots.shape[0], 1, -1), 
                gases.reshape(hots.shape[0], 1, -1), 
                hotwaters.reshape(hots.shape[0], 1, -1), 
                waters.reshape(hots.shape[0], 1, -1)
            ], 1)
            out, _, _ = dmilnet(x)

            temp_out = out.detach().cpu().numpy()
            temp_label = label.detach().cpu().numpy()
            valid_output += list(temp_out)
            valid_label += list(temp_label)
    if e % 50 == 0:
        print(f'-----------------------------------------------{e} Validation--------------------------------------------')
        print(f'F1 score: {f1_score(np.array(valid_label), np.argmax(valid_output, axis=1))}')
        print(f'ROC-AUC score: {roc_auc_score(np.array(valid_label), np.argmax(valid_output, axis=1))}')
        print(f'ACC score: {accuracy_score(np.array(valid_label), np.argmax(valid_output, axis=1))}')

    valid_f1.append(f1_score(np.array(valid_label), np.argmax(valid_output, axis=1)))
    valid_acc.append(accuracy_score(np.array(valid_label), np.argmax(valid_output, axis=1)))
    valid_auc.append(roc_auc_score(np.array(valid_label), np.argmax(valid_output, axis=1)))

    if best_f1 <= f1_score(np.array(valid_label), np.argmax(valid_output, axis=1))+roc_auc_score(np.array(valid_label), np.argmax(valid_output, axis=1)):
        best_f1 = f1_score(np.array(valid_label), np.argmax(valid_output, axis=1))+roc_auc_score(np.array(valid_label), np.argmax(valid_output, axis=1))
        torch.save(dmilnet.state_dict(), best_loss_path)

dmilnet.load_state_dict(torch.load(best_loss_path))

In [None]:
dmilnet.load_state_dict(torch.load(best_loss_path))

In [None]:
plt.plot(train_f1, label='train_f1')
plt.plot(train_auc, label='train_auc')
plt.plot(train_acc, label='train_auccuracy')
plt.plot(valid_f1, label='valid_f1')
plt.plot(valid_auc, label='valid_auc')
plt.plot(valid_acc, label='valid_auccuracy')
plt.legend(loc='best')
plt.show()

In [None]:
test_output = []
test_label = []

test_real_labels = []
test_hot_labels = []
test_power_labels = []
test_gas_labels = []
test_hotwater_labels = []
test_water_labels = []

test_timestep_output = []
test_segment_output = []

dmilnet.eval()
for batch_id, (hots, powers, gases, hotwaters, waters, labels, real_labels, hot_labels, power_labels, gas_labels, hotwater_labels, water_labels) in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
    hots = hots.to(device)
    powers = powers.to(device)
    gases = gases.to(device)
    hotwaters = hotwaters.to(device)
    waters = waters.to(device)
    x = torch.concat([
        hots.reshape(hots.shape[0], 1, -1), 
        powers.reshape(hots.shape[0], 1, -1), 
        gases.reshape(hots.shape[0], 1, -1), 
        hotwaters.reshape(hots.shape[0], 1, -1), 
        waters.reshape(hots.shape[0], 1, -1)
    ], 1)
    label = labels.long().to(device)
    out, timestep_out, segment_out = dmilnet(x)

    temp_out = out.detach().cpu().numpy()
    temp_label = label.detach().cpu().numpy()
    temp_real_label = real_labels.detach().cpu().numpy()
    temp_hot_label = hot_labels.detach().cpu().numpy()
    temp_power_label = power_labels.detach().cpu().numpy()
    temp_gas_label = gas_labels.detach().cpu().numpy()
    temp_hotwater_label = hotwater_labels.detach().cpu().numpy()
    temp_water_label = water_labels.detach().cpu().numpy()
    timestep_out = timestep_out.detach().cpu().numpy()
    segment_out = segment_out.detach().cpu().numpy()
    
    test_output += list(temp_out)
    test_label += list(temp_label)
    
    test_real_labels += list(temp_real_label)
    test_hot_labels += list(temp_hot_label)
    test_power_labels += list(temp_power_label)
    test_gas_labels += list(temp_gas_label)
    test_hotwater_labels += list(temp_hotwater_label)
    test_water_labels += list(temp_water_label)

    test_timestep_output += list(timestep_out)
    test_segment_output += list(segment_out)

print('-----------------------------------------------Test--------------------------------------------')
print(f'F1 score: {f1_score(np.array(test_label), np.argmax(test_output, axis=1))}')
print(f'Macro F1 score: {f1_score(np.array(test_label), np.argmax(test_output, axis=1), average="macro")}')
print(f'Weight F1 score: {f1_score(np.array(test_label), np.argmax(test_output, axis=1), average="weighted")}')
print(f'ROC-AUC score: {roc_auc_score(np.array(test_label), np.argmax(test_output, axis=1))}')
print(f'ACC score: {accuracy_score(np.array(test_label), np.argmax(test_output, axis=1))}')

In [None]:
segment_label = []
segment_pred = []
segment_prob = []
for i in range(len(test_real_labels)):
    segment_label += list(test_real_labels[i])
    segment_pred += list(test_timestep_output[i].argmax(1))
    segment_prob += list(np.exp(test_timestep_output[i][: , 1]) - np.exp(test_timestep_output[i][: , 0]))

In [None]:
print('time point Performance')
print(f'F1 score: {f1_score(np.array(segment_label), segment_pred)}')
print(f'Macro F1 score: {f1_score(np.array(segment_label), segment_pred, average="macro")}')
print(f'Weight F1 score: {f1_score(np.array(segment_label), segment_pred, average="weighted")}')
print(f'Recall score: {recall_score(np.array(segment_label), segment_pred)}')
print(f'Precision score: {precision_score(np.array(segment_label), segment_pred)}')
print(f'ROC-AUC score: {roc_auc_score(np.array(segment_label), segment_pred)}')
print(f'ACC score: {accuracy_score(np.array(segment_label), segment_pred)}')

In [None]:
hot__ = []
power__ = []
gas__ = []
hotwater__ = []
water__ = []

hot__pred = []
power__pred = []
gas__pred = []
hotwater__pred = []
water__pred = []

hot_proba = []
power_proba = []
gas_proba = []
hotwater_proba = []
water_proba = []

for i in tqdm(range(len(test_real_labels))):
    temp_id = (np.exp(test_timestep_output[i][: , 1]) - np.exp(test_timestep_output[i][: , 0]) < threshold)
    temp_timestep = test_segment_output[i].argmax(2)
    temp_timestep[temp_id, :] = 0
    temp_proba = np.exp(test_segment_output[i][:, :, 1])
    temp_proba[temp_id, :] = 0
    
    (hot, power, gas, hotwater, water) = seg_testset[i]
    
    temp = np.array([0 for i in range(12)])
    temp[0<(hot)]=1
    hot__.append(temp)
    
    temp = np.array([0 for i in range(12)])
    temp[0<(power)]=1
    power__.append(temp)
    
    temp = np.array([0 for i in range(12)])
    temp[0<(gas)]=1
    gas__.append(temp)
    
    temp = np.array([0 for i in range(12)])
    temp[0<(hotwater)]=1
    hotwater__.append(temp)
    
    temp = np.array([0 for i in range(12)])
    temp[0<(water)]=1
    water__.append(temp)
    
    hot__pred.append(temp_timestep[:, 0])
    power__pred.append(temp_timestep[:, 1])
    gas__pred.append(temp_timestep[:, 2])
    hotwater__pred.append(temp_timestep[:, 3])
    water__pred.append(temp_timestep[:, 4])
    
    hot_proba.append(temp_proba[:, 0])
    power_proba.append(temp_proba[:, 1])
    gas_proba.append(temp_proba[:, 2])
    hotwater_proba.append(temp_proba[:, 3])
    water_proba.append(temp_proba[:, 4])
    
hot__ = np.array(hot__)
power__ = np.array(power__)
gas__ = np.array(gas__)
hotwater__ = np.array(hotwater__)
water__ = np.array(water__)

hot__pred = np.array(hot__pred)
power__pred = np.array(power__pred)
gas__pred = np.array(gas__pred)
hotwater__pred = np.array(hotwater__pred)
water__pred = np.array(water__pred)

hot_proba = np.array(hot_proba)
power_proba = np.array(power_proba)
gas_proba = np.array(gas_proba)
hotwater_proba = np.array(hotwater_proba)
water_proba = np.array(water_proba)

predDf = pd.DataFrame({
    'hot' : hot__pred.reshape(-1),
    'power' : power__pred.reshape(-1),
    'gas' : gas__pred.reshape(-1),
    'hotwater' : hotwater__pred.reshape(-1),
    'water' : water__pred.reshape(-1)
})

probaDf = pd.DataFrame({
    'hot' : hot_proba.reshape(-1),
    'power' : power_proba.reshape(-1),
    'gas' : gas_proba.reshape(-1),
    'hotwater' : hotwater_proba.reshape(-1),
    'water' : water_proba.reshape(-1)
})

labelDf = pd.DataFrame({
    'hot' : hot__.reshape(-1),
    'power' : power__.reshape(-1),
    'gas' : gas__.reshape(-1),
    'hotwater' : hotwater__.reshape(-1),
    'water' : water__.reshape(-1)
})

In [None]:
from sklearn.metrics import classification_report, roc_curve, auc
from sklearn.metrics import auc, precision_recall_curve, average_precision_score, roc_auc_score

In [None]:
sum_hot_pr_re = hot_pr + hot_re
hot_threshold = hot_ths[np.argmax(sum_hot_pr_re)]

sum_power_pr_re = power_pr + power_re
power_threshold = power_ths[np.argmax(sum_power_pr_re)]

sum_gas_pr_re = gas_pr + gas_re
gas_threshold = gas_ths[np.argmax(sum_gas_pr_re)]

sum_hotwater_pr_re = hotwater_pr + hotwater_re
hotwater_threshold = hotwater_ths[np.argmax(sum_hotwater_pr_re)]

sum_water_pr_re = water_pr + water_re
water_threshold = water_ths[np.argmax(sum_water_pr_re)]

print(hot_threshold, power_threshold, gas_threshold, hotwater_threshold, water_threshold)

In [None]:
pred = np.array(probaDf["hot"].to_list())>=hot_threshold

print(f'F1 score: {f1_score(labelDf["hot"].to_list(), pred)}')
print(f'Macro F1 score: {f1_score(labelDf["hot"].to_list(), pred, average="macro")}')
print(f'Weight F1 score: {f1_score(labelDf["hot"].to_list(), pred, average="weighted")}')
print(f'Recall score: {recall_score(labelDf["hot"].to_list(), pred)}')
print(f'Precision score: {precision_score(labelDf["hot"].to_list(), pred)}')
print(f'ROC-AUC score: {roc_auc_score(labelDf["hot"].to_list(), pred)}')
print(f'ACC score: {accuracy_score(labelDf["hot"].to_list(), pred)}')

In [None]:
pred = np.array(probaDf["power"].to_list())>=power_threshold

print(f'F1 score: {f1_score(labelDf["power"].to_list(), pred)}')
print(f'Macro F1 score: {f1_score(labelDf["power"].to_list(), pred, average="macro")}')
print(f'Weight F1 score: {f1_score(labelDf["power"].to_list(), pred, average="weighted")}')
print(f'Recall score: {recall_score(labelDf["power"].to_list(), pred)}')
print(f'Precision score: {precision_score(labelDf["power"].to_list(), pred)}')
print(f'ROC-AUC score: {roc_auc_score(labelDf["power"].to_list(), pred)}')
print(f'ACC score: {accuracy_score(labelDf["power"].to_list(), pred)}')

In [None]:
pred = np.array(probaDf["gas"].to_list())>=gas_threshold

print(f'F1 score: {f1_score(labelDf["gas"].to_list(), pred)}')
print(f'Macro F1 score: {f1_score(labelDf["gas"].to_list(), pred, average="macro")}')
print(f'Weight F1 score: {f1_score(labelDf["gas"].to_list(), pred, average="weighted")}')
print(f'Recall score: {recall_score(labelDf["gas"].to_list(), pred)}')
print(f'Precision score: {precision_score(labelDf["gas"].to_list(), pred)}')
print(f'ROC-AUC score: {roc_auc_score(labelDf["gas"].to_list(), pred)}')
print(f'ACC score: {accuracy_score(labelDf["gas"].to_list(), pred)}')

In [None]:
pred = np.array(probaDf["hotwater"].to_list())>=hotwater_threshold

print(f'F1 score: {f1_score(labelDf["hotwater"].to_list(), pred)}')
print(f'Macro F1 score: {f1_score(labelDf["hotwater"].to_list(), pred, average="macro")}')
print(f'Weight F1 score: {f1_score(labelDf["hotwater"].to_list(), pred, average="weighted")}')
print(f'Recall score: {recall_score(labelDf["hotwater"].to_list(), pred)}')
print(f'Precision score: {precision_score(labelDf["hotwater"].to_list(), pred)}')
print(f'ROC-AUC score: {roc_auc_score(labelDf["hotwater"].to_list(), pred)}')
print(f'ACC score: {accuracy_score(labelDf["hotwater"].to_list(), pred)}')

In [None]:
pred = np.array(probaDf["water"].to_list())>=water_threshold

print(f'F1 score: {f1_score(labelDf["water"].to_list(), pred)}')
print(f'Macro F1 score: {f1_score(labelDf["water"].to_list(), pred, average="macro")}')
print(f'Weight F1 score: {f1_score(labelDf["water"].to_list(), pred, average="weighted")}')
print(f'Recall score: {recall_score(labelDf["water"].to_list(), pred)}')
print(f'Precision score: {precision_score(labelDf["water"].to_list(), pred)}')
print(f'ROC-AUC score: {roc_auc_score(labelDf["water"].to_list(), pred)}')
print(f'ACC score: {accuracy_score(labelDf["water"].to_list(), pred)}')