In [1]:
import sys
sys.path.append('../')

In [2]:
import random

In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, auc, roc_auc_score
from sklearn.metrics import precision_recall_curve, average_precision_score

In [4]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset 

In [5]:
from mlpack.utils import to_device, to_fp16
from mlpack.trainer import TrainArgs, BaseTrainer

# Data

In [6]:
train_id = pd.read_csv('../datasets/ENEL/log_norm_clean_data_train.csv', usecols=[0], sep=',')
valid_id = pd.read_csv('../datasets/ENEL/log_norm_clean_data_valid.csv', usecols=[0], sep=',')

In [7]:
df = pd.read_csv('../datasets/ENEL/dataset.csv', sep='\t', index_col=0)
df.head()

Unnamed: 0_level_0,2014-01-01 00:00:00,2014-01-02 00:00:00,2014-01-03 00:00:00,2014-01-04 00:00:00,2014-01-05 00:00:00,2014-01-06 00:00:00,2014-01-07 00:00:00,2014-01-08 00:00:00,2014-01-09 00:00:00,2014-01-10 00:00:00,...,2016-10-23 00:00:00,2016-10-24 00:00:00,2016-10-25 00:00:00,2016-10-26 00:00:00,2016-10-27 00:00:00,2016-10-28 00:00:00,2016-10-29 00:00:00,2016-10-30 00:00:00,2016-10-31 00:00:00,flag
CONS_NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0387DD8A07E07FDA6271170F86AD9151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.305338,0.306095,0.360579,0.207343,0.331067,0.351877,0.285285,0.34658,0.255016,1
01D6177B5D4FFE0CABA9EF17DAFC2B84,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4B75AC4F2D8434CFF62DB64D0BB43103,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.963074,1
B32AC8CC6D5D805AC053557AB05F5343,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.497605,0.568034,0.490694,0.644715,0.684207,0.590742,0.633854,0.475885,0.38571,1
EDFC78B07BA2908B3395C4EB2304665E,0.106331,0.248451,0.318474,0.128116,0.143158,0.233409,0.201251,0.146789,0.139008,0.133303,...,0.48601,0.39524,0.27283,0.272311,0.304988,0.345965,0.520762,0.467337,0.408207,1


In [8]:
df_train = df.loc[train_id.values[:,0]]

In [9]:
df_valid = df.loc[valid_id.values[:,0]]

In [10]:
x_train = df_train.values[:,:-1]
y_train = df_train.values[:,-1]

x_valid = df_valid.values[:,:-1]
y_valid = df_valid.values[:,-1]

In [11]:
x_train = np.concatenate(([2*[0] for _ in range(len(x_train))], x_train), axis=1)
x_valid = np.concatenate(([2*[0] for _ in range(len(x_valid))], x_valid), axis=1)

In [12]:
x_train.shape, x_valid.shape

((21489, 1036), (5373, 1036))

In [13]:
x_train = x_train.reshape(len(x_train), -1, 7)
x_train = np.expand_dims(x_train, 1)

x_valid = x_valid.reshape(len(x_valid), -1, 7)
x_valid = np.expand_dims(x_valid, 1)

In [14]:
x_train.shape, x_valid.shape

((21489, 1, 148, 7), (5373, 1, 148, 7))

In [15]:
x_train_ones = x_train[y_train == 1]
y_train_ones = y_train[y_train == 1]

In [16]:
x_train = np.vstack((x_train, x_train_ones, x_train_ones, x_train_ones))

In [17]:
y_train = np.concatenate((y_train, y_train_ones, y_train_ones, y_train_ones))

In [18]:
x_train_ones.shape

(1616, 1, 148, 7)

In [19]:
y_train.shape, x_train.shape

((26337,), (26337, 1, 148, 7))

# Dataset

In [20]:
class ENELDataset(Dataset):
    def __init__(self, x, y):
        self.x, self.y = x, y
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        x, y = self.x[idx], self.y[idx]
#         if y == 0:
#             if random.random() < 0.1:
#                 i = random.randint(0, len(x_train_ones)-1)
#                 x, y = x_train_ones[i], y_train_ones[i]
        
        return x.astype(np.float32), y.astype(np.int64)

In [21]:
ds_train = ENELDataset(x_train, y_train)
ds_valid = ENELDataset(x_valid, y_valid)

In [22]:
dl_train = DataLoader(ds_train, batch_size=32, shuffle=True, pin_memory=True, num_workers=4)
dl_valid = DataLoader(ds_valid, batch_size=32, shuffle=False, pin_memory=True, num_workers=4)

# LSTM

In [23]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'

In [24]:
class SelfAttention(nn.Module):
    
    def __init__(self, in_size, out_size):
        super().__init__()
        self.query = nn.Linear(in_size, out_size)
        self.key = nn.Linear(in_size, out_size)
        self.value = nn.Linear(in_size, out_size)
        
        self.d = out_size**(0.5)
        
    def forward(self, x):
        q = self.query(x)
        k = self.key(x)
        v = self.value(x)
        z = torch.softmax((q @ k.permute(0, 1, 3, 2))/self.d, dim=-1) @ v
        return z

In [25]:
class ConvModel(nn.Module):

    def __init__(self):
        super().__init__()

        dropout = 0.1

        self.net = nn.Sequential(            
            self.block(7,64, 1, 90, dropout),
            self.block(64, 64, 90, 90, dropout),
            self.block(64, 64, 90, 90, dropout),
            nn.MaxPool2d(3),
        )
        self.classifier = nn.Linear(90 * 49 * 21, 2)

    def block(self, in_features, out_features, in_channels, out_channels, dropout):
        return nn.Sequential(
            SelfAttention(in_features, out_features),  # n x in_channels x m x out_features
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),  # n x out_channels x m x out_features
            nn.ReLU(),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        o = self.net(x)
        o = o.view(x.shape[0], -1)
        o = self.classifier(o) 
        return o

In [26]:
model = ConvModel()

In [27]:
model.to(device)

ConvModel(
  (net): Sequential(
    (0): Sequential(
      (0): SelfAttention(
        (query): Linear(in_features=7, out_features=64, bias=True)
        (key): Linear(in_features=7, out_features=64, bias=True)
        (value): Linear(in_features=7, out_features=64, bias=True)
      )
      (1): Conv2d(1, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (2): ReLU()
      (3): Dropout(p=0.1, inplace=False)
    )
    (1): Sequential(
      (0): SelfAttention(
        (query): Linear(in_features=64, out_features=64, bias=True)
        (key): Linear(in_features=64, out_features=64, bias=True)
        (value): Linear(in_features=64, out_features=64, bias=True)
      )
      (1): Conv2d(90, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (2): ReLU()
      (3): Dropout(p=0.1, inplace=False)
    )
    (2): Sequential(
      (0): SelfAttention(
        (query): Linear(in_features=64, out_features=64, bias=True)
        (key): Linear(in_features=64, out_features=64, bias

In [28]:
x, y = next(iter(dl_train))

In [29]:
with torch.no_grad():
    o = model(x.to(device))
o.shape

torch.Size([32, 2])

# Eval

In [30]:
import numpy as np

def precision_at_k(y_true,class_probs,k,threshold=0.5,class_of_interest=1,isSorted=False):

    if (not isSorted):

        # Selecting the probs on the class of interest (coi)
        coi_probs = class_probs[:,class_of_interest]

        # print('Selecionando as probs da classe de interesse:',class_of_interest)
        # print(coi_probs)

        # print('Ordem de entrada dos targets')
        # print(y_true)

        # Sorting the coi probs and selecting the top k
        # print('Ordenando as probs e os targets correspondentes:')
        sorted_coi_probs = np.sort(coi_probs)[::-1]
        sorted_y = y_true[np.argsort(coi_probs)[::-1]]
        
        # Calculating the precision for 
        # print(sorted_coi_probs)
        # print(sorted_y)

    else:
        # Code otimization so the algorithm doesn't have to sort the data twice
        sorted_coi_probs = class_probs
        sorted_y = y_true


    # Selecting the top k probs and their respective labels
    sorted_coi_probs = sorted_coi_probs[:k]
    sorted_y = sorted_y[:k]

    # print('Selecionando as top',k,'probs')
    # print(sorted_coi_probs)
    # print(sorted_y)

    # Atributing class based on the threshold
    sorted_predicted_classes = np.where(sorted_coi_probs>threshold,
                                        float(class_of_interest),
                                        0.0)

    # print('Atribuindo a classe baseada no threshold de ',threshold)
    print(sorted_predicted_classes)

    # print('Precisão em K:')

    # print(k)

    precisionK = np.sum(sorted_predicted_classes == sorted_y)/k  

    return precisionK

def map_at_N(y_true,class_probs,N,thrs=0.5,class_of_interest=1):

    # Calls the precision at k function with the values already sorted and calculates the average precision
    # weighted by the number of positive classes inside the sample

    # Declaring the list to hold the precisions
    pks = []

    # Selecting the probs on the class of interest (coi)
    coi_probs = class_probs[:,class_of_interest]

    # Sorting the class of interest and its respective label
    sorted_coi_probs = np.sort(coi_probs)[::-1]
    sorted_y = y_true[np.argsort(coi_probs)[::-1]]

    # Selecting the top N scores 
    sorted_coi_probs = sorted_coi_probs[:N]
    sorted_y = sorted_y[:N]


    ## TESTANDO APENAS ##
    sorted_y[-1] = 1 

    # Identifying the positions of the class of interest inside the top N
    top_coi_indexes = np.argwhere(sorted_y>0)

    for value in top_coi_indexes:
        
        # Adjusting the index
        limite = value[0] + 1
        
        pks.append(
                    precision_at_k(sorted_y[:limite],
                    sorted_coi_probs[:limite],
                    limite,threshold=thrs,isSorted=True)
                    )

    pks = np.array(pks)
    
    # print(pks)
    # print(sorted_coi_probs)
    # print(sorted_y)
    return pks.mean()

In [31]:
class MyTrainer(BaseTrainer):
    
    @staticmethod
    def dataloader_generator(dataloader):
        for x, y in dataloader:
#             x = x.unsqueeze(-1)
            x, y = to_device(x, y, device=device)
            yield {
                'inputs': {
                    'x': x
                },
                'targets': {
                    'y': y
                }
            }
        
    @staticmethod
    def loss_from_model(model_output, targets, loss_fn=None):
        y = targets['y']
        return loss_fn(model_output, y)
    
    def evaluate_fn(self, model, dataloader, loss_fn):
        model.eval()
        losses = []
        preds = []
        trues = []
        probs = []
        dl_gen = self.dataloader_generator(dataloader)
        for batch in self.tqdm(dl_gen, leave=False, desc='Eval...', total=len(dataloader)):
            inputs = batch['inputs']
            targets = batch['targets']

            with torch.no_grad():
                o = model(**inputs)

            loss = self.loss_from_model(o, targets, loss_fn)

            y = targets['y']
            
            probs += o.detach().cpu().numpy().tolist()
            preds += o.argmax(1).detach().cpu().numpy().tolist()
            trues += y.detach().cpu().numpy().tolist()
            losses.append(loss.item())

        acc = accuracy_score(trues, preds)
        f1 = f1_score(trues, preds)
        conf = confusion_matrix(trues, preds)
#         map100 = map_at_N(np.array(trues), np.array(probs), 100)

        both = sorted([
            (x, y) for x, y in zip(preds, trues)
        ], key=lambda x:x[0], reverse=True)

        p = [x for x,_ in both]
        t = [x for _,x in both]

        print('--- Validation ---')
        print(f'F1 = {f1}\t Acc = {acc}')
        print(f'AUC = {auc(p, t)}')
#         print(f'MAP@100 = {map100}')
        print(conf)
        return np.array(losses).mean(), f1

# Optim

In [32]:
loss_fn = nn.CrossEntropyLoss(torch.tensor([0.5, 1.]).to(device))
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-3)

In [33]:
args = TrainArgs(40, 'enel_selfatt_conv.ckp')

In [34]:
trainer = MyTrainer(grad_steps=4)

In [35]:
# trainer.evaluate_fn(model, dl_valid, loss_fn)

In [36]:
trainer.train(args, model, dl_train, dl_valid, optimizer, loss_fn)

HBox(children=(IntProgress(value=0, description='Training...', max=40, style=ProgressStyle(description_width='…

HBox(children=(IntProgress(value=0, max=824), HTML(value='')))

RuntimeError: CUDA out of memory. Tried to allocate 242.00 MiB (GPU 0; 3.95 GiB total capacity; 2.59 GiB already allocated; 123.19 MiB free; 274.95 MiB cached)