In [3]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.utils as torch_utils
import pandas as pd
from torch.utils.data import DataLoader,Dataset
from torchvision import transforms
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report


In [4]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"

os.environ["CUDA_VISIBLE_DEVICES"]="0"
!nvidia-smi

Wed Aug  5 17:53:23 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 396.51                 Driver Version: 396.51                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:02:00.0 Off |                  N/A |
| 31%   54C    P2    59W / 250W |   5778MiB / 11177MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  Off  | 00000000:03:00.0 Off |                  N/A |
| 23%   36C    P8    16W / 250W |     12MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  GeForce GTX 108...  Off  | 00000000:82:00.0 Off |                  N/A |
| 23%   

## Construct custom dataloader

In [51]:
class Customdataset(Dataset):
    def __init__(self,x_data,y_data,transform=None):
        '''
        Call stored dataset
        
        Params
        second: Ellapsed second from the beginning of events 
        encoding_type: Encoding method for outcomeprediction ex) Static, last_state, aggregation, etc.
        '''
        
        
        
        # Transforms
        self.transform = transform
        self.y_data=y_data.to_numpy()
        self.x_data=x_data.to_numpy()
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self,idx):
        
        # Convert x and y data to torch flaot tensor
        x = torch.FloatTensor(self.x_data[idx])
        y = self.y_data[idx]
        return x,y

In [52]:
encoding_type = 'last_state'
prefix = 5
input_data = pd.read_csv('../data/'+encoding_type+'_'+str(prefix)+'.csv')
y_data = input_data.loc[:,['Label']]
input_data = input_data.drop(['Label'],axis=1)
x_data = input_data
x_data = x_data.drop('(case) SUMleges',axis=1)

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.33, random_state=69)


In [53]:
trainset = Customdataset(x_train,y_train)
testset = Customdataset(x_test,y_test)

In [54]:
batch_size =10
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(testset,batch_size=1)

## Data description
X value  
BPIC 2015_2 train loader dataset size is 1 \* 816  
Each rows is a case and 5 cases for one train iteration due to batch_size is 1.  
817 elements are included in a single case which are timestamp related information, case and event attributes.  

Y value  
Size of y value is 1 and content is 0 or 1 binary class.

In [55]:
class MLP_prediction(nn.Module):
    def __init__(self):
        super(MLP_prediction,self).__init__()
        
        # MLP part
        self.batchnorm1 = nn.BatchNorm1d(1632)
        self.batchnorm2 = nn.BatchNorm1d(1632)
        self.layer_1 = nn.Linear(816,1632)
        self.layer_2 = nn.Linear(1632,1632)
        self.layer_3 = nn.Linear(816,1)
        self.dropout = nn.Dropout(p=0.1)
        self.relu = nn.Sigmoid()
        
    
    def forward(self, inputs):

        """
        implement code here
        """
        
        hidden = self.relu(self.layer_1(inputs))

#         hidden = self.batchnorm1(hidden)
        hidden = self.relu(self.layer_2(hidden))
#         hidden = self.batchnorm2(hidden)
        hidden = self.dropout(hidden)
        hidden = self.layer_3(hidden)
        outputs = hidden.squeeze(1)
        return outputs

model = MLP_prediction().cuda()

# Loss function & Optimizers
"""
you can change the loss and optimizer
"""
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)#, weight_decay=1e-4)



# Hyperparameters
"""
you can change the value
"""
num_epochs = 100
batch_size = 10   

In [56]:
def binary_acc(train_predict, train_y):
    train_predict_tag = torch.round(torch.sigmoid(train_predict))
    correct_results_sum = (train_predict_tag == train_y).sum().float()
    acc = correct_results_sum/train_y.shape[0]
    acc = torch.round(acc *100)
    
    return acc    

In [57]:
# Train CNN_prediction first

accuracy_graph = {'train':[], 'test':[], 'epoch': []}
loss_graph = {'train':[], 'test':[], 'epoch': []}
model = MLP_prediction().cuda()

for epoch in range(num_epochs):
    epoch_loss = 0
    epoch_acc = 0
    # Training
    for train_x, train_y in train_loader: 
        train_y = train_y.squeeze(1)
        train_x, train_y = train_x.cuda(), train_y.cuda()
        
        
        train_predict = model(train_x)
        train_predict = train_predict.float()
        train_y = train_y.float()
        
        loss = criterion(torch.sigmoid(train_predict), train_y)
        acc = binary_acc(train_predict, train_y)

        # Backpropagation        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
                

    # Evaluation
    if epoch % 10 ==0:
        
        test_acc = 0
        test_loss =0
        
        for test_x, test_y in test_loader:

            with torch.autograd.no_grad():
                test_y = test_y.squeeze(1)
                test_x, test_y = test_x.cuda(), test_y.cuda()
                test_predict = model(test_x)
            test_predict = test_predict.float()
            test_y = test_y.float()

            loss = criterion(torch.sigmoid(test_predict), test_y)
            acc = binary_acc(test_predict, test_y)
            
            test_loss += loss.item()
            test_acc += acc.item()
                               
        print("\ntrain accuracy: {:.2f} %, test accuracy: {:.2f} %".format(epoch_acc/len(train_loader), test_acc/len(test_loader)))
        print("epoch:{}, train_loss: {:.4f}, test_loss: {:.4f}".format(epoch+1, epoch_loss/len(train_loader), test_loss/len(test_loader))) 
        accuracy_graph['epoch'] = epoch+1
        accuracy_graph['train'] = epoch_acc/len(train_loader)

        loss_graph['epoch'] = epoch+1
        loss_graph['train'] = epoch_loss/len(train_loader)
        loss_graph['test'] = test_loss/len(test_loader)


#         torch.save(model.state_dict(), "./cnn_predic.pt")


train accuracy: 69.29 %, test accuracy: 71.90 %
epoch:1, train_loss: 0.6619, test_loss: 0.6616

train accuracy: 71.61 %, test accuracy: 74.09 %
epoch:11, train_loss: 0.6633, test_loss: 0.6616

train accuracy: 73.04 %, test accuracy: 73.36 %
epoch:21, train_loss: 0.6614, test_loss: 0.6602

train accuracy: 75.18 %, test accuracy: 72.63 %
epoch:31, train_loss: 0.6586, test_loss: 0.6615

train accuracy: 71.96 %, test accuracy: 71.90 %
epoch:41, train_loss: 0.6630, test_loss: 0.6616

train accuracy: 70.36 %, test accuracy: 74.09 %
epoch:51, train_loss: 0.6660, test_loss: 0.6597

train accuracy: 72.50 %, test accuracy: 72.26 %
epoch:61, train_loss: 0.6621, test_loss: 0.6626

train accuracy: 71.07 %, test accuracy: 73.72 %
epoch:71, train_loss: 0.6644, test_loss: 0.6615

train accuracy: 71.43 %, test accuracy: 71.53 %
epoch:81, train_loss: 0.6650, test_loss: 0.6645

train accuracy: 71.07 %, test accuracy: 74.45 %
epoch:91, train_loss: 0.6631, test_loss: 0.6603


In [58]:
test_acc = 0
test_loss =0
y_predict_list=[]
model.eval()

testset = Customdataset(x_test,y_test)
test_loader = DataLoader(testset,batch_size=1)

with torch.autograd.no_grad():
    for test_x, test_y in test_loader:
        test_y = test_y.squeeze(1)
        test_x, test_y = test_x.cuda(), test_y.cuda()
        test_predict = model(test_x)

        test_predict = test_predict.float()
        test_y = test_y.float()
        
        test_predict_tag = torch.round(torch.sigmoid(test_predict))
        y_predict_list.append(test_predict_tag.cpu().numpy())
        
print(classification_report(y_test,y_predict_list))

              precision    recall  f1-score   support

         0.0       0.82      1.00      0.90       224
         1.0       0.00      0.00      0.00        50

    accuracy                           0.82       274
   macro avg       0.41      0.50      0.45       274
weighted avg       0.67      0.82      0.74       274



  _warn_prf(average, modifier, msg_start, len(result))
