# Global settings

In [None]:
# References:
# This source code file refers to:
# https://github.com/ICL-ml4csec/VulBERTa
# https://towardsdatascience.com/text-classification-with-bert-in-pytorch-887965e5820f
# https://huggingface.co/docs/transformers/model_doc/roberta
# https://colab.research.google.com/github/dpressel/dlss-tutorial/blob/master/1_pretrained_vectors.ipynb


In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import random
import torch
import numpy as np
import shutil

def write_to_file(text, path, mode='a'): # 'a': append; 'w': overwrite
    with open(path, mode) as f:
        f.write(text)

def mkdir_if_not_exist(directory):
    if not directory: return
    if not os.path.exists(directory):
        os.mkdir(directory)

def remove_file_if_exist(path):
    if not path: return
    if os.path.exists(path):
        try:
            os.remove(path)
        except:
            shutil.rmtree(path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('using', device)

# The following randomization refers to: https://github.com/ICL-ml4csec/VulBERTa/blob/main/Finetuning_VulBERTa-MLP.ipynb
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
os.environ['WANDB_DISABLED'] = 'true'
os.environ['WANDB_MODE'] = 'dryrun'

# -------------------------------------- start

DATASET_NAME = 'qemu'
DATASET_MASKING = 'masked_'
# DATASET_MASKING = ''

codeTF_check_point = 'checkpoint-6681'
msgTF_check_point = 'roberta_large_cnn_0.794_ep15(masked_qemu_msgTF).pt'

# -------------------------------------- end

root_directory = '/root/autodl-tmp'
dataset_directory = f'{root_directory}/output_dataset_1/{DATASET_MASKING}{DATASET_NAME}'
init_train_path = f'{dataset_directory}/train.json'
init_val_path = f'{dataset_directory}/val.json'
init_test_path = f'{dataset_directory}/test.json'
intermediate_directory = f'{root_directory}/intermediate/{DATASET_MASKING}{DATASET_NAME}'
mkdir_if_not_exist(f'{root_directory}/intermediate')
mkdir_if_not_exist(intermediate_directory)

finetuned_ct_model_path = f'{root_directory}/codeTF_check_point/{DATASET_MASKING}{DATASET_NAME}/{codeTF_check_point}'
intermediate_ct_train_path = f'{intermediate_directory}/ct_train.txt'
intermediate_ct_val_path = f'{intermediate_directory}/ct_val.txt'
intermediate_ct_test_path = f'{intermediate_directory}/ct_test.txt'

finetuned_mt_model_path = f'{root_directory}/msgTF_check_point/{DATASET_MASKING}{DATASET_NAME}/{msgTF_check_point}'
intermediate_mt_train_path = f'{intermediate_directory}/mt_train.txt'
intermediate_mt_val_path = f'{intermediate_directory}/mt_val.txt'
intermediate_mt_test_path = f'{intermediate_directory}/mt_test.txt'


using cuda


# CodeTransformer

In [2]:
from tqdm import tqdm
import sys
import pandas as pd
import numpy as np
import csv
import pickle
import re
import torch
import sklearn
import random
import clang
from clang import *
from clang import cindex
from pathlib import Path
from tokenizers import ByteLevelBPETokenizer
from tokenizers.implementations import ByteLevelBPETokenizer
from tokenizers.processors import BertProcessing
from torch.utils.data import Dataset, DataLoader, IterableDataset
from transformers import RobertaConfig
from transformers import RobertaForMaskedLM, RobertaForSequenceClassification
from transformers import RobertaTokenizerFast
from transformers import DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments
from transformers import LineByLineTextDataset
from transformers.modeling_outputs import SequenceClassifierOutput
from tokenizers.pre_tokenizers import PreTokenizer
from tokenizers.pre_tokenizers import Whitespace
from tokenizers import NormalizedString,PreTokenizedString
from typing import List
from tokenizers import Tokenizer
from tokenizers import normalizers,decoders
from tokenizers.normalizers import StripAccents, unicode_normalizer_from_str, Replace
from tokenizers.processors import TemplateProcessing
from tokenizers import processors,pre_tokenizers
from tokenizers.models import BPE

# definitions
class MyTokenizer:
    cidx = cindex.Index.create()

    def clang_split(self, i: int, normalized_string: NormalizedString) -> List[NormalizedString]:
        ## Tokkenize using clang
        tok = []
        tu = self.cidx.parse('tmp.c',
                       args=[''],  
                       unsaved_files=[('tmp.c', str(normalized_string.original))],  
                       options=0)
        for t in tu.get_tokens(extent=tu.cursor.extent):
            spelling = t.spelling.strip()
            if spelling == '': continue
            ## Keyword no need
            ## Punctuations no need
            ## Literal all to BPE
            #spelling = spelling.replace(' ', '')
            tok.append(NormalizedString(spelling))
        return(tok)

    def pre_tokenize(self, pretok: PreTokenizedString):
        pretok.split(self.clang_split)

def process_encodings(encodings):
    input_ids=[]
    attention_mask=[]
    for enc in encodings:
        input_ids.append(enc.ids)
        attention_mask.append(enc.attention_mask)
    return {'input_ids':input_ids, 'attention_mask':attention_mask}

class MyCustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
        assert len(self.encodings['input_ids']) == len(self.encodings['attention_mask']) ==  len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# ------------------------------------------------------------------------------
# tokenize and load dataset
vocab, merges = BPE.read_file(vocab="./tokenizer/drapgh-vocab.json", merges="./tokenizer/drapgh-merges.txt")
my_tokenizer = Tokenizer(BPE(vocab, merges, unk_token="<unk>"))

my_tokenizer.normalizer = normalizers.Sequence([StripAccents(), Replace(" ", "Ä")])
my_tokenizer.pre_tokenizer = PreTokenizer.custom(MyTokenizer())
my_tokenizer.post_processor = processors.ByteLevel(trim_offsets=False)
my_tokenizer.post_processor = TemplateProcessing(
    single="<s> $A </s>",
    special_tokens=[
    ("<s>",0),
    ("<pad>",1),
    ("</s>",2),
    ("<unk>",3),
    ("<mask>",4)
    ]
)

my_tokenizer.enable_truncation(max_length=1024)
my_tokenizer.enable_padding(direction='right', pad_id=1, pad_type_id=0, pad_token='<pad>', length=None, pad_to_multiple_of=None)

m1 = pd.read_json(init_train_path)
m2 = pd.read_json(init_val_path)

train_encodings = my_tokenizer.encode_batch(m1.commit_patch)
train_encodings = process_encodings(train_encodings)

val_encodings = my_tokenizer.encode_batch(m2.commit_patch)
val_encodings = process_encodings(val_encodings)

train_dataset = MyCustomDataset(train_encodings, m1.label.tolist())
val_dataset = MyCustomDataset(val_encodings, m2.label.tolist())

train_loader = DataLoader(train_dataset, batch_size=128)
val_loader = DataLoader(val_dataset, batch_size=128)

# ------------------------------------------------------------------------------
# generate intermediate data by CodeTransformer
model = RobertaForSequenceClassification.from_pretrained(finetuned_ct_model_path)
model.to(device)

def generate_ct_intermediate_dataset(data_loader, intermediate_data_path):
    model.eval()
    with torch.no_grad():
        for batch in tqdm(data_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            probs = torch.nn.functional.softmax(outputs[0], dim=1).tolist()
            assert(len(probs) == len(labels))
            for i in range(len(probs)):
                prob = probs[i]
                label = int(labels[i])
                content = '\t'.join([str(i) for i in prob + [label]]) + '\n'
                write_to_file(content, intermediate_data_path)

remove_file_if_exist(intermediate_ct_train_path)
remove_file_if_exist(intermediate_ct_val_path)

print('Generating codeTF intermediate dataset:')
generate_ct_intermediate_dataset(train_loader, intermediate_ct_train_path)
del train_loader
generate_ct_intermediate_dataset(val_loader, intermediate_ct_val_path)
del val_loader

# ------------------------------------------------------------------------------
# evaluation
print('\nEvaluation:')
model = RobertaForSequenceClassification.from_pretrained(finetuned_ct_model_path)

test_loader = DataLoader(val_dataset, batch_size=128)

def softmax_accuracy(probs,all_labels):
    def getClass(x):
        return(x.index(max(x)))

    all_labels = all_labels.tolist()
    probs = pd.Series(probs.tolist())
    all_predicted = probs.apply(getClass)
    all_predicted.reset_index(drop=True, inplace=True)
    vc = pd.value_counts(all_predicted == all_labels)
    try:
        acc = vc[1]/len(all_labels)
    except:
        if(vc.index[0]==False):
            acc = 0
        else:
            acc = 1
    return(acc,all_predicted)

model.to(device)

all_pred=[]
all_labels=[]
all_probs=[]
model.eval()
with torch.no_grad():
    for batch in tqdm(test_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs[0]
        acc_val,pred = softmax_accuracy(torch.nn.functional.softmax(outputs[1],dim=1),labels)
        all_pred += pred.tolist()
        all_labels += labels.tolist()
        all_probs += outputs[1].tolist()

confusion = sklearn.metrics.confusion_matrix(y_true=all_labels, y_pred=all_pred)
print('Confusion matrix: \n',confusion)

tn, fp, fn, tp = confusion.ravel()
print('\nTP:',tp)
print('FP:',fp)
print('TN:',tn)
print('FN:',fn)

probs2=[]
for x in all_probs:
    probs2.append(x[1])

## Performance measure
print('\nAccuracy: '+ str(sklearn.metrics.accuracy_score(y_true=all_labels, y_pred=all_pred)))
print('Precision: '+ str(sklearn.metrics.precision_score(y_true=all_labels, y_pred=all_pred)))
print('Recall: '+ str(sklearn.metrics.recall_score(y_true=all_labels, y_pred=all_pred)))
print('F-measure: '+ str(sklearn.metrics.f1_score(y_true=all_labels, y_pred=all_pred)))
print('Precision-Recall AUC: '+ str(sklearn.metrics.average_precision_score(y_true=all_labels, y_score=probs2)))
print('AUC: '+ str(sklearn.metrics.roc_auc_score(y_true=all_labels, y_score=probs2)))
print('MCC: '+ str(sklearn.metrics.matthews_corrcoef(y_true=all_labels, y_pred=all_pred)))



Generating codeTF intermediate dataset:


100%|██████████| 70/70 [02:05<00:00,  1.80s/it]
100%|██████████| 24/24 [00:42<00:00,  1.76s/it]



Evaluation:


100%|██████████| 24/24 [00:42<00:00,  1.76s/it]

Confusion matrix: 
 [[1267  471]
 [ 564  667]]

TP: 667
FP: 471
TN: 1267
FN: 564

Accuracy: 0.6513977770293028
Precision: 0.5861159929701231
Recall: 0.5418359057676686
F-measure: 0.5631067961165049
Precision-Recall AUC: 0.6056356912068408
AUC: 0.6834732584303275
MCC: 0.2744372153612606





# MsgTransformer

In [3]:
import pandas as pd
import numpy as np
import torch
from transformers import BertTokenizer
from torch import nn
from transformers import BertModel
from transformers import RobertaModel, RobertaTokenizerFast
from torch.optim import Adam
from tqdm import tqdm
from sklearn import metrics
from torch.nn.parallel import DistributedDataParallel
import os
import random

# definitions
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

BERT_CONFIG = 'roberta-large'
labels = {0:0, 1:1}
BATCH_SIZE = 128
tokenizer = RobertaTokenizerFast.from_pretrained(BERT_CONFIG)

class Dataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.labels = [labels[label] for label in df['label']]
        self.texts = [tokenizer(text, padding='max_length', max_length=512, truncation=True,
                                return_tensors="pt") for text in df['commit_message']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):
        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)
        return batch_texts, batch_y

import torch.nn.functional as F

class ParallelConv(nn.Module):

    def __init__(self, input_dims, filters, dropout=0.5):
        super().__init__()
        convs = []        
        self.output_dims = sum([t[1] for t in filters])
        for (filter_length, output_dims) in filters:
            pad = filter_length//2
            conv = nn.Sequential(
                nn.Conv1d(input_dims, output_dims, filter_length, padding=pad),
                nn.ReLU()
            )
            convs.append(conv)
        # Add the module so its managed correctly
        self.convs = nn.ModuleList(convs)
        self.conv_drop = nn.Dropout(dropout)

    def forward(self, input_bct):
        mots = []
        for conv in self.convs:
            # In Conv1d, data BxCxT, max over time
            conv_out = conv(input_bct)
            mot, _ = conv_out.max(2)
            mots.append(mot)
        mots = torch.cat(mots, 1)
        return self.conv_drop(mots)

class ConvClassifier(nn.Module):

    def __init__(self, embed_dims,
                 filters=[(2, 100), (3, 100), (4, 100)],
                 dropout=0.5, hidden_units=[]):
        super().__init__()
        self.bert = RobertaModel.from_pretrained(BERT_CONFIG)
        self.dropout = nn.Dropout(dropout)
        self.convs = ParallelConv(embed_dims, filters, dropout)
        
        input_units = self.convs.output_dims
        output_units = self.convs.output_dims
        sequence = []
        for h in hidden_units:
            sequence.append(self.dropout(nn.Linear(input_units, h)))
            input_units = h
            output_units = h
            
        sequence.append(nn.Linear(output_units, 2))
        self.outputs = nn.Sequential(*sequence)

    def forward(self, input_id, mask):
        x, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
        embed = self.dropout(x)
        embed = embed.transpose(1, 2).contiguous()
        hidden = self.convs(embed)
        linear = self.outputs(hidden)
        return F.log_softmax(linear, dim=-1)


# ------------------------------------------------------------------------------
# generate intermediate data by MsgTransformer
embed_dim = 1024
model = ConvClassifier(embed_dim)
model.load_state_dict(torch.load(finetuned_mt_model_path))
model.to(device)

def generate_mt_intermediate_dataset(input_data, intermediate_data_path):
    data_loader = torch.utils.data.DataLoader(Dataset(input_data), batch_size=BATCH_SIZE)
    model.eval()
    with torch.no_grad():
        for texts, labels in tqdm(data_loader):
            labels = labels.to(device)
            masks = texts['attention_mask'].to(device)
            input_ids = texts['input_ids'].squeeze(1).to(device)
            outputs = model(input_ids, masks)

            probs = torch.nn.functional.softmax(outputs, dim=1).tolist()
            assert(len(probs) == len(labels))
            for i in range(len(probs)):
                prob = probs[i]
                label = int(labels[i])
                content = '\t'.join([str(i) for i in prob + [label]]) + '\n'
                write_to_file(content, intermediate_data_path)

df_train = pd.read_json(init_train_path)
df_val = pd.read_json(init_val_path)

remove_file_if_exist(intermediate_mt_train_path)
remove_file_if_exist(intermediate_mt_val_path)

generate_mt_intermediate_dataset(df_train, intermediate_mt_train_path)
generate_mt_intermediate_dataset(df_val, intermediate_mt_val_path)

# ------------------------------------------------------------------------------
# evaluation
print('\nEvaluation:')

seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

def evaluate(model, test_data):
    test = Dataset(test_data)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=BATCH_SIZE)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    total_acc_test = 0
    predict_all = np.array([], dtype=int)
    labels_all = np.array([], dtype=int)
    model.eval()
    with torch.no_grad():
        for test_input, test_label in test_dataloader:
            test_label = test_label.to(device)
            mask = test_input['attention_mask'].to(device)
            input_id = test_input['input_ids'].squeeze(1).to(device)

            output = model(input_id, mask)

            acc = (output.argmax(dim=1) == test_label).sum().item()
            total_acc_test += acc

            test_label = test_label.data.cpu().numpy()
            predic = output.argmax(dim=1).data.cpu().numpy()
            labels_all = np.append(labels_all, test_label)
            predict_all = np.append(predict_all, predic)

    report = metrics.classification_report(labels_all, predict_all, target_names=['benign', 'vulnerable'], digits=4)
    confusion = metrics.confusion_matrix(labels_all, predict_all)
    print(f'Test Accuracy: {total_acc_test / len(test_data): .4f}')
    print(report)
    print(confusion)

embed_dim = 1024
model = ConvClassifier(embed_dim)
model.to(device)
model.load_state_dict(torch.load(finetuned_mt_model_path))
evaluate(model, df_val)



Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 70/70 [02:30<00:00,  2.15s/it]
100%|██████████| 24/24 [00:50<00:00,  2.09s/it]



Evaluation:


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Test Accuracy:  0.7417
              precision    recall  f1-score   support

      benign     0.9593    0.5834    0.7256      1738
  vulnerable     0.6213    0.9651    0.7560      1231

    accuracy                         0.7417      2969
   macro avg     0.7903    0.7742    0.7408      2969
weighted avg     0.8192    0.7417    0.7382      2969

[[1014  724]
 [  43 1188]]


# Combine everything into intermediate dataset

In [4]:
intermediate_train_path = f'{intermediate_directory}/train.txt'
intermediate_val_path = f'{intermediate_directory}/val.txt'

def generate_intermediate_dataset(intermediate_mt_data_path, intermediate_ct_data_path, intermediate_data_path):
    with open(intermediate_mt_data_path) as f:
        mt_data_list = f.read().split('\n')
    
    with open(intermediate_ct_data_path) as f:
        ct_data_list = f.read().split('\n')
    
    mt_data_list = mt_data_list[:-1] if not mt_data_list[-1] else mt_data_list
    ct_data_list = ct_data_list[:-1] if not ct_data_list[-1] else ct_data_list

    assert(len(mt_data_list) == len(ct_data_list))
    
    for i in range(len(mt_data_list)):
        mt_data = mt_data_list[i].split('\t')
        ct_data = ct_data_list[i].split('\t')
        assert(mt_data[2] == ct_data[2])
        label = mt_data[2]
        content = '\t'.join(mt_data[:2] + ct_data[:2] + [label])
        content = content + '\n' if i < len(mt_data_list) - 1 else content
        write_to_file(content, intermediate_data_path)

remove_file_if_exist(intermediate_train_path)
remove_file_if_exist(intermediate_val_path)

generate_intermediate_dataset(intermediate_mt_train_path, intermediate_ct_train_path, intermediate_train_path)
generate_intermediate_dataset(intermediate_mt_val_path, intermediate_ct_val_path, intermediate_val_path)


# Ensemble learning

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn import metrics

EPOCHS = 150
LR = 1e-6
BATCH_SIZE = 4
intermediate_train_path = f'{intermediate_directory}/train.txt'
intermediate_val_path = f'{intermediate_directory}/val.txt'
MODEL_SAVE_PATH = f'{root_directory}/ensemble_model/{DATASET_MASKING}{DATASET_NAME}'
remove_file_if_exist(MODEL_SAVE_PATH)
mkdir_if_not_exist(MODEL_SAVE_PATH)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

class MyDataset(Dataset):
    def __init__(self, path):
        with open(path) as f:
            data_list = f.read().split('\n')
        self.labels = [ int(data.split('\t')[-1]) for data in data_list ]
        self.inputs = [ [float(v) for v in data.split('\t')[:-1]] for data in data_list ]
        assert(len(self.labels) == len(self.inputs))

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        x = self.inputs[idx]
        y = self.labels[idx]
        return x[0], x[1], x[2], x[3], y

class MLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(input_dim, 20)
        self.out = nn.Linear(20, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.out(x)
        return x

def train(model, train_dataset, val_dataset):
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    model = model.to(device)
    optimizer = Adam(model.parameters(), lr=LR)
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    model.train()
    for epoch_num in range(EPOCHS):
        total_acc_train = 0
        total_loss_train = 0
        for x1, x2, x3, x4, y in tqdm(train_dataloader):
            x = torch.transpose(torch.stack([x1, x2, x3, x4]), 0, 1).float().to(device)
            y = y.to(device)
            y_pred = model(x)

            loss = criterion(y_pred, y)
            total_loss_train += loss.item()

            acc = (y_pred.argmax(dim=1) == y).sum().item()
            total_acc_train += acc

            model.zero_grad()
            loss.backward()
            optimizer.step()

        total_acc_val = 0
        total_loss_val = 0
        model.eval()
        with torch.no_grad():
            for x1, x2, x3, x4, y in val_dataloader:
                x = torch.transpose(torch.stack([x1, x2, x3, x4]), 0, 1).float().to(device)
                y = y.to(device)
                y_pred = model(x)

                loss = criterion(y_pred, y)
                total_loss_val += loss.item()

                acc = (y_pred.argmax(dim=1) == y).sum().item()
                total_acc_val += acc

        print(
            f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_dataset): .4f} \
            | Train Accuracy: {total_acc_train / len(train_dataset): .4f} \
            | Val Loss: {total_loss_val / len(val_dataset): .4f} \
            | Val Accuracy: {total_acc_val / len(val_dataset): .4f}')

        val_acc = f'{total_acc_val / len(val_dataset):.4f}'
        torch.save(model.state_dict(), f'{MODEL_SAVE_PATH}/ensemble2_cnn_{val_acc}_epoch{epoch_num + 1}.pt')

train_dataset = MyDataset(intermediate_train_path)
val_dataset = MyDataset(intermediate_val_path)
test_dataset = val_dataset

model = MLP(4, 2)
train(model, train_dataset, val_dataset)



100%|██████████| 2227/2227 [00:04<00:00, 515.93it/s]


Epochs: 1 | Train Loss:  0.1511             | Train Accuracy:  0.7834             | Val Loss:  0.1548             | Val Accuracy:  0.7181


100%|██████████| 2227/2227 [00:03<00:00, 667.89it/s]


Epochs: 2 | Train Loss:  0.1501             | Train Accuracy:  0.7679             | Val Loss:  0.1543             | Val Accuracy:  0.7194


100%|██████████| 2227/2227 [00:03<00:00, 677.82it/s]


Epochs: 3 | Train Loss:  0.1494             | Train Accuracy:  0.7694             | Val Loss:  0.1537             | Val Accuracy:  0.7211


100%|██████████| 2227/2227 [00:03<00:00, 613.18it/s]


Epochs: 4 | Train Loss:  0.1486             | Train Accuracy:  0.7705             | Val Loss:  0.1532             | Val Accuracy:  0.7231


100%|██████████| 2227/2227 [00:04<00:00, 538.00it/s]


Epochs: 5 | Train Loss:  0.1479             | Train Accuracy:  0.7717             | Val Loss:  0.1526             | Val Accuracy:  0.7252


100%|██████████| 2227/2227 [00:03<00:00, 584.53it/s]


Epochs: 6 | Train Loss:  0.1472             | Train Accuracy:  0.7726             | Val Loss:  0.1521             | Val Accuracy:  0.7258


100%|██████████| 2227/2227 [00:03<00:00, 684.76it/s]


Epochs: 7 | Train Loss:  0.1465             | Train Accuracy:  0.7737             | Val Loss:  0.1515             | Val Accuracy:  0.7268


100%|██████████| 2227/2227 [00:03<00:00, 683.46it/s]


Epochs: 8 | Train Loss:  0.1458             | Train Accuracy:  0.7750             | Val Loss:  0.1509             | Val Accuracy:  0.7275


100%|██████████| 2227/2227 [00:03<00:00, 682.30it/s]


Epochs: 9 | Train Loss:  0.1451             | Train Accuracy:  0.7769             | Val Loss:  0.1504             | Val Accuracy:  0.7289


100%|██████████| 2227/2227 [00:03<00:00, 687.98it/s]


Epochs: 10 | Train Loss:  0.1444             | Train Accuracy:  0.7781             | Val Loss:  0.1498             | Val Accuracy:  0.7292


100%|██████████| 2227/2227 [00:03<00:00, 682.61it/s]


Epochs: 11 | Train Loss:  0.1437             | Train Accuracy:  0.7787             | Val Loss:  0.1493             | Val Accuracy:  0.7302


100%|██████████| 2227/2227 [00:03<00:00, 589.09it/s]


Epochs: 12 | Train Loss:  0.1430             | Train Accuracy:  0.7801             | Val Loss:  0.1487             | Val Accuracy:  0.7326


100%|██████████| 2227/2227 [00:03<00:00, 686.09it/s]


Epochs: 13 | Train Loss:  0.1422             | Train Accuracy:  0.7808             | Val Loss:  0.1482             | Val Accuracy:  0.7332


100%|██████████| 2227/2227 [00:03<00:00, 586.64it/s]


Epochs: 14 | Train Loss:  0.1415             | Train Accuracy:  0.7823             | Val Loss:  0.1476             | Val Accuracy:  0.7349


100%|██████████| 2227/2227 [00:03<00:00, 572.08it/s]


Epochs: 15 | Train Loss:  0.1408             | Train Accuracy:  0.7836             | Val Loss:  0.1470             | Val Accuracy:  0.7363


100%|██████████| 2227/2227 [00:03<00:00, 663.87it/s]


Epochs: 16 | Train Loss:  0.1401             | Train Accuracy:  0.7852             | Val Loss:  0.1465             | Val Accuracy:  0.7369


100%|██████████| 2227/2227 [00:03<00:00, 622.27it/s]


Epochs: 17 | Train Loss:  0.1393             | Train Accuracy:  0.7867             | Val Loss:  0.1459             | Val Accuracy:  0.7393


100%|██████████| 2227/2227 [00:04<00:00, 518.13it/s]


Epochs: 18 | Train Loss:  0.1386             | Train Accuracy:  0.7883             | Val Loss:  0.1453             | Val Accuracy:  0.7417


100%|██████████| 2227/2227 [00:04<00:00, 523.38it/s]


Epochs: 19 | Train Loss:  0.1379             | Train Accuracy:  0.7895             | Val Loss:  0.1448             | Val Accuracy:  0.7440


100%|██████████| 2227/2227 [00:04<00:00, 541.71it/s]


Epochs: 20 | Train Loss:  0.1371             | Train Accuracy:  0.7903             | Val Loss:  0.1442             | Val Accuracy:  0.7454


100%|██████████| 2227/2227 [00:03<00:00, 655.82it/s]


Epochs: 21 | Train Loss:  0.1364             | Train Accuracy:  0.7923             | Val Loss:  0.1436             | Val Accuracy:  0.7481


100%|██████████| 2227/2227 [00:03<00:00, 688.35it/s]


Epochs: 22 | Train Loss:  0.1356             | Train Accuracy:  0.7977             | Val Loss:  0.1431             | Val Accuracy:  0.7531


100%|██████████| 2227/2227 [00:03<00:00, 675.73it/s]


Epochs: 23 | Train Loss:  0.1349             | Train Accuracy:  0.8038             | Val Loss:  0.1425             | Val Accuracy:  0.7572


100%|██████████| 2227/2227 [00:03<00:00, 689.51it/s]


Epochs: 24 | Train Loss:  0.1342             | Train Accuracy:  0.8101             | Val Loss:  0.1419             | Val Accuracy:  0.7592


100%|██████████| 2227/2227 [00:03<00:00, 633.59it/s]


Epochs: 25 | Train Loss:  0.1334             | Train Accuracy:  0.8152             | Val Loss:  0.1413             | Val Accuracy:  0.7639


100%|██████████| 2227/2227 [00:03<00:00, 623.92it/s]


Epochs: 26 | Train Loss:  0.1327             | Train Accuracy:  0.8193             | Val Loss:  0.1408             | Val Accuracy:  0.7676


100%|██████████| 2227/2227 [00:03<00:00, 683.07it/s]


Epochs: 27 | Train Loss:  0.1319             | Train Accuracy:  0.8229             | Val Loss:  0.1402             | Val Accuracy:  0.7703


100%|██████████| 2227/2227 [00:03<00:00, 690.25it/s]


Epochs: 28 | Train Loss:  0.1312             | Train Accuracy:  0.8281             | Val Loss:  0.1396             | Val Accuracy:  0.7713


100%|██████████| 2227/2227 [00:03<00:00, 691.34it/s]


Epochs: 29 | Train Loss:  0.1304             | Train Accuracy:  0.8324             | Val Loss:  0.1390             | Val Accuracy:  0.7727


100%|██████████| 2227/2227 [00:03<00:00, 573.00it/s]


Epochs: 30 | Train Loss:  0.1297             | Train Accuracy:  0.8354             | Val Loss:  0.1385             | Val Accuracy:  0.7760


100%|██████████| 2227/2227 [00:03<00:00, 654.49it/s]


Epochs: 31 | Train Loss:  0.1289             | Train Accuracy:  0.8398             | Val Loss:  0.1379             | Val Accuracy:  0.7801


100%|██████████| 2227/2227 [00:03<00:00, 676.20it/s]


Epochs: 32 | Train Loss:  0.1282             | Train Accuracy:  0.8438             | Val Loss:  0.1373             | Val Accuracy:  0.7817


100%|██████████| 2227/2227 [00:03<00:00, 604.62it/s]


Epochs: 33 | Train Loss:  0.1274             | Train Accuracy:  0.8461             | Val Loss:  0.1367             | Val Accuracy:  0.7834


100%|██████████| 2227/2227 [00:04<00:00, 544.33it/s]


Epochs: 34 | Train Loss:  0.1266             | Train Accuracy:  0.8501             | Val Loss:  0.1362             | Val Accuracy:  0.7888


100%|██████████| 2227/2227 [00:03<00:00, 603.38it/s]


Epochs: 35 | Train Loss:  0.1259             | Train Accuracy:  0.8529             | Val Loss:  0.1356             | Val Accuracy:  0.7922


100%|██████████| 2227/2227 [00:04<00:00, 539.90it/s]


Epochs: 36 | Train Loss:  0.1251             | Train Accuracy:  0.8565             | Val Loss:  0.1350             | Val Accuracy:  0.7945


100%|██████████| 2227/2227 [00:03<00:00, 667.64it/s]


Epochs: 37 | Train Loss:  0.1244             | Train Accuracy:  0.8591             | Val Loss:  0.1344             | Val Accuracy:  0.7969


100%|██████████| 2227/2227 [00:03<00:00, 677.85it/s]


Epochs: 38 | Train Loss:  0.1236             | Train Accuracy:  0.8613             | Val Loss:  0.1339             | Val Accuracy:  0.7993


100%|██████████| 2227/2227 [00:03<00:00, 681.27it/s]


Epochs: 39 | Train Loss:  0.1228             | Train Accuracy:  0.8644             | Val Loss:  0.1333             | Val Accuracy:  0.8016


100%|██████████| 2227/2227 [00:03<00:00, 694.27it/s]


Epochs: 40 | Train Loss:  0.1221             | Train Accuracy:  0.8673             | Val Loss:  0.1327             | Val Accuracy:  0.8033


100%|██████████| 2227/2227 [00:03<00:00, 679.12it/s]


Epochs: 41 | Train Loss:  0.1213             | Train Accuracy:  0.8701             | Val Loss:  0.1321             | Val Accuracy:  0.8060


100%|██████████| 2227/2227 [00:03<00:00, 667.02it/s]


Epochs: 42 | Train Loss:  0.1206             | Train Accuracy:  0.8731             | Val Loss:  0.1316             | Val Accuracy:  0.8094


100%|██████████| 2227/2227 [00:03<00:00, 646.64it/s]


Epochs: 43 | Train Loss:  0.1198             | Train Accuracy:  0.8760             | Val Loss:  0.1310             | Val Accuracy:  0.8110


100%|██████████| 2227/2227 [00:03<00:00, 580.51it/s]


Epochs: 44 | Train Loss:  0.1191             | Train Accuracy:  0.8784             | Val Loss:  0.1304             | Val Accuracy:  0.8131


100%|██████████| 2227/2227 [00:03<00:00, 625.80it/s]


Epochs: 45 | Train Loss:  0.1183             | Train Accuracy:  0.8801             | Val Loss:  0.1299             | Val Accuracy:  0.8164


100%|██████████| 2227/2227 [00:03<00:00, 671.38it/s]


Epochs: 46 | Train Loss:  0.1176             | Train Accuracy:  0.8824             | Val Loss:  0.1293             | Val Accuracy:  0.8188


100%|██████████| 2227/2227 [00:03<00:00, 672.43it/s]


Epochs: 47 | Train Loss:  0.1168             | Train Accuracy:  0.8843             | Val Loss:  0.1288             | Val Accuracy:  0.8208


100%|██████████| 2227/2227 [00:03<00:00, 687.91it/s]


Epochs: 48 | Train Loss:  0.1161             | Train Accuracy:  0.8865             | Val Loss:  0.1282             | Val Accuracy:  0.8232


100%|██████████| 2227/2227 [00:03<00:00, 611.84it/s]


Epochs: 49 | Train Loss:  0.1153             | Train Accuracy:  0.8887             | Val Loss:  0.1276             | Val Accuracy:  0.8245


100%|██████████| 2227/2227 [00:03<00:00, 690.05it/s]


Epochs: 50 | Train Loss:  0.1146             | Train Accuracy:  0.8903             | Val Loss:  0.1271             | Val Accuracy:  0.8289


100%|██████████| 2227/2227 [00:03<00:00, 569.49it/s]


Epochs: 51 | Train Loss:  0.1139             | Train Accuracy:  0.8920             | Val Loss:  0.1265             | Val Accuracy:  0.8313


100%|██████████| 2227/2227 [00:03<00:00, 643.83it/s]


Epochs: 52 | Train Loss:  0.1131             | Train Accuracy:  0.8934             | Val Loss:  0.1260             | Val Accuracy:  0.8323


100%|██████████| 2227/2227 [00:03<00:00, 690.27it/s]


Epochs: 53 | Train Loss:  0.1124             | Train Accuracy:  0.8952             | Val Loss:  0.1254             | Val Accuracy:  0.8343


100%|██████████| 2227/2227 [00:03<00:00, 687.71it/s]


Epochs: 54 | Train Loss:  0.1117             | Train Accuracy:  0.8958             | Val Loss:  0.1249             | Val Accuracy:  0.8356


100%|██████████| 2227/2227 [00:03<00:00, 636.57it/s]


Epochs: 55 | Train Loss:  0.1109             | Train Accuracy:  0.8966             | Val Loss:  0.1244             | Val Accuracy:  0.8366


100%|██████████| 2227/2227 [00:03<00:00, 633.34it/s]


Epochs: 56 | Train Loss:  0.1102             | Train Accuracy:  0.8978             | Val Loss:  0.1238             | Val Accuracy:  0.8383


100%|██████████| 2227/2227 [00:03<00:00, 609.58it/s]


Epochs: 57 | Train Loss:  0.1095             | Train Accuracy:  0.8985             | Val Loss:  0.1233             | Val Accuracy:  0.8377


100%|██████████| 2227/2227 [00:03<00:00, 581.05it/s]


Epochs: 58 | Train Loss:  0.1087             | Train Accuracy:  0.8988             | Val Loss:  0.1227             | Val Accuracy:  0.8380


100%|██████████| 2227/2227 [00:04<00:00, 553.61it/s]


Epochs: 59 | Train Loss:  0.1080             | Train Accuracy:  0.9001             | Val Loss:  0.1222             | Val Accuracy:  0.8387


100%|██████████| 2227/2227 [00:03<00:00, 573.26it/s]


Epochs: 60 | Train Loss:  0.1073             | Train Accuracy:  0.9010             | Val Loss:  0.1217             | Val Accuracy:  0.8420


100%|██████████| 2227/2227 [00:04<00:00, 496.54it/s]


Epochs: 61 | Train Loss:  0.1066             | Train Accuracy:  0.9020             | Val Loss:  0.1212             | Val Accuracy:  0.8420


100%|██████████| 2227/2227 [00:04<00:00, 487.89it/s]


Epochs: 62 | Train Loss:  0.1059             | Train Accuracy:  0.9030             | Val Loss:  0.1206             | Val Accuracy:  0.8420


100%|██████████| 2227/2227 [00:04<00:00, 513.73it/s]


Epochs: 63 | Train Loss:  0.1052             | Train Accuracy:  0.9041             | Val Loss:  0.1201             | Val Accuracy:  0.8424


100%|██████████| 2227/2227 [00:03<00:00, 569.26it/s]


Epochs: 64 | Train Loss:  0.1044             | Train Accuracy:  0.9046             | Val Loss:  0.1196             | Val Accuracy:  0.8417


100%|██████████| 2227/2227 [00:03<00:00, 588.60it/s]


Epochs: 65 | Train Loss:  0.1037             | Train Accuracy:  0.9056             | Val Loss:  0.1191             | Val Accuracy:  0.8417


100%|██████████| 2227/2227 [00:03<00:00, 615.40it/s]


Epochs: 66 | Train Loss:  0.1030             | Train Accuracy:  0.9064             | Val Loss:  0.1186             | Val Accuracy:  0.8420


100%|██████████| 2227/2227 [00:03<00:00, 622.83it/s]


Epochs: 67 | Train Loss:  0.1024             | Train Accuracy:  0.9067             | Val Loss:  0.1181             | Val Accuracy:  0.8417


100%|██████████| 2227/2227 [00:03<00:00, 591.55it/s]


Epochs: 68 | Train Loss:  0.1017             | Train Accuracy:  0.9077             | Val Loss:  0.1176             | Val Accuracy:  0.8420


100%|██████████| 2227/2227 [00:03<00:00, 655.29it/s]


Epochs: 69 | Train Loss:  0.1010             | Train Accuracy:  0.9083             | Val Loss:  0.1171             | Val Accuracy:  0.8414


100%|██████████| 2227/2227 [00:03<00:00, 653.22it/s]


Epochs: 70 | Train Loss:  0.1003             | Train Accuracy:  0.9083             | Val Loss:  0.1166             | Val Accuracy:  0.8410


100%|██████████| 2227/2227 [00:03<00:00, 611.20it/s]


Epochs: 71 | Train Loss:  0.0996             | Train Accuracy:  0.9092             | Val Loss:  0.1161             | Val Accuracy:  0.8407


100%|██████████| 2227/2227 [00:03<00:00, 676.98it/s]


Epochs: 72 | Train Loss:  0.0989             | Train Accuracy:  0.9096             | Val Loss:  0.1156             | Val Accuracy:  0.8407


100%|██████████| 2227/2227 [00:03<00:00, 624.91it/s]


Epochs: 73 | Train Loss:  0.0983             | Train Accuracy:  0.9095             | Val Loss:  0.1151             | Val Accuracy:  0.8404


100%|██████████| 2227/2227 [00:03<00:00, 628.71it/s]


Epochs: 74 | Train Loss:  0.0976             | Train Accuracy:  0.9102             | Val Loss:  0.1147             | Val Accuracy:  0.8414


100%|██████████| 2227/2227 [00:03<00:00, 617.10it/s]


Epochs: 75 | Train Loss:  0.0970             | Train Accuracy:  0.9103             | Val Loss:  0.1142             | Val Accuracy:  0.8417


100%|██████████| 2227/2227 [00:03<00:00, 650.16it/s]


Epochs: 76 | Train Loss:  0.0963             | Train Accuracy:  0.9112             | Val Loss:  0.1137             | Val Accuracy:  0.8420


100%|██████████| 2227/2227 [00:04<00:00, 499.17it/s]


Epochs: 77 | Train Loss:  0.0957             | Train Accuracy:  0.9115             | Val Loss:  0.1133             | Val Accuracy:  0.8420


100%|██████████| 2227/2227 [00:04<00:00, 507.13it/s]


Epochs: 78 | Train Loss:  0.0950             | Train Accuracy:  0.9117             | Val Loss:  0.1128             | Val Accuracy:  0.8434


100%|██████████| 2227/2227 [00:03<00:00, 662.56it/s]


Epochs: 79 | Train Loss:  0.0944             | Train Accuracy:  0.9115             | Val Loss:  0.1124             | Val Accuracy:  0.8430


100%|██████████| 2227/2227 [00:03<00:00, 675.83it/s]


Epochs: 80 | Train Loss:  0.0937             | Train Accuracy:  0.9115             | Val Loss:  0.1119             | Val Accuracy:  0.8434


100%|██████████| 2227/2227 [00:03<00:00, 675.23it/s]


Epochs: 81 | Train Loss:  0.0931             | Train Accuracy:  0.9120             | Val Loss:  0.1115             | Val Accuracy:  0.8437


100%|██████████| 2227/2227 [00:03<00:00, 665.55it/s]


Epochs: 82 | Train Loss:  0.0925             | Train Accuracy:  0.9123             | Val Loss:  0.1111             | Val Accuracy:  0.8434


100%|██████████| 2227/2227 [00:03<00:00, 667.90it/s]


Epochs: 83 | Train Loss:  0.0919             | Train Accuracy:  0.9119             | Val Loss:  0.1106             | Val Accuracy:  0.8430


100%|██████████| 2227/2227 [00:04<00:00, 524.50it/s]


Epochs: 84 | Train Loss:  0.0913             | Train Accuracy:  0.9117             | Val Loss:  0.1102             | Val Accuracy:  0.8437


100%|██████████| 2227/2227 [00:04<00:00, 544.51it/s]


Epochs: 85 | Train Loss:  0.0907             | Train Accuracy:  0.9119             | Val Loss:  0.1098             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 648.71it/s]


Epochs: 86 | Train Loss:  0.0901             | Train Accuracy:  0.9115             | Val Loss:  0.1094             | Val Accuracy:  0.8444


100%|██████████| 2227/2227 [00:03<00:00, 562.10it/s]


Epochs: 87 | Train Loss:  0.0895             | Train Accuracy:  0.9116             | Val Loss:  0.1090             | Val Accuracy:  0.8444


100%|██████████| 2227/2227 [00:03<00:00, 556.90it/s]


Epochs: 88 | Train Loss:  0.0889             | Train Accuracy:  0.9112             | Val Loss:  0.1086             | Val Accuracy:  0.8434


100%|██████████| 2227/2227 [00:04<00:00, 527.35it/s]


Epochs: 89 | Train Loss:  0.0883             | Train Accuracy:  0.9114             | Val Loss:  0.1082             | Val Accuracy:  0.8437


100%|██████████| 2227/2227 [00:03<00:00, 673.97it/s]


Epochs: 90 | Train Loss:  0.0878             | Train Accuracy:  0.9115             | Val Loss:  0.1078             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 675.86it/s]


Epochs: 91 | Train Loss:  0.0872             | Train Accuracy:  0.9120             | Val Loss:  0.1074             | Val Accuracy:  0.8451


100%|██████████| 2227/2227 [00:03<00:00, 595.32it/s]


Epochs: 92 | Train Loss:  0.0867             | Train Accuracy:  0.9124             | Val Loss:  0.1071             | Val Accuracy:  0.8451


100%|██████████| 2227/2227 [00:03<00:00, 592.67it/s]


Epochs: 93 | Train Loss:  0.0861             | Train Accuracy:  0.9124             | Val Loss:  0.1067             | Val Accuracy:  0.8444


100%|██████████| 2227/2227 [00:03<00:00, 672.73it/s]


Epochs: 94 | Train Loss:  0.0856             | Train Accuracy:  0.9124             | Val Loss:  0.1063             | Val Accuracy:  0.8437


100%|██████████| 2227/2227 [00:03<00:00, 683.45it/s]


Epochs: 95 | Train Loss:  0.0851             | Train Accuracy:  0.9124             | Val Loss:  0.1060             | Val Accuracy:  0.8441


100%|██████████| 2227/2227 [00:03<00:00, 675.48it/s]


Epochs: 96 | Train Loss:  0.0845             | Train Accuracy:  0.9124             | Val Loss:  0.1056             | Val Accuracy:  0.8444


100%|██████████| 2227/2227 [00:03<00:00, 680.30it/s]


Epochs: 97 | Train Loss:  0.0840             | Train Accuracy:  0.9128             | Val Loss:  0.1053             | Val Accuracy:  0.8444


100%|██████████| 2227/2227 [00:03<00:00, 688.21it/s]


Epochs: 98 | Train Loss:  0.0835             | Train Accuracy:  0.9126             | Val Loss:  0.1049             | Val Accuracy:  0.8451


100%|██████████| 2227/2227 [00:03<00:00, 691.01it/s]


Epochs: 99 | Train Loss:  0.0830             | Train Accuracy:  0.9131             | Val Loss:  0.1046             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 672.35it/s]


Epochs: 100 | Train Loss:  0.0825             | Train Accuracy:  0.9134             | Val Loss:  0.1043             | Val Accuracy:  0.8454


100%|██████████| 2227/2227 [00:03<00:00, 665.16it/s]


Epochs: 101 | Train Loss:  0.0820             | Train Accuracy:  0.9133             | Val Loss:  0.1040             | Val Accuracy:  0.8454


100%|██████████| 2227/2227 [00:03<00:00, 647.32it/s]


Epochs: 102 | Train Loss:  0.0815             | Train Accuracy:  0.9133             | Val Loss:  0.1036             | Val Accuracy:  0.8454


100%|██████████| 2227/2227 [00:03<00:00, 676.24it/s]


Epochs: 103 | Train Loss:  0.0811             | Train Accuracy:  0.9133             | Val Loss:  0.1033             | Val Accuracy:  0.8451


100%|██████████| 2227/2227 [00:03<00:00, 677.92it/s]


Epochs: 104 | Train Loss:  0.0806             | Train Accuracy:  0.9138             | Val Loss:  0.1030             | Val Accuracy:  0.8451


100%|██████████| 2227/2227 [00:03<00:00, 680.94it/s]


Epochs: 105 | Train Loss:  0.0801             | Train Accuracy:  0.9139             | Val Loss:  0.1027             | Val Accuracy:  0.8451


100%|██████████| 2227/2227 [00:03<00:00, 682.44it/s]


Epochs: 106 | Train Loss:  0.0797             | Train Accuracy:  0.9139             | Val Loss:  0.1024             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 574.63it/s]


Epochs: 107 | Train Loss:  0.0792             | Train Accuracy:  0.9137             | Val Loss:  0.1022             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 675.27it/s]


Epochs: 108 | Train Loss:  0.0788             | Train Accuracy:  0.9139             | Val Loss:  0.1019             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:04<00:00, 539.24it/s]


Epochs: 109 | Train Loss:  0.0784             | Train Accuracy:  0.9135             | Val Loss:  0.1016             | Val Accuracy:  0.8454


100%|██████████| 2227/2227 [00:04<00:00, 527.34it/s]


Epochs: 110 | Train Loss:  0.0779             | Train Accuracy:  0.9135             | Val Loss:  0.1013             | Val Accuracy:  0.8454


100%|██████████| 2227/2227 [00:04<00:00, 503.52it/s]


Epochs: 111 | Train Loss:  0.0775             | Train Accuracy:  0.9135             | Val Loss:  0.1010             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:04<00:00, 533.95it/s]


Epochs: 112 | Train Loss:  0.0771             | Train Accuracy:  0.9138             | Val Loss:  0.1008             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 675.91it/s]


Epochs: 113 | Train Loss:  0.0767             | Train Accuracy:  0.9139             | Val Loss:  0.1005             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 620.62it/s]


Epochs: 114 | Train Loss:  0.0763             | Train Accuracy:  0.9140             | Val Loss:  0.1003             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 677.36it/s]


Epochs: 115 | Train Loss:  0.0759             | Train Accuracy:  0.9140             | Val Loss:  0.1000             | Val Accuracy:  0.8451


100%|██████████| 2227/2227 [00:03<00:00, 673.91it/s]


Epochs: 116 | Train Loss:  0.0755             | Train Accuracy:  0.9138             | Val Loss:  0.0998             | Val Accuracy:  0.8451


100%|██████████| 2227/2227 [00:03<00:00, 673.30it/s]


Epochs: 117 | Train Loss:  0.0751             | Train Accuracy:  0.9138             | Val Loss:  0.0995             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 610.11it/s]


Epochs: 118 | Train Loss:  0.0747             | Train Accuracy:  0.9142             | Val Loss:  0.0993             | Val Accuracy:  0.8444


100%|██████████| 2227/2227 [00:03<00:00, 658.76it/s]


Epochs: 119 | Train Loss:  0.0743             | Train Accuracy:  0.9142             | Val Loss:  0.0991             | Val Accuracy:  0.8444


100%|██████████| 2227/2227 [00:03<00:00, 671.98it/s]


Epochs: 120 | Train Loss:  0.0740             | Train Accuracy:  0.9141             | Val Loss:  0.0989             | Val Accuracy:  0.8444


100%|██████████| 2227/2227 [00:03<00:00, 675.76it/s]


Epochs: 121 | Train Loss:  0.0736             | Train Accuracy:  0.9146             | Val Loss:  0.0986             | Val Accuracy:  0.8451


100%|██████████| 2227/2227 [00:03<00:00, 578.95it/s]


Epochs: 122 | Train Loss:  0.0732             | Train Accuracy:  0.9148             | Val Loss:  0.0984             | Val Accuracy:  0.8441


100%|██████████| 2227/2227 [00:04<00:00, 522.58it/s]


Epochs: 123 | Train Loss:  0.0729             | Train Accuracy:  0.9148             | Val Loss:  0.0982             | Val Accuracy:  0.8447


100%|██████████| 2227/2227 [00:03<00:00, 621.97it/s]


Epochs: 124 | Train Loss:  0.0725             | Train Accuracy:  0.9149             | Val Loss:  0.0980             | Val Accuracy:  0.8437


100%|██████████| 2227/2227 [00:03<00:00, 631.14it/s]


Epochs: 125 | Train Loss:  0.0722             | Train Accuracy:  0.9151             | Val Loss:  0.0978             | Val Accuracy:  0.8434


100%|██████████| 2227/2227 [00:03<00:00, 597.19it/s]


Epochs: 126 | Train Loss:  0.0719             | Train Accuracy:  0.9152             | Val Loss:  0.0976             | Val Accuracy:  0.8430


100%|██████████| 2227/2227 [00:03<00:00, 671.05it/s]


Epochs: 127 | Train Loss:  0.0715             | Train Accuracy:  0.9150             | Val Loss:  0.0974             | Val Accuracy:  0.8430


100%|██████████| 2227/2227 [00:03<00:00, 605.13it/s]


Epochs: 128 | Train Loss:  0.0712             | Train Accuracy:  0.9155             | Val Loss:  0.0972             | Val Accuracy:  0.8424


100%|██████████| 2227/2227 [00:04<00:00, 553.19it/s]


Epochs: 129 | Train Loss:  0.0709             | Train Accuracy:  0.9155             | Val Loss:  0.0971             | Val Accuracy:  0.8417


100%|██████████| 2227/2227 [00:03<00:00, 675.07it/s]


Epochs: 130 | Train Loss:  0.0706             | Train Accuracy:  0.9153             | Val Loss:  0.0969             | Val Accuracy:  0.8417


100%|██████████| 2227/2227 [00:03<00:00, 674.27it/s]


Epochs: 131 | Train Loss:  0.0703             | Train Accuracy:  0.9152             | Val Loss:  0.0967             | Val Accuracy:  0.8414


100%|██████████| 2227/2227 [00:03<00:00, 599.02it/s]


Epochs: 132 | Train Loss:  0.0700             | Train Accuracy:  0.9152             | Val Loss:  0.0965             | Val Accuracy:  0.8407


100%|██████████| 2227/2227 [00:03<00:00, 572.98it/s]


Epochs: 133 | Train Loss:  0.0697             | Train Accuracy:  0.9152             | Val Loss:  0.0964             | Val Accuracy:  0.8404


100%|██████████| 2227/2227 [00:03<00:00, 595.41it/s]


Epochs: 134 | Train Loss:  0.0694             | Train Accuracy:  0.9151             | Val Loss:  0.0962             | Val Accuracy:  0.8407


100%|██████████| 2227/2227 [00:03<00:00, 696.82it/s]


Epochs: 135 | Train Loss:  0.0691             | Train Accuracy:  0.9151             | Val Loss:  0.0961             | Val Accuracy:  0.8407


100%|██████████| 2227/2227 [00:03<00:00, 690.16it/s]


Epochs: 136 | Train Loss:  0.0688             | Train Accuracy:  0.9151             | Val Loss:  0.0959             | Val Accuracy:  0.8410


100%|██████████| 2227/2227 [00:03<00:00, 606.42it/s]


Epochs: 137 | Train Loss:  0.0685             | Train Accuracy:  0.9152             | Val Loss:  0.0957             | Val Accuracy:  0.8410


100%|██████████| 2227/2227 [00:04<00:00, 553.31it/s]


Epochs: 138 | Train Loss:  0.0683             | Train Accuracy:  0.9151             | Val Loss:  0.0956             | Val Accuracy:  0.8410


100%|██████████| 2227/2227 [00:03<00:00, 680.70it/s]


Epochs: 139 | Train Loss:  0.0680             | Train Accuracy:  0.9152             | Val Loss:  0.0955             | Val Accuracy:  0.8414


100%|██████████| 2227/2227 [00:03<00:00, 688.30it/s]


Epochs: 140 | Train Loss:  0.0677             | Train Accuracy:  0.9151             | Val Loss:  0.0953             | Val Accuracy:  0.8414


100%|██████████| 2227/2227 [00:03<00:00, 687.69it/s]


Epochs: 141 | Train Loss:  0.0675             | Train Accuracy:  0.9149             | Val Loss:  0.0952             | Val Accuracy:  0.8410


100%|██████████| 2227/2227 [00:03<00:00, 691.04it/s]


Epochs: 142 | Train Loss:  0.0672             | Train Accuracy:  0.9151             | Val Loss:  0.0950             | Val Accuracy:  0.8410


100%|██████████| 2227/2227 [00:03<00:00, 697.33it/s]


Epochs: 143 | Train Loss:  0.0670             | Train Accuracy:  0.9152             | Val Loss:  0.0949             | Val Accuracy:  0.8407


100%|██████████| 2227/2227 [00:03<00:00, 694.43it/s]


Epochs: 144 | Train Loss:  0.0667             | Train Accuracy:  0.9149             | Val Loss:  0.0948             | Val Accuracy:  0.8407


100%|██████████| 2227/2227 [00:03<00:00, 611.11it/s]


Epochs: 145 | Train Loss:  0.0665             | Train Accuracy:  0.9147             | Val Loss:  0.0946             | Val Accuracy:  0.8404


100%|██████████| 2227/2227 [00:03<00:00, 660.82it/s]


Epochs: 146 | Train Loss:  0.0662             | Train Accuracy:  0.9148             | Val Loss:  0.0945             | Val Accuracy:  0.8404


100%|██████████| 2227/2227 [00:03<00:00, 615.72it/s]


Epochs: 147 | Train Loss:  0.0660             | Train Accuracy:  0.9148             | Val Loss:  0.0944             | Val Accuracy:  0.8393


100%|██████████| 2227/2227 [00:03<00:00, 678.51it/s]


Epochs: 148 | Train Loss:  0.0658             | Train Accuracy:  0.9149             | Val Loss:  0.0943             | Val Accuracy:  0.8387


100%|██████████| 2227/2227 [00:03<00:00, 692.25it/s]


Epochs: 149 | Train Loss:  0.0655             | Train Accuracy:  0.9149             | Val Loss:  0.0942             | Val Accuracy:  0.8383


100%|██████████| 2227/2227 [00:03<00:00, 656.23it/s]


Epochs: 150 | Train Loss:  0.0653             | Train Accuracy:  0.9149             | Val Loss:  0.0941             | Val Accuracy:  0.8377


In [6]:
def evaluate(model, test_dataset):
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
    model = model.to(device)
    
    total_acc_test = 0
    predict_all = np.array([], dtype=int)
    labels_all = np.array([], dtype=int)
    model.eval()
    with torch.no_grad():
        for x1, x2, x3, x4, y in test_dataloader:
            x = torch.transpose(torch.stack([x1, x2, x3, x4]), 0, 1).float().to(device)
            y = y.to(device)
            y_pred = model(x)
            
            acc = (y_pred.argmax(dim=1) == y).sum().item()
            total_acc_test += acc
            
            y = y.data.cpu().numpy()
            predic = y_pred.argmax(dim=1).data.cpu().numpy()
            labels_all = np.append(labels_all, y)
            predict_all = np.append(predict_all, predic)

    report = metrics.classification_report(labels_all, predict_all, target_names=['benign', 'vulnerable'], digits=4)
    confusion = metrics.confusion_matrix(labels_all, predict_all)
    print(f'Test Accuracy: {total_acc_test / len(test_dataset): .4f}')
    print(report)
    print(confusion)

MODEL_SAVE_PATH = f'{root_directory}/ensemble_model/{DATASET_MASKING}{DATASET_NAME}'

mkdir_if_not_exist(MODEL_SAVE_PATH)
model = MLP(4, 2)
saved_model_name = 'ensemble2_cnn_0.8454_epoch109.pt'
model.load_state_dict(torch.load(f'{MODEL_SAVE_PATH}/{saved_model_name}'))
evaluate(model, val_dataset)


Test Accuracy:  0.8454
              precision    recall  f1-score   support

      benign     0.8414    0.9068    0.8729      1738
  vulnerable     0.8522    0.7587    0.8028      1231

    accuracy                         0.8454      2969
   macro avg     0.8468    0.8328    0.8378      2969
weighted avg     0.8459    0.8454    0.8438      2969

[[1576  162]
 [ 297  934]]
