# Next Instruction Prediction Training


In [1]:
import torch

torch.cuda.is_available()

  from .autonotebook import tqdm as notebook_tqdm


True

# DATASET GENERATION

In [2]:
#!/usr/bin/env python3

import sys,os
from elftools.elf.elffile import ELFFile
from elftools.elf.segments import Segment
from capstone import *
from capstone.x86 import *





data_dir_path = "./data/binaries/"
dir_file_list = os.listdir(data_dir_path)

with open('./data/instruction_clusters.txt', 'w') as data_file:
    for filename in dir_file_list:
        filePath = os.path.join(data_dir_path,filename)

        fh = open(filePath, 'rb')
        bin_bytearray = bytearray(fh.read())
        
        with open(filePath, 'rb') as f:
            elf = ELFFile(f)
            dwarfinfo = elf.get_dwarf_info()
            aranges = dwarfinfo.get_aranges()
            print(filename, len(aranges.entries))
            for arange in aranges.entries:

                entry = arange.begin_addr
                exit  = arange.begin_addr + arange.length
                ops = bin_bytearray[entry: exit]

                md = Cs(CS_ARCH_X86, CS_MODE_64)
                md.detail = True
                for inst in md.disasm(ops, entry):

                    data_file.write(inst.mnemonic+" "+inst.op_str+";")
                data_file.write('\n')






gitwipe 4
gitps 147
gitview 140
gitfm 341
gitwhich 6
gitkeys 4


# Creating the pipeline

In [3]:
from transformers import BertTokenizer, BertForNextSentencePrediction
import torch

# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

tokenizer = BertTokenizer.from_pretrained("./binary-tokenizer")
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForNextSentencePrediction: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForNextSentencePrediction from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForNextSentencePrediction from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
delim = ';'
with open('./data/instruction_clusters.txt', 'r') as fp:
    text = fp.read().split('\n')

In [5]:
# text = text[:10]

We need to split sentences into consecutive, and non-consecutive sequences.

We have to deal with edge-cases too - for example where there is only a single sentence within a paragraph as with the three examples above (in comparison to below where we can easily split into multiple sentences).

In [6]:
# text[51].split(delim)

We'll assign a 50% probability of using the genuine next sentence, and 50% probability of using another random sentence.

To make this simpler, we'll create a *'bag'* of individual sentences to pull from when selecting a random sentence B.

In [7]:
bag = [instruction for instruction_cluster in text for instruction in instruction_cluster.split(delim)  if instruction!= '']
bag_size = len(bag)
print(bag_size)

49783


In [8]:
bag

['endbr64 ',
 'push rbp',
 'mov rbp, rsp',
 'mov rdx, qword ptr [rip + 0x2d98]',
 'mov rax, qword ptr [rip + 0x2d81]',
 'lea rcx, [rip + 0xd62]',
 'mov rsi, rcx',
 'mov rdi, rax',
 'mov eax, 0',
 'call 0x1120',
 'mov edi, 1',
 'call 0x1170',
 'endbr64 ',
 'push rbp',
 'mov rbp, rsp',
 'sub rsp, 0x20',
 'mov dword ptr [rbp - 0x14], edi',
 'mov eax, dword ptr [rbp - 0x14]',
 'mov edx, 1',
 'mov esi, 0',
 'mov edi, eax',
 'call 0x1180',
 'mov qword ptr [rbp - 0x10], rax',
 'mov eax, dword ptr [rbp - 0x14]',
 'mov edx, 2',
 'mov esi, 0',
 'mov edi, eax',
 'call 0x1180',
 'mov qword ptr [rbp - 8], rax',
 'mov rcx, qword ptr [rbp - 0x10]',
 'mov eax, dword ptr [rbp - 0x14]',
 'mov edx, 0',
 'mov rsi, rcx',
 'mov edi, eax',
 'call 0x1180',
 'mov rax, qword ptr [rbp - 8]',
 'leave ',
 'ret ',
 'endbr64 ',
 'push rbp',
 'mov rbp, rsp',
 'sub rsp, 0x40',
 'mov qword ptr [rbp - 0x38], rdi',
 'mov rax, qword ptr [rbp - 0x38]',
 'mov esi, 2',
 'mov rdi, rax',
 'mov eax, 0',
 'call 0x1160',
 'mov dw

And now we create our 50/50 NIP training data.

In [9]:
import random

history = []
next_instruction = []
label = []

page_len = 5
instruction_pages = []
for instruction_cluster in text:
    instructions = [
        instruction for instruction in instruction_cluster.split(delim) if instruction != ''
    ]
    if len(instructions)>page_len:
        
        for i in range(0,len(instructions),page_len):
            instruction_pages.append(instructions[i:i+page_len])
        
print(len(instruction_pages))
print(instruction_pages[0])

for instruction_page in instruction_pages:
    
#     instructions = [
#         instruction for instruction in instruction_page.split(';') if instruction != ''
#     ]
    
    
#     num_instructions = len(instruction_page)
    
    

#     start = random.randint(0, num_instructions-2)
    # 50/50 whether is IsNextSentence or NotNextSentence
    if random.random() >= 0.5:
        # this is IsNextSentence
        history.append(delim.join(instruction_page[:-1]))
        next_instruction.append(instruction_page[-1])
        label.append(0)
    else:
        index = random.randint(0, bag_size-1)
        # this is NotNextSentence
        history.append(delim.join(instruction_page[:-1]))
        next_instruction.append(bag[index])
        label.append(1)

10221
['endbr64 ', 'push rbp', 'mov rbp, rsp', 'mov rdx, qword ptr [rip + 0x2d98]', 'mov rax, qword ptr [rip + 0x2d81]']


In [10]:
print(len(label))
for i in range(3):
    print(label[i])
    print('->',history[i] , '\n')
    print('# ',next_instruction[i] , '\n')

10221
0
-> endbr64 ;push rbp;mov rbp, rsp;mov rdx, qword ptr [rip + 0x2d98] 

#  mov rax, qword ptr [rip + 0x2d81] 

0
-> lea rcx, [rip + 0xd62];mov rsi, rcx;mov rdi, rax;mov eax, 0 

#  call 0x1120 

0
-> mov edi, 1 

#  call 0x1170 



Our data is now ready for tokenization, this time we truncate/pad each token to the same length of *512* tokens.

In [11]:
inputs = tokenizer(history, next_instruction, return_tensors='pt', max_length=128, truncation=True, padding='max_length')

In [12]:
inputs.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

We can see that the *token_type_ids* tensors have been built correctly (eg **1** indicating sentence B tokens) by checking the first instance of *token_type_ids*:

In [13]:
inputs.token_type_ids[0]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])

The **0** tokens following our sentence B tokens correspond to *PAD* tokens.

Alongside this, we need to create a *labels* tensor too - which corresponds to the values contained within our `label` variable. Our *labels* tensor must be a *LongTensor*, and we will need to transpose the tensor so that it matches our other tensors' dimensionality.

In [14]:
inputs['labels'] = torch.LongTensor([label]).T

In [15]:
inputs.labels[:10]

tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [1],
        [0],
        [1],
        [0]])

The `inputs` tensors are now ready, and we can begin building the model input pipeline for training. We first create a PyTorch dataset from our data.

In [16]:
class MeditationsDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
    def __len__(self):
        return len(self.encodings.input_ids)

Initialize our data using the `MeditationDataset` class.

In [17]:
dataset = MeditationsDataset(inputs)

In [18]:
train_size = int(0.8 * len(dataset))
validation_size = len(dataset) - train_size

train_dataset, validation_dataset = torch.utils.data.random_split(dataset, [train_size, validation_size])

And initialize the dataloader, which we'll be using to load our data into the model during training.

In [19]:
BATCH_SIZE = 16
train_loader      = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [20]:
# 

Now we can move onto setting up the training loop. First we setup GPU/CPU usage.

In [21]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# and move our model over to the selected device
model.to(device)

BertForNextSentencePrediction(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

Activate the training mode of our model, and initialize our optimizer (Adam with weighted decay - reduces chance of overfitting).

In [22]:
from sklearn.metrics import precision_recall_fscore_support , accuracy_score
import numpy as np


Now we can move onto the training loop, we'll train for a couple of epochs (change `epochs` to modify this).

In [None]:
from transformers import AdamW
from tqdm import tqdm  # for our progress bar


# initialize optimizer
optim = AdamW(model.parameters(), lr=5e-6)



epochs = 10000

for epoch in range(epochs):
    # setup loop with TQDM and dataloader
    train_loop = tqdm(train_loader, leave=True)
    
    
    predictions_all, ground_truths_all = None, None
    
    # activate training mode
    model.train()
    for N,batch in enumerate(train_loop):

        optim.zero_grad()
        # pull all tensor batches required for training
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch['token_type_ids'].to(device)
        labels = batch['labels'].to(device)
        # process
        outputs = model(input_ids, attention_mask=attention_mask,
                        token_type_ids=token_type_ids,
                        labels=labels)
        prediction = torch.argmax(outputs.logits, axis=-1)
        prediction = prediction.detach().cpu().numpy().flatten()
        ground_truth = labels.detach().cpu().numpy().flatten()
        
        if N==0:
            predictions_all = prediction
            ground_truths_all = ground_truth
        else:
            predictions_all   = np.concatenate((predictions_all, prediction))
            ground_truths_all = np.concatenate((ground_truths_all, ground_truth))
            

        # extract loss
        loss = outputs.loss
        # calculate loss for every parameter that needs grad update
        loss.backward()
        # update parameters
        optim.step()
        # print relevant info to progress bar
        train_loop.set_description(f'Epoch {epoch}')
        train_loop.set_postfix(loss=loss.item())
    accuracy = (accuracy_score(ground_truths_all,predictions_all))
    precision, recall, f1, _ = precision_recall_fscore_support(ground_truths_all,predictions_all, average='binary')
    print("Training: ", accuracy, precision, recall, f1, _)
    
    
    ### EVAL Validation
    
    with torch.no_grad():
        model.eval()
        v_predictions_all, v_ground_truths_all = None, None
        validation_loop = tqdm(validation_loader, leave=True)
        for N,v_batch in enumerate(validation_loop):
            v_input_ids = v_batch['input_ids'].to(device)
            v_attention_mask = v_batch['attention_mask'].to(device)
            v_token_type_ids = v_batch['token_type_ids'].to(device)
            v_labels = v_batch['labels'].to(device)
            # process
            v_outputs = model(v_input_ids, attention_mask=v_attention_mask,
                            token_type_ids=v_token_type_ids,
                            labels=v_labels)
            v_prediction = torch.argmax(v_outputs.logits, axis=-1)
            v_prediction = v_prediction.detach().cpu().numpy().flatten()
            v_ground_truth = v_labels.detach().cpu().numpy().flatten()

            if N==0:
                v_predictions_all = v_prediction
                v_ground_truths_all = v_ground_truth
            else:
                v_predictions_all   = np.concatenate((v_predictions_all, v_prediction))
                v_ground_truths_all = np.concatenate((v_ground_truths_all, v_ground_truth))

        v_accuracy = (accuracy_score(v_ground_truths_all, v_predictions_all))
        v_precision, v_recall, v_f1, _ = precision_recall_fscore_support(v_ground_truths_all, 
                                                                         v_predictions_all, average='binary')
        print("VALIDATION: ",v_accuracy, v_precision, v_recall, v_f1, _)

  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 0: 100%|████████████████████| 511/511 [03:27<00:00,  2.46it/s, loss=0.729]


Training:  0.5039138943248532 0.49852239145260285 0.5424189957952016 0.51954513148543 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.08it/s]


VALIDATION:  0.511002444987775 0.5014749262536873 0.16983016983016982 0.25373134328358204 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 1: 100%|████████████████████| 511/511 [03:28<00:00,  2.45it/s, loss=0.665]


Training:  0.49853228962818 0.49266031419005923 0.47316349245609696 0.48271511481201107 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.07it/s]


VALIDATION:  0.511002444987775 1.0 0.000999000999000999 0.001996007984031936 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 2: 100%|████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.863]


Training:  0.5117416829745597 0.5062792415661167 0.5085332673757111 0.5074037512339585 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]
  _warn_prf(average, modifier, msg_start, len(result))


VALIDATION:  0.5105134474327628 0.0 0.0 0.0 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 3: 100%|████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.697]


Training:  0.5221379647749511 0.5163934426229508 0.5298046005441504 0.5230130631180564 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.04it/s]


VALIDATION:  0.5183374083129584 0.5078895463510849 0.5144855144855145 0.511166253101737 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 4: 100%|█████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.71]


Training:  0.5267857142857143 0.5218812877263581 0.5132327479594361 0.5175208878912583 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.5188264058679707 0.5043701799485861 0.98001998001998 0.6659877800407331 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 5: 100%|████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.689]


Training:  0.5406066536203522 0.5353187300024612 0.5379668562948305 0.536639526276832 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.5242053789731052 0.5110062893081762 0.6493506493506493 0.5719313682358117 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 6: 100%|██████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.7]


Training:  0.549779843444227 0.5467217346411978 0.5238684145436557 0.535051155740811 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.517359413202934 0.5035714285714286 0.986013986013986 0.6666666666666666 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 7: 100%|████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.653]


Training:  0.5519814090019569 0.5512129380053908 0.5058125154588177 0.5275377273313555 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.01it/s]


VALIDATION:  0.5501222493887531 0.525328330206379 0.8391608391608392 0.6461538461538461 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 8: 100%|████████████████████| 511/511 [03:34<00:00,  2.38it/s, loss=0.471]


Training:  0.5694716242661448 0.5694186355189806 0.5305466237942122 0.5492957746478874 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  5.83it/s]


VALIDATION:  0.5535452322738387 0.5492170022371364 0.4905094905094905 0.5182058047493403 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 9: 100%|████████████████████| 511/511 [03:32<00:00,  2.40it/s, loss=0.601]


Training:  0.5798679060665362 0.5772750381291306 0.5617116002968093 0.5693869875893194 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  5.89it/s]


VALIDATION:  0.5745721271393643 0.6139130434782609 0.35264735264735264 0.44796954314720816 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 10: 100%|███████████████████| 511/511 [03:32<00:00,  2.40it/s, loss=0.513]


Training:  0.5905088062622309 0.5854438160806491 0.5889191194657433 0.5871763255240445 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  5.98it/s]


VALIDATION:  0.5677261613691932 0.5665529010238908 0.4975024975024975 0.5297872340425531 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 11: 100%|███████████████████| 511/511 [03:33<00:00,  2.39it/s, loss=0.706]


Training:  0.6135029354207436 0.6213912565301072 0.5589908483799159 0.5885416666666666 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  5.83it/s]


VALIDATION:  0.5770171149144254 0.5665362035225049 0.5784215784215784 0.5724172021749876 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 12: 100%|█████████████████████| 511/511 [03:35<00:00,  2.37it/s, loss=0.7]


Training:  0.636986301369863 0.6385666408868265 0.612663863467722 0.6253471345619793 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  5.99it/s]


VALIDATION:  0.5638141809290954 0.5374056280027454 0.7822177822177823 0.6371033360455655 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 13: 100%|████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.71]


Training:  0.6571673189823874 0.6509985387238189 0.6611427158050952 0.6560314148975336 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:20<00:00,  6.10it/s]


VALIDATION:  0.5691931540342299 0.5423728813559322 0.7672327672327672 0.6354985519238725 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 14: 100%|███████████████████| 511/511 [03:27<00:00,  2.46it/s, loss=0.363]


Training:  0.68456457925636 0.6812778603268945 0.680435320306703 0.6808563296621706 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:20<00:00,  6.10it/s]


VALIDATION:  0.5965770171149144 0.5781527531083481 0.6503496503496503 0.61212976022567 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 15: 100%|███████████████████| 511/511 [03:27<00:00,  2.46it/s, loss=0.578]


Training:  0.7021771037181996 0.7035443037974684 0.6873608706406134 0.6953584386338045 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:20<00:00,  6.11it/s]


VALIDATION:  0.5887530562347189 0.5613496932515337 0.7312687312687313 0.6351409978308026 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 16: 100%|███████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.488]


Training:  0.7215019569471625 0.7216365461847389 0.7111056146425921 0.7163323782234956 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.5990220048899756 0.5949632738719832 0.5664335664335665 0.5803480040941658 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 17: 100%|███████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.556]


Training:  0.7386252446183953 0.734382685686178 0.7385604748948801 0.7364656554445679 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.5770171149144254 0.5512820512820513 0.7302697302697303 0.6282767511817792 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 18: 100%|███████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.544]


Training:  0.7605185909980431 0.7579806978470676 0.7576057383131338 0.7577931716971795 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.04it/s]


VALIDATION:  0.5843520782396088 0.5655951346655083 0.6503496503496503 0.6050185873605948 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 19: 100%|███████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.255]


Training:  0.7820450097847358 0.7889087656529516 0.7635419243136284 0.7760180995475112 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.5941320293398533 0.5827686350435625 0.6013986013986014 0.591937069813176 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 20: 100%|███████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.322]


Training:  0.7977005870841487 0.8021755628636479 0.7843185753153599 0.7931465732866433 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.589242053789731 0.588170865279299 0.5364635364635365 0.5611285266457681 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 21: 100%|█████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.5]


Training:  0.81176614481409 0.8243523316062176 0.7870393272322532 0.8052638238643552 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.5921760391198044 0.5708227311280747 0.6723276723276723 0.6174311926605505 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 22: 100%|███████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.347]


Training:  0.8169031311154599 0.8272493573264782 0.7959436062329953 0.8112945922097567 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.60880195599022 0.6192170818505338 0.5214785214785215 0.5661605206073753 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 23: 100%|████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.39]


Training:  0.8236301369863014 0.8337182448036952 0.8036111798169676 0.8183879093198992 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.5951100244498777 0.6082603254067585 0.4855144855144855 0.54 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 24: 100%|███████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.263]


Training:  0.8319471624266145 0.8483424693291569 0.8038585209003215 0.825501651003302 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.5990220048899756 0.5991237677984665 0.5464535464535465 0.5715778474399164 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 25: 100%|████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.27]


Training:  0.8418542074363993 0.8578865174388339 0.8152362107346031 0.8360177552314522 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.6034229828850856 0.6067415730337079 0.5394605394605395 0.5711263881544157 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 26: 100%|███████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.265]


Training:  0.8516389432485323 0.8652555498193082 0.829087311402424 0.8467853985095365 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.6156479217603912 0.6254375729288215 0.5354645354645354 0.5769644779332616 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 27: 100%|██████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.0419]


Training:  0.8546966731898239 0.869340232858991 0.8310660400692556 0.8497723823975721 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.04it/s]


VALIDATION:  0.6097799511002445 0.6045314109165808 0.5864135864135864 0.5953346855983772 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 28: 100%|████████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.23]


Training:  0.8565313111545988 0.8655119714722364 0.8404650012367054 0.8528046178943405 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  6.05it/s]


VALIDATION:  0.6078239608801956 0.610678531701891 0.5484515484515484 0.5778947368421052 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 29: 100%|███████████████████| 511/511 [03:29<00:00,  2.44it/s, loss=0.289]


Training:  0.8698630136986302 0.8875878220140515 0.8436804353203067 0.8650773522698452 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:21<00:00,  5.93it/s]


VALIDATION:  0.5995110024449878 0.6022471910112359 0.5354645354645354 0.5668958223162347 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 30: 100%|███████████████████| 511/511 [03:28<00:00,  2.45it/s, loss=0.351]


Training:  0.8716976516634051 0.8801422041645506 0.8572841949047737 0.868562836737251 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|█████████████████████████████████████████| 128/128 [00:22<00:00,  5.64it/s]


VALIDATION:  0.5995110024449878 0.5821299638989169 0.6443556443556444 0.6116642958748223 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 31: 100%|███████████████████| 511/511 [03:38<00:00,  2.34it/s, loss=0.131]


Training:  0.87279843444227 0.8850987432675045 0.8535740786544646 0.8690506169730547 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  2%|█                                          | 3/128 [00:00<00:20,  6.01it/s]

In [None]:
# # Save the trained model weights
# training_model.save_weights("weights/wghts" + str(epoch + 1) + ".ckpt")

In [None]:
print(ground_truths)