# Next Instruction Prediction Training


In [1]:
import torch

torch.cuda.is_available()

  from .autonotebook import tqdm as notebook_tqdm


True

# DATASET GENERATION

In [2]:
#!/usr/bin/env python3

import sys,os
from elftools.elf.elffile import ELFFile
from elftools.elf.segments import Segment
from capstone import *
from capstone.x86 import *





data_dir_path = "./data/binaries/"
dir_file_list = os.listdir(data_dir_path)

with open('./data/instruction_clusters.txt', 'w') as data_file:
    for filename in dir_file_list:
        filePath = os.path.join(data_dir_path,filename)

        fh = open(filePath, 'rb')
        bin_bytearray = bytearray(fh.read())
        
        with open(filePath, 'rb') as f:
            elf = ELFFile(f)
            dwarfinfo = elf.get_dwarf_info()
            aranges = dwarfinfo.get_aranges()
            print(filename, len(aranges.entries))
            for arange in aranges.entries:

                entry = arange.begin_addr
                exit  = arange.begin_addr + arange.length
                ops = bin_bytearray[entry: exit]

                md = Cs(CS_ARCH_X86, CS_MODE_64)
                md.detail = True
                for inst in md.disasm(ops, entry):

                    data_file.write(inst.mnemonic+" "+inst.op_str+";")
                data_file.write('\n')






gitwipe 4
gitps 147
gitview 140
gitfm 341
gitwhich 6
gitkeys 4


# Creating the pipeline

In [3]:
from transformers import BertTokenizer, BertForNextSentencePrediction,BertForPreTraining
import torch

# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

tokenizer = BertTokenizer.from_pretrained("./binary-tokenizer")
# model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
model = BertForPreTraining.from_pretrained('bert-base-uncased')


In [4]:
delim = ';'
with open('./data/instruction_clusters.txt', 'r') as fp:
    text = fp.read().split('\n')

In [5]:
# text = text[:12]

We need to split sentences into consecutive, and non-consecutive sequences.

We have to deal with edge-cases too - for example where there is only a single sentence within a paragraph as with the three examples above (in comparison to below where we can easily split into multiple sentences).

In [6]:
# text[51].split(delim)

We'll assign a 50% probability of using the genuine next sentence, and 50% probability of using another random sentence.

To make this simpler, we'll create a *'bag'* of individual sentences to pull from when selecting a random sentence B.

In [7]:
bag = [instruction for instruction_cluster in text for instruction in instruction_cluster.split(delim)  if instruction!= '']
bag_size = len(bag)
print(bag_size)

49783


In [8]:
bag

['endbr64 ',
 'push rbp',
 'mov rbp, rsp',
 'mov rdx, qword ptr [rip + 0x2d98]',
 'mov rax, qword ptr [rip + 0x2d81]',
 'lea rcx, [rip + 0xd62]',
 'mov rsi, rcx',
 'mov rdi, rax',
 'mov eax, 0',
 'call 0x1120',
 'mov edi, 1',
 'call 0x1170',
 'endbr64 ',
 'push rbp',
 'mov rbp, rsp',
 'sub rsp, 0x20',
 'mov dword ptr [rbp - 0x14], edi',
 'mov eax, dword ptr [rbp - 0x14]',
 'mov edx, 1',
 'mov esi, 0',
 'mov edi, eax',
 'call 0x1180',
 'mov qword ptr [rbp - 0x10], rax',
 'mov eax, dword ptr [rbp - 0x14]',
 'mov edx, 2',
 'mov esi, 0',
 'mov edi, eax',
 'call 0x1180',
 'mov qword ptr [rbp - 8], rax',
 'mov rcx, qword ptr [rbp - 0x10]',
 'mov eax, dword ptr [rbp - 0x14]',
 'mov edx, 0',
 'mov rsi, rcx',
 'mov edi, eax',
 'call 0x1180',
 'mov rax, qword ptr [rbp - 8]',
 'leave ',
 'ret ',
 'endbr64 ',
 'push rbp',
 'mov rbp, rsp',
 'sub rsp, 0x40',
 'mov qword ptr [rbp - 0x38], rdi',
 'mov rax, qword ptr [rbp - 0x38]',
 'mov esi, 2',
 'mov rdi, rax',
 'mov eax, 0',
 'call 0x1160',
 'mov dw

And now we create our 50/50 NIP training data.

In [9]:
import random

history = []
next_instruction = []
label = []

page_len = 5
instruction_pages = []
for instruction_cluster in text:
    instructions = [
        instruction for instruction in instruction_cluster.split(delim) if instruction != ''
    ]
    if len(instructions)>page_len:
        
        for i in range(0,len(instructions),page_len):
            instruction_pages.append(instructions[i:i+page_len])
        
print(len(instruction_pages))
print(instruction_pages[0])

for instruction_page in instruction_pages:
    
#     instructions = [
#         instruction for instruction in instruction_page.split(';') if instruction != ''
#     ]
    
    
#     num_instructions = len(instruction_page)
    
    

#     start = random.randint(0, num_instructions-2)
    # 50/50 whether is IsNextSentence or NotNextSentence
    if random.random() >= 0.5:
        # this is IsNextSentence
        history.append(delim.join(instruction_page[:-1]))
        next_instruction.append(instruction_page[-1])
        label.append(0)
    else:
        index = random.randint(0, bag_size-1)
        # this is NotNextSentence
        history.append(delim.join(instruction_page[:-1]))
        next_instruction.append(bag[index])
        label.append(1)

10221
['endbr64 ', 'push rbp', 'mov rbp, rsp', 'mov rdx, qword ptr [rip + 0x2d98]', 'mov rax, qword ptr [rip + 0x2d81]']


In [10]:
print(len(label))
for i in range(3):
    print(label[i])
    print('->',history[i] , '\n')
    print('# ',next_instruction[i] , '\n')

10221
1
-> endbr64 ;push rbp;mov rbp, rsp;mov rdx, qword ptr [rip + 0x2d98] 

#  call 0xe319 

0
-> lea rcx, [rip + 0xd62];mov rsi, rcx;mov rdi, rax;mov eax, 0 

#  call 0x1120 

0
-> mov edi, 1 

#  call 0x1170 



Our data is now ready for tokenization, this time we truncate/pad each token to the same length of *512* tokens.

In [11]:
inputs = tokenizer(history, next_instruction, return_tensors='pt', 
                   max_length=64, truncation=True, padding='max_length')

In [12]:
inputs.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

We can see that the *token_type_ids* tensors have been built correctly (eg **1** indicating sentence B tokens) by checking the first instance of *token_type_ids*:

In [13]:
inputs.token_type_ids[0]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

The **0** tokens following our sentence B tokens correspond to *PAD* tokens.

Alongside this, we need to create a *labels* tensor too - which corresponds to the values contained within our `label` variable. Our *labels* tensor must be a *LongTensor*, and we will need to transpose the tensor so that it matches our other tensors' dimensionality.

In [14]:
inputs['next_sentence_label'] = torch.LongTensor([label]).T


And the labels tensor is simply a clone of the input_ids tensor before masking.

In [15]:
inputs['labels'] = inputs.input_ids.detach().clone()

In [16]:
inputs.labels[0]

tensor([  2, 180,   1, 162,  94,   1,  86,  94,   9, 122,   1,  86, 101,   9,
         95,  89,  22, 107,   8, 469, 255,  23,   3, 104, 755,   3,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0])

Now we mask tokens in the input_ids tensor using the 15% probability for MLM - ensuring we don't mask CLS, SEP, or PAD tokens.

In [17]:
# create random array of floats with equal dimensions to input_ids tensor
rand = torch.rand(inputs.input_ids.shape)
# create mask array
mask_arr = (rand < 0.15) * (inputs.input_ids != 101) * \
           (inputs.input_ids != 102) * (inputs.input_ids != 0)

In [18]:
mask_arr[0]
# inputs.input_ids.shape[0]

tensor([False,  True, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False,  True, False,  True,  True,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False])

And now take the indices of each True value within each vector.

In [19]:
selection = []

for i in range(inputs.input_ids.shape[0]):
    selection.append(
        torch.flatten(mask_arr[i].nonzero()).tolist()
    )

In [20]:
selection[0]

[1, 6, 16, 18, 19]

Then apply these indices to each row in input_ids, assigning each value at these indices a value of 103.

In [21]:
for i in range(inputs.input_ids.shape[0]):
    inputs.input_ids[i, selection[i]] = 103

In [22]:
inputs.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'next_sentence_label', 'labels'])

The `inputs` tensors are now ready, and we can begin building the model input pipeline for training. We first create a PyTorch dataset from our data.

In [23]:
class MeditationsDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
    def __len__(self):
        return len(self.encodings.input_ids)

Initialize our data using the `MeditationDataset` class.

In [24]:
dataset = MeditationsDataset(inputs)

In [25]:
train_size = int(0.8 * len(dataset))
validation_size = len(dataset) - train_size

train_dataset, validation_dataset = torch.utils.data.random_split(dataset, [train_size, validation_size])

And initialize the dataloader, which we'll be using to load our data into the model during training.

In [26]:
BATCH_SIZE = 8
train_loader      = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [27]:
# 

Now we can move onto setting up the training loop. First we setup GPU/CPU usage.

In [28]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# and move our model over to the selected device
model.to(device)

BertForPreTraining(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine

Activate the training mode of our model, and initialize our optimizer (Adam with weighted decay - reduces chance of overfitting).

In [29]:
from sklearn.metrics import precision_recall_fscore_support , accuracy_score
import numpy as np


Now we can move onto the training loop, we'll train for a couple of epochs (change `epochs` to modify this).

In [30]:
# odict_keys(['loss', 'prediction_logits', 'seq_relationship_logits'])

In [None]:
from transformers import AdamW
from tqdm import tqdm  # for our progress bar


# initialize optimizer
optim = AdamW(model.parameters(), lr=5e-6)



epochs = 1000
counter = 0
for epoch in range(epochs):
    # setup loop with TQDM and dataloader
    train_loop = tqdm(train_loader, leave=True)
    
    
    instruction_predictions_all, instruction_ground_truths_all = None, None
    token_predictions_all, token_ground_truths_all = None, None
    
    # activate training mode
    model.train()
    for N,batch in enumerate(train_loop):

        optim.zero_grad()
        # pull all tensor batches required for training
        input_ids = batch['input_ids'].to(device)
        token_type_ids = batch['token_type_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        next_sentence_label = batch['next_sentence_label'].to(device)
        labels = batch['labels'].to(device)
        # process
        outputs = model(input_ids, attention_mask=attention_mask,
                        token_type_ids=token_type_ids,
                        next_sentence_label=next_sentence_label,
                        labels=labels)


        token_prediction = torch.argmax(outputs.prediction_logits, axis=-1)

        
        token_prediction = token_prediction.detach().cpu().numpy().flatten()
        token_ground_truth = labels.detach().cpu().numpy().flatten()
        
        counter+=1
        if counter%2000==0 and counter!=0:
            
            print(outputs.prediction_logits.shape, outputs.prediction_logits)
            print(torch.argmax(outputs.prediction_logits, axis=-1))

            print(tokenizer.batch_decode(torch.argmax(outputs.prediction_logits, axis=-1)))

            print(labels.shape , labels)
            
        
        
        instruction_prediction = torch.argmax(outputs.seq_relationship_logits, axis=-1)
        instruction_prediction   = instruction_prediction.detach().cpu().numpy().flatten()
        instruction_ground_truth = next_sentence_label.detach().cpu().numpy().flatten()
        
        if N==0:
            instruction_predictions_all   = instruction_prediction
            instruction_ground_truths_all = instruction_ground_truth
            
            token_predictions_all         = token_prediction
            token_ground_truths_all       = token_ground_truth  
        else:
            instruction_predictions_all   = np.concatenate((instruction_predictions_all, instruction_prediction))
            instruction_ground_truths_all = np.concatenate((instruction_ground_truths_all, instruction_ground_truth))
            token_predictions_all   = np.concatenate((token_predictions_all, token_prediction))
            token_ground_truths_all = np.concatenate((token_ground_truths_all, token_ground_truth))
            

        # extract loss
        loss = outputs.loss
        # calculate loss for every parameter that needs grad update
        loss.backward()
        # update parameters
        optim.step()
        # print relevant info to progress bar
        train_loop.set_description(f'Epoch {epoch}')
        train_loop.set_postfix(loss=loss.item())
    
    
    instruction_accuracy = (accuracy_score(instruction_ground_truths_all,instruction_predictions_all))
    instruction_precision, instruction_recall, instruction_f1, _ = precision_recall_fscore_support(instruction_ground_truths_all,instruction_predictions_all, average='binary')
    print("Training: ", instruction_accuracy, instruction_precision, instruction_recall, instruction_f1, _)
    
    
    ### EVAL Validation

#     with torch.no_grad():
#         model.eval()
#         v_predictions_all, v_ground_truths_all = None, None
#         validation_loop = tqdm(validation_loader, leave=True)
#         for N,v_batch in enumerate(validation_loop):
#             v_input_ids = v_batch['input_ids'].to(device)
#             v_attention_mask = v_batch['attention_mask'].to(device)
#             v_token_type_ids = v_batch['token_type_ids'].to(device)
#             v_labels = v_batch['labels'].to(device)
#             # process
#             v_outputs = model(v_input_ids, attention_mask=v_attention_mask,
#                             token_type_ids=v_token_type_ids,
#                             labels=v_labels)
#             v_prediction = torch.argmax(v_outputs.logits, axis=-1)
#             v_prediction = v_prediction.detach().cpu().numpy().flatten()
#             v_ground_truth = v_labels.detach().cpu().numpy().flatten()

#             if N==0:
#                 v_predictions_all = v_prediction
#                 v_ground_truths_all = v_ground_truth
#             else:
#                 v_predictions_all   = np.concatenate((v_predictions_all, v_prediction))
#                 v_ground_truths_all = np.concatenate((v_ground_truths_all, v_ground_truth))

#         v_accuracy = (accuracy_score(v_ground_truths_all, v_predictions_all))
#         v_precision, v_recall, v_f1, _ = precision_recall_fscore_support(v_ground_truths_all, 
#                                                                          v_predictions_all, average='binary')
#         print("VALIDATION: ",v_accuracy, v_precision, v_recall, v_f1, _)

  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 0: 100%|███████████████████| 1022/1022 [02:44<00:00,  6.21it/s, loss=3.53]


Training:  0.5100293542074364 0.5079513564078578 0.5331369661266568 0.5202395209580838 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 1:  96%|███████████████████▏| 978/1022 [02:35<00:07,  6.02it/s, loss=2.52]

torch.Size([8, 64, 30522]) tensor([[[ 8.2182e+00,  8.3281e+00,  8.6531e+00,  ..., -1.8650e+00,
          -8.1123e-01, -3.0827e+00],
         [ 7.1044e+00,  8.3823e+00,  6.9689e+00,  ..., -1.7266e+00,
           1.2043e-01, -3.9926e+00],
         [ 2.9552e+00,  4.2867e+00,  3.3036e+00,  ..., -5.6875e+00,
          -4.5578e+00, -7.1035e+00],
         ...,
         [ 1.5857e+01,  8.5598e+00,  8.4537e+00,  ...,  1.7169e+00,
           2.3162e+00, -1.2559e+00],
         [ 1.6430e+01,  9.4550e+00,  9.5399e+00,  ...,  1.3539e+00,
           2.2787e+00,  4.2687e-01],
         [ 1.4751e+01,  7.7021e+00,  7.6795e+00,  ..., -1.7905e-01,
           1.0883e+00, -1.9829e+00]],

        [[ 1.0502e+01,  1.0775e+01,  1.0563e+01,  ...,  3.5160e-01,
           3.5340e-01, -1.1113e+00],
         [ 1.1889e+01,  1.3569e+01,  1.1991e+01,  ...,  2.7444e+00,
           1.7586e+00,  6.1909e-01],
         [ 1.4836e+01,  1.6090e+01,  1.4027e+01,  ...,  3.9560e+00,
           3.3064e+00,  2.2776e+00],
         ...

Epoch 1: 100%|███████████████████| 1022/1022 [02:42<00:00,  6.27it/s, loss=2.44]


Training:  0.5204256360078278 0.5182534001431639 0.5331369661266568 0.525589836660617 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 2: 100%|███████████████████| 1022/1022 [02:45<00:00,  6.18it/s, loss=2.03]


Training:  0.5299657534246576 0.5270808909730363 0.551791850760923 0.539153375704521 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 3:  91%|███████████████████▏ | 934/1022 [02:27<00:14,  5.98it/s, loss=1.6]

torch.Size([8, 64, 30522]) tensor([[[ 4.8569,  3.0642,  8.4831,  ..., -4.2121, -4.6648, -5.0275],
         [ 9.7800,  9.8827,  9.9903,  ...,  1.0099, -0.2144, -4.0216],
         [ 5.3497,  8.5034,  7.5117,  ..., -2.1596, -1.1514, -6.4077],
         ...,
         [16.8085,  7.3693,  8.0538,  ...,  0.1898, -0.7422,  1.1627],
         [16.6647,  6.4952,  8.0178,  ...,  0.8615, -0.0357, -0.6246],
         [18.3609,  8.9514,  9.6482,  ...,  1.0330,  0.5950, -0.6513]],

        [[ 6.1671,  6.3186, 10.5674,  ..., -3.3194, -3.6099, -3.7328],
         [ 8.6013,  8.6139,  8.2121,  ..., -0.5204, -1.6258, -2.3219],
         [ 3.4770,  3.3306,  4.3195,  ..., -1.3236, -3.0779, -5.6692],
         ...,
         [16.2036,  7.2324,  7.9815,  ..., -0.2070, -0.8587, -2.5573],
         [16.1823,  6.9145,  7.9099,  ..., -0.2690, -1.0871, -2.1080],
         [17.1722,  8.5498,  8.7344,  ...,  0.3369, -0.3548, -2.0142]],

        [[ 4.4646,  4.2612,  9.1370,  ..., -3.9766, -3.8175, -4.8614],
         [ 8.3902,

Epoch 3: 100%|███████████████████| 1022/1022 [02:40<00:00,  6.35it/s, loss=1.46]


Training:  0.5434197651663405 0.5409757269887047 0.5525282277859598 0.5466909532483304 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 4: 100%|███████████████████| 1022/1022 [02:43<00:00,  6.24it/s, loss=1.22]


Training:  0.5593199608610567 0.5576499388004896 0.5591556210112911 0.5584017649221719 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 5:  87%|█████████████████▍  | 890/1022 [02:22<00:23,  5.71it/s, loss=1.11]

torch.Size([8, 64, 30522]) tensor([[[ 7.2367e+00,  9.6140e+00,  1.4405e+01,  ..., -3.7860e-01,
          -2.1508e+00,  6.1996e-01],
         [ 5.4465e+00,  7.6073e+00,  6.8403e+00,  ..., -5.0868e-01,
          -1.6260e+00, -7.1499e-01],
         [ 8.0273e+00,  8.6460e+00,  7.5630e+00,  ..., -1.0702e+00,
          -1.4359e+00, -5.6456e+00],
         ...,
         [ 1.9372e+01,  9.3230e+00,  9.6650e+00,  ...,  1.5249e+00,
           3.6203e-01, -1.1098e+00],
         [ 1.9628e+01,  1.0529e+01,  1.0221e+01,  ...,  1.2275e+00,
           4.2393e-02, -4.4877e-01],
         [ 1.9168e+01,  9.7940e+00,  9.5270e+00,  ...,  5.7744e-01,
           2.2575e-01,  1.5923e-01]],

        [[ 6.1535e+00,  6.9113e+00,  1.2495e+01,  ..., -2.9739e+00,
          -3.8914e+00, -2.3382e+00],
         [ 9.5945e+00,  1.0114e+01,  1.0367e+01,  ...,  3.0028e+00,
           1.4632e+00,  3.2904e+00],
         [ 2.2412e+00,  2.9328e+00,  2.3903e+00,  ..., -3.5931e+00,
          -4.3061e+00, -6.4464e+00],
         ...

Epoch 5: 100%|██████████████████| 1022/1022 [02:43<00:00,  6.26it/s, loss=0.797]


Training:  0.5798679060665362 0.57715527650326 0.586647029945999 0.5818624467437614 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 6: 100%|███████████████████| 1022/1022 [02:43<00:00,  6.26it/s, loss=1.12]


Training:  0.6121575342465754 0.6119514009422267 0.6057928325969563 0.6088565437276428 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 7:  83%|███████████████▋   | 846/1022 [02:18<00:29,  5.97it/s, loss=0.921]

torch.Size([8, 64, 30522]) tensor([[[  4.1424,   4.5533,  11.7281,  ...,  -4.3919,  -5.0579,  -5.4246],
         [  5.5042,   6.7514,   6.0260,  ...,  -1.8720,  -3.3585,  -4.2302],
         [ -2.4287,  -2.0654,   5.9629,  ...,  -9.1203,  -8.2882, -10.2356],
         ...,
         [ 19.4217,   7.7373,   8.7010,  ...,   0.3701,  -1.0872,  -1.7759],
         [ 20.0670,   7.5697,   8.8537,  ...,   1.1417,   0.2217,  -1.8466],
         [ 19.4400,   7.4982,   7.7865,  ...,   0.2047,  -0.4002,  -0.3591]],

        [[  9.1899,  10.2686,  14.0625,  ...,  -3.9938,  -1.9636,   0.0339],
         [ 10.7595,   9.7533,   9.8594,  ...,  -0.7128,   0.1037,  -0.4242],
         [ 11.3194,  17.5215,   8.9193,  ...,   0.1410,   0.2254,   1.6356],
         ...,
         [ 20.0671,   9.3642,   9.8230,  ...,  -0.9108,  -0.8684,  -0.4978],
         [ 20.1018,   9.9186,  10.0199,  ...,  -0.3380,  -0.2224,  -0.2030],
         [ 20.2856,   9.4871,   9.4389,  ...,  -0.0267,   0.1887,  -1.2321]],

        [[  7.901

Epoch 7: 100%|███████████████████| 1022/1022 [02:46<00:00,  6.14it/s, loss=1.01]


Training:  0.6556996086105675 0.6700891650905161 0.6087383406971035 0.6379421221864953 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 8: 100%|██████████████████| 1022/1022 [02:43<00:00,  6.26it/s, loss=0.698]


Training:  0.6954500978473581 0.7125603864734299 0.6516936671575847 0.6807692307692308 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 9:  78%|██████████████▉    | 802/1022 [02:11<00:36,  6.09it/s, loss=0.828]

torch.Size([8, 64, 30522]) tensor([[[ 5.6304e+00,  6.4579e+00,  1.3290e+01,  ..., -4.9492e+00,
          -5.0249e+00, -3.8997e+00],
         [ 6.1365e+00,  6.2229e+00,  6.6393e+00,  ..., -1.6513e+00,
          -2.5229e+00, -2.2981e+00],
         [ 2.0287e+00,  3.1292e+00,  4.6756e+00,  ..., -1.1575e+00,
          -2.5772e+00, -1.4355e-01],
         ...,
         [ 2.1484e+01,  8.8481e+00,  9.8984e+00,  ...,  1.2335e+00,
           2.3470e-01, -1.2952e+00],
         [ 2.1613e+01,  8.6066e+00,  9.3327e+00,  ...,  1.0923e+00,
           9.6563e-02, -1.2928e+00],
         [ 2.0714e+01,  7.2457e+00,  8.0363e+00,  ..., -1.6116e-02,
          -1.2907e+00, -1.4470e+00]],

        [[ 5.5991e+00,  6.9773e+00,  1.3939e+01,  ..., -5.1234e+00,
          -4.3282e+00, -4.1649e+00],
         [ 7.9577e+00,  1.0550e+01,  8.4853e+00,  ...,  6.3294e-01,
           4.6863e-01, -3.0525e+00],
         [ 3.5847e+00,  7.2650e+00,  7.0779e+00,  ..., -1.8056e-02,
          -1.2209e-01, -1.7025e+00],
         ...

Epoch 9: 100%|██████████████████| 1022/1022 [02:46<00:00,  6.15it/s, loss=0.682]


Training:  0.7388698630136986 0.7539952842546502 0.7064310260186549 0.7294386009377771 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 10: 100%|█████████████████| 1022/1022 [02:42<00:00,  6.29it/s, loss=0.797]


Training:  0.764554794520548 0.7906410603191777 0.7174766813942072 0.7522841333161755 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 11:  74%|█████████████▎    | 758/1022 [02:00<00:45,  5.85it/s, loss=0.518]

torch.Size([8, 64, 30522]) tensor([[[ 4.8120,  4.2149, 11.0203,  ..., -6.6567, -4.9816, -5.7425],
         [ 5.0032,  6.2727,  7.1272,  ...,  0.1404, -0.0703, -2.0883],
         [ 6.7085,  4.8988,  5.8099,  ..., -0.6984, -1.0504, -3.1079],
         ...,
         [21.0035,  7.4241,  8.5138,  ..., -0.9447, -0.9265, -0.8267],
         [21.9839,  7.6607,  9.4231,  ...,  0.7031, -0.1976, -1.4615],
         [21.2325,  7.0218,  8.9304,  ..., -0.3440, -0.6520, -2.6210]],

        [[ 5.9837,  4.5846, 13.4077,  ..., -4.8707, -4.3472, -5.7644],
         [ 6.4231,  7.2978,  5.8957,  ..., -0.1557, -1.3648, -3.2416],
         [ 7.8917, 10.7717,  7.8666,  ..., -1.0947,  0.4267,  0.1293],
         ...,
         [22.3188,  7.8779,  9.3309,  ...,  0.2682, -0.8682, -1.3427],
         [21.9743,  7.7118,  9.6560,  ...,  0.3294, -1.1131, -0.5805],
         [21.9701,  8.1777,  9.8869,  ...,  0.7213, -0.2708, -2.2278]],

        [[ 7.3035,  6.6807, 14.6755,  ..., -3.7945, -3.4048, -1.3631],
         [ 7.0885,

Epoch 11: 100%|█████████████████| 1022/1022 [02:42<00:00,  6.28it/s, loss=0.793]


Training:  0.7969667318982387 0.8159685863874345 0.7650957290132547 0.7897137066126172 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 12: 100%|█████████████████| 1022/1022 [02:46<00:00,  6.13it/s, loss=0.514]


Training:  0.8165362035225049 0.8358559498956158 0.7862052037309769 0.810270680495826 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 13:  70%|████████████▌     | 714/1022 [01:53<00:49,  6.20it/s, loss=0.916]

torch.Size([8, 64, 30522]) tensor([[[ 5.7142e+00,  5.5387e+00,  1.4037e+01,  ..., -4.8681e+00,
          -5.1514e+00, -3.2522e+00],
         [ 7.3290e+00,  1.0198e+01,  8.3858e+00,  ..., -1.1615e+00,
          -1.2777e+00, -1.9209e+00],
         [ 7.8848e+00,  5.7219e+00,  5.5768e+00,  ..., -8.7830e-01,
          -1.3330e+00, -4.9574e+00],
         ...,
         [ 2.1396e+01,  6.3404e+00,  8.4142e+00,  ..., -5.1657e-01,
          -1.8638e+00, -1.9525e+00],
         [ 2.2452e+01,  7.3527e+00,  9.1214e+00,  ...,  5.9530e-01,
          -1.0197e+00, -8.3408e-01],
         [ 2.2257e+01,  6.7903e+00,  8.9188e+00,  ...,  1.1242e-01,
          -1.7431e+00, -1.0996e+00]],

        [[ 4.2575e+00,  7.0059e+00,  1.5569e+01,  ..., -3.0375e+00,
          -2.2087e+00, -2.0360e+00],
         [ 7.7110e+00,  8.9956e+00,  9.8084e+00,  ..., -1.9622e+00,
          -1.3425e+00, -1.4067e+00],
         [ 1.9427e+00,  5.0660e+00,  5.7174e+00,  ..., -1.6608e+00,
          -1.8409e+00, -4.9501e+00],
         ...

Epoch 13: 100%|███████████████████| 1022/1022 [02:42<00:00,  6.29it/s, loss=1.1]


Training:  0.8408757338551859 0.8572532852357639 0.8166421207658321 0.8364550597108735 None


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 14:   0%|                    | 5/1022 [00:00<02:38,  6.40it/s, loss=0.468]

In [None]:
# # Save the trained model weights
# training_model.save_weights("weights/wghts" + str(epoch + 1) + ".ckpt")