<a href="https://colab.research.google.com/github/VincentZuo/fin-models/blob/main/%5BFinal%5D_v8_torch_transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set up and read data

In [None]:
!pip install transformers==4.17.0
!pip install -U sentence-transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==4.17.0
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 8.2 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.7.0-py3-none-any.whl (86 kB)
[K     |████████████████████████████████| 86 kB 3.3 MB/s 
Collecting tokenizers!=0.11.3,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 64.4 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[K     |████████████████████████████████| 880 kB 76.1 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 60.1 MB/s 
Building wheels for collected packages:

In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
import torch
import transformers
print(transformers.__version__)
print(torch.__version__)

4.17.0
1.11.0+cu113


In [None]:
import sys
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
read_df = pd.read_pickle("/content/gdrive/MyDrive/224U_2022/final_project_data.pkl")

In [None]:
read_df.head()

Unnamed: 0,title,label,range,emb
0,Joseph Greenberg,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","(0, 106)","[[-0.07557425, -0.011484241, -0.07807306, 0.00..."
1,Pauline Donalda,"[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","(106, 127)","[[-0.057122264, 0.005889078, -0.030671664, -0...."
2,List of German football transfers summer 2017,"[0, 0, 1, 0, 0]","(127, 132)","[[-0.0032938644, -0.02310217, -0.05397396, -0...."
3,Lester Hudson,"[0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, ...","(132, 176)","[[-0.0091192825, 0.090078354, -0.029187214, -0..."
4,Monique Ganderton,"[1, 0, 0, 0, 0, 1, 0]","(176, 183)","[[-0.090651, -0.09176681, -0.029592248, -0.062..."


In [None]:
read_df = read_df.sample(frac=1)
read_df.head()

Unnamed: 0,title,label,range,emb
793,Henry Charbonneau,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0]","(16145, 16158)","[[0.05722959, -0.008566863, -0.094458535, 0.00..."
504,Wild West City,"[1, 0]","(10505, 10507)","[[0.10378061, 0.071708806, -0.028881393, 0.004..."
1007,Mikkel Aagaard,"[1, 1, 0, 0, 1, 0, 0, 1, 0]","(20758, 20767)","[[-0.090821765, 0.018140187, 0.030428424, -0.0..."
1820,Frederik Hendrikplantsoen,"[0, 0]","(38642, 38644)","[[0.13479649, 0.087804824, -0.01524276, -0.064..."
500,Jane Somerville,"[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, ...","(9566, 9631)","[[-0.02891623, -0.043679826, 0.0065269014, -0...."


In [None]:
MAX_SEQ_LENGTH = 128

In [None]:
from sentence_transformers import SentenceTransformer
#Define the model
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
empty_sentence = sentence_model.encode([''])
MAX_SEQ_LENGTH=128
FAKE_INDEX = 0

In [None]:
def dummy_emb():
  return empty_sentence[0]

def pad_emb(current_emb, seq_len):
  if len(current_emb) > seq_len:
    return current_emb[0:seq_len]
  padding =  np.array([dummy_emb()] * (seq_len - len(current_emb)), dtype = 'double')
  return np.concatenate([current_emb, padding], axis=0)

def pad_label(current_label, seq_len):
  if len(current_label) > seq_len:
    return current_label[0:seq_len]
  padding =  [FAKE_INDEX] * (seq_len - len(current_label))
  # print(current_label, padding)
  return np.concatenate([current_label, padding], axis=0)

In [None]:
from torch.utils.data import Dataset

class WikiPandasDfDataSet(Dataset):
  def __init__(self, input_df):
    self.pandas_df = input_df

  def __getitem__(self, index):
    seq = np.array(self.pandas_df.iloc[index]["emb"], dtype='double')
    label = np.array(self.pandas_df.iloc[index]["label"], dtype='double')
    return seq, label

  def __len__(self):
      return len(self.pandas_df)

def collate_fn(batch):
    batched_seq = []
    batched_label = []
    for seqs, labels in batch:
      try:
        local_label = pad_label(labels, MAX_SEQ_LENGTH)
        local_emb = pad_emb(seqs, MAX_SEQ_LENGTH)
        batched_seq.append(local_emb)
        batched_label.append(local_label)
      except Exception as e:
        print("collate_fn err:", e)
        continue
    return torch.tensor(batched_seq).float(), torch.unsqueeze(torch.tensor(batched_label).float(), axis=2)

In [None]:
from torch.utils.data import DataLoader

BATCH_SIZE_TRAIN = 32

dataset_train = WikiPandasDfDataSet(read_df.head(6400))
train_dataloader = DataLoader(dataset_train, batch_size=BATCH_SIZE_TRAIN, collate_fn = collate_fn, shuffle=True, drop_last=True)
dataset_val = WikiPandasDfDataSet(read_df.tail(3200))
val_dataloader = DataLoader(dataset_val, batch_size=BATCH_SIZE_TRAIN, collate_fn = collate_fn, shuffle=False, drop_last=True)

# NN Model code
https://pytorch.org/tutorials/beginner/translation_transformer.html

In [None]:
import torch.nn as nn
import datetime
import math
from torch.nn import TransformerEncoderLayer

In [None]:
# helper Module that adds positional encoding to the token embedding to introduce a notion of word order.
class PositionalEncoding(nn.Module):
    def __init__(self, emb_size, dropout=0.1, maxlen=MAX_SEQ_LENGTH):
        super(PositionalEncoding, self).__init__()
        den = torch.exp(- torch.arange(0, emb_size, 2)* math.log(10000) / emb_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros((maxlen, emb_size))
        pos_embedding[:, 0::2] = torch.sin(pos * den)
        pos_embedding[:, 1::2] = torch.cos(pos * den)
        pos_embedding = pos_embedding.unsqueeze(-2)

        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding', pos_embedding)

    def forward(self, token_embedding):
        return self.dropout(token_embedding + self.pos_embedding[:token_embedding.size(0), :])


class TransformerClassifierModel(nn.Module):
  def __init__(self, emb_dim=384, nhead = 16):
    super(TransformerClassifierModel, self).__init__()
    self.transformer_encoder = TransformerEncoderLayer(d_model=emb_dim,
                                                        nhead=nhead)
    self.positional_encoding = PositionalEncoding(emb_dim)
    self.classifier_layer = nn.Linear(emb_dim, 1)
    self.activation = torch.nn.Sigmoid()

  def forward(self, seq_input):
    enc_emb = self.positional_encoding(seq_input)
    trans_enc = self.transformer_encoder(enc_emb)
    linear_output = self.classifier_layer(trans_enc)
    return self.activation(linear_output)

# Training loop code

In [None]:
from tqdm import tqdm
from transformers import AdamW
from sklearn.metrics import f1_score,accuracy_score

In [None]:
def train(train_dataloader, is_early=False, max_batches=500):
    model.train()

    total_loss, total_accuracy = 0, 0
    step = 0
    # iterate over batches
    for batch in tqdm(train_dataloader):
        sequences, labels = batch[0].to(device),batch[1].to(device)

        model.zero_grad()        
        preds = model(sequences)
        loss = CELoss(preds, labels)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        total_loss += loss.item()

        # progress update after every 100 batches.
        if step % 50 == 0 and not step == 0:
            
            print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_dataloader)))
            print("loss",loss.item())
            torch.cuda.empty_cache()
            if is_early and step>=max_batches:
                print("early stopping...")
                break
        step+=1
    # compute the training loss of the epoch
    avg_loss = total_loss / step
    # predictions are in the form of (no. of batches, size of batch, no. of classes).
    # reshape the predictions in form of (number of samples, no. of classes)
    #total_preds  = np.concatenate(total_preds, axis=0)

    #returns the loss and predictions
    return avg_loss

def evaluate(dev_dataloader, is_early=False, max_batches=500):
  
    print("\nEvaluating...")
  
    # deactivate dropout layers
    model.eval()
    total_loss, total_accuracy = 0, 0
    eval_preds = []
    eval_labels = []
    curr_examples = 0

    # iterate over batches
    step = 0
    for batch in tqdm(dev_dataloader):
    
      # Progress update every 10 batches.
      if step % 10 == 0 and not step == 0:
    
        # # Report progress.
        # temp = np.delete(total_preds,0,0)
        # print('  Batch {:>5,}  of  {:>5,} accuracy {}.'.format(step, len(dev_dataloader), accuracy_score(list(temp[:,0]),list(temp[:,1]))))
        
        # print("F1 score {}".format(f1_score(list(temp[:,0]),list(temp[:,1]),average="macro")))
        if is_early and step>=max_batches:
            print("early stopping...")
            break
      # push the batch to gpu
      #batch = [t.to(device) for t in batch]
  

      sequences, labels = batch[0].to(device),batch[1].to(device)
      curr_examples += len(labels)
      # deactivate autograd
      with torch.no_grad():
    
      # model predictions
          preds = model(sequences)

          # compute the validation loss between actual and predicted values
          loss = CELoss(preds,labels)

          total_loss = total_loss + loss.item()
          eval_preds += preds.detach().cpu().numpy().flatten().tolist()
          eval_labels += labels.detach().cpu().numpy().flatten().tolist()
      step+=1
    # compute the validation loss of the epoch
    avg_loss = total_loss / step 

    return avg_loss, eval_preds, eval_labels

In [None]:
LEARING_RATE = 0.001
NUM_EPOCS = 5

model = TransformerClassifierModel()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
CELoss = nn.BCELoss()
optimizer = AdamW(model.parameters(),lr = LEARING_RATE)

train_losses = []
valid_losses = []
accuracy_list = []
f1_list = []

for epoch in range(NUM_EPOCS):
     
    print('\n Epoch {:} / {:}'.format(epoch + 1, NUM_EPOCS))
    
    #train model
    train_loss = train(train_dataloader, is_early=True)
    
    #evaluate model
    valid_loss, e_pred, e_label = evaluate(val_dataloader, is_early=True)

    # append training and validation loss
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    # accuracy_list.append(accuracy)
    # f1_list.append(valid_f1)

    print('Train loss:', train_loss, 'Val loss:', valid_loss)

    for threshold in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]:
      labels_bool = [ elem > 0 for elem in e_label]
      preds_bool = [elem > threshold for elem in e_pred]
      accuracy = accuracy_score(labels_bool, preds_bool)
      valid_f1 = f1_score(labels_bool, preds_bool,average="macro")
      print('Threshold:', threshold,'Accuracy:', accuracy, 'F1:', valid_f1)





 Epoch 1 / 5


 26%|██▌       | 52/200 [00:06<00:19,  7.54it/s]

  Batch    50  of    200.
loss 0.21984297037124634


 42%|████▎     | 85/200 [00:10<00:13,  8.31it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 50%|█████     | 101/200 [00:12<00:12,  7.97it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
  Batch   100  of    200.
loss 0.12895075976848602


 76%|███████▌  | 152/200 [00:19<00:05,  8.10it/s]

  Batch   150  of    200.
loss 0.09535811096429825


 77%|███████▋  | 154/200 [00:19<00:05,  8.10it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 91%|█████████ | 182/200 [00:22<00:02,  8.13it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 98%|█████████▊| 197/200 [00:24<00:00,  8.25it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 200/200 [00:24<00:00,  8.02it/s]



Evaluating...


 93%|█████████▎| 93/100 [00:10<00:00,  9.16it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 100/100 [00:11<00:00,  9.04it/s]


Train loss: 0.13021964525803922 Val loss: 0.08993701655417681
Threshold: 0.1 Accuracy: 0.9047138754298218 F1: 0.718971151217429
Threshold: 0.2 Accuracy: 0.9047163175992498 F1: 0.7189750627554179
Threshold: 0.3 Accuracy: 0.9047236441075336 F1: 0.7189867978598046
Threshold: 0.4 Accuracy: 0.9517280790872148 F1: 0.5432156455821004
Threshold: 0.5 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.6 Accuracy: 0.9541287316348859 F1: 0.4882629870738514

 Epoch 2 / 5


  2%|▏         | 3/200 [00:00<00:23,  8.44it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 26%|██▌       | 52/200 [00:06<00:18,  8.08it/s]

  Batch    50  of    200.
loss 0.10896479338407516


 51%|█████     | 102/200 [00:12<00:12,  8.05it/s]

  Batch   100  of    200.
loss 0.09142374992370605


 53%|█████▎    | 106/200 [00:13<00:11,  8.23it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 76%|███████▌  | 152/200 [00:18<00:06,  7.96it/s]

  Batch   150  of    200.
loss 0.057646263390779495


 82%|████████▎ | 165/200 [00:20<00:04,  8.25it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 95%|█████████▌| 190/200 [00:23<00:01,  8.06it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 200/200 [00:24<00:00,  8.09it/s]



Evaluating...


 93%|█████████▎| 93/100 [00:10<00:00,  9.11it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 100/100 [00:11<00:00,  9.00it/s]


Train loss: 0.09302549241110683 Val loss: 0.08942818362265825
Threshold: 0.1 Accuracy: 0.9047187597686777 F1: 0.7189921386248961
Threshold: 0.2 Accuracy: 0.9047138754298218 F1: 0.718971151217429
Threshold: 0.3 Accuracy: 0.9528441505157862 F1: 0.5313993950424183
Threshold: 0.4 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.5 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.6 Accuracy: 0.9541287316348859 F1: 0.4882629870738514

 Epoch 3 / 5


 11%|█         | 22/200 [00:02<00:21,  8.20it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 12%|█▏        | 24/200 [00:02<00:21,  8.20it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 26%|██▌       | 52/200 [00:06<00:18,  8.05it/s]

  Batch    50  of    200.
loss 0.09670725464820862


 51%|█████     | 102/200 [00:12<00:12,  8.08it/s]

  Batch   100  of    200.
loss 0.14699867367744446


 60%|█████▉    | 119/200 [00:14<00:09,  8.21it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 76%|███████▌  | 152/200 [00:18<00:06,  7.91it/s]

  Batch   150  of    200.
loss 0.10054260492324829


 80%|███████▉  | 159/200 [00:19<00:05,  8.18it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 86%|████████▋ | 173/200 [00:21<00:03,  8.22it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 200/200 [00:24<00:00,  8.14it/s]



Evaluating...


 93%|█████████▎| 93/100 [00:10<00:00,  9.10it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 100/100 [00:11<00:00,  9.07it/s]


Train loss: 0.09285081770271063 Val loss: 0.09055008932948112
Threshold: 0.1 Accuracy: 0.9047187597686777 F1: 0.7189921386248961
Threshold: 0.2 Accuracy: 0.9047187597686777 F1: 0.7189921386248961
Threshold: 0.3 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.4 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.5 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.6 Accuracy: 0.9541287316348859 F1: 0.4882629870738514

 Epoch 4 / 5


 26%|██▌       | 52/200 [00:06<00:18,  7.87it/s]

  Batch    50  of    200.
loss 0.09447755664587021


 27%|██▋       | 54/200 [00:06<00:17,  8.13it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 32%|███▎      | 65/200 [00:08<00:16,  8.22it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 45%|████▌     | 90/200 [00:11<00:13,  8.24it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 51%|█████     | 102/200 [00:12<00:12,  8.02it/s]

  Batch   100  of    200.
loss 0.10238233953714371


 76%|███████▌  | 152/200 [00:18<00:06,  8.00it/s]

  Batch   150  of    200.
loss 0.08499161899089813


 79%|███████▉  | 158/200 [00:19<00:05,  8.11it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 88%|████████▊ | 176/200 [00:21<00:02,  8.02it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 200/200 [00:24<00:00,  8.13it/s]



Evaluating...


 93%|█████████▎| 93/100 [00:10<00:00,  9.20it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 100/100 [00:11<00:00,  9.00it/s]


Train loss: 0.09453334206715226 Val loss: 0.0892039430513978
Threshold: 0.1 Accuracy: 0.9047187597686777 F1: 0.7189921386248961
Threshold: 0.2 Accuracy: 0.9047187597686777 F1: 0.7189921386248961
Threshold: 0.3 Accuracy: 0.9047163175992498 F1: 0.7189816450489994
Threshold: 0.4 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.5 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.6 Accuracy: 0.9541287316348859 F1: 0.4882629870738514

 Epoch 5 / 5


  4%|▍         | 8/200 [00:00<00:22,  8.36it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 26%|██▌       | 52/200 [00:06<00:18,  8.01it/s]

  Batch    50  of    200.
loss 0.0710487961769104


 35%|███▌      | 70/200 [00:08<00:16,  8.11it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 50%|█████     | 100/200 [00:12<00:12,  8.15it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 51%|█████     | 102/200 [00:12<00:12,  7.85it/s]

  Batch   100  of    200.
loss 0.07450773566961288


 61%|██████    | 122/200 [00:14<00:09,  8.28it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


 76%|███████▌  | 152/200 [00:18<00:06,  7.98it/s]

  Batch   150  of    200.
loss 0.10210190713405609


 95%|█████████▌| 190/200 [00:23<00:01,  8.30it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 200/200 [00:24<00:00,  8.16it/s]



Evaluating...


 93%|█████████▎| 93/100 [00:10<00:00,  9.02it/s]

collate_fn err: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)


100%|██████████| 100/100 [00:11<00:00,  8.97it/s]


Train loss: 0.09256078088656068 Val loss: 0.08907548669725657
Threshold: 0.1 Accuracy: 0.9047187597686777 F1: 0.7189921386248961
Threshold: 0.2 Accuracy: 0.9047187597686777 F1: 0.7189921386248961
Threshold: 0.3 Accuracy: 0.9066041145670523 F1: 0.7183743339157794
Threshold: 0.4 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.5 Accuracy: 0.9541287316348859 F1: 0.4882629870738514
Threshold: 0.6 Accuracy: 0.9541287316348859 F1: 0.4882629870738514


In [None]:
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
src = torch.rand(10, 32, 512)
out = transformer_encoder(src)

In [None]:
print(out.shape)

torch.Size([10, 32, 512])


In [None]:
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
src = torch.rand(10, 32, 512)
out = encoder_layer(src)

In [None]:
print(out.shape)

torch.Size([10, 32, 512])


In [None]:
transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
src = torch.rand((10, 32, 512))
tgt = torch.rand((20, 32, 512))
out = transformer_model(src, tgt)

In [None]:
print(out.shape)

torch.Size([20, 32, 512])
