In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import csv

file_path = '/kaggle/input/challenge/Listing_Titles.tsv'

# Read the data from the TSV file
listing_data = pd.read_csv(file_path, sep="\t", dtype=str, keep_default_na=False, na_values=[""], quoting=csv.QUOTE_NONE)
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/challenge/Listing_Titles.tsv
/kaggle/input/challenge/output.tsv
/kaggle/input/challenge/Train_Tagged_Titles.tsv


In [2]:
!pip install transformers seqeval[gpu] tqdm

Collecting seqeval[gpu]
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l- done
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l- \ done
[?25h  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16165 sha256=f933fc3e0b25c80d928facedb0f71506a9ce6308e2e194997dfb31d674b38af7
  Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [3]:
!huggingface-cli login --token #removed

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
import re

def remove_special_characters_from_sentence(sentence):
    # Define a regular expression pattern to match all special characters

    # Split the sentence into words
    words = sentence.split()

    # Initialize a list to store cleaned words
    cleaned_words = []

    # Iterate through the words and remove special characters
    for word in words:
        # Check if the word has a length greater than one
        if len(word) > 1:
            # Use re.sub to replace matched special characters with an empty string
            
            filtered_word = ''
            for char in word:
                if char.isalpha():  # Check if the character is alphabetical
                    filtered_word += char
                elif char.isdigit():  # Check if the character is a numerical digit
                    filtered_word += char
            
            
            if(len(filtered_word)==0):
                cleaned_words.append(word[0])
            else:
                cleaned_words.append(filtered_word)
        else:
            # Add the original word as it is if its length is one or less
            cleaned_words.append(word)

    # Combine the cleaned words to form the modified sentence
    modified_sentence = " ".join(cleaned_words)
    
    return modified_sentence


print(remove_special_characters_from_sentence("Nike Air Force 1 '07 White Black DH7561-102	Nike"))

Nike Air Force 1 07 White Black DH7561102 Nike


In [5]:
file_path = '/kaggle/input/challenge/Train_Tagged_Titles.tsv'

# Read the data from the TSV file
trained_data = pd.read_csv(file_path, sep="\t", dtype=str, keep_default_na=False, na_values=[""], quoting=csv.QUOTE_NONE)
trained_data = trained_data.replace('No Tag', '0')

trained_data = trained_data.fillna(method='ffill')
trained_data['word_labels'] = trained_data[['Record Number','Tag']].groupby(['Record Number'])['Tag'].transform(lambda x: ','.join(x))
data = trained_data[["Title", "word_labels"]].drop_duplicates().reset_index(drop=True)
data.rename(columns={'Title': 'sentence'}, inplace=True)
data['sentence'] = data['sentence'].apply(remove_special_characters_from_sentence)

data.head()

Unnamed: 0,sentence,word_labels
0,Supreme Nike SB Dunk High By any Means Red US1...,"Modell,Marke,Produktlinie,Produktlinie,Schuhsc..."
1,New Balance 530 Männer und Frauen Laufschuhe m...,"Marke,Marke,Modell,Abteilung,0,Abteilung,Produ..."
2,♥ MICHAEL KORS Sneaker Gr 39 ♥,"0,Marke,Marke,Stil,0,EU-Schuhgröße,0"
3,New Balance ML 574 EGO Turnschuhe grün,"Marke,Marke,Modell,Modell,Modell,Produktart,Farbe"
4,"Nike air jordan 9 og , space jam , hare , bugs...","Marke,Produktlinie,Produktlinie,Modell,Modell,..."


In [6]:
label2id = {k: v + 1 for v, k in enumerate(trained_data.Tag.unique())}

id2label = {v + 1: k for v, k in enumerate(trained_data.Tag.unique())}

label2id['0']=0


In [7]:
id2label[0]='0'



In [8]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


In [9]:
len(data)
data.iloc[23].word_labels


'Abteilung,Abteilung,Produktart,Besonderheiten,Gewebeart,Aktivität,Produktart,Stil,EU-Schuhgröße'

In [10]:
data.iloc[23].sentence


'Herren Damenschuhe Laufschuhe Atmungsaktiv Mesh Running Shoes Sneaker Gr3545'

In [11]:
from sklearn.metrics import accuracy_score
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertConfig, BertForTokenClassification, BertForMaskedLM
from sklearn.model_selection import KFold #implementing kfold

MAX_LEN = 128
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 2
EPOCHS = 15
LEARNING_RATE = 1e-05
MAX_GRAD_NORM = 10
tokenizer = BertTokenizer.from_pretrained('dbmdz/bert-base-german-uncased')




Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/247k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/433 [00:00<?, ?B/s]

In [12]:
model = BertForMaskedLM.from_pretrained('dbmdz/bert-base-german-uncased')

Downloading model.safetensors:   0%|          | 0.00/442M [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-base-german-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [13]:
mlm_train = listing_data[5000:30000]
mlm_train['Title'] = mlm_train['Title'].apply(remove_special_characters_from_sentence)
mlm_train.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mlm_train['Title'] = mlm_train['Title'].apply(remove_special_characters_from_sentence)


Unnamed: 0,Record Number,Title
5000,5001,NIKE FREE RUN 3 SHIELD 50 SNEAKERS LAUFSCHUHE ...
5001,5002,DAMEN SCHUHE 153351 SNEAKER WEISS 38 NEU
5002,5003,Converse Sneakers Damen Gr . DE 36 Leder grau ...
5003,5004,Adidas Freizeitschuh Gr UK 9
5004,5005,K Swiss Schuhe schwarz Leder größe 41 low snea...


In [14]:
mlm_train.head()

Unnamed: 0,Record Number,Title
5000,5001,NIKE FREE RUN 3 SHIELD 50 SNEAKERS LAUFSCHUHE ...
5001,5002,DAMEN SCHUHE 153351 SNEAKER WEISS 38 NEU
5002,5003,Converse Sneakers Damen Gr . DE 36 Leder grau ...
5003,5004,Adidas Freizeitschuh Gr UK 9
5004,5005,K Swiss Schuhe schwarz Leder größe 41 low snea...


In [15]:
text = mlm_train['Title'].tolist()


In [16]:
inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True, padding='max_length')


In [17]:
inputs['labels'] = inputs.input_ids.detach().clone()


In [18]:
rand = torch.rand(inputs.input_ids.shape)
# create mask array
mask_arr = (rand < 0.15) * (inputs.input_ids != 101) * \
           (inputs.input_ids != 102) * (inputs.input_ids != 0)

In [19]:
selection = []

for i in range(inputs.input_ids.shape[0]):
    selection.append(
        torch.flatten(mask_arr[i].nonzero()).tolist()
    )

In [20]:
for i in range(inputs.input_ids.shape[0]):
    inputs.input_ids[i, selection[i]] = 103

In [21]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
    def __len__(self):
        return len(self.encodings.input_ids)

dataset = Dataset(inputs)


In [22]:

from transformers import TrainingArguments

args = TrainingArguments(
    output_dir='out',
    per_device_train_batch_size=16,
    num_train_epochs=4,
     report_to='none'  
)

In [23]:

from transformers import Trainer

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset
)

In [24]:
trainer.train()


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Step,Training Loss
500,0.1012
1000,0.0165
1500,0.0152
2000,0.0128
2500,0.0123
3000,0.012
3500,0.0101
4000,0.0096
4500,0.0095
5000,0.008


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


TrainOutput(global_step=6252, training_loss=0.018037672959606538, metrics={'train_runtime': 6429.2492, 'train_samples_per_second': 15.554, 'train_steps_per_second': 0.972, 'total_flos': 2.6320659456e+16, 'train_loss': 0.018037672959606538, 'epoch': 4.0})

In [25]:
model.save_pretrained("downstream-german-bert")
tokenizer.save_pretrained("downstream-german-bert")


('downstream-german-bert/tokenizer_config.json',
 'downstream-german-bert/special_tokens_map.json',
 'downstream-german-bert/vocab.txt',
 'downstream-german-bert/added_tokens.json')

In [26]:
tokenizer = BertTokenizer.from_pretrained("downstream-german-bert")

#use the model that was just saved on next word prediction

In [27]:
def tokenize_and_preserve_labels(sentence, text_labels, tokenizer):


    tokenized_sentence = []
    labels = []

    sentence = sentence.strip()

    for word, label in zip(sentence.split(' '), text_labels.split(",")):

        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        tokenized_sentence.extend(tokenized_word)

        labels.extend([label] * n_subwords)
    return tokenized_sentence, labels
#this function is used to tokenize the sentence and preserve the labels for each word in the sentence

In [28]:
class dataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.len = len(dataframe)
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __getitem__(self, index):
        sentence = self.data.sentence[index]  
        word_labels = self.data.word_labels[index]  
        tokenized_sentence, labels = tokenize_and_preserve_labels(sentence, word_labels, self.tokenizer)
        
        tokenized_sentence = ["[CLS]"] + tokenized_sentence + ["[SEP]"] # add special tokens
        labels.insert(0, "0") # add outside label for [CLS] token
        labels.insert(-1, "0") # add outside label for [SEP] token

        maxlen = self.max_len

        if (len(tokenized_sentence) > maxlen):
          tokenized_sentence = tokenized_sentence[:maxlen]
          labels = labels[:maxlen]
        else:
          tokenized_sentence = tokenized_sentence + ['[PAD]'for _ in range(maxlen - len(tokenized_sentence))]
          labels = labels + ["0" for _ in range(maxlen - len(labels))]

        attn_mask = [1 if tok != '[PAD]' else 0 for tok in tokenized_sentence]
        
        ids = self.tokenizer.convert_tokens_to_ids(tokenized_sentence)

        label_ids = [label2id[label] for label in labels]
        
        
        return {
              'ids': torch.tensor(ids, dtype=torch.long),
              'mask': torch.tensor(attn_mask, dtype=torch.long),
              #'token_type_ids': torch.tensor(token_ids, dtype=torch.long),
              'targets': torch.tensor(label_ids, dtype=torch.long)
        } 
    
    def __len__(self):
        return self.len
  #this function is used to tokenize the sentence and preserve the labels for each word in the sentence. It also adds the special tokens [CLS] and [SEP] to the sentence and the outside label for these tokens. It also pads the sentence and the labels to the max length of the sentence. It also creates the attention mask for the sentence. It also converts the tokens to ids and the labels to ids.

In [29]:
kfold = KFold(n_splits=5, shuffle=True, random_state=1)
#this is used to implement kfold cross validation

In [30]:
train_size = 0.99
train_dataset = data.sample(frac=train_size,random_state=200)
test_dataset = data.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

print("FULL Dataset: {}".format(data.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

training_set = dataset(train_dataset, tokenizer, MAX_LEN)
testing_set = dataset(test_dataset, tokenizer, MAX_LEN)

#this is used to create the training and testing dataset

FULL Dataset: (5000, 2)
TRAIN Dataset: (4950, 2)
TEST Dataset: (50, 2)


In [31]:
training_set[10]


{'ids': tensor([  102, 27677,   195,  4606,  2835,  3114, 15709,   806, 16731,   108,
         12376, 30937,   806,  7565,   806,  4192, 15709,   806,  5838,  1061,
          6949,   103,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,  

In [32]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

#this is used to create the training and testing dataloader

In [33]:
model = BertForTokenClassification.from_pretrained('downstream-german-bert', 
                                                   num_labels=len(id2label),
                                                   id2label=id2label,
                                                   label2id=label2id)
model.to(device)

#this is used to create the model for the token classification task. It uses the pretrained model that was saved earlier. It also uses the id2label and label2id dictionaries that were created earlier.

Some weights of BertForTokenClassification were not initialized from the model checkpoint at downstream-german-bert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31102, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, el

In [34]:
ids = training_set[0]["ids"].unsqueeze(0)
mask = training_set[0]["mask"].unsqueeze(0)
targets = training_set[0]["targets"].unsqueeze(0)
ids = ids.to(device)
mask = mask.to(device)
targets = targets.to(device)
outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
initial_loss = outputs[0]
initial_loss

#this is used to check the initial loss of the model

tensor(3.1844, device='cuda:0', grad_fn=<NllLossBackward0>)

In [35]:
tr_logits = outputs[1]
tr_logits.shape

torch.Size([1, 128, 37])

In [36]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)
#this is used to create the optimizer for the model

In [37]:


def train(epoch):
    tr_loss, tr_accuracy = 0, 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []

    model.train()
    
    for idx, batch in enumerate(training_loader):
        
        ids = batch['ids'].to(device, dtype = torch.long)
        mask = batch['mask'].to(device, dtype = torch.long)
        targets = batch['targets'].to(device, dtype = torch.long)

        outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
        loss, tr_logits = outputs.loss, outputs.logits
        tr_loss += loss.item()

        nb_tr_steps += 1
        nb_tr_examples += targets.size(0)
        
        if idx % 100==0:
            loss_step = tr_loss/nb_tr_steps
            print(f"Training loss per 100 training steps: {loss_step}")
           

        flattened_targets = targets.view(-1) 
        active_logits = tr_logits.view(-1, model.num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) 

        active_accuracy = mask.view(-1) == 1 
        targets = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)
        
        tr_preds.extend(predictions)
        tr_labels.extend(targets)
        
        tmp_tr_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy
    
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=MAX_GRAD_NORM
        )
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    print(f"Training loss epoch: {epoch_loss}")
    print(f"Training accuracy epoch: {tr_accuracy}")
#this is used to train the model for each epoch. It also calculates the training loss and training accuracy for each epoch

In [38]:
for epoch in range(EPOCHS):
    print(f"Training epoch: {epoch + 1}")
    train(epoch)

#iterate over the number of epochs and train the model for each epoch

Training epoch: 1
Training loss per 100 training steps: 3.4841644763946533
Training loss per 100 training steps: 0.5849278569221497
Training loss per 100 training steps: 0.43911974057925873
Training loss per 100 training steps: 0.3568561789502337
Training loss per 100 training steps: 0.30545895401750717
Training loss per 100 training steps: 0.2699214471002182
Training loss per 100 training steps: 0.24272437155643437
Training loss per 100 training steps: 0.22119858281716132
Training loss per 100 training steps: 0.20418251159550918
Training loss per 100 training steps: 0.18997048091073082
Training loss per 100 training steps: 0.1781149003924532
Training loss per 100 training steps: 0.16857497302982805
Training loss per 100 training steps: 0.15934016878059837
Training loss epoch: 0.1565909390826747
Training accuracy epoch: 0.77303344123693
Training epoch: 2
Training loss per 100 training steps: 0.034165408462285995
Training loss per 100 training steps: 0.048387292136281436
Training loss p

In [39]:
model.push_to_hub("codern/downstream-german-bert")
tokenizer.push_to_hub("codern/downstream-german-bert")

#save the model for future use

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/codern/downstream-german-bert/commit/bac3f1fc402547018953e18772abd3b47c72bef7', commit_message='Upload tokenizer', commit_description='', oid='bac3f1fc402547018953e18772abd3b47c72bef7', pr_url=None, pr_revision=None, pr_num=None)

In [40]:
def valid(model, testing_loader):

    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
    
    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['ids'].to(device, dtype = torch.long)
            mask = batch['mask'].to(device, dtype = torch.long)
            targets = batch['targets'].to(device, dtype = torch.long)
            
            outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
            loss, eval_logits = outputs.loss, outputs.logits
            
            eval_loss += loss.item()

            nb_eval_steps += 1
            nb_eval_examples += targets.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            flattened_targets = targets.view(-1) 
            active_logits = eval_logits.view(-1, model.num_labels) 
            flattened_predictions = torch.argmax(active_logits, axis=1)

            active_accuracy = mask.view(-1) == 1
            targets = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(targets)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy
    
    #print(eval_labels)
    #print(eval_preds)

    labels = [id2label[id.item()] for id in eval_labels]
    predictions = [id2label[id.item()] for id in eval_preds]

    #print(labels)
    #print(predictions)
    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    print(f"Validation Loss: {eval_loss}")
    print(f"Validation Accuracy: {eval_accuracy}")

    return labels, predictions

#this is used to evaluate the model on the validation dataset. It also calculates the validation loss and validation accuracy for each epoch

In [41]:
labels, predictions = valid(model, testing_loader)


Validation loss per 100 evaluation steps: 0.183653324842453
Validation Loss: 0.03702047369501088
Validation Accuracy: 0.9621718426406788
