In [None]:
!pip install transformers seqeval[gpu]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting seqeval[gpu]
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━

# Import Libraries


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertConfig, BertForTokenClassification

In [None]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


# Downloading and preprocessing the data

In [None]:
data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Restaurant_NER/dataset/restaurant-bert.csv", sep='*',encoding='unicode_escape').fillna(method="ffill")
data.head(10)

Unnamed: 0,Sentence #,Word,POS,Tag
0,1,good,JJ,B-FOOD
1,1,food,NN,I-FOOD
2,1,and,CC,O
3,1,friendly,RB,B-RORG
4,1,service,NN,I-RORG
5,1,the,DT,O
6,1,restaurant,NN,O
7,1,is,VBZ,O
8,1,on,IN,O
9,1,upper,JJ,O


In [None]:
#Let's check how many sentences and words (and corresponding tags) there are in this dataset:
data.count()

In [None]:
#Let's have a look at the different NER tags, and their frequency:
print("Number of tags: {}".format(len(data.Tag.unique())))
frequencies = data.Tag.value_counts()
frequencies

Number of tags: 7


O         817864
B-FOOD     33130
B-RORG     27502
B-RLOC     19666
I-FOOD      7408
I-RLOC      6523
I-RORG      4209
Name: Tag, dtype: int64

In [None]:
#There are 3 category tags, each with a "beginning" and "inside" variant, and the "outside" tag. Let's print them by frequency (highest to lowest):
tags = {}
for tag, count in zip(frequencies.index, frequencies):
    if tag != "O":
        if tag[2:5] not in tags.keys():
            tags[tag[2:5]] = count
        else:
            tags[tag[2:5]] += count
    continue

print(sorted(tags.items(), key=lambda x: x[1], reverse=True))

[('FOO', 40538), ('ROR', 31711), ('RLO', 26189)]


In [None]:
# let's create a new column called "sentence" which groups the words by sentence
data['sentence'] = data[['Sentence #','Word','Tag']].groupby(['Sentence #'])['Word'].transform(lambda x: ' '.join(x))
# let's also create a new column called "word_labels" which groups the tags by sentence
data['word_labels'] = data[['Sentence #','Word','Tag']].groupby(['Sentence #'])['Tag'].transform(lambda x: ','.join(x))
data.head()

Unnamed: 0,Sentence #,Word,POS,Tag,sentence,word_labels
0,1,good,JJ,B-FOOD,good food and friendly service the restaurant ...,"B-FOOD,I-FOOD,O,B-RORG,I-RORG,O,O,O,O,O,O,B-RL..."
1,1,food,NN,I-FOOD,good food and friendly service the restaurant ...,"B-FOOD,I-FOOD,O,B-RORG,I-RORG,O,O,O,O,O,O,B-RL..."
2,1,and,CC,O,good food and friendly service the restaurant ...,"B-FOOD,I-FOOD,O,B-RORG,I-RORG,O,O,O,O,O,O,B-RL..."
3,1,friendly,RB,B-RORG,good food and friendly service the restaurant ...,"B-FOOD,I-FOOD,O,B-RORG,I-RORG,O,O,O,O,O,O,B-RL..."
4,1,service,NN,I-RORG,good food and friendly service the restaurant ...,"B-FOOD,I-FOOD,O,B-RORG,I-RORG,O,O,O,O,O,O,B-RL..."


In [None]:
#create dictionary with numbers, indicies
label2id = {k: v for v, k in enumerate(data.Tag.unique())}
id2label = {v: k for v, k in enumerate(data.Tag.unique())}
label2id

{'B-FOOD': 0,
 'I-FOOD': 1,
 'O': 2,
 'B-RORG': 3,
 'I-RORG': 4,
 'B-RLOC': 5,
 'I-RLOC': 6}

In [None]:
#select sentence,labels column and drop duplicates
data = data[["sentence", "word_labels"]].drop_duplicates().reset_index(drop=True)
data.head()

Unnamed: 0,sentence,word_labels
0,good food and friendly service the restaurant ...,"B-FOOD,I-FOOD,O,B-RORG,I-RORG,O,O,O,O,O,O,B-RL..."
1,so we look on the site and it says closes at 1...,"O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,..."
2,letic is a nice restaurant tucked away in the ...,"O,O,O,B-RORG,I-RORG,O,O,O,O,O,O,O,O,O,O,O,O,O,..."
3,we all living in phuket usually have our place...,"O,O,O,B-RLOC,I-RLOC,O,O,O,O,O,B-FOOD,I-FOOD,O,..."
4,definitely one of the best places in bangkok a...,"O,O,O,O,O,O,B-RLOC,I-RLOC,O,O,O,O,O,O,O,O,O,O,..."


# Cross verify the sentence & label

In [None]:
len(data)

18684

In [None]:
data.iloc[41].sentence

'beautiful spot with spectacular food and lovely staff slightly off the beaten track but well worth the journey highly recommend and summarization on 9 restaurant is relaxed vibe'

In [None]:
data.iloc[41].word_labels

'B-RORG,O,O,O,O,O,B-RORG,B-RORG,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,B-RORG,B-RLOC'

# Preparing the dataset & dataloader

In [None]:
MAX_LEN = 128
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 2
EPOCHS = 1
LEARNING_RATE = 1e-05
MAX_GRAD_NORM = 10
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
def tokenize_and_preserve_labels(sentence, text_labels, tokenizer):
    """
    Word piece tokenization makes it difficult to match word labels
    back up with individual word pieces. This function tokenizes each
    word one at a time so that it is easier to preserve the correct
    label for each subword. It is, of course, a bit slower in processing
    time, but it will help our model achieve higher accuracy.
    """

    tokenized_sentence = []
    labels = []

    sentence = sentence.strip()

    for word, label in zip(sentence.split(), text_labels.split(",")):

        # Tokenize the word and count # of subwords the word is broken into
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        # Add the tokenized word to the final tokenized word list
        tokenized_sentence.extend(tokenized_word)

        # Add the same label to the new list of labels `n_subwords` times
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels

In [None]:
#dataset class (which transforms examples of a dataframe to PyTorch tensors
class dataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.len = len(dataframe)
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __getitem__(self, index):
        # step 1: tokenize (and adapt corresponding labels)
        sentence = self.data.sentence[index]
        word_labels = self.data.word_labels[index]
        tokenized_sentence, labels = tokenize_and_preserve_labels(sentence, word_labels, self.tokenizer)

        # step 2: add special tokens (and corresponding labels)
        tokenized_sentence = ["[CLS]"] + tokenized_sentence + ["[SEP]"] # add special tokens
        labels.insert(0, "O") # add outside label for [CLS] token
        labels.insert(-1, "O") # add outside label for [SEP] token

        # step 3: truncating/padding
        maxlen = self.max_len

        if (len(tokenized_sentence) > maxlen):
          # truncate
          tokenized_sentence = tokenized_sentence[:maxlen]
          labels = labels[:maxlen]
        else:
          # pad
          tokenized_sentence = tokenized_sentence + ['[PAD]'for _ in range(maxlen - len(tokenized_sentence))]
          labels = labels + ["O" for _ in range(maxlen - len(labels))]

        # step 4: obtain the attention mask
        attn_mask = [1 if tok != '[PAD]' else 0 for tok in tokenized_sentence]

        # step 5: convert tokens to input ids
        ids = self.tokenizer.convert_tokens_to_ids(tokenized_sentence)

        label_ids = [label2id[label] for label in labels]
        # the following line is deprecated
        #label_ids = [label if label != 0 else -100 for label in label_ids]

        return {
              'ids': torch.tensor(ids, dtype=torch.long),
              'mask': torch.tensor(attn_mask, dtype=torch.long),
              #'token_type_ids': torch.tensor(token_ids, dtype=torch.long),
              'targets': torch.tensor(label_ids, dtype=torch.long)
        }

    def __len__(self):
        return self.len

In [None]:
#create 2 datasets, one for training and one for testing. Let's use a 80/20 split
train_size = 0.8
train_dataset = data.sample(frac=train_size,random_state=200)
test_dataset = data.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

print("FULL Dataset: {}".format(data.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

training_set = dataset(train_dataset, tokenizer, MAX_LEN)
testing_set = dataset(test_dataset, tokenizer, MAX_LEN)

FULL Dataset: (18684, 2)
TRAIN Dataset: (14947, 2)
TEST Dataset: (3737, 2)


In [None]:
training_set[0]

{'ids': tensor([  101,  2054,  1037,  6919,  4292,  1037,  7273,  2806, 27387, 22704,
          2369,  1037,  2235,  2697,  2007,  7564,  1997,  2300,  3669, 23697,
          2015,  1998,  2317, 27083,  3909,  2058,  2009,  1998,  1996,  2833,
          2003, 12090,  1045, 16755,  1996, 25482, 20130, 14380, 19673, 16521,
          1998,  7680,  7849,  3989,  2006, 14916, 24601, 24728,  2063, 23308,
          3347,  2012, 20704, 11921,  5342,  9497, 11460, 24164,  2854,  2011,
          2061,  8873,  9834,  2003, 12090,   102,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,  

In [None]:
training_set[0]["ids"]

tensor([  101,  2054,  1037,  6919,  4292,  1037,  7273,  2806, 27387, 22704,
         2369,  1037,  2235,  2697,  2007,  7564,  1997,  2300,  3669, 23697,
         2015,  1998,  2317, 27083,  3909,  2058,  2009,  1998,  1996,  2833,
         2003, 12090,  1045, 16755,  1996, 25482, 20130, 14380, 19673, 16521,
         1998,  7680,  7849,  3989,  2006, 14916, 24601, 24728,  2063, 23308,
         3347,  2012, 20704, 11921,  5342,  9497, 11460, 24164,  2854,  2011,
         2061,  8873,  9834,  2003, 12090,   102,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0])

In [None]:
# print the first 30 tokens and corresponding labels
for token, label in zip(tokenizer.convert_ids_to_tokens(training_set[0]["ids"][:30]), training_set[0]["targets"][:30]):
  print('{0:10}  {1}'.format(token, id2label[label.item()]))

[CLS]       O
what        O
a           O
wonderful   B-RORG
setting     O
a           O
thai        O
style       O
pagoda      O
nestled     O
behind      O
a           O
small       B-RLOC
lake        O
with        O
plenty      O
of          O
water       O
##li        O
##llie      O
##s         O
and         O
white       O
cranes      O
flying      O
over        O
it          O
and         O
the         O
food        O


In [None]:
#define pytorch loaders
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

# Defining the model


In [None]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


In [None]:
model = BertForTokenClassification.from_pretrained('bert-base-uncased',
                                                   num_labels=len(id2label),
                                                   id2label=id2label,
                                                   label2id=label2id)
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: 

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, el

# Training the model

In [None]:
ids = training_set[0]["ids"].unsqueeze(0)
mask = training_set[0]["mask"].unsqueeze(0)
targets = training_set[0]["targets"].unsqueeze(0)
ids = ids.to(device)
mask = mask.to(device)
targets = targets.to(device)
outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
initial_loss = outputs[0]
initial_loss

tensor(1.9041, device='cuda:0', grad_fn=<NllLossBackward0>)

In [None]:
#check the shape of batch_size, sequence_length, num_lables
tr_logits = outputs[1]
tr_logits.shape

torch.Size([1, 128, 7])

In [None]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [None]:
# Defining the training function on the 80% of the dataset for tuning the bert model
def train(epoch):
    tr_loss, tr_accuracy = 0, 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()

    for idx, batch in enumerate(training_loader):

        ids = batch['ids'].to(device, dtype = torch.long)
        mask = batch['mask'].to(device, dtype = torch.long)
        targets = batch['targets'].to(device, dtype = torch.long)

        outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
        loss, tr_logits = outputs.loss, outputs.logits
        tr_loss += loss.item()

        nb_tr_steps += 1
        nb_tr_examples += targets.size(0)

        if idx % 100==0:
            loss_step = tr_loss/nb_tr_steps
            print(f"Training loss per 100 training steps: {loss_step}")

        # compute training accuracy
        flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
        active_logits = tr_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        # now, use mask to determine where we should compare predictions with targets (includes [CLS] and [SEP] token predictions)
        active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
        targets = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        tr_preds.extend(predictions)
        tr_labels.extend(targets)

        tmp_tr_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy

        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=MAX_GRAD_NORM
        )

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    print(f"Training loss epoch: {epoch_loss}")
    print(f"Training accuracy epoch: {tr_accuracy}")

In [None]:
for epoch in range(EPOCHS):
    print(f"Training epoch: {epoch + 1}")
    train(epoch)

Training epoch: 1
Training loss per 100 training steps: 1.8862504959106445
Training loss per 100 training steps: 0.35634562314146817
Training loss per 100 training steps: 0.26139279540201915
Training loss per 100 training steps: 0.21450505093779676
Training loss per 100 training steps: 0.18435453602166246
Training loss per 100 training steps: 0.16299920457744313
Training loss per 100 training steps: 0.14638282133331415
Training loss per 100 training steps: 0.13329195313835876
Training loss per 100 training steps: 0.12371004873261172
Training loss per 100 training steps: 0.11523981640874083
Training loss per 100 training steps: 0.10786391592646886
Training loss per 100 training steps: 0.10156655785662344
Training loss per 100 training steps: 0.0963542761989124
Training loss per 100 training steps: 0.09158830768353579
Training loss per 100 training steps: 0.08743688832505804
Training loss per 100 training steps: 0.08379305141458634
Training loss per 100 training steps: 0.0803381911723837

# Validation Code

In [None]:
def valid(model, testing_loader):
    # put model in evaluation mode
    model.eval()

    eval_loss, eval_accuracy = 0, 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):

            ids = batch['ids'].to(device, dtype = torch.long)
            mask = batch['mask'].to(device, dtype = torch.long)
            targets = batch['targets'].to(device, dtype = torch.long)

            outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
            loss, eval_logits = outputs.loss, outputs.logits

            eval_loss += loss.item()

            nb_eval_steps += 1
            nb_eval_examples += targets.size(0)

            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")

            # compute evaluation accuracy
            flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
            active_logits = eval_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            # now, use mask to determine where we should compare predictions with targets (includes [CLS] and [SEP] token predictions)
            active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
            targets = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)

            eval_labels.extend(targets)
            eval_preds.extend(predictions)

            tmp_eval_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy

    #print(eval_labels)
    #print(eval_preds)

    labels = [id2label[id.item()] for id in eval_labels]
    predictions = [id2label[id.item()] for id in eval_preds]

    #print(labels)
    #print(predictions)

    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    print(f"Validation Loss: {eval_loss}")
    print(f"Validation Accuracy: {eval_accuracy}")

    return labels, predictions

In [None]:
labels, predictions = valid(model, testing_loader)

Validation loss per 100 evaluation steps: 0.00021007763280067593
Validation loss per 100 evaluation steps: 0.006773041897380752
Validation loss per 100 evaluation steps: 0.006799044705225405
Validation loss per 100 evaluation steps: 0.007697627075214521
Validation loss per 100 evaluation steps: 0.0078703230600357
Validation loss per 100 evaluation steps: 0.008080389259042434
Validation loss per 100 evaluation steps: 0.007718464253128804
Validation loss per 100 evaluation steps: 0.007831641979369908
Validation loss per 100 evaluation steps: 0.00770274435854389
Validation loss per 100 evaluation steps: 0.007864413472567194
Validation loss per 100 evaluation steps: 0.007784455136604631
Validation loss per 100 evaluation steps: 0.008020388534175001
Validation loss per 100 evaluation steps: 0.00798526837013933
Validation loss per 100 evaluation steps: 0.007751428431983508
Validation loss per 100 evaluation steps: 0.007791243392285618
Validation loss per 100 evaluation steps: 0.0077687405270

# Evaluation Metrics

In [None]:
from seqeval.metrics import classification_report

print(classification_report([labels], [predictions]))

              precision    recall  f1-score   support

        FOOD       0.97      0.98      0.98      8369
        RLOC       0.98      0.98      0.98      4284
        RORG       0.98      0.99      0.99      5759

   micro avg       0.98      0.98      0.98     18412
   macro avg       0.98      0.98      0.98     18412
weighted avg       0.98      0.98      0.98     18412



# Save the model

In [None]:
import os

directory = "/content/drive/MyDrive/Colab Notebooks/Restaurant_NER/Model"

if not os.path.exists(directory):
    os.makedirs(directory)

# save vocabulary of the tokenizer
tokenizer.save_vocabulary(directory)
# save the model weights and its configuration file
model.save_pretrained(directory)
print('All files saved')
print('This tutorial is completed')

All files saved
This tutorial is completed


# Check for the output

In [None]:
from transformers import pipeline

pipe = pipeline(task="token-classification", model=model.to("cpu"), tokenizer=tokenizer, aggregation_strategy="simple")
pipe("The restaurant is on upper Night Market close to the railway road.")

[{'entity_group': 'RLOC',
  'score': 0.9963379,
  'word': 'market',
  'start': None,
  'end': None},
 {'entity_group': 'RLOC',
  'score': 0.9930742,
  'word': 'close to',
  'start': None,
  'end': None}]