In [1]:
# Required packages
import torch
import re
import csv

In [2]:
# Check if GPU is configured
print(torch.rand(5, 3))

tensor([[0.3223, 0.5575, 0.4429],
        [0.4668, 0.1532, 0.1979],
        [0.0133, 0.1692, 0.7453],
        [0.8223, 0.9694, 0.0355],
        [0.8007, 0.3288, 0.9550]])


In [3]:
train_path = "/DEVEL/code/data/annotation/v1.0.0.9019/42/cpes_rasa_vpv_5k.csv"
test_path = "/DEVEL/code/data/annotation/v1.0.0.9019/42/cpes_rasa_vpv_1k.csv"
num_epochs = 10
num_decay = 0.05

#### Load training set

In [4]:
raw_text = ""
with open(train_path) as csvfile:
    raw_csv = csv.reader(csvfile, delimiter=',')
    for row in raw_csv:
        raw_text = raw_text + "\n" + row[1]

# remove last empty line
train_text = raw_text.split("\n")[1:]

In [5]:
raw_text2 = ""
with open(test_path) as csvfile:
    raw_csv = csv.reader(csvfile, delimiter=',')
    for row in raw_csv:
        raw_text2 = raw_text2 + "\n" + row[1]

# remove last empty line
test_text = raw_text2.split("\n")[1:]

In [6]:

def get_tokens_with_entities(raw_text: str):
    # split the text by spaces only if the space does not occur between square brackets
    # we do not want to split "multi-word" entity value yet
    raw_tokens = re.split(r"\s(?![^\[]*\])", raw_text)

    # a regex for matching the annotation according to our notation [entity_value](entity_name)
    entity_value_pattern = r"\[(?P<value>.+?)\]\((?P<entity>.+?)\)"
    entity_value_pattern_compiled = re.compile(entity_value_pattern, flags=re.I|re.M)

    tokens_with_entities = []

    for raw_token in raw_tokens:
        match = entity_value_pattern_compiled.match(raw_token)
        if match:
            raw_entity_name, raw_entity_value = match.group("entity"), match.group("value")

            # we prefix the name of entity differently
            # B- indicates beginning of an entity
            # I- indicates the token is not a new entity itself but rather a part of existing one
            for i, raw_entity_token in enumerate(re.split("\s", raw_entity_value)):
                entity_prefix = "B" if i == 0 else "I"
                entity_name = f"{entity_prefix}-{raw_entity_name}"
                tokens_with_entities.append((raw_entity_token, entity_name))
        else:
            tokens_with_entities.append((raw_token, "O"))

    return tokens_with_entities

In [7]:
print(get_tokens_with_entities("adobe [acrobat](cpe_product) x (10.1)"))
print(get_tokens_with_entities("red hat [wildfly core](cpe_product) 2.0.0 alpha 2"))
print(get_tokens_with_entities(test_text[0]))
print(get_tokens_with_entities(test_text[1]))


[('adobe', 'O'), ('acrobat', 'B-cpe_product'), ('x', 'O'), ('(10.1)', 'O')]
[('red', 'O'), ('hat', 'O'), ('wildfly', 'B-cpe_product'), ('core', 'I-cpe_product'), ('2.0.0', 'O'), ('alpha', 'O'), ('2', 'O')]
[('discourse', 'B-cpe_vendor'), ('message', 'B-cpe_product'), ('bus', 'I-cpe_product'), ('1.0.15', 'B-cpe_version'), ('for', 'O'), ('ruby', 'O')]
[('nisuta', 'B-cpe_vendor'), ('ns-wir150ne', 'B-cpe_product'), ('firmware', 'I-cpe_product'), ('5.07.41', 'B-cpe_version')]


In [8]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [9]:
class NERDataMaker:
    def __init__(self, texts):
        self.unique_entities = []
        self.processed_texts = []

        temp_processed_texts = []
        for text in texts:
            tokens_with_entities = get_tokens_with_entities(text)
            for _, ent in tokens_with_entities:
                if ent not in self.unique_entities:
                    self.unique_entities.append(ent)
            temp_processed_texts.append(tokens_with_entities)

        self.unique_entities.sort(key=lambda ent: ent if ent != "O" else "")

        for tokens_with_entities in temp_processed_texts:
            self.processed_texts.append([(t, self.unique_entities.index(ent)) for t, ent in tokens_with_entities])

    @property
    def id2label(self):
        return dict(enumerate(self.unique_entities))

    @property
    def label2id(self):
        return {v:k for k, v in self.id2label.items()}

    def __len__(self):
        return len(self.processed_texts)

    def __getitem__(self, idx):
        def _process_tokens_for_one_text(id, tokens_with_encoded_entities):
            ner_tags = []
            tokens = []
            for t, ent in tokens_with_encoded_entities:
                ner_tags.append(ent)
                tokens.append(t)

            return {
                "id": id,
                "ner_tags": ner_tags,
                "tokens": tokens
            }

        tokens_with_encoded_entities = self.processed_texts[idx]
        if isinstance(idx, int):
            return _process_tokens_for_one_text(idx, tokens_with_encoded_entities)
        else:
            return [_process_tokens_for_one_text(i+idx.start, tee) for i, tee in enumerate(tokens_with_encoded_entities)]

    def as_hf_dataset(self, tokenizer):
        from datasets import Dataset, Features, Value, ClassLabel, Sequence
        def tokenize_and_align_labels(examples):
            tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)

            labels = []
            for i, label in enumerate(examples[f"ner_tags"]):
                word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
                previous_word_idx = None
                label_ids = []
                for word_idx in word_ids:  # Set the special tokens to -100.
                    if word_idx is None:
                        label_ids.append(-100)
                    elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                        label_ids.append(label[word_idx])
                    else:
                        label_ids.append(-100)
                    previous_word_idx = word_idx
                labels.append(label_ids)

            tokenized_inputs["labels"] = labels
            return tokenized_inputs

        ids, ner_tags, tokens = [], [], []
        for i, pt in enumerate(self.processed_texts):
            ids.append(i)
            pt_tokens,pt_tags = list(zip(*pt))
            ner_tags.append(pt_tags)
            tokens.append(pt_tokens)
        data = {
            "id": ids,
            "ner_tags": ner_tags,
            "tokens": tokens
        }
        features = Features({
            "tokens": Sequence(Value("string")),
            "ner_tags": Sequence(ClassLabel(names=dm.unique_entities)),
            "id": Value("int32")
        })
        ds = Dataset.from_dict(data, features)
        tokenized_ds = ds.map(tokenize_and_align_labels, batched=True)
        return tokenized_ds

In [10]:
# Create Training NER Data Object
dm = NERDataMaker(train_text)
print(f"total examples = {len(dm)}")
print(dm[0:3])

total examples = 5000
[{'id': 0, 'ner_tags': [2, 1, 4, 3], 'tokens': ['netgear', 'r6700', 'firmware', '1.0.2.6']}, {'id': 1, 'ner_tags': [2, 1, 4, 3, 0, 0], 'tokens': ['agentevolution', 'impress', 'listings', '2.1.1', 'for', 'wordpress']}, {'id': 2, 'ner_tags': [2, 1, 4, 4, 4, 3], 'tokens': ['dell', 'supportassist', 'for', 'home', 'pcs', '2.1']}]


In [11]:
# Create NER Data Object
dm_test = NERDataMaker(test_text)
print(f"total examples = {len(dm_test)}")
print(dm_test[0:3])

total examples = 500
[{'id': 0, 'ner_tags': [2, 1, 4, 3, 0, 0], 'tokens': ['discourse', 'message', 'bus', '1.0.15', 'for', 'ruby']}, {'id': 1, 'ner_tags': [2, 1, 4, 3], 'tokens': ['nisuta', 'ns-wir150ne', 'firmware', '5.07.41']}, {'id': 2, 'ner_tags': [2, 1, 4, 4, 3, 0], 'tokens': ['wpgmaps', 'wp', 'google', 'maps', '6.4.08', 'wordpress']}]


## Model training
For this demo, I’ll use distilbert-base-uncased model. The dm object contains few properties which we pass to the AutoModelForTokenClassification.from_pretrained method.

In [13]:
from transformers import AutoTokenizer, DataCollatorForTokenClassification, AutoModelForTokenClassification, TrainingArguments, Trainer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
model = AutoModelForTokenClassification.from_pretrained("distilbert-base-uncased", num_labels=len(dm.unique_entities), id2label=dm.id2label, label2id=dm.label2id)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForTokenClassification: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN t

In [14]:
train_ds = dm.as_hf_dataset(tokenizer=tokenizer)
test_ds = dm_test.as_hf_dataset(tokenizer=tokenizer)

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [15]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=num_epochs,
    weight_decay=num_decay,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds, 
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()

The following columns in the training set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 5000
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 3130


  0%|          | 0/3130 [00:00<?, ?it/s]

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.0355323925614357, 'eval_runtime': 0.595, 'eval_samples_per_second': 840.37, 'eval_steps_per_second': 53.784, 'epoch': 1.0}


Saving model checkpoint to ./results\checkpoint-500
Configuration saved in ./results\checkpoint-500\config.json


{'loss': 0.135, 'learning_rate': 1.6805111821086264e-05, 'epoch': 1.6}


Model weights saved in ./results\checkpoint-500\pytorch_model.bin
tokenizer config file saved in ./results\checkpoint-500\tokenizer_config.json
Special tokens file saved in ./results\checkpoint-500\special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.01891344040632248, 'eval_runtime': 0.6, 'eval_samples_per_second': 833.333, 'eval_steps_per_second': 53.333, 'epoch': 2.0}


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.016959795728325844, 'eval_runtime': 0.5973, 'eval_samples_per_second': 837.16, 'eval_steps_per_second': 53.578, 'epoch': 3.0}


Saving model checkpoint to ./results\checkpoint-1000
Configuration saved in ./results\checkpoint-1000\config.json


{'loss': 0.0203, 'learning_rate': 1.3610223642172523e-05, 'epoch': 3.19}


Model weights saved in ./results\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in ./results\checkpoint-1000\tokenizer_config.json
Special tokens file saved in ./results\checkpoint-1000\special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.013904373161494732, 'eval_runtime': 0.599, 'eval_samples_per_second': 834.678, 'eval_steps_per_second': 53.419, 'epoch': 4.0}


Saving model checkpoint to ./results\checkpoint-1500
Configuration saved in ./results\checkpoint-1500\config.json


{'loss': 0.009, 'learning_rate': 1.0415335463258786e-05, 'epoch': 4.79}


Model weights saved in ./results\checkpoint-1500\pytorch_model.bin
tokenizer config file saved in ./results\checkpoint-1500\tokenizer_config.json
Special tokens file saved in ./results\checkpoint-1500\special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.018952112644910812, 'eval_runtime': 0.5992, 'eval_samples_per_second': 834.44, 'eval_steps_per_second': 53.404, 'epoch': 5.0}


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.01712842658162117, 'eval_runtime': 0.594, 'eval_samples_per_second': 841.763, 'eval_steps_per_second': 53.873, 'epoch': 6.0}


Saving model checkpoint to ./results\checkpoint-2000
Configuration saved in ./results\checkpoint-2000\config.json


{'loss': 0.0043, 'learning_rate': 7.220447284345049e-06, 'epoch': 6.39}


Model weights saved in ./results\checkpoint-2000\pytorch_model.bin
tokenizer config file saved in ./results\checkpoint-2000\tokenizer_config.json
Special tokens file saved in ./results\checkpoint-2000\special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.019965549930930138, 'eval_runtime': 0.597, 'eval_samples_per_second': 837.572, 'eval_steps_per_second': 53.605, 'epoch': 7.0}


Saving model checkpoint to ./results\checkpoint-2500
Configuration saved in ./results\checkpoint-2500\config.json


{'loss': 0.0019, 'learning_rate': 4.02555910543131e-06, 'epoch': 7.99}


Model weights saved in ./results\checkpoint-2500\pytorch_model.bin
tokenizer config file saved in ./results\checkpoint-2500\tokenizer_config.json
Special tokens file saved in ./results\checkpoint-2500\special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.01687655970454216, 'eval_runtime': 0.6103, 'eval_samples_per_second': 819.307, 'eval_steps_per_second': 52.436, 'epoch': 8.0}


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.01773788034915924, 'eval_runtime': 0.599, 'eval_samples_per_second': 834.722, 'eval_steps_per_second': 53.422, 'epoch': 9.0}


Saving model checkpoint to ./results\checkpoint-3000
Configuration saved in ./results\checkpoint-3000\config.json


{'loss': 0.0014, 'learning_rate': 8.306709265175719e-07, 'epoch': 9.58}


Model weights saved in ./results\checkpoint-3000\pytorch_model.bin
tokenizer config file saved in ./results\checkpoint-3000\tokenizer_config.json
Special tokens file saved in ./results\checkpoint-3000\special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 16


  0%|          | 0/32 [00:00<?, ?it/s]



Training completed. Do not forget to share your model on huggingface.co/models =)




{'eval_loss': 0.018553731963038445, 'eval_runtime': 0.596, 'eval_samples_per_second': 838.885, 'eval_steps_per_second': 53.689, 'epoch': 10.0}
{'train_runtime': 338.3635, 'train_samples_per_second': 147.77, 'train_steps_per_second': 9.25, 'train_loss': 0.027474704291969063, 'epoch': 10.0}


TrainOutput(global_step=3130, training_loss=0.027474704291969063, metrics={'train_runtime': 338.3635, 'train_samples_per_second': 147.77, 'train_steps_per_second': 9.25, 'train_loss': 0.027474704291969063, 'epoch': 10.0})

In [16]:
model.save_pretrained("models/ner_rasa_vpv_v2")
tokenizer.save_pretrained("models/ner_rasa_vpv_v2/tokenizer")

Configuration saved in models/ner_rasa_vpv_v1\config.json
Model weights saved in models/ner_rasa_vpv_v1\pytorch_model.bin


# INFERENCE

In [17]:
from transformers import pipeline
pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple", device=0) # pass device=0 if using gpu
pipe("""softing uagates 1.73 for wordpress""")

[{'entity_group': 'cpe_vendor',
  'score': 0.99989367,
  'word': 'soft',
  'start': 0,
  'end': 4},
 {'entity_group': 'cpe_vendor',
  'score': 0.5673068,
  'word': '##ing',
  'start': 4,
  'end': 7},
 {'entity_group': 'cpe_product',
  'score': 0.99985415,
  'word': 'uagates',
  'start': 8,
  'end': 15},
 {'entity_group': 'cpe_version',
  'score': 0.99989915,
  'word': '1',
  'start': 16,
  'end': 17}]

In [18]:
pipe("""microsoft visual c++ 2013 redistributable (x64) - 12.0.30501""")

[{'entity_group': 'cpe_vendor',
  'score': 0.9998944,
  'word': 'microsoft',
  'start': 0,
  'end': 9},
 {'entity_group': 'cpe_product',
  'score': 0.9955368,
  'word': 'visual c + + 2013 redistributable',
  'start': 10,
  'end': 41},
 {'entity_group': 'cpe_version',
  'score': 0.9998672,
  'word': '12',
  'start': 50,
  'end': 52},
 {'entity_group': 'cpe_version',
  'score': 0.99964035,
  'word': '0',
  'start': 53,
  'end': 54}]

In [19]:
pipe("""google chrome 32.0.1670.5""")

[{'entity_group': 'cpe_vendor',
  'score': 0.99989796,
  'word': 'google',
  'start': 0,
  'end': 6},
 {'entity_group': 'cpe_product',
  'score': 0.9998331,
  'word': 'chrome',
  'start': 7,
  'end': 13},
 {'entity_group': 'cpe_version',
  'score': 0.99989355,
  'word': '32',
  'start': 14,
  'end': 16},
 {'entity_group': 'cpe_version',
  'score': 0.999882,
  'word': '0',
  'start': 17,
  'end': 18},
 {'entity_group': 'cpe_version',
  'score': 0.9970709,
  'word': '1670',
  'start': 19,
  'end': 23}]

In [20]:
pipe("cool house technology ewelink 4.3.0 for android")

[{'entity_group': 'cpe_vendor',
  'score': 0.99988973,
  'word': 'cool',
  'start': 0,
  'end': 4},
 {'entity_group': 'cpe_product',
  'score': 0.99972516,
  'word': 'ewelink',
  'start': 22,
  'end': 29},
 {'entity_group': 'cpe_version',
  'score': 0.99990356,
  'word': '4',
  'start': 30,
  'end': 31},
 {'entity_group': 'cpe_version',
  'score': 0.9997198,
  'word': '3',
  'start': 32,
  'end': 33}]

In [21]:
pipe("fastball productions fastball 2.5.3 for joomla")

[{'entity_group': 'cpe_vendor',
  'score': 0.999835,
  'word': 'fast',
  'start': 0,
  'end': 4},
 {'entity_group': 'cpe_product',
  'score': 0.9998336,
  'word': 'fastball',
  'start': 21,
  'end': 29},
 {'entity_group': 'cpe_version',
  'score': 0.9999032,
  'word': '2',
  'start': 30,
  'end': 31},
 {'entity_group': 'cpe_version',
  'score': 0.99985814,
  'word': '5',
  'start': 32,
  'end': 33}]

In [22]:

pipe("""Microsoft Visual C++ 2013 Redistributable (x64) - 12.0.30501""")
# pipe("""microsoft visual c++ 2013 redistributable (x64) - 12.0.30501""")

# pipe("""google chrome 32.0.1670.5""")
# pipe("""draw.io 2.6.3 for confluence""")


# pipe("""progress sitefinity 9.2""")
# pipe("bitnami containers 7.30.1-debian-10-r40 for laravel")

# pipe("cool house technology ewelink 4.3.0 for android")
# pipe("fastball productions fastball 2.5.3 for joomla")

[{'entity_group': 'cpe_vendor',
  'score': 0.9998944,
  'word': 'microsoft',
  'start': 0,
  'end': 9},
 {'entity_group': 'cpe_product',
  'score': 0.9955368,
  'word': 'visual c + + 2013 redistributable',
  'start': 10,
  'end': 41},
 {'entity_group': 'cpe_version',
  'score': 0.9998672,
  'word': '12',
  'start': 50,
  'end': 52},
 {'entity_group': 'cpe_version',
  'score': 0.99964035,
  'word': '0',
  'start': 53,
  'end': 54}]