In [1]:
# aaron e
# ae-314

# Demo of my finely tuned token classification model for Named Entity Recognition
# Trained on local db: manu/wnut_17 vs. tutorial dataset wnut_17 because of datasets version 4.0 changes
# Changed hyperparameters: learning rate, warmup_ratio. LR changed from 2 e-5 to 1 e-9 and added warmup_ratio = 0.1
# This demo notebook includes a few examples of the model being used for NER
# NOTE: install transformers directly from source to avoid import issues because of version changes

#!pip install -U transformers
! pip install git+https://github.com/huggingface/transformers.git

Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-bxr5fh5_
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-bxr5fh5_
  Resolved https://github.com/huggingface/transformers.git to commit 307c5238546ba1675daabc46050c63ffde25f8e6
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


## Local Inference on GPU
Model page: https://huggingface.co/ae-314/token_classification_wnut_model

⚠️ If the generated code snippets do not work, please open an issue on either the [model repo](https://huggingface.co/ae-314/token_classification_wnut_model)
			and/or on [huggingface.js](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts) 🙏

In [2]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("token-classification", model="ae-314/token_classification_wnut_model")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/266M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

In [10]:
example_1 = "Agnes Arber was a botanist and the author of 'Herbals:Their Origin and Evolution', as well as being a Fellow of the Royal Society in London."

In [11]:
pipe(example_1)

[{'entity': 'B-person',
  'score': np.float32(0.9910617),
  'index': 1,
  'word': 'agnes',
  'start': 0,
  'end': 5},
 {'entity': 'I-person',
  'score': np.float32(0.98781735),
  'index': 2,
  'word': 'ar',
  'start': 6,
  'end': 8},
 {'entity': 'I-person',
  'score': np.float32(0.83477294),
  'index': 3,
  'word': '##ber',
  'start': 8,
  'end': 11},
 {'entity': 'B-location',
  'score': np.float32(0.7598351),
  'index': 32,
  'word': 'london',
  'start': 133,
  'end': 139},
 {'entity': 'I-creative-work',
  'score': np.float32(0.5088698),
  'index': 33,
  'word': '.',
  'start': 139,
  'end': 140}]

In [12]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForTokenClassification

tokenizer = AutoTokenizer.from_pretrained("ae-314/token_classification_wnut_model")
inputs = tokenizer(example_1, return_tensors="pt") # give the model inputs
model = AutoModelForTokenClassification.from_pretrained("ae-314/token_classification_wnut_model")

import torch

with torch.no_grad():
  logits = model(**inputs).logits

In [13]:
# get the class predictions in human-readable labels

predictions = torch.argmax(logits, dim=2)
predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
predicted_token_class

['O',
 'B-person',
 'I-person',
 'I-person',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'B-location',
 'I-creative-work',
 'I-creative-work']

In [14]:
example_2 = "Jules Richard Petri invented the Petri dish during his time at Berlin University."

pipe(example_2)

[{'entity': 'B-person',
  'score': np.float32(0.9978757),
  'index': 1,
  'word': 'jules',
  'start': 0,
  'end': 5},
 {'entity': 'I-person',
  'score': np.float32(0.9973246),
  'index': 2,
  'word': 'richard',
  'start': 6,
  'end': 13},
 {'entity': 'I-person',
  'score': np.float32(0.99879277),
  'index': 3,
  'word': 'pet',
  'start': 14,
  'end': 17},
 {'entity': 'I-person',
  'score': np.float32(0.9937058),
  'index': 4,
  'word': '##ri',
  'start': 17,
  'end': 19},
 {'entity': 'B-location',
  'score': np.float32(0.97920716),
  'index': 14,
  'word': 'berlin',
  'start': 63,
  'end': 69},
 {'entity': 'I-location',
  'score': np.float32(0.90893704),
  'index': 15,
  'word': 'university',
  'start': 70,
  'end': 80}]

In [23]:
example_3 = "The World Wildlife Fund (WWF) was founded in 1961, is focused on conservation, and is based in Switzerland."

pipe(example_3)

[{'entity': 'B-group',
  'score': np.float32(0.60621166),
  'index': 6,
  'word': 'wwf',
  'start': 25,
  'end': 28},
 {'entity': 'B-location',
  'score': np.float32(0.9889181),
  'index': 22,
  'word': 'switzerland',
  'start': 95,
  'end': 106}]

In [26]:
example_4 = "The Golden State Warriors are an American professional basketball team based in San Francisco."

pipe(example_4)

[{'entity': 'B-group',
  'score': np.float32(0.8838734),
  'index': 1,
  'word': 'the',
  'start': 0,
  'end': 3},
 {'entity': 'B-location',
  'score': np.float32(0.73068917),
  'index': 2,
  'word': 'golden',
  'start': 4,
  'end': 10},
 {'entity': 'I-group',
  'score': np.float32(0.5931818),
  'index': 3,
  'word': 'state',
  'start': 11,
  'end': 16},
 {'entity': 'I-group',
  'score': np.float32(0.9798933),
  'index': 4,
  'word': 'warriors',
  'start': 17,
  'end': 25},
 {'entity': 'B-location',
  'score': np.float32(0.99818534),
  'index': 13,
  'word': 'san',
  'start': 80,
  'end': 83},
 {'entity': 'I-location',
  'score': np.float32(0.99061126),
  'index': 14,
  'word': 'francisco',
  'start': 84,
  'end': 93},
 {'entity': 'I-creative-work',
  'score': np.float32(0.85991144),
  'index': 15,
  'word': '.',
  'start': 93,
  'end': 94}]