In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import AutoModelForTokenClassification, pipeline
import torch

In [4]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-3-mini-4k-instruct",
    device_map="mps",
    torch_dtype=torch.float16,
    trust_remote_code=True,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [5]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-3-mini-4k-instruct")

tokenizer_config.json:   0%|          | 0.00/3.44k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.94M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

In [6]:
prompt = "Why did the chicken cross the morbius strip? <|assistant|>"

In [10]:
input_ids = tokenizer(prompt, return_tensors="pt").to("mps")

In [11]:
type(input_ids)

transformers.tokenization_utils_base.BatchEncoding

In [13]:
input_ids["input_ids"]

tensor([[ 3750,  1258,   278,   521, 21475,  4891,   278,  3036, 29890,  2482,
         17820, 29973, 29871, 32001]], device='mps:0')

In [14]:
op = model.generate(
    input_ids["input_ids"],
    max_new_tokens=25,
    use_cache=False,
    pad_token_id=tokenizer.eos_token_id
)

You are not running the flash-attention implementation, expect numerical differences.


In [15]:
type(op)

torch.Tensor

In [16]:
tokenizer.decode(op[0])

'Why did the chicken cross the morbius strip? <|assistant|> The chicken crossed the morbius strip to get to the other side, just like it would cross any other road or'

In [23]:
for i in input_ids["input_ids"][0]:
    print(tokenizer.decode(i))

Why
did
the
ch
icken
cross
the
mor
b
ius
strip
?

<|assistant|>


In [4]:
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/829 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

Device set to use mps:0


In [94]:
text = """Trump was particularly peeved by Musk insinuating the president was tied to the late sex offender Jeffrey Epstein, claiming Trump was “in the Epstein files.”"""

# https://www.politico.com/news/2025/06/08/musk-trump-ceasefire-call-00393527

In [95]:
results = ner_pipeline(text)

In [96]:
type(results)

list

In [97]:
len(results)

11

In [98]:
results[0]

{'entity': 'B-PER',
 'score': 0.999616,
 'index': 1,
 'word': 'Trump',
 'start': 0,
 'end': 5}

In [99]:
for i in results:
    print(i["start"], i["end"], i["entity"])

0 5 B-PER
33 35 B-PER
35 37 B-PER
98 105 B-PER
106 107 I-PER
107 109 I-PER
109 113 I-PER
124 129 B-PER
142 143 B-PER
143 145 B-PER
145 149 B-PER


In [120]:
entity_dict = {}

In [121]:
i = 0
while i < len(results):
    beg = results[i]["start"]
    term = results[i]["end"]
    ent = results[i]["entity"]

    if i + 1 >= len(results):
        entity_dict[ent].append((beg, term))
        break

    while i + 1 < len(results) and term == results[i+1]["start"]:
        term = results[i+1]["end"]
        i += 1

    # entity_dict[ent].append((beg, term))
    entity_dict[(beg, term)] = ent
    i += 1

In [122]:
entity_dict

{(0, 5): 'B-PER',
 (33, 37): 'B-PER',
 (98, 105): 'B-PER',
 (106, 113): 'I-PER',
 (124, 129): 'B-PER',
 (142, 149): 'B-PER'}

In [124]:
redac = ""

In [125]:
end = 0
max_end_ind = 0
for key, value in entity_dict.items():
    redac += text[end:key[0]] + f"<{value}>" + " "
    end = key[1] + 1
    max_end_ind = max(max_end_ind, end)
redac += text[max_end_ind:]

In [126]:
print(text)
print(redac)


Trump was particularly peeved by Musk insinuating the president was tied to the late sex offender Jeffrey Epstein, claiming Trump was “in the Epstein files.”
<B-PER> was particularly peeved by <B-PER> insinuating the president was tied to the late sex offender <B-PER> <I-PER>  claiming <B-PER> was “in the <B-PER> files.”
