In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

# Load model
model_name = "cahya/bert-base-indonesian-NER"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Setup NER pipeline
ner_pipeline = pipeline(
    "ner",
    model=model,
    tokenizer=tokenizer,
    aggregation_strategy="simple"  # Group tokens into entities
)

# Load data
df = pd.read_csv("../data/kompas.csv")

# Get only 1 row for testing
row = df.iloc[0]

# Concate the title and description to text for NER
text = f"{row['title']}. {row['description']}"
print(text)

# Apply NER
entities = ner_pipeline(text)

# See the results
for ent in entities:
    print(f"{ent['word']} -> {ent['entity_group']} ({ent['score']:.2f})")