In [None]:
import spacy
import pandas as pd
import joblib
from spacy.training.example import Example
from spacy.training import offsets_to_biluo_tags

# Load dataset
df = pd.read_csv('../data/dataset.csv')

# Convert dataset into SpaCy training format
TRAIN_DATA = []
for _, row in df.iterrows():
    text = row['Sentence']
    entities = [(text.index(row['Entity']), text.index(row['Entity']) + len(row['Entity']), row['Label'])]
    TRAIN_DATA.append((text, {"entities": entities}))

# Create a blank English model
nlp = spacy.blank("en")
ner = nlp.add_pipe("ner")

# Add labels
for _, annotations in TRAIN_DATA:
    for ent in annotations["entities"]:
        ner.add_label(ent[2])

# Train the model
nlp.begin_training()
for text, annotations in TRAIN_DATA:
    example = Example.from_dict(nlp.make_doc(text), annotations)
    nlp.update([example])

# Save trained model
nlp.to_disk("../models/ner_model.pkl")
joblib.dump(nlp, "../models/ner_model.pkl")

print("✅ Model training complete! Model saved in /models/ner_model.pkl")
