In [21]:
import spacy
import csv
from spacy.training import Example
from spacy.tokens import Doc
from spacy.training import offsets_to_biluo_tags

# Load the blank English language model
nlp = spacy.blank("en")

# Define the labels and their order in the csv file
LABELS = ["Rating", "Location"]
MODEL_DIR = "model"

# Load the training data from the csv file
with open("data_copy.csv", "r", encoding="utf-8") as f:
    reader = csv.DictReader(f, fieldnames=LABELS, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
    train_data = []
    for row in reader:
        # Split the row into NER label and text
        label = row[LABELS[0]]
        text = row[LABELS[1]]
        # Convert the label and text to a SpaCy Example object
        doc = nlp.make_doc(text)
        entities = [(0, len(text), label)]
        biluo_tags = offsets_to_biluo_tags(doc, entities)
        if "-" not in biluo_tags:
            example = Example.from_dict(doc, {"entities": entities, "tags": biluo_tags})
            train_data.append(example)

# Add the named entity recognizer to the pipeline
ner = nlp.add_pipe("ner")

# Add the labels to the named entity recognizer
for label in LABELS:
    ner.add_label(label)

# Disable other pipeline components
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
with nlp.disable_pipes(*other_pipes):
    # Train the named entity recognizer on the training data
    optimizer = nlp.begin_training()
    for i in range(10):
        for example in train_data:
            nlp.update([example], sgd=optimizer)

# Test the trained model
doc = nlp("I had dinner at a 4-star Italian restaurant in New York.")
print([(ent.label_, ent.text) for ent in doc.ents])

# Save the trained model to the disk
nlp.to_disk(MODEL_DIR)


[('I', 'I-Amenity'), ('had', 'B-Cuisine'), ('dinner', 'I-Location'), ('at', 'I-Hours'), ('a', 'I-Hours'), ('4', 'I-Location'), ('-', 'I-Amenity'), ('star', 'I-Rating'), ('Italian', 'I-Restaurant_Name'), ('restaurant', 'I-Amenity'), ('in', 'B-Location'), ('New', 'O'), ('York', 'B-Amenity'), ('.', 'B-Cuisine')]
