In [1]:
import pandas as pd
import numpy as np

import spacy
from spacy.training.example import Example

In [None]:
# call trained model
nlp = spacy.load("ner_trained")
doc = nlp("I love when restaurants think using fancy expensive ingrediants makes the food fine cuisine, even with no idea how to use them.")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

doc = nlp("The waiters are sweet, the food is tasty and the bill is never too large.")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

In [2]:
nlp=spacy.blank("en")
nlp.create_pipe('ner')
nlp.add_pipe('ner')

<spacy.pipeline.ner.EntityRecognizer at 0x7fc683a87820>

In [3]:
path = 'data/restaurants_laptop_train.csv'

df = pd.read_csv(path)
print(df.shape)
df.head()

(3602, 5)


Unnamed: 0,text,aspect_term,from,to,polarity
0,But the staff was so horrible to us.,staff,8,13,-1
1,"To be completely fair, the only redeeming fact...",food,57,61,1
2,"The food is uniformly exceptional, with a very...",food,4,8,1
3,"The food is uniformly exceptional, with a very...",kitchen,55,62,1
4,"The food is uniformly exceptional, with a very...",menu,141,145,0


In [4]:
label = 'aspect'
def process_data(df):
    data = []
    text = df.text.tolist()
    from_pos = df['from'].tolist()
    to_pos = df['to'].tolist()
    
    for i in range(len(df)):
        d = (text[i], {'entities': [(int(from_pos[i]), int(to_pos[i]), 'aspect')]})
        data.append(d)
    return data
        
train_data = process_data(df[:3000])

In [5]:
train_data[0]

('But the staff was so horrible to us.', {'entities': [(8, 13, 'aspect')]})

In [6]:
ner=nlp.get_pipe('ner')

In [7]:
ner.add_label(label)

1

In [8]:
optimizer = nlp.resume_training()
move_names = list(ner.move_names)

In [9]:
from spacy.util import minibatch, compounding
import random
nlp.begin_training()

sizes = compounding(1.0, 4.0, 1.001)
# Training for 30 iterations     
for itn in range(30):
    # shuffle examples before training
    random.shuffle(train_data)
    # batch up the examples using spaCy's minibatch
    batches = minibatch(train_data, size=sizes)
    # ictionary to store losses
    losses = {}
    for batch in batches:
        texts, annotations = zip(*batch)
        # # Calling update() over the iteration
        # nlp.update(texts, annotations, sgd=optimizer, drop=0.35, losses=losses)
        #print(annotations[0])
        doc = nlp.make_doc(texts[0])
        example = Example.from_dict(doc, annotations[0])
        # Update the model
        nlp.update([example], losses=losses, drop=0.3)
        #print("Losses", losses)


  d_xhat = N * dY - sum_dy - dist * var ** (-1.0) * sum_dy_dist


In [10]:
doc = nlp("I love when restaurants think using fancy expensive ingrediants makes the food fine cuisine, even with no idea how to use them.")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

doc = nlp("The waiters are sweet, the food is tasty and the bill is never too large.")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

Entities [('cuisine', 'aspect')]
Entities [('waiters', 'aspect'), ('food', 'aspect'), ('bill', 'aspect')]


In [11]:
test_data = df[3001:].text.unique()
num_of_aspects = len(df[3001:])
num_correct_aspect = 0
num_wrong_aspect = 0

for test in test_data:
    text_df = df[df.text == test]
    expected_aspects = text_df.aspect_term.tolist()
    
    doc = nlp(test)
    predicted_aspects = [ent.text for ent in doc.ents]
    
    for aspect in predicted_aspects:
        if aspect in expected_aspects:
            num_correct_aspect += 1
        else:
            num_wrong_aspect += 1

num_unidentified_aspect = num_of_aspects - num_correct_aspect
print('Total number of aspects: ', num_of_aspects)
print('Number of correctly identified aspects: ', num_correct_aspect)
print('Number of misclassified aspect: ', num_wrong_aspect)
print('Number of unidentified aspect: ', num_unidentified_aspect)

Total number of aspects:  601
Number of correctly identified aspects:  301
Number of misclassified aspect:  51
Number of unidentified aspect:  300


In [13]:
nlp.to_disk('ner_trained')

In [14]:
# call trained model
nlp = spacy.load("ner_trained")
doc = nlp("I love when restaurants think using fancy expensive ingrediants makes the food fine cuisine, even with no idea how to use them.")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

doc = nlp("The waiters are sweet, the food is tasty and the bill is never too large.")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

Entities [('cuisine', 'aspect')]
Entities [('waiters', 'aspect'), ('food', 'aspect'), ('bill', 'aspect')]
