In [1]:
import sys
sys.path.append("..")

In [2]:
import pandas as pd
import spacy_annotator as spa

## Example

In [3]:
df = pd.DataFrame({
    "text": [
        "New york is lovely, Milan is nice, but london is amazing!",
        "Stockholm is too cold. Ingrid Bergman says so."
    ]})
df["index"] = df.index

In [4]:
df

Unnamed: 0,text,index
0,"New york is lovely, Milan is nice, but london ...",0
1,Stockholm is too cold. Ingrid Bergman says so.,1


### Annotation without model

In [5]:
annotator = spa.Annotator(labels=["GPE", "PERSON"])

In [6]:
annotator.instructions


            [1mInstructions[0m 

            For each entity type, input must be a DELIMITER separated string. 

            If no entities in text, leave as is and press submit.
            Similarly, if no entities for a particular label, leave as is. 

            Buttons: 

            	 * submit inserts new annotation (or overwrites existing one if one is present). 

            	 * skip moves forward and leaves empty string (or existing annotation if one is present). 

            	 * finish terminates the annotation session.
            


In [7]:
df_labels = annotator.annotate(df=df, col_text="text")

HTML(value='-1 examples annotated, 3 examples left')

Text(value='', description='GPE', layout=Layout(width='auto'), placeholder='ent one, ent two, ent three')

Text(value='', description='PERSON', layout=Layout(width='auto'), placeholder='ent one, ent two, ent three')

HBox(children=(Button(button_style='success', description='submit', style=ButtonStyle()), Button(button_style=…

Output()

In [8]:
df_labels

Unnamed: 0,text,index,annotations
0,"New york is lovely, Milan is nice, but london ...",0,"(New york is lovely, Milan is nice, but london..."
1,Stockholm is too cold. Ingrid Bergman says so.,1,(Stockholm is too cold. Ingrid Bergman says so...


In [10]:
df_labels.annotations[1]

('Stockholm is too cold. Ingrid Bergman says so.',
 {'entities': [(23, 37, 'PERSON')]})

### Annotation with model

In [11]:
import spacy

In [12]:
nlp = spacy.load("en_core_web_sm")

In [13]:
annotator = spa.Annotator(labels=["GPE", "PERSON"], model=nlp)

In [14]:
df_labels = annotator.annotate(df=df, col_text="text", shuffle=True)

HTML(value='-1 examples annotated, 3 examples left')

Text(value='', description='GPE', layout=Layout(width='auto'), placeholder='ent one, ent two, ent three')

Text(value='', description='PERSON', layout=Layout(width='auto'), placeholder='ent one, ent two, ent three')

HBox(children=(Button(button_style='success', description='submit', style=ButtonStyle()), Button(button_style=…

Output()

In [15]:
df_labels

Unnamed: 0,text,index,annotations
0,Stockholm is too cold. Ingrid Bergman says so.,1,(Stockholm is too cold. Ingrid Bergman says so...
1,"New york is lovely, Milan is nice, but london ...",0,"(New york is lovely, Milan is nice, but london..."


In [16]:
df_labels["annotations"][1]

('New york is lovely, Milan is nice, but london is amazing!',
 {'entities': [(0, 8, 'GPE'), (20, 25, 'GPE'), (39, 45, 'GPE')]})