# Cross-lingual Transfer Learning for NER Tagging


### Read Data Danish Data in `Conll` format

datasets:
- `danish_train.conll`
- `danish_test.conll`
- `danish dev.conll`

format:
- `danish_word [tab] BIO_tag-NER_tag`
- `\n` between sentences
- entity-types:
    - `LOC`
    - `MISC`
    - `ORG`
    - `PER`

In [3]:
import os
from os import path
import spacy 
from spacy.util import minibatch, compounding
from random import shuffle, seed
import numpy as np
import torch
from spacy.training import Example
from copy import deepcopy
import random

In [4]:
#define function for reading data
def read_data(file):
    """_summary_

    Args:
        file (_type_): ner dataset in conll format
    Output:
        a list of sentences
        each sentence made up of a list of tuples with (token,bio_tag)
    """

    data = []
    sentence = []
    for line in file:
        if line.strip() == "":
            data.append(sentence)
            sentence = []
        else:
            sentence.append(tuple(line.strip().split("\t")))
    if len(data) == 0:
        data.append(sentence)
    return data
            

import io


#test for reading a file with a single line
test_string = "Berlingske\tB-ORG"
assert read_data(io.StringIO(test_string)) == [[("Berlingske","B-ORG")]]


#test for reading an empty file
assert read_data(io.StringIO(""))==[[]]


#test for reading files with line breaker at the end
read_data(io.StringIO(test_string))

[[('Berlingske', 'B-ORG')]]

### Convert Data into spaCy format


In [5]:
danish_train = read_data(open("data/danish-train.conll"))
danish_test = read_data(open("data/danish-test.conll"))
danish_dev = read_data(open("data/danish-dev.conll"))


def get_spacy_ner_data(data):
    spacy_ner_data = []

    for sent in data:
        ind = 0
        entities = []
        full_sentence = " ".join([pair[0] for pair in sent])
        for token, tag in sent:
            iob_tag = tag[0]
            if iob_tag == "B":
                entity = (ind, ind + len(token),tag[2:])
                entities.append(entity)
            elif iob_tag == "I":
                start,end,label = entities.pop()
                entity = (start, end + len(token) + 1,label)
                entities.append(entity)
            ind += (len(token) + 1)
        spacy_ner_data.append((full_sentence,{"entities":entities}))
 
    return spacy_ner_data



danish_train_spacy = get_spacy_ner_data(danish_train)
danish_test_spacy = get_spacy_ner_data(danish_test)
danish_dev_spacy = get_spacy_ner_data(danish_dev)


### Build Evaluation System with `Recall`, `Precision` and `f-score`

In [40]:
def evaluate(system_spacy_data, gold_spacy_data):
    n_sys = 0
    n_gold = 0
    n_correct = 0
    for sys_sent, gold_sent in zip(system_spacy_data,gold_spacy_data):
        n_sys += len(sys_sent[1]["entities"])
        n_gold += len(gold_sent[1]["entities"])
        set(sys_sent[1]["entities"])
        set(gold_sent[1]["entities"])
        n_correct += len(set(sys_sent[1]["entities"]).intersection(set(gold_sent[1]["entities"])))
    precision = n_correct / n_sys * 100
    recall = n_correct / n_gold * 100
    fscore = 0 if n_correct == 0 else 2 * precision*recall / (precision+recall)
    return precision,recall,fscore

sys_data = [("word1 word2 word3 word4",{"entities":[(0,5,"PER"),(12,17,"LOC")]}),
            ("word1 word2 word3 word4",{"entities":[(6,11,"ORG")]})]

gold_data = [("word1 word2 word3 word4",{"entities":[(0,6,"PER"),(12,17,"LOC")]}),
             ("word1 word2 word3 word4",{"entities":[]})]

precision, recall, fscore = evaluate(sys_data,gold_data)

assert precision == 1.0/3 * 100
assert recall == 1.0/2 * 100
assert fscore == 2*1.0/3*1.0/2 / (1.0/3 + 1.0/2) * 100

### Initialize the `NER_MODEL`
- Define `init_model` function:
    - `ner_annotated_data` in `spaCy` format
    - `language_code`: either `en` or `da`

- Changes needed to make for `spaCy-3.0`
    - create blank `text-preprocessing` pipeline with `spacy.blank` called `model`
    - add `ner` submodel to that pipeline (`model`) using `add_pipe()`
    - include all entity types appearing in `spacy_train_data` using `add_label`



In [36]:
def init_model(spacy_train_data, language):
    #create a blank spacy model with specified language
    model = spacy.blank(language)

    #setting seed for reproducibility
    seed(0)
    np.random.seed(0)
    spacy.util.fix_random_seed(0)
    torch.manual_seed(0)
    
    ner = model.add_pipe("ner")

    #add all ne types in training data to ner component
    for _, annotation in spacy_train_data:
        for entity in annotation.get("entities"):
            ner.add_label(entity[2])

    # Make sure we're only training the NER component of the pipeline
    pipe_exceptions = ["ner"]
    other_pipes = [pipe for pipe in model.pipe_names if pipe not in pipe_exceptions]

    # Start training so that we can use the model to annotate data
    model.disable_pipes(*other_pipes)
    optimizer = model.begin_training()
    return model, optimizer

danish_untrained_model, _ = init_model(danish_train_spacy,"da")

### Define an `Annotation Function` to make inference with `NER_MODEL` (to be trained)

In [15]:
def annotate(spacy_data,model):
    result = []

    for sent, _ in spacy_data:
        doc = model(sent)
        entities = [(ent.start_char,ent.end_char,ent.label_) for ent in doc.ents]
        result.append((sent,{"entities":entities}))

    return result

### Train the `NER MODEL` initialized in the previous step

[LINK TO EXAMPLE CODE WALK THRU](https://github.com/explosion/spaCy/blob/v2.x/examples/training/train_ner.py)


Steps:
- Convert `sent,annotation` tuples into `Example` objects
- Shuffle the data at the start of each epoch
- Batch up data with `spacy.util.minibatch`:
    - fixed size
    - varying size: provide a generator to `size` arguement
    - compounding size of batches with `compounding`

In [37]:
def train(spacy_train_data, spacy_dev_data, epochs, language):
    #Initialize model and optimizer
    model, optimizer = init_model(spacy_train_data,language)

    spacy_train_data = deepcopy(spacy_train_data)

    for itn in range(epochs):
        losses = {}
        random.shuffle(spacy_train_data)
        batches = minibatch(spacy_train_data,size=5)

        for batch in batches:
            texts, annotations = zip(*batch)
            examples = []
            # Update the model with every iteration
            for i in range(len(texts)):
                doc = model.make_doc(texts[i])
                examples.append(Example.from_dict(doc, annotations[i]))

            model.update(examples,
                         losses=losses, #update the losses in-place
                         drop=0.1)
                
        print("Losses", losses)
        
        # Evaluate model
        print("Loss for epoch %u: %.4f" % (itn+1, losses["ner"]))
        spacy_dev_sys = annotate(spacy_dev_data, model)
        p, r, f = evaluate(spacy_dev_sys,spacy_dev_data)
        print("  PRECISION: %.2f%%, RECALL: %.2f%%, F-SCORE: %.2f%%" % (p,r,f))
    return model

In [41]:
danish_model = train(danish_train_spacy,danish_dev_spacy,20,"da")
print()
print("Evaluating model on development set:")

danish_dev_spacy_sys = annotate(danish_dev_spacy, danish_model)

p, r, f = evaluate(danish_dev_spacy_sys,danish_dev_spacy)
print("  PRECISION: %.2f%%, RECALL: %.2f%%, F-SCORE: %.2f%%" % (p,r,f))

Losses {'ner': np.float32(867.636)}
Loss for epoch 1: 867.6360
  PRECISION: 0.00%, RECALL: 0.00%, F-SCORE: 0.00%
Losses {'ner': np.float32(274.55267)}
Loss for epoch 2: 274.5527
  PRECISION: 43.66%, RECALL: 35.73%, F-SCORE: 39.30%
Losses {'ner': np.float32(119.764015)}
Loss for epoch 3: 119.7640
  PRECISION: 42.68%, RECALL: 39.48%, F-SCORE: 41.02%
Losses {'ner': np.float32(137.19289)}
Loss for epoch 4: 137.1929
  PRECISION: 44.06%, RECALL: 43.80%, F-SCORE: 43.93%
Losses {'ner': np.float32(112.6825)}
Loss for epoch 5: 112.6825
  PRECISION: 46.44%, RECALL: 39.48%, F-SCORE: 42.68%
Losses {'ner': np.float32(53.071445)}
Loss for epoch 6: 53.0714
  PRECISION: 46.05%, RECALL: 40.35%, F-SCORE: 43.01%
Losses {'ner': np.float32(52.874985)}
Loss for epoch 7: 52.8750
  PRECISION: 44.85%, RECALL: 35.16%, F-SCORE: 39.42%
Losses {'ner': np.float32(38.8294)}
Loss for epoch 8: 38.8294
  PRECISION: 49.44%, RECALL: 38.33%, F-SCORE: 43.18%
Losses {'ner': np.float32(25.486406)}
Loss for epoch 9: 25.4864
  

### Trainining with `Pocket Learning` (Documenting the best-performing model)

In [42]:
def train(spacy_train_data, spacy_dev_data, epochs,language):
    # Initialize model and get optimizer
    model, optimizer = init_model(spacy_train_data,language)
    
    # Make sure we don't permute the original training data.
    spacy_train_data = deepcopy(spacy_train_data)
    best_f = 0
    best_model = None
    
    for itn in range(epochs):
        losses = {}
        random.shuffle(spacy_train_data)
        batches = minibatch(spacy_train_data,size=5)

        for batch in batches:
            texts, annotations = zip(*batch)
            example = []
            # Update the model with every iteration
            for i in range(len(texts)):
                doc = model.make_doc(texts[i])
                example.append(Example.from_dict(doc, annotations[i]))

            model.update(example,
                         losses=losses, #update the losses in-place
                         drop=0.1)
        print("Loss for epoch %u: %.4f" % (itn+1, losses["ner"]))
        spacy_dev_sys = annotate(spacy_dev_data, model)
        p, r, f = evaluate(spacy_dev_sys,spacy_dev_data)
        print("  PRECISION: %.2f%%, RECALL: %.2f%%, F-SCORE: %.2f%%" % (p,r,f))
        if f > best_f:
            best_f = f
            best_model = deepcopy(model)
    return best_model

In [43]:
danish_model = train(danish_train_spacy,danish_dev_spacy,20,"da")
print()
print("Evaluating model on development set:")
danish_dev_spacy_sys = annotate(danish_dev_spacy, danish_model)
p, r, f = evaluate(danish_dev_spacy_sys,danish_dev_spacy)
print("PRECISION: %.2f%%, RECALL: %.2f%%, F-SCORE: %.2f%%" % (p,r,f))

Loss for epoch 1: 867.6360
  PRECISION: 0.00%, RECALL: 0.00%, F-SCORE: 0.00%
Loss for epoch 2: 274.5527
  PRECISION: 43.66%, RECALL: 35.73%, F-SCORE: 39.30%
Loss for epoch 3: 119.7640
  PRECISION: 42.68%, RECALL: 39.48%, F-SCORE: 41.02%
Loss for epoch 4: 137.1929
  PRECISION: 44.06%, RECALL: 43.80%, F-SCORE: 43.93%
Loss for epoch 5: 112.6825
  PRECISION: 46.44%, RECALL: 39.48%, F-SCORE: 42.68%
Loss for epoch 6: 53.0714
  PRECISION: 46.05%, RECALL: 40.35%, F-SCORE: 43.01%
Loss for epoch 7: 52.8750
  PRECISION: 44.85%, RECALL: 35.16%, F-SCORE: 39.42%
Loss for epoch 8: 38.8294
  PRECISION: 49.44%, RECALL: 38.33%, F-SCORE: 43.18%
Loss for epoch 9: 25.4864
  PRECISION: 40.29%, RECALL: 40.06%, F-SCORE: 40.17%
Loss for epoch 10: 56.0251
  PRECISION: 44.51%, RECALL: 44.38%, F-SCORE: 44.44%
Loss for epoch 11: 18.2236
  PRECISION: 46.23%, RECALL: 40.63%, F-SCORE: 43.25%
Loss for epoch 12: 7.5617
  PRECISION: 46.32%, RECALL: 43.52%, F-SCORE: 44.87%
Loss for epoch 13: 10.2623
  PRECISION: 48.61%, 

### Add pretrained bilingual embeddings during initialization

In [44]:
from spacy.vocab import Vocab

def init_model(spacy_train_data, language):
    model = spacy.blank(language)#config={"paths":{"vectors":"data/vocab"}})

    seed(0)
    np.random.seed(0)
    spacy.util.fix_random_seed(0)
    torch.manual_seed(0)
    
    
    #load pretrained bilingual embeddings
    model.vocab.from_disk("data/vocab")

    #add config argument to add.pipe to ensure the actual usage of embeddings   
    ner = model.add_pipe("ner",
                         config={"model":{"tok2vec":{"pretrained_vectors":True}}})


    #add all entity types to ner component
    for _, annotations in spacy_train_data:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])

    # Make sure we're only training the NER component of the pipeline
    pipe_exceptions = ["ner"]
    other_pipes = [pipe for pipe in model.pipe_names if pipe not in pipe_exceptions]

    # Start training so that we can use the model to annotate data
    model.disable_pipes(*other_pipes)
    optimizer = model.begin_training()

    return model, optimizer

In [45]:
danish_model = train(danish_train_spacy,danish_dev_spacy,20,"da")
print()
print("Evaluating model on development set:")
danish_dev_spacy_sys = annotate(danish_dev_spacy, danish_model)
p, r, f = evaluate(danish_dev_spacy_sys,danish_dev_spacy)
print("  PRECISION: %.2f%%, RECALL: %.2f%%, F-SCORE: %.2f%%" % (p,r,f))

Loss for epoch 1: 910.9559
  PRECISION: 39.84%, RECALL: 14.12%, F-SCORE: 20.85%
Loss for epoch 2: 181.7382
  PRECISION: 44.98%, RECALL: 37.46%, F-SCORE: 40.88%
Loss for epoch 3: 117.4945
  PRECISION: 47.73%, RECALL: 42.36%, F-SCORE: 44.89%
Loss for epoch 4: 98.7817
  PRECISION: 50.81%, RECALL: 45.24%, F-SCORE: 47.87%
Loss for epoch 5: 61.0618
  PRECISION: 47.38%, RECALL: 49.57%, F-SCORE: 48.45%
Loss for epoch 6: 42.8465
  PRECISION: 46.51%, RECALL: 49.86%, F-SCORE: 48.12%
Loss for epoch 7: 29.6811
  PRECISION: 51.34%, RECALL: 44.09%, F-SCORE: 47.44%
Loss for epoch 8: 40.5796
  PRECISION: 48.80%, RECALL: 46.97%, F-SCORE: 47.87%
Loss for epoch 9: 27.2266
  PRECISION: 51.62%, RECALL: 45.82%, F-SCORE: 48.55%
Loss for epoch 10: 91.1609
  PRECISION: 47.80%, RECALL: 43.80%, F-SCORE: 45.71%
Loss for epoch 11: 30.1131
  PRECISION: 50.75%, RECALL: 48.70%, F-SCORE: 49.71%
Loss for epoch 12: 14.3707
  PRECISION: 56.47%, RECALL: 45.24%, F-SCORE: 50.24%
Loss for epoch 13: 7.8904
  PRECISION: 51.72%,

### Transfer-learning: 
Train an English NER System and fine-tune on Danish

In [46]:
#Load english training data
english_train = read_data(open(path.join("data","english-train.conll")))
english_dev = read_data(open(path.join("data","english-dev.conll")))

#Convert data into spaCy format
english_spacy_train = get_spacy_ner_data(english_train)
english_spacy_dev = get_spacy_ner_data(english_dev)

#Train english NER model
english_model = train(english_spacy_train,english_spacy_dev,5,"en")

Loss for epoch 1: 11073.0596
  PRECISION: 87.16%, RECALL: 86.00%, F-SCORE: 86.57%
Loss for epoch 2: 5694.5972
  PRECISION: 86.91%, RECALL: 87.39%, F-SCORE: 87.15%
Loss for epoch 3: 4177.0112
  PRECISION: 87.80%, RECALL: 87.78%, F-SCORE: 87.79%
Loss for epoch 4: 3278.9580
  PRECISION: 90.00%, RECALL: 88.93%, F-SCORE: 89.46%
Loss for epoch 5: 2964.8284
  PRECISION: 90.23%, RECALL: 89.38%, F-SCORE: 89.80%


In [47]:
def retrain(spacy_train_data, spacy_dev_data, epochs,model):
    # Make sure we don't modify the original training data.
    spacy_train_data = deepcopy(spacy_train_data)
    
    model = deepcopy(model)
    
    #copying code from train below
    for itn in range(epochs):
        losses = {}
        random.shuffle(spacy_train_data)
        batches = minibatch(spacy_train_data,size=5)

        for i,batch in enumerate(batches):
            texts, annotations = zip(*batch)
            example = []
            # Update the model with every iteration
            for j in range(len(texts)):
                doc = model.make_doc(texts[j])
                example.append(Example.from_dict(doc, annotations[j]))

            model.update(example,
                         losses=losses,
                         drop=0.1)
        
        
        # Evaluate model
        print("Loss for epoch %u: %.4f" % (itn+1, losses["ner"]))
        spacy_dev_sys = annotate(spacy_dev_data, model)
        p, r, f = evaluate(spacy_dev_sys,spacy_dev_data)
        print("  PRECISION: %.2f%%, RECALL: %.2f%%, F-SCORE: %.2f%%" % (p,r,f))
    return model

transfer_model = retrain(danish_train_spacy, danish_dev_spacy, 20,english_model)

print("Evaluating basic Danish model on test set:")
danish_spacy_test_sys_basic = annotate(danish_test_spacy, danish_model)
p, r, f = evaluate(danish_spacy_test_sys_basic,danish_test_spacy)
print("  PRECISION: %.2f%%, RECALL: %.2f%%, F-SCORE: %.2f%%" % (p,r,f))
print()

print("Evaluating basic transfer model on test set:")
danish_spacy_test_sys_transfer = annotate(danish_test_spacy, transfer_model)
p, r, f = evaluate(danish_spacy_test_sys_transfer,danish_test_spacy)
print("  PRECISION: %.2f%%, RECALL: %.2f%%, F-SCORE: %.2f%%" % (p,r,f))

Loss for epoch 1: 217.5671
  PRECISION: 56.18%, RECALL: 55.04%, F-SCORE: 55.60%
Loss for epoch 2: 93.6002
  PRECISION: 58.76%, RECALL: 59.94%, F-SCORE: 59.34%
Loss for epoch 3: 52.7189
  PRECISION: 62.99%, RECALL: 60.81%, F-SCORE: 61.88%
Loss for epoch 4: 31.0359
  PRECISION: 62.73%, RECALL: 59.65%, F-SCORE: 61.15%
Loss for epoch 5: 20.0590
  PRECISION: 63.98%, RECALL: 59.37%, F-SCORE: 61.58%
Loss for epoch 6: 14.4064
  PRECISION: 60.59%, RECALL: 59.37%, F-SCORE: 59.97%
Loss for epoch 7: 4.3865
  PRECISION: 61.77%, RECALL: 58.21%, F-SCORE: 59.94%
Loss for epoch 8: 4.5780
  PRECISION: 61.89%, RECALL: 58.50%, F-SCORE: 60.15%
Loss for epoch 9: 1.9884
  PRECISION: 59.47%, RECALL: 57.93%, F-SCORE: 58.69%
Loss for epoch 10: 1.9681
  PRECISION: 59.59%, RECALL: 59.08%, F-SCORE: 59.33%
Loss for epoch 11: 6.7378
  PRECISION: 60.00%, RECALL: 58.79%, F-SCORE: 59.39%
Loss for epoch 12: 7.9623
  PRECISION: 59.42%, RECALL: 59.08%, F-SCORE: 59.25%
Loss for epoch 13: 3.0925
  PRECISION: 60.29%, RECALL: