In [1]:
!pip install seqeval

[33mYou are using pip version 10.0.1, however version 20.2b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import pandas as pd
import numpy as np
import os
import json
from seqeval.metrics import f1_score

import sagemaker
from sagemaker.tensorflow.serving import Model
from sagemaker.tensorflow import TensorFlowPredictor

In [3]:
# if you want to deploy the model change DEPLOY_MODEL to True and specify artifact's directory
DEPLOY_MODEL = False
MODEL_DIR = 's3://sagemaker-eu-west-1-087816224558/tensorflow-training-2020-05-29-19-35-10-455/output/model.tar.gz'
ENDPOINT_NAME = 'ner-bilstm-v8'

In [4]:
if DEPLOY_MODEL:
    sagemaker_session = sagemaker.Session()
    role = sagemaker.get_execution_role()
    bucket = sagemaker_session.default_bucket()

    model = Model(model_data=MODEL_DIR,
                  role=role)

    predictor = model.deploy(initial_instance_count=1,
                             instance_type='ml.m4.xlarge',
                             endpoint_name=ENDPOINT_NAME)

### Load test set and tag2idx

In [5]:
tag2idx = json.load( open("utils/objects/tag2idx.json", "r"))
test = pd.read_csv("../data/processed/bilstm_test.csv", encoding='latin', header=None, names=None)
test_y = test.iloc[:,:-45].values
test_X = test.iloc[:,45:].values

### Create predictor from endpoint and compute predictions

In [6]:
if not DEPLOY_MODEL:
    predictor = TensorFlowPredictor(ENDPOINT_NAME)

predictions = predictor.predict(test_X)
test_pred = np.array(predictions['predictions'])

### Decode tags from test and predictions sets

In [7]:
idx2tag = {i: w for w, i in tag2idx.items()}

test_labels = []
for i in test_y:
    decoded_sentence = []
    for j in i:
        decoded_sentence.append(idx2tag[j])
    test_labels.append(decoded_sentence)

def pred2label(pred):
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            p_i = np.argmax(p)
            out_i.append(idx2tag[p_i].replace("PAD", "O"))
        out.append(out_i)
    return out
    
pred_labels = pred2label(test_pred)


# check shapes
assert len(pred_labels) == len(test_labels)
assert len(pred_labels[0]) == len(test_labels[0])

print('Test labels example:')
print(test_labels[1])
print('Predicted labels example:')
print(pred_labels[1])

Test labels example:
['O', 'O', 'B-geo', 'O', 'O', 'B-geo', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-geo', 'O', 'O', 'O', 'B-org', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-org', 'I-org', 'I-org', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
Predicted labels example:
['O', 'O', 'B-geo', 'O', 'O', 'B-geo', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-geo', 'O', 'O', 'O', 'B-org', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-org', 'O', 'B-geo', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']


# Compute F1 Score

In [8]:
test_f1 = f1_score(pred_labels, test_labels)
print(f"Test F1-Score: {test_f1}")

Test F1-Score: 0.7875852170792969
