In [8]:
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score

In [10]:
with open('./input/metrics_dataset-traffic-test.json', 'r') as f:
    training_data_a = json.load(f)

with open('./input/metrics_dataset-domesticDeclarations.json', 'r') as f:
    training_data_b = json.load(f)

In [11]:
data = training_data_a['metrics']
data.extend(training_data_b['metrics'])

In [12]:
useful_tags = ['TSE', 'TEE', 'TBE', 'CE','AttributeName', 'AttributeValue', 'AGR', 'GBC', 'FDE']

cleaned_evaluation_data = []
for phrase in data:
    useful_slots = []
    for slots in phrase['slots']:
        if slots in useful_tags:
            useful_slots.append(f"{phrase['slots'][slots]}: {slots}")
    cleaned_evaluation_data.append((phrase['description'], useful_slots))

In [13]:
from transformers import AutoModelForTokenClassification, AutoModelForSequenceClassification, AutoTokenizer
from ppinat.ppiparser.PPIDecoder import PPIDecoder

model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)


text_model = './ppinat/models/TextClassification'
time_model = './ppinat/models/TimeModel'
count_model = './ppinat/models/CountModel'
data_model = './ppinat/models/DataModel'

text_model = AutoModelForSequenceClassification.from_pretrained(text_model)
time_model = AutoModelForTokenClassification.from_pretrained(time_model)
count_model = AutoModelForTokenClassification.from_pretrained(count_model)
data_model = AutoModelForTokenClassification.from_pretrained(data_model)
model = {"time": time_model, "count": count_model, "data": data_model} 
decoder = PPIDecoder(model, tokenizer, text_model)

In [14]:
expected_outputs = []
predicted_outputs = []
for phrase in cleaned_evaluation_data:
    input_text = phrase[0]
    expected_output = "; ".join(phrase[1])
    expected_outputs.append(expected_output)

    prediction = decoder.predict_annotation(input_text)
    predicted_outputs.append(prediction)

In [7]:
from hermetrics.levenshtein import Levenshtein
import numpy as np

def compute_slot_metric(preds, real):
    phrases_metrics = []
    for real_phrase, pred_phrase in zip(real,preds):
        real_slots = real_phrase.split("; ") 
        pred_slots = pred_phrase.split("; ")
        slots_metrics = []
        for real_slot in real_slots:
            real_tag = real_slot.split(": ")[1]
            slot_metric = 0
            for pred_slot in pred_slots:
                try:
                    pred_tag = pred_slot.split(": ")[1]
                    if real_tag == pred_tag:
                        real_text = real_slot.split(": ")[0]
                        pred_text = pred_slot.split(": ")[0]
                        slot_metric = (1 - Levenshtein().normalized_distance(real_text, pred_text))
                except Exception as e:
                    pass
            slots_metrics.append(slot_metric)
        phrases_metrics.append(np.mean(slots_metrics))
    return np.mean(phrases_metrics)

In [40]:
def compute_slot_metric_per_tag(preds, real):
    results_per_tag = {}
    for tag in useful_tags:
        results_per_tag[tag] = []
        
    for real_phrase, pred_phrase in zip(real,preds):
        real_slots = real_phrase.split("; ") 
        pred_slots = pred_phrase.split("; ")
        for real_slot in real_slots:
            real_tag = real_slot.split(": ")[1]
            matched_tag = False
            for pred_slot in pred_slots:
                try:
                    pred_tag = pred_slot.split(": ")[1]
                    if real_tag == pred_tag:
                        matched_tag = True
                        real_text = real_slot.split(": ")[0]
                        pred_text = pred_slot.split(": ")[0]
                        distance = 1 - Levenshtein().normalized_distance(real_text, pred_text)
                        results_per_tag[real_tag].append(distance)
                except Exception as e:
                    results_per_tag[real_tag].append(0)
                    pass
            if not matched_tag:
                results_per_tag[real_tag].append(0)
    
    for tag in results_per_tag:
        results_per_tag[tag] = np.mean(results_per_tag[tag])
    return results_per_tag


In [41]:
def compute_slot_accuracy_per_tag(preds, real):
    results_per_tag = {}
    for tag in useful_tags:
        results_per_tag[tag] = []
        
    for real_phrase, pred_phrase in zip(real,preds):
        real_slots = real_phrase.split("; ") 
        pred_slots = pred_phrase.split("; ")
        for real_slot in real_slots:
            real_tag = real_slot.split(": ")[1]
            matched_tag = False
            for pred_slot in pred_slots:
                try:
                    pred_tag = pred_slot.split(": ")[1]
                    if real_tag == pred_tag:
                        matched_tag = True
                        real_text = real_slot.split(": ")[0]
                        pred_text = pred_slot.split(": ")[0]
                        if real_text == pred_text:
                            results_per_tag[real_tag].append(1)
                        else:
                            results_per_tag[real_tag].append(0)
                except Exception as e:
                    results_per_tag[real_tag].append(0)
                    pass
            if not matched_tag:
                results_per_tag[real_tag].append(0)
    
    for tag in results_per_tag:
        results_per_tag[tag] = np.mean(results_per_tag[tag])
    return results_per_tag


# PPIBOT old models

In [16]:
import numpy as np
def compute_slot_accuracy(preds, real):
    phrases_metrics = []
    for real_phrase, pred_phrase in zip(real,preds):
        real_slots = real_phrase.split("; ") 
        slots_metrics = []
        for real_slot in real_slots:
            real_tag = real_slot.split(": ")[1]
            slot_metric = 0
            predicted_chunk = pred_phrase.get_chunk_by_tag(real_tag)
            if predicted_chunk is not None:
                real_text = real_slot.split(": ")[0]
                pred_text = predicted_chunk.text()
                if real_text == pred_text:
                    slot_metric = 1
                else:
                    slot_metric = 0
            else:
                slot_metric = 0
            slots_metrics.append(slot_metric)
        phrases_metrics.append(np.mean(slots_metrics))
    return np.mean(phrases_metrics)

Slot accuracy: 0.3660130718954248


In [18]:
from hermetrics.levenshtein import Levenshtein
import numpy as np

def compute_slot_metric(preds, real):
    phrases_metrics = []
    for real_phrase, pred_phrase in zip(real,preds):
        real_slots = real_phrase.split("; ") 
        slots_metrics = []
        for real_slot in real_slots:
            real_tag = real_slot.split(": ")[1]
            slot_metric = 0
            predicted_chunk = pred_phrase.get_chunk_by_tag(real_tag)
            if predicted_chunk is not None:
                real_text = real_slot.split(": ")[0]
                pred_text = predicted_chunk.text()
                slot_metric = (1 - Levenshtein().normalized_distance(real_text, pred_text))
            else:
                slot_metric = 0
            slots_metrics.append(slot_metric)
        phrases_metrics.append(np.mean(slots_metrics))
    return np.mean(phrases_metrics)

In [20]:
print(f"Slot accuracy: {compute_slot_accuracy(predicted_outputs, expected_outputs)}")
print(f"Slot metric: {compute_slot_metric(predicted_outputs, expected_outputs)}")

Slot accuracy: 0.3660130718954248
Slot metric: 0.47673013780966694
