# Knowledge Graph Construction
> Relation and entity extraction from text

In [None]:
#| default_exp ml.kg.cons

In [None]:
#| hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|export
from typing import TypeAlias, Iterable, List, Set
import numpy as np

In [None]:
#|export

Entity: TypeAlias = tuple[str, str]
Relation: TypeAlias = str
Triplet: TypeAlias = tuple[Entity, Relation, Entity]

In [None]:
#|export

def evaluate_joint_er_extraction(references: Iterable[Triplet], predictions: Iterable[Triplet]):
    """
    Example: [(('John', 'PERSON'), 'works_at', ('Google', 'ORG'))]
    """

    reference_set = set(references)
    prediction_set = set(predictions)
    assert len(references) == len(reference_set), "Duplicates found in references"

    TP = len(reference_set & prediction_set)
    FP = len(prediction_set - reference_set)
    FN = len(reference_set - prediction_set)
    
    # Calculate metrics
    precision = TP / (TP + FP) if TP + FP > 0 else 0
    recall = TP / (TP + FN) if TP + FN > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    
    return {
        'precision': precision,
        'recall': recall,
        'f1': f1_score
    }

In [None]:
references = [(('John', 'PERSON'), 'works_at', ('Google', 'ORG')), (('Mike', 'PERSON'), 'lives_in', ('Paris', 'LOC'))]
predictions = [(('John', 'PERSON'), 'works_at', ('Google', 'ORG')), (('Mike', 'PERSON'), 'lives_in', ('New York', 'LOC'))]

metrics = evaluate_joint_er_extraction(references, predictions)
print(metrics)

{'precision': 0.5, 'recall': 0.5, 'f1': 0.5}


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()