In [73]:
import pandas as pd

In [45]:
df = pd.read_csv('articles_ner.csv')

In [46]:
df = df.iloc[:30]


In [47]:
texts = df['articles']

In [36]:
print(df.shape)

(30, 5)


In [None]:
import json 
from itertools import groupby
import spacy 
nlp = spacy.load('en_core_web_sm')

In [None]:
def doc_to_spans(doc):
    tokens = [(tok.text, tok.idx, tok.ent_type_) for tok in doc]
    results = []
    entities = set()
    for entity, group in groupby(tokens, key=lambda t: t[-1]):
        if not entity:
            continue
        group = list(group)
        _, start, _ = group[0]
        word, last, _ = group[-1]
        text = ' '.join(item[0] for item in group)
        end = last + len(word)
        results.append({
            'from_name': 'label',
            'to_name': 'text',
            'type': 'labels',
            'value': {
                'start': start,
                'end': end,
                'text': text,
                'labels': [entity]
            }
        })
        entities.add(entity)

    return results, entities

In [None]:
text = []
for idx, row in df.iterrows():
    text.append(row['articles'])


In [None]:
print(text)




In [48]:

# Prepare Label Studio tasks in import JSON format with the model predictions:
entities = set()
tasks = []
for text in texts:
    doc = nlp(text)
    spans, ents = doc_to_spans(doc)
    entities |= ents
    predictions = [{'model_version': 'en_core_web_sm', 'result': spans}]
    tasks.append({
        'data': {'text': text},
        'predictions': predictions
    })

# Save Label Studio tasks.json
print(f'Save {len(tasks)} tasks to "tasks.json"')
with open('tasks.json', mode='w') as f:
    json.dump(tasks, f, indent=2)
    
# Save class labels as a txt file
print('Named entities are saved to "named_entities.txt"')
with open('named_entities.txt', mode='w') as f:
    f.write('\n'.join(sorted(entities)))



Save 30 tasks to "tasks.json"
Named entities are saved to "named_entities.txt"


In [4]:
import json

def span_match(pred_span, gold_span):
    """ Check if predicted span matches gold span with label. """
    return (pred_span['value']['start'] == gold_span['value']['start'] and
            pred_span['value']['end'] == gold_span['value']['end'] and
            set(pred_span['value']['labels']) == set(gold_span['value']['labels']))  # Ensure labels match

tasks = json.load(open('annotations1.json'))

true_positive = 0
false_positive = 0
false_negative = 0
true_negative = 0

for task in tasks:
    full_text = task['data']['text']  # Full document text
    total_text_length = len(full_text)

    # Extract gold standard annotations
    gold_result = task['annotations'][0]['result']
    for r in gold_result:
        r.pop('id', None)

    # Extract model predictions
    prediction_result = task['annotations'][0]['prediction']['result']
    for r in prediction_result:
        r.pop('id', None)

    matched_gold_indices = set()
    predicted_spans = set()
    gold_spans = set()

    # Track spans along with labels
    for pred in prediction_result:
        predicted_spans.add((pred['value']['start'], pred['value']['end'], tuple(pred['value']['labels'])))
    
    for gold in gold_result:
        gold_spans.add((gold['value']['start'], gold['value']['end'], tuple(gold['value']['labels'])))

    # True Positives and False Positives
    for pred in prediction_result:
        found_match = False
        for i, gold in enumerate(gold_result):
            if i not in matched_gold_indices and span_match(pred, gold):
                true_positive += 1
                matched_gold_indices.add(i)
                found_match = True
                break
        if not found_match:
            false_positive += 1  # Wrong span OR wrong label

    # False Negatives: Gold spans that were not matched
    false_negative += len(gold_result) - len(matched_gold_indices)

    # True Negatives: Text portions where no labels exist
    total_annotated_chars = sum(end - start for start, end, _ in gold_spans)
    total_predicted_chars = sum(end - start for start, end, _ in predicted_spans)

    tn_characters = total_text_length - (total_annotated_chars + total_predicted_chars)
    true_negative += max(0, tn_characters)  # Ensure TN is non-negative

# Compute metrics
precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

print(f"True Positives: {true_positive}")
print(f"False Positives: {false_positive}")
print(f"False Negatives: {false_negative}")
print(f"True Negatives: {true_negative}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


True Positives: 1492
False Positives: 366
False Negatives: 454
True Negatives: 44745
Precision: 0.80
Recall: 0.77
F1 Score: 0.78
