# Import Modules and init Formatting Funtions

In [1]:
import pandas as pd

import torch
from transformers import pipeline


def format_results(results, precision=3):
    return { result["label"]: round(result["score"], precision) for result in results[0] } # round to n decimal places


def format_zeroshot(result, top_k):
    return [[{"label": result["labels"][i], "score": result["scores"][i]} for i in range(3)]] # match fine-tuned output

# Init Global Vars

In [2]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") # use GPU if possible
print("DEVICE IS", DEVICE)

TOP_K = 3 # return the top 3 classes

LABELS = ["Beruf", "Herkunft", "Kompositionelles Motiv", "Rufname", "Rufnamenmuster", "Wohnstätte", "Übername"]

DEVICE IS cpu


# Apply Fine-tuned Model to Examples

In [3]:
# init model
finetuned_pipeline = pipeline(
    "text-classification", 
    model="../models/fine-tuned/gbert-base-dfd-motives", 
    device=DEVICE, 
    top_k=TOP_K
)

In [4]:
texts = ["Hamburger", "Brookshire"]

for text in texts:
    classification_results = finetuned_pipeline(text)
    print(text, format_results(classification_results))

Hamburger {'Übername': 0.373, 'Beruf': 0.264, 'Wohnstätte': 0.26}
Brookshire {'Herkunft': 0.994, 'Beruf': 0.002, 'Wohnstätte': 0.001}


# Zero-shot Approach

In [5]:
# init model
zeroshot_pipeline = pipeline(
    "zero-shot-classification", 
    model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli",
    device=DEVICE
)

In [6]:
for text in texts:
    classification_results = zeroshot_pipeline(text, LABELS)
    print(text, format_results(format_zeroshot(classification_results, TOP_K)))

Hamburger {'Wohnstätte': 0.282, 'Herkunft': 0.21, 'Rufnamenmuster': 0.209}
Brookshire {'Rufnamenmuster': 0.337, 'Wohnstätte': 0.23, 'Herkunft': 0.157}
