# Visualizing GlossLM results

In [3]:
import pandas as pd
import json
import sys
import os
import yaml
import tqdm
from dotenv import load_dotenv
import datasets
import numpy as np

import zeno_client

## Authenticate and Create a Project

In [4]:
# load the dotenv environment
load_dotenv()

client = zeno_client.ZenoClient(os.environ.get("ZENO_API_KEY"))

In [5]:
project = client.create_project(name="GlossLM-segmented", 
                                view="space-separated-values",
                                metrics=[
                                    zeno_client.ZenoMetric(name="Average MER", type="mean", columns=["MER"]),
                                    zeno_client.ZenoMetric(name="Average WER", type="mean", columns=["WER"])
                                ])

Successfully updated project.
Access your project at  https://hub.zenoml.com/project/ltjuatja/GlossLM-segmented


## Obtaining Data and Outputs

Separate segmented and unsegmented test data

In [6]:
dataset = datasets.load_dataset("lecslab/glosslm-split")
dataset = dataset.filter(lambda x: x["is_segmented"] == 'yes')
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['transcription', 'glosses', 'translation', 'glottocode', 'id', 'source', 'metalang_glottocode', 'is_segmented', 'language', 'metalang'],
        num_rows: 184336
    })
    train_ID: Dataset({
        features: ['transcription', 'glosses', 'translation', 'glottocode', 'id', 'source', 'metalang_glottocode', 'is_segmented', 'language', 'metalang'],
        num_rows: 52464
    })
    eval_ID: Dataset({
        features: ['transcription', 'glosses', 'translation', 'glottocode', 'id', 'source', 'metalang_glottocode', 'is_segmented', 'language', 'metalang'],
        num_rows: 5569
    })
    test_ID: Dataset({
        features: ['transcription', 'glosses', 'translation', 'glottocode', 'id', 'source', 'metalang_glottocode', 'is_segmented', 'language', 'metalang'],
        num_rows: 5970
    })
    train_OOD: Dataset({
        features: ['transcription', 'glosses', 'translation', 'glottocode', 'id', 'source', 'metalang_glottocode', 'is_segm

In [7]:
import re

zeno_data = {x: list() for x in ["data", "label", "glottocode", "id", "source", "language", "split"]}
for split in ["test_ID", "test_OOD"]:
    for x in dataset[split]:
        zeno_data["id"].append(x['id'])
        zeno_data["data"].append(f"id {x['id']}\ntranscription {re.sub('-', '- ', x['transcription'])}")
        zeno_data["label"].append(f"gloss {re.sub('-', '- ', x['glosses'])}")
        zeno_data["source"].append(x["source"])
        zeno_data["glottocode"].append(x["glottocode"])
        zeno_data["language"].append(x["language"])
        zeno_data["split"].append(split)

df = pd.DataFrame(zeno_data)

In [8]:
print(df.head())

                                                data   
0  id st_test_arap1274_0\ntranscription 'oh hiiho...  \
1  id st_test_arap1274_1\ntranscription nuhu' tih...   
2  id st_test_arap1274_2\ntranscription nehe' neb...   
3  id st_test_arap1274_3\ntranscription nooxeihi'...   
4  id st_test_arap1274_4\ntranscription beet- bet...   

                                               label glottocode   
0  gloss but 3.IMPERF.NEG- say.s.t.- pers.PL you ...   arap1274  \
1  gloss this when.PAST- speak- 3PL IC.tell.the.t...   arap1274   
2  gloss this my.grandfather PAST.IMPERF- pause w...   arap1274   
3  gloss maybe tomorrow FUT- REDUP- tell.a.story-...   arap1274   
4  gloss want.to- dance- 2S.SUBJ why.in.the.heck!...   arap1274   

                   id         source language    split  
0  st_test_arap1274_0  sigmorphon_st  Arapaho  test_ID  
1  st_test_arap1274_1  sigmorphon_st  Arapaho  test_ID  
2  st_test_arap1274_2  sigmorphon_st  Arapaho  test_ID  
3  st_test_arap1274_3  sigmorpho

In [9]:
project.upload_dataset(df, id_column="id", data_column="data", label_column="label")

  0%|          | 0/2 [00:00<?, ?it/s]

Successfully uploaded data


# Specifying system outputs


Add system predictions

In [10]:
df_system_id = pd.read_csv("/home/ltjuatja/glosslm/preds/byt5-translation-all/test_ID-preds.csv")
df_system_ood = pd.read_csv("/home/ltjuatja/glosslm/preds/byt5-translation-all/test_OOD-preds.csv")
df_system = pd.concat([df_system_id, df_system_ood])
df_system = df_system.loc[df_system["is_segmented"] == "yes"]

df_system.head()

Unnamed: 0,ID,pred,is_segmented,glottocode
0,st_test_arap1274_0,but 3.IMPERF.NEG-say.s.t.-pers.PL you know,yes,arap1274
2,st_test_arap1274_1,this when.PAST-speak-3PL IC.corrrect-3PL,yes,arap1274
4,st_test_arap1274_2,this 1S-grandfather PAST.IMPERF-pause what-say...,yes,arap1274
6,st_test_arap1274_3,maybe tomorrow FUT-REDUP-tell.a.story-3S Wayne...,yes,arap1274
8,st_test_arap1274_4,want.to-dance-2S.SUBJ why.in.the.heck!? it.is....,yes,arap1274


In [11]:
test_dataset = datasets.concatenate_datasets([dataset["test_ID"], dataset["test_OOD"]])

In [19]:
from jiwer import wer

def eval_error_rate(pred: list[str], gold: list[str]) -> float:
    prediction = ' '.join(pred)
    reference = ' '.join(gold)
    return wer(reference, prediction)

zeno_system_data = {x: list() for x in ["id", "output", "MER", "WER"]}
zeno_system_data["id"] = df_system["ID"]
preds = df_system['pred'].tolist()
zeno_system_data["output"] = [re.sub('-', '- ', str(pred)) for pred in preds]
pred_morphs = [re.split(r"\s|-", str(pred)) for pred in preds]
pred_words = [str(pred).split() for pred in preds]
gold_labels = test_dataset['glosses']
gold_morphs = [re.split(r"\s|-", str(gold_label)) for gold_label in gold_labels]
gold_words = [str(gold_label).split() for gold_label in gold_labels]
for pred, gold in zip(pred_morphs, gold_morphs):
    zeno_system_data["MER"].append(eval_error_rate(pred, gold))
for pred, gold in zip(pred_words, gold_words):
    zeno_system_data["WER"].append(eval_error_rate(pred, gold))

df_sys_outputs = pd.DataFrame(zeno_system_data)
df_sys_outputs.head()

Unnamed: 0,id,output,MER,WER
0,st_test_arap1274_0,gloss but 3.IMPERF.NEG- say.s.t.- pers.PL you ...,0.0,0.0
2,st_test_arap1274_1,gloss this when.PAST- speak- 3PL IC.corrrect- 3PL,0.166667,0.333333
4,st_test_arap1274_2,gloss this 1S- grandfather PAST.IMPERF- pause ...,0.181818,0.2
6,st_test_arap1274_3,gloss maybe tomorrow FUT- REDUP- tell.a.story-...,0.0,0.0
8,st_test_arap1274_4,gloss want.to- dance- 2S.SUBJ why.in.the.heck!...,0.0,0.0


In [20]:
project.upload_system(df_sys_outputs, name="byt5-translation-all-base", id_column="id", output_column="output")

  0%|          | 0/1 [00:00<?, ?it/s]

Successfully uploaded system
