In [1]:
import os
from momaapi import MOMA
from tqdm import tqdm

In [2]:
moma = MOMA(dir_moma="/data/dir_moma", paradigm="standard")

In [3]:
sact2id, id2sact, sact_count = {}, {}, {}
for split in ["train", "val", "test"]:
    ids_act = moma.get_ids_act(split=split)
    for act in tqdm(moma.get_anns_act(ids_act=ids_act)):
        for sact in moma.get_anns_sact(ids_sact=act.ids_sact):
            sact2id[sact.cname] = sact.cid
            id2sact[sact.cid] = sact.cname
            if sact.cname in sact_count:
                sact_count[sact.cname] += 1
            else:
                sact_count[sact.cname] = 1

100%|██████████| 904/904 [00:14<00:00, 63.54it/s] 
100%|██████████| 226/226 [00:06<00:00, 37.43it/s] 
100%|██████████| 282/282 [00:05<00:00, 48.32it/s] 


In [4]:
sorted_id2sact = sorted(id2sact.items())
with open(f"sact_taxonomy.txt", "w") as f:
    for m in sorted_id2sact:
        print(f"{m[0]}. {m[1]}", file=f)

In [5]:
act_to_sact = {}
sact_to_act = moma.taxonomy["sact_to_act"]

for sact, act in sact_to_act.items():
    if act not in act_to_sact:
        act_to_sact[act] = [sact]
    else:
        act_to_sact[act].append(sact)

with open("act_to_sact.txt", "w") as f:
    for act, sact_list in act_to_sact.items():
        print(f"< {act} >", file=f)
        n_sact = 0
        for sact in sact_list:
            n_sact += sact_count[sact]
        for sact in sact_list:
            sact_cid = sact2id[sact]
            print(f"    {sact_cid}. {sact} ({(sact_count[sact] / n_sact)*100.:.2f})", file=f)
        print("", file=f)

In [6]:
ids_act_train = moma.get_ids_act(split="train")
ids_act_val = moma.get_ids_act(split="val")
ids_act_test = moma.get_ids_act(split="test")
ids_act = ids_act_train + ids_act_val + ids_act_test

cls2seq = {}
for act in moma.get_anns_act(ids_act=ids_act):
    sact_seq = ""
    anns_sact = moma.get_anns_sact(ids_sact=act.ids_sact)
    for i, sact in enumerate(anns_sact):
        if i == len(anns_sact) - 1:
            sact_seq = sact_seq + f"{sact.cid} ({act.id})"
        else:
            sact_seq = sact_seq + f"{sact.cid} -> "

    if act.cname in cls2seq:
        cls2seq[act.cname].append(sact_seq)
    else:
        cls2seq[act.cname] = [sact_seq]

with open("sact_list_per_class.txt", "w") as f:
    for cls, seq_list in cls2seq.items():
        print(f"< {cls} >", file=f)
        for sact_seq in seq_list:
            print(sact_seq, file=f)
        print("\n\n", file=f)