# Trim LabelStudio annotations

> "to match trimmed audio"

- branch: master
- hidden: true
- comments: false
- categories: [labelstudio, trim, csv]

In [2]:
def slurpfile(filename) -> str:
    with open(filename) as inf:
        return inf.read().strip()

In [52]:
output_dir = "/tmp/textgrid_cut"
wav_dir = "/Users/joregan/Playing/hsi/audio"
host = "http://130.237.3.107:8080/api/"
api_token: str = slurpfile("label_studio_mine")

In [53]:
from pathlib import Path

output_path = Path(output_dir)
wav_path = Path(wav_dir)

In [5]:
import requests
import json
from pathlib import Path

headers = {
    "Authorization": f"Token {api_token}"
}

In [6]:
def get_task(task_id):
    ep = f"{host}tasks/{task_id}"
    req = requests.get(ep, headers=headers)
    if req.status_code != 200:
        return {}
    data = json.loads(req.text)
    return data

In [7]:
def get_annotation(annot_it):
    ep = f"{host}annotations/{annot_it}"
    req = requests.get(ep, headers=headers)
    assert req.status_code == 200
    data = json.loads(req.text)
    return data

In [8]:
data = get_annotation(264)

In [10]:
def combine_labels(data):
    combined = {}

    if "result" in data:
        for res in data["result"]:
            if not res["id"] in combined:
                combined[res["id"]] = res
            else:
                if "text" in res["value"]:
                    combined[res["id"]]["value"]["text"] = res["value"]["text"]
                elif "labels" in res["value"]:
                    combined[res["id"]]["value"]["labels"] = res["value"]["labels"]
    return combined

In [15]:
from pydub import AudioSegment

In [16]:
MAPPING = """
ɑː AA
æ AE
ə AH
ɐ AH
ʌ AH
ɔː AO
aʊ AW
aɪ AY
b B
tʃ CH
d D
ð DH
ɛ EH
ɚ ER
ɜː ER
eɪ EY
f F
ɡ G
h HH
ɪ IH
i IY
iː IY
dʒ JH
k K
l L
m M
n N
ŋ NG
oʊ OW
ɔɪ OY
p P
ɹ R
s S
ʃ SH
t T
θ TH
ʊ UH
uː UW
v V
w W
j Y
z Z
ʒ ZH
ɾ D
"""

In [17]:
espeak_to_cmudict = {}
for line in MAPPING.split("\n"):
    if line == "":
        continue
    line = line.strip()
    parts = line.split(" ")

    if len(parts) != 2:
        print(line)
        continue
    k, v = line.split(" ")
    if not k in espeak_to_cmudict:
        espeak_to_cmudict[k] = v

In [18]:
import re

cmudict_keys = espeak_to_cmudict.keys()
cmudict_keys = sorted(cmudict_keys, key=len, reverse=True)
espeak_regex = re.compile(rf"({'|'.join(cmudict_keys)})")

def cmudictify(espeak):
    espeak = espeak.replace("ˈ", "").replace("ˌ", "")
    return " ".join([espeak_to_cmudict[x] for x in re.findall(espeak_regex, espeak)])

In [19]:
def normword(text):
    text = text.strip(",.;:!?")
    return text.lower()

def normphon(phon):
    phon = phon.strip(",.;:!?")
    return phon

def make_lexicon(text, phon):
    if phon.startswith("/") and phon.endswith("/"):
        phon = phon[1:-1]
    words = [normword(x) for x in text.split(" ")]
    phonwords = [cmudictify(normphon(x)) for x in phon.split(" ")]
    assert len(words) == len(phonwords)
    output = list(set(zip(words, phonwords)))
    return output

In [21]:
combined = combine_labels(data)

In [49]:
def simplify_combined_mine(combined):
    simplified = []
    for item in combined:
        val = combined[item]["value"]
        if not "Speech" in val["labels"]:
            continue
        start = val["start"]
        end = val["end"]
        text_parts = val["text"]
        if len(text_parts) == 2:
            t1 = text_parts[1].strip()
            if t1.startswith("/") and t1.endswith("/"):
                phone = text_parts[1].strip()
                text = text_parts[0].strip()
                simplified.append({
                    "start": start,
                    "end": end,
                    "text": text,
                    "phone": phone
                })
            else:
                 print("Error", val)
        elif len(text_parts) == 1:
                simplified.append({
                    "start": start,
                    "end": end,
                    "text": text_parts[0]
                })
        else:
             print("Error", val)

    return simplified


In [70]:
def strip_comment(text):
    outchars = []
    in_bracket = False
    for char in list(text):
        if not in_bracket:
            if char == "[":
                in_bracket = True
            else:
                outchars.append(char)
        else:
            if char == "]":
                in_bracket = False
    return "".join(outchars)

In [45]:
def get_audio_name(data):
    task = data["task"]
    task_data = get_task(task)
    if "data" in task_data and "audio" in task_data["data"]:
        orig_file = task_data["data"]["audio"]
        parts = orig_file.split("/")
        orig_file = parts[-1]
    return orig_file

In [39]:
def get_person(data):
    if "created_username" in data:
        if ", " in data["created_username"]:
            return data["created_username"].split(", ")[-1]
    else:
        return data["completed_by"]

In [50]:
simplifiers = {
    "1": simplify_combined_mine,
}

def simplify_data(data):
    who = get_person(data)
    if who not in simplifiers:
        simplifier = simplifiers[who]
    else:
        simplifier = simplifiers["1"]
    combined = combine_labels(data)
    return simplifier(combined)

In [57]:
s = simplify_data(data)

In [58]:
s[3]

{'start': 37.494264366980374,
 'end': 39.66077668153498,
 'text': "yeah, but it's been like that the entire time",
 'phone': '/jˈæ bət ɪts bˌɪn lˈaɪk ðæt ðɪ ɛntˈaɪɚ tˈaɪm./'}

In [61]:
PARAMS = ["-acodec", "pcm_s16le", "-f", "s16le", "-ac", "1", "-ar", "16000"]

audio_seg = AudioSegment.from_file(wav_path / get_audio_name(data))
clip = audio_seg[int(37.494264366980374 * 1000):int(39.66077668153498 * 1000)]
clip.export("/tmp/ps.raw", format="s16le", parameters=PARAMS)

<_io.BufferedRandom name='/tmp/ps.raw'>

In [64]:
!/opt/homebrew/bin/pocketsphinx align /tmp/ps.raw "yeah but it's been like that the entire time"

{"b":0.000,"d":2.170,"p":1.000,"t":"yeah but it's been like that the entire time","w":[{"b":0.000,"d":0.180,"p":0.978,"t":"yeah"},{"b":0.180,"d":0.250,"p":0.952,"t":"but"},{"b":0.430,"d":0.180,"p":0.983,"t":"it's"},{"b":0.610,"d":0.150,"p":0.981,"t":"been"},{"b":0.760,"d":0.190,"p":0.973,"t":"like"},{"b":0.950,"d":0.260,"p":0.973,"t":"that"},{"b":1.210,"d":0.190,"p":0.973,"t":"the(2)"},{"b":1.400,"d":0.340,"p":0.953,"t":"entire"},{"b":1.740,"d":0.420,"p":0.945,"t":"time"}]}
