In [1]:
def slurpfile(filename) -> str:
    with open(filename) as inf:
        return inf.read().strip()

In [8]:
host = "http://130.237.3.107:8080/api/"
api_token: str = slurpfile("label_studio_mine")
input_dir = "/Users/joregan/Desktop/breath_corrected/"

In [3]:
import requests
import json
from pathlib import Path

headers = {
    "Authorization": f"Token {api_token}"
}

In [4]:
def get_projects():
    req = requests.get(f"{host}projects", headers=headers)
    assert req.status_code == 200
    data = json.loads(req.text)
    return data

In [5]:
def get_project_id_from_name(name):
    projects = get_projects()
    for res in projects["results"]:
        if res["title"].strip() == name.strip():
            return res["id"]

In [9]:
get_project_id_from_name("Main 6")

7

In [10]:
def get_tasks(projectid):
    req = requests.get(f"{host}tasks", headers=headers, params={"project": projectid})
    assert req.status_code == 200
    data = json.loads(req.text)
    return data

In [11]:
def index_task_filestem_to_id(tasks_data):
    tasks = tasks_data["tasks"]
    mapping = {}
    for task in tasks:
        task_id = task["id"]
        if "storage_filename" in task:
            task_raw_path = task["storage_filename"]
        else:
            task_raw_path = task["data"]["audio"]
        if not task_raw_path:
            continue
        task_stem = task_raw_path.split("/")[-1]
        mapping[task_stem] = task_id
    return mapping

In [22]:
get_project_id_from_name("Speaker 3")

1

In [43]:
tasks = get_tasks(8)

In [44]:
mapping = index_task_filestem_to_id(tasks)

In [45]:
mapping

{'hsi_7_0719_209_001_main.wav': 107,
 'hsi_7_0719_209_002_main.wav': 108,
 'hsi_7_0719_209_003_main.wav': 109,
 'hsi_7_0719_210_001_main.wav': 110,
 'hsi_7_0719_210_002_main.wav': 111,
 'hsi_7_0719_210_003_main.wav': 112,
 'hsi_7_0719_211_002_main.wav': 113,
 'hsi_7_0719_211_004_main.wav': 114,
 'hsi_7_0719_222_002_main.wav': 115,
 'hsi_7_0719_222_004_main.wav': 116,
 'hsi_7_0719_227_002_main.wav': 117,
 'hsi_7_0719_227_003_main.wav': 118}

In [25]:
import json
import uuid
from praatio import textgrid


labels = {}
labels["n"] = "Noise"
labels["spn"] = "Noise"
labels["b"] = "Breath"
labels["ct"] = "Cross-talk"
labels["sp"] = "Speech"

texts = {}
texts["n"] = "noise"
texts["spn"] = "spn"
texts["b"] = "breath"
texts["ct"] = "crosstalk"
texts["sp"] = ""

def tg_to_result(tgfile):
    outputs = []
    tg = textgrid.openTextgrid(tgfile, False)
    tiername = "annot"
    if not tiername in tg.tierNames:
        tiername = "words"

    tier = tg.getTier(tiername)
    for entry in tier.entries:
        text = entry.label.strip()
        if text == "":
            continue

        if not text in labels:
            continue
        label = labels[text]
        text = texts[text]

        gen_id = str(uuid.uuid4())[:6]
        segment = {
            "value": {
                "start": entry.start,
                "end": entry.end,
                "channel": 0,
                "labels": [label]
            },
            "from_name": "labels",
            "to_name": "audio",
            "type": "labels",
            "id": gen_id
        }
        rec = {
            "value": {
                "start": entry.start,
                "end": entry.end,
                "channel": 0,
                "text": [text]
            },
            "from_name": "transcription",
            "to_name": "audio",
            "type": "textarea",
            "id": gen_id
        }
        outputs.append(segment)
        outputs.append(rec)

    return outputs

In [13]:
def post_results(id, task, project, results):
    ep = f"{host}annotations/{id}/?taskID={task}&project={project}"

    cur_headers = {i: headers[i] for i in headers}
    cur_headers["Content-type"] = "application/json"

    content = {
        "was_cancelled": False,
        "ground_truth": False,
        "project": project,
        "draft_id": 0,
        "parent_prediction": None,
        "parent_annotation": None,
        "result": results
    }
    r = requests.patch(ep, data=json.dumps(content), headers=cur_headers)
    return r

In [24]:
file = f"{input_dir}hsi_4_0717_211_002_main.TextGrid"
data = tg_to_result(file)

In [26]:
r = post_results(263, 77, 5, data)
print(r.text)

{"id":263,"result":[{"value":{"start":2.9165205467865554,"end":3.3149967793985606,"channel":0,"labels":["Speech"]},"from_name":"labels","to_name":"audio","type":"labels","id":"076fe1"},{"value":{"start":2.9165205467865554,"end":3.3149967793985606,"channel":0,"text":["speech"]},"from_name":"transcription","to_name":"audio","type":"textarea","id":"076fe1"},{"value":{"start":9.3286217690192,"end":9.9,"channel":0,"labels":["Noise"]},"from_name":"labels","to_name":"audio","type":"labels","id":"6187c3"},{"value":{"start":9.3286217690192,"end":9.9,"channel":0,"text":["noise"]},"from_name":"transcription","to_name":"audio","type":"textarea","id":"6187c3"},{"value":{"start":9.9,"end":16.705529883479887,"channel":0,"labels":["Cross-talk"]},"from_name":"labels","to_name":"audio","type":"labels","id":"efecf6"},{"value":{"start":9.9,"end":16.705529883479887,"channel":0,"text":["crosstalk"]},"from_name":"transcription","to_name":"audio","type":"textarea","id":"efecf6"},{"value":{"start":16.705529883

In [46]:
from pathlib import Path

for fn in mapping:
    tgfile = fn.replace(".wav", ".TextGrid")
    if (Path(input_dir) / tgfile).exists():
        print(mapping[fn], fn)


107 hsi_7_0719_209_001_main.wav
108 hsi_7_0719_209_002_main.wav
109 hsi_7_0719_209_003_main.wav
110 hsi_7_0719_210_001_main.wav
111 hsi_7_0719_210_002_main.wav
112 hsi_7_0719_210_003_main.wav
113 hsi_7_0719_211_002_main.wav
114 hsi_7_0719_211_004_main.wav
115 hsi_7_0719_222_002_main.wav
116 hsi_7_0719_222_004_main.wav
117 hsi_7_0719_227_002_main.wav
118 hsi_7_0719_227_003_main.wav


In [None]:
count = 230
for task in mapping:
    jsonfile = task.replace(".wav", ".TextGrid")
    file = f"{input_dir}{jsonfile}"
    if not (Path(input_dir) / jsonfile).exists():
        continue
    data = tg_to_result(file)
    r = post_results(count, mapping[task], 8, data)
    count += 1
    print(r.text)

In [136]:
tmap = {}
count = 99
for task in mapping:
    tmap[task] = count
    count += 1


In [140]:
from pathlib import Path

for file in Path("/Users/joregan/Playing/hsi_ctmedit/textgrid").glob("*.TextGrid"):
    wavfile = file.stem + ".wav"
    if wavfile in mapping:
        print(wavfile, mapping[wavfile])

hsi_7_0719_227_002_inter.wav 69
hsi_7_0719_209_003_inter.wav 61
hsi_7_0719_227_003_inter.wav 70
hsi_7_0719_211_004_inter.wav 66
hsi_6_0718_209_001_inter.wav 59
hsi_7_0719_222_002_inter.wav 67
hsi_7_0719_209_001_inter.wav 60
hsi_7_0719_222_004_inter.wav 68
hsi_7_0719_211_002_inter.wav 65
hsi_7_0719_210_002_inter.wav 63
hsi_7_0719_210_003_inter.wav 64
hsi_5_0718_209_001_inter.wav 56
hsi_5_0718_209_003_inter.wav 58
hsi_7_0719_210_001_inter.wav 62
hsi_5_0718_209_002_inter.wav 57
