# Write textgrid from Label Studio

> "For local editing"

- hidden: true
- branch: master
- categories: [labelstudio, textgrid, hsi]

In [2]:
def slurpfile(filename) -> str:
    with open(filename) as inf:
        return inf.read().strip()

In [3]:
host = "http://130.237.3.107:8080/api/"
api_token: str = slurpfile("label_studio_mine")
input_dir = "/Users/joregan/Desktop/breath_corrected/"

In [4]:
import requests
import json
from pathlib import Path

headers = {
    "Authorization": f"Token {api_token}"
}

In [5]:
def get_tasks(projectid):
    req = requests.get(f"{host}tasks", headers=headers, params={"project": projectid})
    assert req.status_code == 200
    data = json.loads(req.text)
    return data

In [9]:
def get_annotation(annotationid):
    req = requests.get(f"{host}annotations/{annotationid}/", headers=headers)
    assert req.status_code == 200
    data = json.loads(req.text)
    return data

In [6]:
def index_task_filestem_to_id(tasks_data):
    tasks = tasks_data["tasks"]
    mapping = {}
    for task in tasks:
        task_id = task["id"]
        if "storage_filename" in task:
            task_raw_path = task["storage_filename"]
        else:
            task_raw_path = task["data"]["audio"]
        if not task_raw_path:
            continue
        task_stem = task_raw_path.split("/")[-1]
        mapping[task_stem] = task_id
    return mapping

In [8]:
tasks = get_tasks(6)

In [10]:
working = get_annotation(243)

In [11]:
working

{'id': 243,
 'result': [{'original_length': 481.813333,
   'value': {'start': 4.2,
    'end': 4.927364191818334,
    'channel': 0,
    'text': ['Strange.', '/stɹˈeɪndʒ./']},
   'id': 'bopaxzo3iF',
   'from_name': 'transcription',
   'to_name': 'audio',
   'type': 'textarea',
   'origin': 'manual'},
  {'original_length': 481.813333,
   'value': {'start': 4.2,
    'end': 4.927364191818334,
    'channel': 0,
    'labels': ['Speech']},
   'id': 'bopaxzo3iF',
   'from_name': 'labels',
   'to_name': 'audio',
   'type': 'labels',
   'origin': 'manual'},
  {'original_length': 481.813333,
   'value': {'start': 18.032415263020983,
    'end': 19.2,
    'channel': 0,
    'text': ['Oy, sorry.']},
   'id': 'uqhJZcl--o',
   'from_name': 'transcription',
   'to_name': 'audio',
   'type': 'textarea',
   'origin': 'manual'},
  {'original_length': 481.813333,
   'value': {'start': 18.032415263020983,
    'end': 19.2,
    'channel': 0,
    'labels': ['Speech']},
   'id': 'uqhJZcl--o',
   'from_name': 'lab

In [8]:
mapping = index_task_filestem_to_id(tasks)

In [None]:
# get_annotation(243)

In [9]:
from collections import defaultdict


def process_annotation(data):
    if not "id" in data:
        print("Missing id, probably not an annotation")
        return

    layers = defaultdict(list)

    audio = None
    if "data" in data and "audio" in data["data"]:
        audio = data["data"]["audio"].rsplit("/", maxsplit=1)
    if audio is None:
        return None

    


    


{'hsi_6_0718_209_001_main.wav': 96,
 'hsi_6_0718_209_002_main.wav': 97,
 'hsi_6_0718_209_003_main.wav': 98,
 'hsi_6_0718_210_001_main.wav': 99,
 'hsi_6_0718_210_002_main.wav': 100,
 'hsi_6_0718_211_001_main.wav': 101,
 'hsi_6_0718_211_002_main.wav': 102,
 'hsi_6_0718_222_001_main.wav': 103,
 'hsi_6_0718_222_003_main.wav': 104,
 'hsi_6_0718_227_001_main.wav': 105,
 'hsi_6_0718_227_002_main.wav': 106}

In [25]:
import json
import uuid
from praatio import textgrid


def tg_to_result(tgfile):
    outputs = []
    tg = textgrid.openTextgrid(tgfile, False)
    tiername = "annot"
    if not tiername in tg.tierNames:
        tiername = "words"

    tier = tg.getTier(tiername)
    for entry in tier.entries:
        text = entry.label.strip()
        if text == "":
            continue

        if not text in labels:
            continue
        label = labels[text]
        text = texts[text]

        gen_id = str(uuid.uuid4())[:6]
        segment = {
            "value": {
                "start": entry.start,
                "end": entry.end,
                "channel": 0,
                "labels": [label]
            },
            "from_name": "labels",
            "to_name": "audio",
            "type": "labels",
            "id": gen_id
        }
        rec = {
            "value": {
                "start": entry.start,
                "end": entry.end,
                "channel": 0,
                "text": [text]
            },
            "from_name": "transcription",
            "to_name": "audio",
            "type": "textarea",
            "id": gen_id
        }
        outputs.append(segment)
        outputs.append(rec)

    return outputs

In [13]:
def post_results(id, task, project, results):
    ep = f"{host}annotations/{id}/?taskID={task}&project={project}"

    cur_headers = {i: headers[i] for i in headers}
    cur_headers["Content-type"] = "application/json"

    content = {
        "was_cancelled": False,
        "ground_truth": False,
        "project": project,
        "draft_id": 0,
        "parent_prediction": None,
        "parent_annotation": None,
        "result": results
    }
    r = requests.patch(ep, data=json.dumps(content), headers=cur_headers)
    return r

In [24]:
file = f"{input_dir}hsi_4_0717_211_002_main.TextGrid"
data = tg_to_result(file)

In [None]:
r = post_results(263, 77, 5, data)
print(r.text)