In [42]:
import re
import os
import json


def parse_input(input_text):
    parts = re.split(r"```json|```", input_text)
    parts = [part.strip() for part in parts if part.strip()]

    return {"id": parts[0], "content": json.loads(parts[-1])}


signs = []
count = 0

for filename in sorted(os.listdir("tmp")):
    with open(f"tmp/{filename}", "r") as f:
        try:
            filecontent = f.read()
            signs.append(parse_input(filecontent))
        except Exception as e:
            count += 1
            # print(filecontent)

print(count, len(signs))

7 6358


In [44]:
import json


count = {}
count_length = {}
sign_groups = {}


def create_index(
    index_list_to_use,
    hand,
    shape,
    hand_orientation,
    palm_orientation,
    fingers_orientation,
    location,
    movement,
):
    index = hand + "("

    if "shape" in index_list_to_use:
        index += f"{shape},"

    if "hand_orientation" in index_list_to_use:
        index += f"{hand_orientation},"

    if "palm_orientation" in index_list_to_use:
        index += f"{palm_orientation},"

    if "fingers_orientation" in index_list_to_use:
        index += f"{fingers_orientation},"

    if "location" in index_list_to_use:
        index += f"{location},"

    if "movement" in index_list_to_use:
        index += f"{movement},"

    return index + ")"


def parse_signs(index_list_to_use, length):
    for sign in signs:
        try:
            content = sign["content"]
            indexes = ""
            count_length[len(content)] = count_length.get(len(content), 0) + 1
            for c in content[:length]:
                shape = c.get("right", {}).get("shape", "default")
                hand_orientation = c.get("right", {}).get("hand_orientation", "any")
                palm_orientation = c.get("right", {}).get("palm_orientation", "default")
                fingers_orientation = c.get("right", {}).get(
                    "fingers_orientation", "any"
                )
                location = c.get("right", {}).get("location", "chest")
                right_movement = c.get("right", {}).get("movement", [])
                movement = len(right_movement) > 0 and right_movement[0] or "nil"

                shape_left = c.get("left", {}).get("shape", "default")
                hand_orientation_left = c.get("left", {}).get("hand_orientation", "any")
                palm_orientation_left = c.get("left", {}).get(
                    "palm_orientation", "default"
                )
                fingers_orientation_left = c.get("left", {}).get(
                    "fingers_orientation", "any"
                )
                location_left = c.get("left", {}).get("location", "default")
                left_movement = c.get("left", {}).get("movement", [])
                movement_left = len(left_movement) > 0 and left_movement[0] or "nil"

                index_right = create_index(
                    index_list_to_use,
                    "r",
                    shape,
                    hand_orientation,
                    palm_orientation,
                    fingers_orientation,
                    location,
                    movement,
                )
                index_left = create_index(
                    index_list_to_use,
                    "l",
                    shape_left,
                    hand_orientation_left,
                    palm_orientation_left,
                    fingers_orientation_left,
                    location_left,
                    movement_left,
                )
                indexes += index_right + index_left

            count[indexes] = count.get(indexes, 0) + 1

            if indexes not in sign_groups:
                sign_groups[indexes] = [sign]
            else:
                sign_groups[indexes].append(sign)
        except Exception as e:
            print(e)


parse_signs(
    [
        "shape",
        "hand_orientation",
        "palm_orientation",
        "fingers_orientation",
        "location",
        "movement",
    ],
    5,
)

'str' object has no attribute 'get'


In [45]:

counter = 0
total = 0

for k, v in sorted(count.items(), key=lambda item: item[1], reverse=True):
    print(k, v)
    if v == 1:
        counter += 1
    total += v

print(counter, total)

r(5,any,left,any,chest,front,)l(5,any,right,any,default,front,) 16
r(X,any,left,any,index,nil,)l(X,any,back,any,default,nil,) 11
r(L,any,left,any,chin,index_left,)l(default,any,default,any,default,nil,) 9
r(open_united,horizontal,back,any,chest_left,right,)l(default,any,default,any,default,nil,)r(open_united,horizontal,back,any,chest_right,nil,)l(default,any,default,any,default,nil,) 9
r(open_thumb_index_united,vertical,front,any,chest,right,)l(open_thumb_index_united,vertical,front,any,default,left,) 8
r(I,any,front,any,chest,left,)l(default,any,default,any,default,nil,) 7
r(U,any,back,any,mouth,front,)l(default,any,default,any,default,nil,) 7
r(1,any,front,any,chest,down,)l(default,any,default,any,default,nil,) 7
r(open,horizontal,left,any,chest,right,)l(open,horizontal,right,any,default,left,) 6
r(D,any,left,any,forehead,front,)l(default,any,default,any,default,nil,) 6
r(closed_thumb_index_curved,horizontal,left,any,chest,down,)l(closed_thumb_index_curved,horizontal,right,any,defaul

In [48]:
group = (
    "r(open_thumb,any,down,any,hand,right,)l(open,any,down,any,default,nil,)"
)

print(
    json.dumps(
        sign_groups[group],
        ensure_ascii=False,
        indent=4,
    )
)

[
    {
        "id": "DOENÇA (1) (sinal usado em: SP, RJ, MS, PR, PB, SC, CE, RS) (Inglês: disease, sickness, illness, ailment): S. f. Falta de saúde. Enfermidade. Indisposição. Molestia. Processo mórbido definido e com sintomas característicos, que pode afetar o corpo todo, uma ou várias de suas partes. Ex.: A vacinação infantil imuniza o organismo contra várias doenças, e é muito importante que os pais se lembrem sempre de levar os seus filhos para a vacinação nas épocas apropriadas. (Mão esquerda aberta, palma para baixo; mão direita aberta, palma para baixo, polegar tocando a palma esquerda e dedos direitos tocando o dorso da mão esquerda. Oscilar os dedos direitos sobre a mão esquerda.)doença",
        "content": [
            {
                "right": {
                    "shape": "open_thumb",
                    "palm_orientation": "down",
                    "location": "hand",
                    "movement": [
                        "right",
                        "left"

In [52]:
from PIL import Image
import os


def find_text_image(text):
    for text_path in sorted(os.listdir("raw_texts")):
        if text in open(f"raw_texts/{text_path}", "r").read():
            return text_path


def open_image(image_path):
    try:
        img = Image.open(image_path)
        img.show()
    except IOError:
        print(
            "Error: Unable to open image. Make sure the path is correct and you have permission to access the file."
        )


founded_path = find_text_image("DOENÇA (")
image_path = f"images/{founded_path.replace('.txt', '')}.png"

open_image(image_path)

FileNotFoundError: [Errno 2] No such file or directory: 'texts'