In [None]:
import os
import json
from pathlib import Path
from openai import OpenAI


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Templates übersetzen

In [None]:

client = OpenAI(api_key="")

INPUT_DIR = Path("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final_1/permutation_0")
OUTPUT_DIR = Path("/content/drive/MyDrive/master_thesis/data/multilingual_data/factual/tepmplates_test")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
#  Zielsprachen
TARGET_LANGS = {
    "de": "German",
    "fr": "French",
    "it": "Italian",
    "pt": "Portuguese",
    "hi": "Hindi",
    "es": "Spanish",
    "th": "Thai",
}

# Übersetzungsfunktion mit gpt-4o
def translate_templates_with_gpt(templates, target_language):
    prompt = (
        f"Translate the following English prompt templates into {target_language}. "
        f"Only return a JSON list of translated strings.\n\n"
        f"{json.dumps(templates, ensure_ascii=False, indent=2)}"
    )

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.2
    )

    try:
        translated = json.loads(response.choices[0].message.content.strip())
        if isinstance(translated, list):
            return translated
    except Exception as e:
        print(f"Fehler beim Parsen der GPT-Antwort: {e}")
    return [""] * len(templates)

# Nur *_0shot.json-Dateien verarbeiten
for file in INPUT_DIR.glob("*_0shot.json"):
    with open(file, "r", encoding="utf-8") as f:
        data = json.load(f)

    english_templates = data.get("prompt_templates", [])
    if not english_templates:
        print(f"Keine prompt_templates in {file.name}")
        continue

    # Jede Sprache mit GPT übersetzen
    for lang_code, lang_name in TARGET_LANGS.items():
        print(f"Übersetze {file.name} nach {lang_name}...")
        translations = translate_templates_with_gpt(english_templates, lang_name)
        data[f"prompt_templates_{lang_code}"] = translations

    # Speichern
    output_path = OUTPUT_DIR / file.name
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f"Gespeichert: {output_path.name}")

##linguistic data

### antonym

In [None]:
import json
import time
import re
from pathlib import Path
from openai import OpenAI
from tqdm import tqdm

"""
verarbeitet nur adjective_antonym.json
neuer Prompt erzwingt echte Adjektive, 1 Token, Klein­schreibung
"""

API_KEY = ""

INPUT_FILE  = Path("/content/drive/MyDrive/master_thesis/data/linguistic_data/zero_shot_linguistic/zero_shot_best_template/adjective_antonym.json")
OUTPUT_FILE = Path("/content/drive/MyDrive/master_thesis/data/multilingual_data/linguistic/gpt_linguistic_final/adjective_antonym.json")
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)

LANGS = {
    "de": "German", "fr": "French", "it": "Italian", "pt": "Portuguese",
    "hi": "Hindi",  "es": "Spanish", "th": "Thai"
}

REL_PROMPT = {
    "adj_antonym": (
        # Kontext
        "You are a professional linguist specialising in {lang}. "
        "Translate the **English adjective pair** “{w1} → {w2}” into {lang}. "
        # Format
        "Return **exactly two words**, lowercase, comma-separated, no extra text. "
        # Regeln
        "• Both outputs **must be adjectives in positive/base form** (no nouns, no verbs). "
        "• Provide **one token per word** – no spaces, hyphens or clitics. "
        "• Preserve all mandatory diacritics for {lang}. "
        "• If no direct one-word adjective exists, choose the best single-word approximation; "
        "do **not** fall back to verbs or nouns."
    )
}

client = OpenAI(api_key=API_KEY)


def _clean(txt: str) -> str:
    return re.sub(r'^\W+|\W+$', "", txt.strip())

def _first_token(txt: str) -> str:
    return re.split(r"[\s,;→]", txt)[0]

def translate_word(word: str, lc: str, retries: int = 3) -> str:
    if not word:
        return ""
    prompt = (
        f"Translate the word '{word}' into {LANGS[lc]} and output only the translation, "
        "no extra words, no quotes."
    )
    for a in range(retries):
        try:
            r = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )
            return _first_token(_clean(r.choices[0].message.content))
        except Exception:
            time.sleep(2 * (a + 1))
    return word

def translate_pair(w1: str, w2: str, rel: str, lc: str, retries: int = 3):
    prompt = REL_PROMPT[rel].format(w1=w1, w2=w2, lang=LANGS[lc])
    for a in range(retries):
        try:
            r = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )
            parts = [p.strip() for p in r.choices[0].message.content.split(",") if p.strip()]
            if len(parts) == 2:
                return _first_token(_clean(parts[0])), _first_token(_clean(parts[1]))
        except Exception:
            time.sleep(2 * (a + 1))
    return translate_word(w1, lc), translate_word(w2, lc)

data = json.loads(INPUT_FILE.read_text("utf-8"))

new_samples = []
for s in tqdm(data.get("samples", []), desc="Samples"):
    subj_en, obj_en = s["subject"], s["object"]
    entry = {"subject": subj_en, "object": obj_en}

    for lc in LANGS:
        subj_tr, obj_tr = translate_pair(subj_en, obj_en, "adj_antonym", lc)
        entry[f"subject_{lc}"] = subj_tr
        entry[f"object_{lc}"]  = obj_tr

    new_samples.append(entry)

data["samples"] = new_samples

OUTPUT_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
print("\n adjective_antonym.json erfolgreich aktualisiert.")


###comparativ

In [None]:
import json
import time
import re
from pathlib import Path
from openai import OpenAI
from tqdm import tqdm

"""

• verarbeitet nur adjective_comparative.json
• neuer Prompt lässt 1- oder 2-Wort-Komparative zu
"""

API_KEY = ""

INPUT_FILE  = Path("/content/drive/MyDrive/master_thesis/data/linguistic_data/zero_shot_linguistic/zero_shot_best_template/adjective_comparative.json")
OUTPUT_FILE = Path("/content/drive/MyDrive/master_thesis/data/multilingual_data/linguistic/gpt_linguistic_final/adjective_comparative.json")
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)

LANGS = {
    "de": "German", "fr": "French", "it": "Italian", "pt": "Portuguese",
    "hi": "Hindi",  "es": "Spanish", "th": "Thai"
}

REL_PROMPT = {
    "adj_comparative": (
        # Kontext
        "You are a professional linguist specialising in {lang}. "
        "Translate the **English adjective pair** “{w1} → {w2}” into {lang}. "
        # Format
        "Return exactly two items, comma-separated, no extra text: "
        "1) the base adjective, 2) the comparative expression. "
        # Regeln
        "• Each item may contain one **or two** words (e.g. “plus grand”, “más grande”). "
        "• Both items must be adjectives; do not output nouns or verbs. "
        "• Preserve all diacritics and write in lowercase unless {lang} orthography requires otherwise. "
        "• Do not add intensifiers like 'very'; no articles or extra words."
    )
}

client = OpenAI(api_key=API_KEY)

def _clean(txt: str) -> str:
    return re.sub(r'^\W+|\W+$', "", txt.strip())

def _first_token(txt: str) -> str:
    """Erstes Token vor Whitespace/Komma/→ (Bindestrich bleibt erhalten)."""
    return re.split(r"[\s,;→]", txt)[0]

def translate_word(word: str, lc: str, retries: int = 3) -> str:
    if not word:
        return ""
    prompt = (
        f"Translate the word '{word}' into {LANGS[lc]} and output only the translation, "
        "no extra words, no quotes."
    )
    for a in range(retries):
        try:
            r = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )
            return _first_token(_clean(r.choices[0].message.content))
        except Exception:
            time.sleep(2 * (a + 1))
    return word

def translate_pair(w1: str, w2: str, rel: str, lc: str, retries: int = 3):
    prompt = REL_PROMPT[rel].format(w1=w1, w2=w2, lang=LANGS[lc])
    for a in range(retries):
        try:
            r = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )
            parts = [p.strip() for p in r.choices[0].message.content.split(",") if p.strip()]
            if len(parts) == 2:
                if rel == "adj_comparative":
                    return _clean(parts[0]), _clean(parts[1])

        except Exception:
            time.sleep(2 * (a + 1))
    return translate_word(w1, lc), translate_word(w2, lc)

data = json.loads(INPUT_FILE.read_text("utf-8"))

# Prompt-Templates wie im Original in alle Sprachen übertragen
data["prompt_templates"] = [
    {**{"en": tpl if isinstance(tpl, str) else tpl.get("en", "")},
     **{lc: translate_word(tpl if isinstance(tpl, str) else tpl.get("en", ""), lc)
        for lc in LANGS}}
    for tpl in data.get("prompt_templates", [])
]

new_samples = []
for s in tqdm(data.get("samples", []), desc="Samples"):
    subj_en, obj_en = s["subject"], s["object"]
    entry = {"subject": subj_en, "object": obj_en}

    for lc in LANGS:
        subj_tr, obj_tr = translate_pair(subj_en, obj_en, "adj_comparative", lc)
        entry[f"subject_{lc}"] = subj_tr
        entry[f"object_{lc}"]  = obj_tr

    new_samples.append(entry)

data["samples"] = new_samples
OUTPUT_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
print("\n adjective_comparative.json erfolgreich aktualisiert.")


###superlativ

In [None]:
import json
import time
import re
from pathlib import Path
from openai import OpenAI
from tqdm import tqdm

"""
verarbeitet ausschließlich adjective_superlative.json
Prompt deckt alle Sprachen ab, mit deutschem Spezialfall
superlativ darf 1 oder 2 Wörter sein (z. B. „more dull“)
"""

API_KEY = ""

INPUT_FILE  = Path(
    "/content/drive/MyDrive/master_thesis/data/linguistic_data/"
    "zero_shot_linguistic/zero_shot_best_template/adjective_superlative.json"
)
OUTPUT_FILE = Path(
    "/content/drive/MyDrive/master_thesis/data/multilingual_data/"
    "linguistic/gpt_linguistic_final/adjective_superlative.json"
)
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)

LANGS = {
    "de": "German", "fr": "French", "it": "Italian", "pt": "Portuguese",
    "hi": "Hindi",  "es": "Spanish", "th": "Thai"
}

client = OpenAI(api_key=API_KEY)

def build_super_prompt(w1: str, w2: str, lc: str) -> str:
    """Erzeuge den Superlativ-Prompt, inkl. DE-Spezialregel."""
    common = (
        "You are a professional linguist specialising in {lang}. "
        "Translate the **English adjective pair** “{w1} → {w2}” into {lang}. "
        "Return exactly two items, comma-separated, no extra text: "
        "1) the base adjective, 2) its superlative expression. "
        "Rules: "
        "• The superlative may contain one **or two** words "
        "(e.g. “plus grand”, “más grande”). "
        "• Both items must be adjectives – no nouns or verbs. "
        "• Preserve all diacritics and write in lowercase unless "
        "{lang} orthography requires capitals. "
    )
    if lc == "de":
        extra = (
            "• Allowed superlatives: attributive (stamm + -ste/-sten) **or** "
            "prädikativ (“am” + stamm + -sten). "
            "• Do not include articles like “der/die/das”. "
            "• Use the same word stem; no other adjectives."
        )
    else:
        extra = (
            "• Omit definite articles (le, la, el, il, o …). "
            "• Use the correct irregular form when it exists "
            "(e.g. ‘best’ = ‘meilleur’ in French)."
        )
    return (common + extra).format(w1=w1, w2=w2, lang=LANGS[lc])

def _clean(txt: str) -> str:
    return re.sub(r'^\W+|\W+$', "", txt.strip())

def _first_token(txt: str) -> str:
    return re.split(r"[\s,;→]", txt)[0]

def translate_word(word: str, lc: str, retries: int = 3) -> str:
    if not word:
        return ""
    prompt = (
        f"Translate the word '{word}' into {LANGS[lc]} and output only the translation, "
        "no extra words, no quotes."
    )
    for a in range(retries):
        try:
            r = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )
            return _first_token(_clean(r.choices[0].message.content))
        except Exception:
            time.sleep(2 * (a + 1))
    return word

def translate_pair(w1: str, w2: str, lc: str, retries: int = 3):
    prompt = build_super_prompt(w1, w2, lc)
    for a in range(retries):
        try:
            r = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )
            parts = [p.strip() for p in r.choices[0].message.content.split(",") if p.strip()]
            if len(parts) == 2:
                return _clean(parts[0]), _clean(parts[1])
        except Exception:
            time.sleep(2 * (a + 1))
    return translate_word(w1, lc), translate_word(w2, lc)

data = json.loads(INPUT_FILE.read_text("utf-8"))

# Prompt-Templates wie im Ursprungscode in alle Sprachen übersetzen
data["prompt_templates"] = [
    {**{"en": tpl if isinstance(tpl, str) else tpl.get("en", "")},
     **{lc: translate_word(tpl if isinstance(tpl, str) else tpl.get("en", ""), lc)
        for lc in LANGS}}
    for tpl in data.get("prompt_templates", [])
]

for s in tqdm(data["samples"], desc="Samples"):
    base_en, sup_en = s["subject"], s["object"]
    for lc in LANGS:
        s[f"subject_{lc}"], s[f"object_{lc}"] = translate_pair(base_en, sup_en, lc)

OUTPUT_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
print(" adjective_superlative.json erfolgreich aktualisiert.")


###verb past tense

In [None]:
"""
übersetzt ALLE Samples in 7 Sprachen, ohne englische Fallbacks
nutzt neue Prompt-Regeln für de/hi/th
"""

import json, re, time
from pathlib import Path
from openai import OpenAI
from tqdm import tqdm

API_KEY = ""
BASE    = "/content/drive/MyDrive/master_thesis/data"

IN_F  = Path(f"{BASE}/linguistic_data/zero_shot_linguistic/zero_shot_best_template/verb_past_tense.json")
OUT_F = Path(f"{BASE}/multilingual_data/linguistic/gpt_linguistic_final/verb_past_tense.json")
OUT_F.parent.mkdir(parents=True, exist_ok=True)

LANGS = {
    "de": "German", "fr": "French", "it": "Italian", "pt": "Portuguese",
    "hi": "Hindi",  "es": "Spanish", "th": "Thai"
}
SEP_PREFIXES = ("ab","an","auf","aus","ein","mit","nach","vor",
                "weg","zu","zurück","zusammen","los","weiter","fest")

client = OpenAI(api_key=API_KEY)

def build_prompt(inf, past, lc):
    lang = LANGS[lc]
    base = (
        "You are a professional linguist specialising in {lang}. "
        "Translate the English verb pair “{inf} (infinitive) → {past} (simple past)” into {lang}. "
        "Return exactly ONE line in the form:\n<infinitive>|<past>\n"
        "No extra text. Use lowercase, keep diacritics, no particles or auxiliaries."
    )
    extra = {
        "de": " For separable verbs include the prefix in the past form, joined with a hyphen (gab-auf).",
        "hi": " Hindi infinitive ends with “न” (e.g. पूछन); past masc. singular is bare root (e.g. पूछ).",
        "th": " Thai verbs do not inflect; output “<verb>|<verb แล้ว>”, with a space before แล้ว."
    }.get(lc, "")
    return base.format(lang=lang, inf=inf, past=past) + extra

def post_process(inf, past, lc):
    inf, past = inf.lower(), past.lower()
    if lc == "de":
        pref = next((p for p in SEP_PREFIXES if inf.startswith(p)), "")
        if pref and "-" not in past:
            past = f"{past}-{pref}"
    elif lc == "hi":
        inf  = re.sub("ना?$", "न", inf)
        past = re.sub("ा?$",  "", past)
    elif lc == "th":
        past = inf + " แล้ว"
    return inf, past

def translate_pair(inf_en, past_en, lc, retries=4):
    prompt = build_prompt(inf_en, past_en, lc)
    for attempt in range(retries):
        try:
            rsp = client.chat.completions.create(
                model="gpt-4o",
                temperature=0,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=25
            )
            raw = rsp.choices[0].message.content.strip()
            if "|" in raw:
                parts = [p.strip() for p in raw.split("|", 1)]
            else:
                parts = re.split(r"[,\n]", raw, maxsplit=1)
            if len(parts) == 2 and all(parts):
                return post_process(*parts, lc)
        except Exception:
            time.sleep(2 * (attempt + 1))
    raise RuntimeError(f"GPT-Übersetzung fehlgeschlagen für {inf_en} → {lc}")

data = json.loads(IN_F.read_text("utf-8"))

for s in tqdm(data["samples"], desc="Übersetze Samples"):
    inf_en, past_en = s["subject"], s["object"]
    for lc in LANGS:
        inf_tr, past_tr = translate_pair(inf_en, past_en, lc)
        s[f"subject_{lc}"] = inf_tr
        s[f"object_{lc}"]  = past_tr

OUT_F.write_text(json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
print(" Alle Samples übersetzt & gespeichert →", OUT_F)


### word last letter/first letter

In [None]:

"""
Word-Letter-Relationen (first / last)
übersetzt pro Sprache nur das Subjekt
 Objekt wird aus der Übersetzung gebildet:
  – first-letter.json
  – last-letter.json
"""

import json
import re
import time
from pathlib import Path

from openai import OpenAI
from tqdm import tqdm

API_KEY = ""
BASE    = "/content/drive/MyDrive/master_thesis/data"

IN_DIR  = Path(f"{BASE}/linguistic_data/zero_shot_linguistic/zero_shot_best_template")
OUT_DIR = Path(f"{BASE}/multilingual_data/linguistic/gpt_linguistic_final")
OUT_DIR.mkdir(parents=True, exist_ok=True)

FILES = {
    "word_first_letter.json": "first",
    "word_last_letter.json":  "last"
}

LANGS = {
    "de": "German", "fr": "French", "it": "Italian", "pt": "Portuguese",
    "hi": "Hindi",  "es": "Spanish", "th": "Thai"
}

client = OpenAI(api_key=API_KEY)

def clean(txt: str) -> str:
    return re.sub(r"^\W+|\W+$", "", txt.strip())

def first_char(word: str) -> str:
    word = clean(word)
    return word[0] if word else ""

def last_char(word: str) -> str:
    word = clean(word)
    return word[-1] if word else ""

def translate_word(word: str, lc: str, retries: int = 3) -> str:
    prompt = (
        f"Translate the word '{word}' into {LANGS[lc]} and output only the translation, "
        "no extra words, no quotes."
    )
    for a in range(retries):
        try:
            r = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )
            return clean(r.choices[0].message.content).split()[0]  # erstes Token
        except Exception:
            time.sleep(2 * (a + 1))
    return word

for fname, mode in FILES.items():
    src = IN_DIR  / fname
    dst = OUT_DIR / fname
    if not src.exists():
        print(f"{fname} fehlt – übersprungen.")
        continue

    data = json.loads(src.read_text("utf-8"))

    data["prompt_templates"] = [
        {**{"en": tpl if isinstance(tpl, str) else tpl.get("en", "")},
         **{lc: translate_word(tpl if isinstance(tpl, str) else tpl.get("en", ""), lc)
            for lc in LANGS}}
        for tpl in data.get("prompt_templates", [])
    ]

    for samp in tqdm(data["samples"], desc=f"{fname}"):
        subj_en = samp["subject"]

        # Englisch unverändert
        if mode == "first":
            samp["object"] = first_char(subj_en)
        else:
            samp["object"] = last_char(subj_en)

        # Alle Zielsprachen
        for lc in LANGS:
            subj_tr = translate_word(subj_en, lc)
            samp[f"subject_{lc}"] = subj_tr.upper()

            if mode == "first":
                samp[f"object_{lc}"] = first_char(subj_tr).upper()
            else:
                samp[f"object_{lc}"] = last_char(subj_tr).upper()

    dst.write_text(json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
    print(f" {fname} fertig → {dst}")

print("\n  First-/Last-Letter-Dateien erfolgreich aktualisiert.")



hi/thia

In [None]:
"""
Word‑Letter‑Relationen (first / last)
übersetzt NUR THAI und HINDI (alle anderen Sprachen werden ignoriert)
Objekt wird aus der Übersetzung gebildet
LANGS enthält jetzt ausschließlich "hi" und "th".
Extraktion des sichtbaren Buchstabens nutzt Unicode‑Kategorie "L".

 – first-letter.json
  – last-letter.json

"""

import json
import re
import time
import unicodedata
from pathlib import Path

from openai import OpenAI
from tqdm import tqdm

API_KEY = ""
BASE    = "/content/drive/MyDrive/master_thesis/data"

IN_DIR  = Path(f"{BASE}/linguistic_data/zero_shot_linguistic/zero_shot_best_template")
OUT_DIR = Path(f"{BASE}/multilingual_data/linguistic/gpt_linguistic_final_last_letter_hi_thai")
OUT_DIR.mkdir(parents=True, exist_ok=True)

FILES = {
    "word_first_letter.json": "first",
    "word_last_letter.json":  "last"
}

# Nur Thai & Hindi
LANGS = {"hi": "Hindi", "th": "Thai"}

client = OpenAI(api_key=API_KEY)


def clean(txt: str) -> str:
    """Trimmt führende/trailende Nicht‑Buchstaben‑Zeichen."""
    return re.sub(r"^\W+|\W+$", "", txt.strip())


def visible_letters(word: str):
    """Alle sichtbaren Buchstaben (Unicode‑Kategorie 'L*')."""
    return [c for c in clean(word) if unicodedata.category(c).startswith("L")]


def first_visible_letter(word: str) -> str:
    letters = visible_letters(word)
    return letters[0] if letters else ""


def last_visible_letter(word: str) -> str:
    letters = visible_letters(word)
    return letters[-1] if letters else ""


def translate_word(word: str, lc: str, retries: int = 3) -> str:
    prompt = (
        f"Translate the word '{word}' into {LANGS[lc]} and output only the translation, "
        "no extra words, no quotes."
    )
    for a in range(retries):
        try:
            r = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )
            return clean(r.choices[0].message.content).split()[0]
        except Exception:
            time.sleep(2 * (a + 1))
    return word

for fname, mode in FILES.items():
    src = IN_DIR  / fname
    dst = OUT_DIR / fname
    if not src.exists():
        print(f"  {fname} fehlt – übersprungen.")
        continue

    data = json.loads(src.read_text("utf-8"))

    # Prompt‑Templates übersetzen (nur hi + th)
    data["prompt_templates"] = [
        {**{"en": tpl if isinstance(tpl, str) else tpl.get("en", "")},
         **{lc: translate_word(tpl if isinstance(tpl, str) else tpl.get("en", ""), lc)
            for lc in LANGS}}
        for tpl in data.get("prompt_templates", [])
    ]

    for samp in tqdm(data["samples"], desc=f"{fname}"):
        subj_en = samp["subject"]

        # Englisch‑basiertes Objekt (überschreibt oder ergänzt, egal)
        if mode == "first":
            samp["object"] = subj_en[0]
        else:
            samp["object"] = subj_en[-1]

        # Nur Thai & Hindi
        for lc in LANGS:
            subj_tr = translate_word(subj_en, lc)
            samp[f"subject_{lc}"] = subj_tr.upper()

            if mode == "first":
                samp[f"object_{lc}"] = first_visible_letter(subj_tr).upper()
            else:
                samp[f"object_{lc}"] = last_visible_letter(subj_tr).upper()

    dst.write_text(json.dumps(data, ensure_ascii=False, indent=2), "utf-8")
    print(f" {fname} fertig → {dst}")

print("\n  First-/Last-Letter‑Dateien (nur Thai & Hindi) erfolgreich aktualisiert.")
