In [None]:
# --- EvaCun Translation Environment bootstrap (run first) ---
# Clones and uses your EvaCun-Colab-Notebook repo so paths/imports resolve in Colab or local Jupyter.
import os, sys, subprocess
from pathlib import Path

# Detect Colab
IN_COLAB = False
try:
    import google.colab  # type: ignore
    IN_COLAB = True
except Exception:
    IN_COLAB = False

# Defaults to your repo; override with EVACUN_APP_REPO_URL if needed
DEFAULT_URL = "https://github.com/ancient-world-citation-analysis/EvaCun-Colab-Notebook.git"
REPO_URL = os.environ.get("EVACUN_APP_REPO_URL", DEFAULT_URL).strip()
REPO_NAME = os.environ.get("EVACUN_APP_REPO_NAME", "EvaCun-Colab-Notebook").strip()
REPO_DIR = Path(os.getenv("EVACUN_APP_REPO_DIR", Path.cwd() / REPO_NAME)).resolve()

# Data layout
DATA_DIR = REPO_DIR / "data"
INPUT_DIR = DATA_DIR / "input"
OUTPUT_DIR = DATA_DIR / "outputs"
INPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Clone if missing/empty
if not REPO_DIR.exists() or not any(REPO_DIR.iterdir()):
    print(f"Cloning {REPO_URL} into {REPO_DIR} ...")
    subprocess.check_call(["git", "clone", "--depth", "1", REPO_URL, str(REPO_DIR)])

# Install if it looks like a Python project
pyproject = REPO_DIR / "pyproject.toml"
setup_py = REPO_DIR / "setup.py"
if pyproject.exists() or setup_py.exists():
    print("Installing EvaCun app repo (editable if possible)...")
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-e", str(REPO_DIR)])
    except subprocess.CalledProcessError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", str(REPO_DIR)])

# Add repo to path and cd there so relative imports and paths work
if str(REPO_DIR) not in sys.path:
    sys.path.insert(0, str(REPO_DIR))
os.chdir(REPO_DIR)

print("Working dir:", REPO_DIR)
print("INPUT_DIR:", INPUT_DIR)
print("OUTPUT_DIR:", OUTPUT_DIR)

# Install extra deps if present
for req_name in ["requirements-colab.txt", "requirements.txt"]:
    req_path = REPO_DIR / req_name
    if req_path.exists():
        print(f"Installing dependencies from {req_name} ...")
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", str(req_path)])
        except subprocess.CalledProcessError as e:
            print(f"Warning: failed to install from {req_name}: {e}")

# Convenience helpers for I/O
from pathlib import Path as _Path
import pandas as _pd

def in_input(*parts): return INPUT_DIR.joinpath(*parts)
def in_output(*parts): return OUTPUT_DIR.joinpath(*parts)

def read_csv(path, **kwargs):
    path = _Path(path); print("Reading CSV:", path); return _pd.read_csv(path, **kwargs)

def write_csv(df, path, **kwargs):
    path = _Path(path); path.parent.mkdir(parents=True, exist_ok=True)
    print("Writing CSV:", path); return df.to_csv(path, index=False, **kwargs)

print("EvaCun translation bootstrap complete.")


In [None]:
# --- Fetch EvaCun datasets from Zenodo (pin to a VERSION DOI for reproducibility) ---
import json, hashlib
from pathlib import Path
from urllib.request import urlopen, urlretrieve

ZENODO_DOI_VERSION = "10.5281/zenodo.XXXXXXX"  # <-- replace with your VERSION DOI after publishing
DATA_DIR = (Path.cwd() / "data" / "input").resolve()
DATA_DIR.mkdir(parents=True, exist_ok=True)

# Map desired filenames; we'll fill URLs from the Zenodo record
NEEDED = {
    "akkadian_train.txt": None,
    "transcription_train.txt": None,
    "english_train.txt": None,
    "akkadian_validation.txt": None,
    "transcription_validation.txt": None,
    "english_validation.txt": None,
    # Optional: checksums file if uploaded
    "SHA256SUMS.txt": None,
}

def zenodo_record_from_doi(doi: str) -> dict:
    # Resolve DOI to Zenodo record URL, then fetch JSON
    with urlopen(f"https://doi.org/{doi}") as r:
        rec_url = r.geturl()
    rec_id = rec_url.rstrip("/").split("/")[-1]
    with urlopen(f"https://zenodo.org/api/records/{rec_id}") as r:
        return json.load(r)

rec = zenodo_record_from_doi(ZENODO_DOI_VERSION)

# Discover direct file URLs
for f in rec.get("files", []):
    name = f.get("key")
    if name in NEEDED:
        NEEDED[name] = f["links"]["self"]

# Download files if missing
for fname, url in NEEDED.items():
    if url is None:
        continue
    dest = DATA_DIR / fname
    if dest.exists() and dest.stat().st_size > 0:
        print(f"✓ Exists: {fname}")
        continue
    print(f"↓ Downloading: {fname}")
    urlretrieve(url, dest)

# Optional: verify checksums if SHA256SUMS.txt present
sumfile = DATA_DIR / "SHA256SUMS.txt"
if sumfile.exists():
    expected = {}
    for line in sumfile.read_text().splitlines():
        if not line.strip() or line.startswith("#"): 
            continue
        sha, name = line.split(None, 1)
        expected[name.strip()] = sha.strip()
    for name, sha in expected.items():
        fp = DATA_DIR / name
        if not fp.exists():
            print(f"Checksum missing file: {name}")
            continue
        h = hashlib.sha256(fp.read_bytes()).hexdigest()
        print(f"{name}: {'OK' if h==sha else 'MISMATCH'}")
print("Zenodo dataset ready in:", DATA_DIR)


**Welcome to the notebook of "Translating Akkadian to English using NLP"!**

Please follow the instructions in the following sections in order to get your Akkadian input translated into English.

In [None]:
# Please run this section once to prepare your environment to translate Akkadian to English.

!git clone https://github.com/gaigutherz/Akkademia.git
!cat Akkademia/trans_result.LR_0.1.MAX_TOKENS_4000/checkpoint_best.pt.* > Akkademia/trans_result.LR_0.1.MAX_TOKENS_4000/checkpoint_best.pt
!cat Akkademia/not_divided_by_three_dots_result.LR_0.1.MAX_TOKENS_4000/checkpoint_best.pt.* > Akkademia/not_divided_by_three_dots_result.LR_0.1.MAX_TOKENS_4000/checkpoint_best.pt
!pip install sentencepiece
!git clone https://github.com/pytorch/fairseq
!pip install ./fairseq
!chmod +x fairseq/fairseq_cli/interactive.py
!sed -i 's/#!\/usr\/bin\/env python3 -u/#!\/usr\/bin\/env python3/g' fairseq/fairseq_cli/interactive.py

Cloning into 'Akkademia'...
remote: Enumerating objects: 10658, done.[K
remote: Counting objects: 100% (152/152), done.[K
remote: Compressing objects: 100% (59/59), done.[K
remote: Total 10658 (delta 99), reused 140 (delta 91), pack-reused 10506[K
Receiving objects: 100% (10658/10658), 3.28 GiB | 30.45 MiB/s, done.
Resolving deltas: 100% (9870/9870), done.
Updating files: 100% (7380/7380), done.
Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99
Cloning into 'fairseq'...
remote: Enumerating objects: 34850, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 34850 (delta 2), reused 13 (delta 0), pack-reused 34832[K
Receiving o

In [None]:
# In this section you can translate one transliteration sentence to English.
# Please try to run it first with the sample input, and then you can run it again with input of your own.
# Make sure not to use broken
# Sample input is: {m}-aš-šur—BAD₃—PAB
!python Akkademia/akkadian/translate_transliteration.py

Please enter a transliteration sentence for translation
Traceback (most recent call last):

^C


In [None]:
# In this section you can translate one cuneiform sentence to English.
# Please try to run it first with the sample input, and then you can run it again with input of your own.

# Sample input is:  ▁ . . . 𒉻 𒇻 𒇻
!python Akkademia/akkadian/translate_cuneiform.py

In [None]:
# In this section you can translate a transliteration file to English.
# Please try to run it first with the sample file, and then you can run it again with files of your own.
# The file you use should be in the directory "Akkademia".
# Please be patient as translation of long files could take a few minutes.

# Sample file is: input.tr
!python Akkademia/akkadian/translate_from_transliteration.py

In [None]:
# In this section you can translate a cuneiform file to English.
# Please try to run it first with the sample file, and then you can run it again with files of your own.
# The file you use should be in the directory "Akkademia".
# Please be patient as translation of long files could take a few minutes.

# Sample file is: input.ak
!python Akkademia/akkadian/translate_from_cuneiform.py