In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -r /content/drive/MyDrive/amr-tst-indo-new/requirements.txt

Collecting accelerate==0.32.1 (from -r /content/drive/MyDrive/amr-tst-indo-new/requirements.txt (line 1))
  Downloading accelerate-0.32.1-py3-none-any.whl.metadata (18 kB)
Collecting amrlib==0.8.0 (from -r /content/drive/MyDrive/amr-tst-indo-new/requirements.txt (line 2))
  Downloading amrlib-0.8.0-py3-none-any.whl.metadata (5.6 kB)
Collecting cached-property==1.5.2 (from -r /content/drive/MyDrive/amr-tst-indo-new/requirements.txt (line 3))
  Downloading cached_property-1.5.2-py2.py3-none-any.whl.metadata (11 kB)
Collecting ConfigArgParse==1.7 (from -r /content/drive/MyDrive/amr-tst-indo-new/requirements.txt (line 4))
  Downloading ConfigArgParse-1.7-py3-none-any.whl.metadata (23 kB)
Collecting datasets==2.20.0 (from -r /content/drive/MyDrive/amr-tst-indo-new/requirements.txt (line 5))
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate==0.4.3 (from -r /content/drive/MyDrive/amr-tst-indo-new/requirements.txt (line 7))
  Downloading evaluate-0.4.3-py3-non

In [3]:
import os
import sys
import gradio as gr
import penman
from huggingface_hub import snapshot_download

In [4]:
# Sesuaikan dengan lokasi folder AMR-TST Anda di Google Drive
PROJECT_PATH = "/content/drive/MyDrive/amr-tst-indo-new"

# Tambahkan path proyek ke sistem
sys.path.append(PROJECT_PATH)

# Cek apakah folder proyek terdeteksi
os.listdir(PROJECT_PATH)


['LICENSE',
 'text_to_amr.py',
 'convert_to_amr_with_pointers.py',
 'amr_to_text.py',
 'utils.py',
 'amr_tst_new.py',
 'AMRBART-id',
 'hasil_paralel_cl_formal-to-informal_0.txt',
 '__pycache__',
 'requirements.txt',
 '(formal-informal inference) AMR-parse-gen - 7 mar - eks 3 - st-join-cl-gen.ipynb',
 'taufiq-indo-amr-generation-gold-v3.sen.join.cl-uncased',
 'Gradio.ipynb']

In [5]:
# Impor modul dari proyek AMR-TST
from text_to_amr import TextToAMR
from amr_to_text import AMRToTextWithTaufiqMethod
from amr_tst_new import AMRTST



In [6]:
# Model names
amr_parsing_model_name = "mbart-en-id-smaller-indo-amr-parsing-translated-nafkhan"
amr_gen_model_name = "taufiq-indo-amr-generation-gold-v3.sen.join.cl-uncased"

# Path model
parsing_model_path = f"{PROJECT_PATH}/AMRBART-id/models/{amr_parsing_model_name}"
generation_model_path = f"{PROJECT_PATH}/{amr_gen_model_name}"

In [7]:
# Unduh model parsing AMR jika belum tersedia
snapshot_download(
    repo_id=f"abdiharyadi/{amr_parsing_model_name}",
    local_dir=parsing_model_path,
    ignore_patterns=["*log*", "*checkpoint*"]
)

# Unduh model generasi AMR jika belum tersedia
snapshot_download(
    repo_id=f"atikaistiqomah/{amr_gen_model_name}",
    local_dir=generation_model_path,
    allow_patterns=["*checkpoint-3*"]
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

Fetching 88 files:   0%|          | 0/88 [00:00<?, ?it/s]

'/content/drive/MyDrive/amr-tst-indo-new/taufiq-indo-amr-generation-gold-v3.sen.join.cl-uncased'

In [8]:
# Load models
t2a = TextToAMR(model_name=amr_parsing_model_name)
a2t = AMRToTextWithTaufiqMethod(
    model_path=os.path.join(generation_model_path, "checkpoint-3"),
    lowercase=True
)

# Inisialisasi model AMR-TST
amr_tst = AMRTST(t2a=t2a, a2t=a2t)

Running on the CPU


You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


In [9]:
# Fungsi untuk memformat AMR dengan kaidah penman
def format_amr_penman(amr_text):
    """
    Format string AMR agar sesuai dengan kaidah `penman`.

    Args:
    - `amr_text`: String AMR yang dihasilkan model.

    Returns:
    - `formatted_amr`: AMR dalam format `penman`.
    """
    try:
        # Parsing AMR ke format `penman`
        amr_graph = penman.decode(amr_text)
        formatted_amr = penman.encode(amr_graph, indent=4)  # Indentasi agar lebih mudah dibaca
        return formatted_amr
    except Exception as e:
        print(f"⚠️ Gagal memformat AMR: {e}")
        return amr_text  # Jika gagal, tetap kembalikan teks AMR original

# Fungsi Gradio untuk mengonversi teks formal ke informal
def formal_to_informal(text):
    """Konversi teks formal menjadi AMR dan hasilkan teks informal."""
    try:
        sentences = [text]
        results, infos = amr_tst(sentences)

        # Ambil informasi hasil inferensi
        single_info = infos.to_list()[0]
        g_src_str = single_info["source_amr"]  # AMR output
        x_tgt = single_info["target_text"]  # Teks informal

        # Format AMR menggunakan `penman`
        formatted_amr = format_amr_penman(g_src_str)

        return formatted_amr, x_tgt
    except Exception as e:
        return f"Error: {str(e)}", ""

In [10]:
# Gradio Interface
iface = gr.Interface(
    fn=formal_to_informal,
    inputs=gr.Textbox(label="Input Teks Formal"),
    outputs=[
        gr.Textbox(label="AMR"),
        gr.Textbox(label="Teks Informal")
    ],
    title="AMR-TST Formal to Informal",
    description="Masukkan teks formal untuk dikonversi ke teks informal."
)

In [11]:
# Jalankan Gradio di Colab
iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://583ca22e6bdf043fb9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# Hugging Space

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# !rm -rf /content/amr-tst-inference  # Hapus jika sudah ada

In [None]:
!git clone https://huggingface.co/spaces/atikaistiqomah/amr-tst-inference

Cloning into 'amr-tst-inference'...
remote: Enumerating objects: 4, done.[K
remote: Total 4 (delta 0), reused 0 (delta 0), pack-reused 4 (from 1)[K
Unpacking objects: 100% (4/4), 1.28 KiB | 1.28 MiB/s, done.


In [None]:
# !git init
# !git remote add origin https://huggingface.co/spaces/atikaistiqomah/amr-tst-inference

In [None]:
%cd /content/amr-tst-inference

/content/amr-tst-inference


In [None]:
!cp -r /content/drive/MyDrive/amr-tst-indo-new/* /content/amr-tst-inference/

In [None]:
!git config --global user.email "istiqomahatika.ai@gmail.com"
!git config --global user.name "atikaistiqomah"

In [None]:
!pwd  # Cek direktori saat ini
!ls -lah  # Lihat isi folder

/content/amr-tst-inference
total 4.1M
drwxr-xr-x 5 root root 4.0K Mar  9 09:56  .
drwxr-xr-x 1 root root 4.0K Mar  9 09:55  ..
drwx------ 6 root root 4.0K Mar  9 09:56  AMRBART-id
-rw------- 1 root root  16K Mar  9 09:56  amr_to_text.py
-rw------- 1 root root 2.3K Mar  9 09:56  amr_tst_new.py
-rw------- 1 root root 4.4K Mar  9 09:56  convert_to_amr_with_pointers.py
-rw------- 1 root root 3.1M Mar  9 09:56 '(formal-informal inference) AMR-parse-gen - 7 mar - eks 3 - st-join-cl-gen.ipynb'
drwxr-xr-x 8 root root 4.0K Mar  9 09:55  .git
-rw-r--r-- 1 root root 1.5K Mar  9 09:55  .gitattributes
-rw------- 1 root root 954K Mar  9 09:56  Gradio.ipynb
-rw------- 1 root root 1.1K Mar  9 09:56  LICENSE
drwx------ 2 root root 4.0K Mar  9 09:56  __pycache__
-rw-r--r-- 1 root root  235 Mar  9 09:55  README.md
-rw------- 1 root root  847 Mar  9 09:56  requirements.txt
-rw------- 1 root root  16K Mar  9 09:56  text_to_amr.py
-rw------- 1 root root  12K Mar  9 09:56  utils.py


In [None]:
!git add .

In [None]:
!git commit -m "Reinitialize repository"

[main 1160c44] Reinitialize repository
 82 files changed, 123655 insertions(+)
 create mode 100644 (formal-informal inference) AMR-parse-gen - 7 mar - eks 3 - st-join-cl-gen.ipynb
 create mode 100644 AMRBART-id/.gitignore
 create mode 100644 AMRBART-id/LICENSE
 create mode 100644 AMRBART-id/README.md
 create mode 100644 AMRBART-id/ds/wrete/inference.jsonl
 create mode 100644 AMRBART-id/fine-tune/common/additional-tokens.json
 create mode 100644 AMRBART-id/fine-tune/common/constant.py
 create mode 100644 AMRBART-id/fine-tune/common/new-additional-tokens.json
 create mode 100644 AMRBART-id/fine-tune/common/options.py
 create mode 100644 AMRBART-id/fine-tune/common/penman_interface.py
 create mode 100644 AMRBART-id/fine-tune/common/postprocessing.py
 create mode 100644 AMRBART-id/fine-tune/common/utils.py
 create mode 100644 AMRBART-id/fine-tune/continue_train_a2t_without_eval_second_config.sh
 create mode 100644 AMRBART-id/fine-tune/continue_train_without_eval.sh
 create mode 100644 AMRB

In [None]:
!git push origin main

Enumerating objects: 96, done.
Counting objects:   1% (1/96)Counting objects:   2% (2/96)Counting objects:   3% (3/96)Counting objects:   4% (4/96)Counting objects:   5% (5/96)Counting objects:   6% (6/96)Counting objects:   7% (7/96)Counting objects:   8% (8/96)Counting objects:   9% (9/96)Counting objects:  10% (10/96)Counting objects:  11% (11/96)Counting objects:  12% (12/96)Counting objects:  13% (13/96)Counting objects:  14% (14/96)Counting objects:  15% (15/96)Counting objects:  16% (16/96)Counting objects:  17% (17/96)Counting objects:  18% (18/96)Counting objects:  19% (19/96)Counting objects:  20% (20/96)Counting objects:  21% (21/96)Counting objects:  22% (22/96)Counting objects:  23% (23/96)Counting objects:  25% (24/96)Counting objects:  26% (25/96)Counting objects:  27% (26/96)Counting objects:  28% (27/96)Counting objects:  29% (28/96)Counting objects:  30% (29/96)Counting objects:  31% (30/96)Counting objects:  32% (31/96)Counting objects: