<a href="https://colab.research.google.com/github/agungfirdaus717-ux/torentotgd/blob/main/TranslateSrtsubsV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Srt Translate Ke Indonesia

In [None]:
# 🇮🇩 SRT Translator ke Bahasa Indonesia
# Jalankan di Google Colab atau Python dengan dependency terinstal

# === Install Dependencies (jika perlu di Colab) ===
# !pip -q install transformers sentencepiece srt chardet tqdm

# === Imports ===
import re
import srt
import chardet
from tqdm import tqdm
from pathlib import Path
from transformers import pipeline
try:
    from google.colab import files
except ImportError:
    files = None  # agar script bisa jalan di luar Colab juga


# === Load Translation Model (EN → ID) ===
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-id")
print("✅ Model siap dipakai")


# === Utils: File Handling ===
def read_file_guess_encoding(path: str):
    """
    Membaca file teks dengan deteksi encoding otomatis.
    Return: (decoded_text, encoding)
    """
    raw = open(path, "rb").read()
    enc = chardet.detect(raw).get("encoding") or "utf-8"

    try:
        return raw.decode(enc), enc
    except Exception:
        return raw.decode("utf-8", errors="ignore"), "utf-8"


def write_text(path: str, text: str):
    """Menyimpan teks ke file UTF-8."""
    with open(path, "w", encoding="utf-8", newline="") as f:
        f.write(text)


def parse_srt(text: str):
    """Parse string SRT → list of Subtitle objects."""
    return list(srt.parse(text))


def serialize_srt(subs):
    """Serialize list of Subtitle → SRT string."""
    return srt.compose(subs)


# === Text Cleaning & Style ===
def clean_text(text: str, style: str = "natural") -> str:
    """
    Membersihkan dan menyesuaikan gaya bahasa terjemahan.
    style: "simple" | "natural" | "formal" | "colloquial"
    """
    text = text.strip()
    text = re.sub(r"\s+", " ", text)  # rapikan spasi

    # Kapital awal
    if text:
        text = text[0].upper() + text[1:]

    # Normalisasi frasa umum
    replacements_common = {
        "kamu semua": "kalian",
        "apa yang": "apa",
        "itu adalah": "itu",
        "mari kita": "ayo",
        "jangan khawatir": "tenang saja",
        "bagaimana kalau": "gimana kalau",
    }
    for k, v in replacements_common.items():
        text = text.replace(k, v)

    # Style pilihan
    if style == "simple":
        text = (
            text.replace("saya", "aku")
            .replace("kami", "kita")
            .replace("tidak", "nggak")
        )
    elif style == "natural":
        text = (
            text.replace("saya", "aku")
            .replace("kami", "kita")
            .replace("tidak", "nggak")
        )
    elif style == "formal":
        text = (
            text.replace("aku", "saya")
            .replace("nggak", "tidak")
            .replace("kita", "kami")
        )
    elif style == "colloquial":
        text = (
            text.replace("saya", "gue")
            .replace("aku", "gue")
            .replace("kita", "kita")
            .replace("tidak", "nggak")
            .replace("ingin", "pengen")
            .replace("ayo", "yuk")
        )

    return text


# === Main Execution ===
def translate_srt(file_path: str, style: str = "natural") -> str:
    """
    Translate file .srt dari English ke Bahasa Indonesia.
    style bisa dipilih: "simple", "natural", "formal", "colloquial"
    Return: path hasil terjemahan.
    """
    raw_text, encoding = read_file_guess_encoding(file_path)
    subs = parse_srt(raw_text)

    translated_blocks = []
    for block in tqdm([s.content for s in subs]):
        try:
            result = translator(block, max_length=512)
            translated = clean_text(result[0]["translation_text"], style)
        except Exception:
            translated = block  # fallback: tetap pakai teks asli

        translated_blocks.append(translated)

    # Update subtitle content
    for s, trans in zip(subs, translated_blocks):
        s.content = trans

    # Simpan hasil
    output_path = Path(file_path).with_name(Path(file_path).stem + f"_{style}.srt")
    write_text(output_path, serialize_srt(subs))
    return str(output_path)


if __name__ == "__main__":
    if files:  # Jika dijalankan di Colab
        uploaded = files.upload()
        srt_path = list(uploaded.keys())[0]
    else:
        srt_path = input("Masukkan path file .srt: ")

    print("📂 File dipakai:", srt_path)
    output_file = translate_srt(srt_path, style="colloquial")
    print("✅ Selesai! File tersimpan:", output_file)
