<a href="https://colab.research.google.com/github/agungfirdaus717-ux/torentotgd/blob/main/SRTTranslatorSubsV222.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Translator Srt Ke Indonesia

In [None]:
# 🇮🇩 SRT Translator ke Bahasa Indonesia (Colab Only)

# === Install Dependencies ===
!pip -q install transformers sentencepiece srt chardet tqdm

# === Imports ===
import re
import srt
import chardet
from tqdm import tqdm
from pathlib import Path
from transformers import pipeline
from google.colab import files

# === Load Translation Model (EN → ID) ===
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-id")
print("✅ Model siap dipakai")


# === Utils: File Handling ===
def read_file_guess_encoding(path: str):
    raw = open(path, "rb").read()
    enc = chardet.detect(raw).get("encoding") or "utf-8"

    try:
        return raw.decode(enc), enc
    except Exception:
        return raw.decode("utf-8", errors="ignore"), "utf-8"


def write_text(path: str, text: str):
    with open(path, "w", encoding="utf-8", newline="") as f:
        f.write(text)


def parse_srt(text: str):
    return list(srt.parse(text))


def serialize_srt(subs):
    return srt.compose(subs)


# === Text Cleaning & Style ===
def clean_text(text: str, style: str = "natural") -> str:
    text = text.strip()
    text = re.sub(r"\s+", " ", text)

    if text:
        text = text[0].upper() + text[1:]

    replacements_common = {
        "kamu semua": "kalian",
        "apa yang": "apa",
        "itu adalah": "itu",
        "mari kita": "ayo",
        "jangan khawatir": "tenang saja",
        "bagaimana kalau": "gimana kalau",
    }
    for k, v in replacements_common.items():
        text = text.replace(k, v)

    if style == "simple":
        text = (
            text.replace("saya", "aku")
            .replace("kami", "kita")
            .replace("tidak", "nggak")
        )
    elif style == "natural":
        text = (
            text.replace("saya", "aku")
            .replace("kami", "kita")
            .replace("tidak", "nggak")
        )
    elif style == "formal":
        text = (
            text.replace("aku", "saya")
            .replace("nggak", "tidak")
            .replace("kita", "kami")
        )
    elif style == "colloquial":
        text = (
            text.replace("saya", "gue")
            .replace("aku", "gue")
            .replace("kita", "kita")
            .replace("tidak", "nggak")
            .replace("ingin", "pengen")
            .replace("ayo", "yuk")
        )

    return text


# === Main Execution (Colab Only) ===
def translate_srt_colab(style: str = "colloquial"):
    uploaded = files.upload()
    srt_path = list(uploaded.keys())[0]
    print("📂 File dipakai:", srt_path)

    raw_text, encoding = read_file_guess_encoding(srt_path)
    subs = parse_srt(raw_text)

    translated_blocks = []
    for block in tqdm([s.content for s in subs]):
        try:
            result = translator(block, max_length=512)
            translated = clean_text(result[0]["translation_text"], style)
        except Exception:
            translated = block
        translated_blocks.append(translated)

    for s, trans in zip(subs, translated_blocks):
        s.content = trans

    output_path = Path("/content") / f"{Path(srt_path).stem}_{style}.srt"
    write_text(output_path, serialize_srt(subs))

    print("✅ Selesai! File tersimpan:", output_path)


# === Jalankan di Colab ===
translate_srt_colab(style="colloquial")  # ubah style sesuai kebutuhan
