<a href="https://colab.research.google.com/github/agungfirdaus717-ux/torentotgd/blob/main/SrtTranslator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Srt Translator Ke Indonesia

In [None]:
# 🇮🇩 SRT Translator ke Bahasa Indonesia (Gratis, dengan Style)
# Gunakan di Google Colab

# ⬇️ Install dependencies
!pip -q install transformers sentencepiece srt chardet tqdm

# ⚙️ Import & Load Model
from transformers import pipeline
import srt, chardet, pathlib, re
from tqdm import tqdm

# Load model en->id
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-id")
print("✅ Model siap dipakai")

# 📥 Fungsi Baca & Tulis SRT
def read_file_guess_encoding(path):
    raw = open(path, "rb").read()
    enc = chardet.detect(raw).get("encoding") or "utf-8"
    try:
        return raw.decode(enc), enc
    except Exception:
        return raw.decode("utf-8", errors="ignore"), "utf-8"

def write_text(path, text):
    with open(path, "w", encoding="utf-8", newline="") as f:
        f.write(text)

def serialize_srt(subs):
    return srt.compose(subs)

def parse_srt(text):
    return list(srt.parse(text))

# 📂 Upload File
from google.colab import files
uploaded = files.upload()
srt_path = list(uploaded.keys())[0]
print("Menggunakan file upload:", srt_path)

# 🧹 Formatter & Style
def clean_text(t, style="simple"):
    t = t.strip()

    # Rapikan spasi ganda
    t = re.sub(r"\s+", " ", t)

    # Kapital awal kalimat
    if t:
        t = t[0].upper() + t[1:]

    # Style pilihan
    if style == "simple":
        # buat kalimat singkat (hapus kata tambahan umum)
        t = t.replace("saya", "aku").replace("kami", "kita")
    elif style == "natural":
        t = t.replace("saya", "aku")
    elif style == "formal":
        t = t.replace("aku", "saya").replace("nggak", "tidak")

    return t

# 🧠 Jalankan Terjemahan
style = "simple"  #@param ["simple", "natural", "formal"]

raw_text, encoding = read_file_guess_encoding(srt_path)
subs = parse_srt(raw_text)

original_blocks = [s.content for s in subs]
translated_blocks = []

for block in tqdm(original_blocks):
    try:
        result = translator(block, max_length=512)
        text_id = result[0]["translation_text"]
    except Exception:
        text_id = block  # fallback
    translated_blocks.append(clean_text(text_id, style=style))

# Rekatkan ke objek SRT
for i, s in enumerate(subs):
    s.content = translated_blocks[i]

out_text = serialize_srt(subs)
out_path = str(pathlib.Path(srt_path).with_suffix("").as_posix()) + f"_{style.upper()}.srt"
write_text(out_path, out_text)

print("✅ Selesai! File tersimpan sebagai:", out_path)

# 💾 Download hasil
files.download(out_path)
