<a href="https://colab.research.google.com/github/agungfirdaus717-ux/torentotgd/blob/main/SrtTranslatorv2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Srt Translator To indonesia

In [None]:
# 🔧 Install library sekali di awal
!pip install transformers sentencepiece sacremoses srt chardet ipywidgets

import srt, chardet, re
from transformers import pipeline
import ipywidgets as widgets
from IPython.display import display
from google.colab import files

# 🚀 Model gratis EN→ID
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-id")

# ======================
# Formatter Subtitle
# ======================
def format_subtitle_text(t, style="simple", max_len=42):
    t = t.strip()
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"\s+([,.!?])", r"\1", t)
    t = re.sub(r"\.{3,}", "...", t)

    # kapitalisasi per kalimat
    sentences = re.split(r'([.!?])', t)
    combined = []
    for i in range(0, len(sentences)-1, 2):
        sentence = (sentences[i].strip() + sentences[i+1]).strip()
        if sentence:
            sentence = sentence[0].upper() + sentence[1:]
            combined.append(sentence)
    if len(sentences) % 2 == 1 and sentences[-1].strip():
        s = sentences[-1].strip()
        s = s[0].upper() + s[1:]
        combined.append(s)
    t = " ".join(combined)

    # style
    if style == "simple":
        t = t.replace("saya", "aku").replace("tidak", "nggak")
    elif style == "natural":
        t = (t.replace("saya", "aku")
               .replace("tidak", "nggak")
               .replace("benar", "bener")
               .replace("sebaiknya", "mending"))
    elif style == "formal":
        t = t.replace("aku", "saya").replace("nggak", "tidak").replace("bener", "benar")

    # bungkus biar enak dibaca
    words, lines, line = t.split(), [], ""
    for w in words:
        if len(line) + len(w) + 1 <= max_len:
            line += w + " "
        else:
            lines.append(line.strip()); line = w + " "
    if line: lines.append(line.strip())

    return "\n".join(lines)

# ======================
# Translate fungsi
# ======================
def translate_srt_file(input_path, output_path, style="simple"):
    raw = open(input_path, "rb").read()
    enc = chardet.detect(raw)["encoding"]

    with open(input_path, "r", encoding=enc) as f:
        subs = list(srt.parse(f.read()))

    new_subs = []
    for sub in subs:
        translated = translator(sub.content, max_length=512)[0]['translation_text']
        sub.content = format_subtitle_text(translated, style=style)
        new_subs.append(sub)

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(srt.compose(new_subs))

    print(f"✅ Subtitle selesai → {output_path}")
    files.download(output_path)  # otomatis download ke PC

# ======================
# 🔘 Pilih file & style
# ======================
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

style_dropdown = widgets.Dropdown(
    options=["simple", "natural", "formal"],
    value="natural",
    description="Style:",
)
display(style_dropdown)

# Jalankan translate
def run_translate(_):
    out_name = file_name.replace(".srt", "_id.srt")
    translate_srt_file(file_name, out_name, style=style_dropdown.value)

button = widgets.Button(description="🚀 Translate SRT")
button.on_click(run_translate)
display(button)
