<a href="https://colab.research.google.com/github/agungfirdaus717-ux/torentotgd/blob/main/srtTranslatorv2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SRT Translator Ke Indonesia

In [None]:
# 🔧 Install library sekali di awal
!pip install transformers sentencepiece sacremoses srt chardet ipywidgets

import srt, chardet, re
from transformers import pipeline
import ipywidgets as widgets
from IPython.display import display

# 🚀 Load model gratis EN→ID
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-id")

# ======================
# Formatter Subtitle
# ======================
def format_subtitle_text(t, style="simple", max_len=42):
    t = t.strip()
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"\s+([,.!?])", r"\1", t)
    t = re.sub(r"\.{3,}", "...", t)

    replacements_general = {
        "itu adalah": "itu",
        "hal tersebut": "itu",
        "milik saya": "punyaku",
        "milik anda": "punyamu",
    }
    for k,v in replacements_general.items():
        t = t.replace(k, v)

    # Kapitalisasi per kalimat
    sentences = re.split(r'([.!?])', t)
    combined = []
    for i in range(0, len(sentences)-1, 2):
        sentence = (sentences[i].strip() + sentences[i+1]).strip()
        if sentence:
            sentence = sentence[0].upper() + sentence[1:]
            combined.append(sentence)
    if len(sentences) % 2 == 1 and sentences[-1].strip():
        s = sentences[-1].strip()
        s = s[0].upper() + s[1:]
        combined.append(s)
    t = " ".join(combined)

    # Style kata
    if style == "simple":
        t = (t.replace("saya", "aku")
               .replace("tidak", "nggak"))
    elif style == "natural":
        t = (t.replace("saya", "aku")
               .replace("tidak", "nggak")
               .replace("benar", "bener")
               .replace("sebaiknya", "mending"))
    elif style == "formal":
        t = (t.replace("aku", "saya")
               .replace("nggak", "tidak")
               .replace("bener", "benar"))

    # Bungkus max_len
    words = t.split()
    lines, line = [], ""
    for w in words:
        if len(line) + len(w) + 1 <= max_len:
            line += (w + " ")
        else:
            lines.append(line.strip())
            line = w + " "
    if line:
        lines.append(line.strip())

    if len(lines) > 2:
        half = len(lines)//2
        return "\n".join([" ".join(lines[:half]), " ".join(lines[half:])])
    else:
        return "\n".join(lines)

# ======================
# Translate
# ======================
def translate_text(text, style="simple"):
    result = translator(text, max_length=512)[0]['translation_text']
    return format_subtitle_text(result, style=style)

def translate_srt_file(input_path, output_path, style="simple"):
    raw = open(input_path, "rb").read()
    enc = chardet.detect(raw)["encoding"]

    with open(input_path, "r", encoding=enc) as f:
        subs = list(srt.parse(f.read()))

    new_subs = []
    for sub in subs:
        translated = translate_text(sub.content, style=style)
        sub.content = translated
        new_subs.append(sub)

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(srt.compose(new_subs))

    print(f"✅ Subtitle selesai diterjemahkan → {output_path}")

# ======================
# 🔘 Dropdown interaktif
# ======================
style_dropdown = widgets.Dropdown(
    options=["simple", "natural", "formal"],
    value="natural",
    description="Style:",
)

display(style_dropdown)
