In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from transformers import MT5ForConditionalGeneration, MT5Tokenizer
import torch
import os

# ======================
# PATH MODEL
# ======================
# Corrected MODEL_DIR to point to the location where the best model was saved
MODEL_DIR = "/content/drive/MyDrive/UAS_DEEPL/mt5_19/best_mt5_paraphrase_simple"
assert os.path.exists(MODEL_DIR), "Model directory not found"

# ======================
# DEVICE
# ======================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ======================
# LOAD MODEL
# ======================
model = MT5ForConditionalGeneration.from_pretrained(MODEL_DIR)
tokenizer = MT5Tokenizer.from_pretrained(MODEL_DIR)

model.to(device)
model.eval()

print("Model loaded")

# ======================
# INFERENCE CONFIG
# ======================
GEN_CFG_INFER = {
    "do_sample": True,
    "num_beams": 2,
    "temperature": 0.9,
    "top_p": 0.9,
    "no_repeat_ngram_size": 3,
    "repetition_penalty": 1.25,
}

SIM_LOW  = 0.70
SIM_HIGH = 0.85
MAX_TRIES = 2

In [None]:
import torch.nn.functional as F

class ParaphraseInference:
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device

    def _sentence_embedding(self, text):
        inputs = self.tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            max_length=128
        ).to(self.device)

        with torch.no_grad():
            outputs = self.model.encoder(**inputs)

        # mean pooling
        emb = outputs.last_hidden_state.mean(dim=1)
        return emb

    def _similarity(self, a, b):
        emb_a = self._sentence_embedding(a)
        emb_b = self._sentence_embedding(b)
        return F.cosine_similarity(emb_a, emb_b).item()

    def predict(self, text, max_length=64):
        best_output = None
        best_sim = 1.0

        for _ in range(MAX_TRIES):
            inputs = self.tokenizer(
                text,
                return_tensors="pt"
            ).to(self.device)

            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_length=max_length,
                    **GEN_CFG_INFER
                )

            candidate = self.tokenizer.decode(
                outputs[0],
                skip_special_tokens=True
            )

            sim = self._similarity(text, candidate)

            # sim terlalu tinggi â†’ terlalu mirip
            if SIM_LOW <= sim <= SIM_HIGH:
                return candidate

            # sim terendah disimpan sebagai fallback
            if sim < best_sim:
                best_sim = sim
                best_output = candidate

        return best_output


In [None]:
inferencer = ParaphraseInference(model, tokenizer, device)

print(
    inferencer.predict(
        "Ketika Jepang mendarat di Indonesia pada Maret 1942."
    )
)

In [None]:
from flask import Flask, request, jsonify
from pyngrok import ngrok
import threading, time

app = Flask(__name__)

@app.route("/paraphrase", methods=["POST"])
def paraphrase():
    data = request.get_json(force=True)

    if not data or "text" not in data:
        return jsonify({"error": "Missing field 'text'"}), 400

    result = inferencer.predict(data["text"])

    return jsonify({
        "input": data["text"],
        "paraphrase": result
    })

def run_flask():
    app.run(
        host="0.0.0.0",
        port=5000,
        debug=False,
        use_reloader=False
    )

# ðŸ”¹ Jalankan Flask di background
threading.Thread(target=run_flask, daemon=True).start()
time.sleep(3)

# ðŸ”¹ Bersihkan tunnel lama (AMAN)
try:
    ngrok.kill()
except:
    pass

# ðŸ”¹ Auth ngrok
ngrok.set_auth_token("36v3wHUtN08JAtwT9mfPOLzjhJV_4GfZnfYKkcXZWMRiwvCTx")

public_url = ngrok.connect(5000)
print("PUBLIC URL:", public_url)
