### Step 1 — Load semua artefak yang dibutuhkan
Kita ambil vocab, label encoder, config, dan model terlatih dari fase sebelumnya.

In [1]:
import torch
import torch.nn as nn
import pickle, json
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load vocab
with open("artifacts/vocab/word2idx.pkl", "rb") as f:
    word2idx = pickle.load(f)

# Load label encoder
with open("artifacts/labels/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# Load config
with open("artifacts/config/config.json") as f:
    config = json.load(f)

MAX_LEN = config["max_len"]
VOCAB_SIZE = config["vocab_size"]
EMBED_DIM = config["embedding_dim"]
HIDDEN_SIZE = config["hidden_size"]
NUM_CLASSES = config["num_classes"]
best_type = config["best_model_type"]

### Step 2 — Bangun ulang arsitektur model
Strukturnya harus SAMA persis kayak waktu training.

In [2]:
embedding_matrix = np.load("artifacts/embedding/embedding_matrix.npy")
EMBED_DIM = embedding_matrix.shape[1]

In [3]:
class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, num_classes):
        super().__init__()
        self.emb = nn.Embedding.from_pretrained(
            torch.tensor(embedding_matrix, dtype=torch.float32),
            freeze=False   # kalau mau fine-tuning, True kalau mau tetap
        )
        self.lstm = nn.LSTM(embed_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.emb(x)
        _, (h, _) = self.lstm(x)
        return self.fc(h[-1])


class GRUClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, num_classes):
        super().__init__()
        self.emb = nn.Embedding.from_pretrained(
            torch.tensor(embedding_matrix, dtype=torch.float32),
            freeze=False   # kalau mau fine-tuning, True kalau mau tetap
        )
        self.gru = nn.GRU(embed_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.emb(x)
        _, h = self.gru(x)
        return self.fc(h[-1])

### Step 3 — Load bobot model (.pth) dari fase training
Model langsung bisa dipakai tanpa training ulang.

In [4]:
if best_type == "gru":
    final_model = GRUClassifier(VOCAB_SIZE, EMBED_DIM, HIDDEN_SIZE, NUM_CLASSES).to(device)
elif best_type == "lstm":
    final_model = LSTMClassifier(VOCAB_SIZE, EMBED_DIM, HIDDEN_SIZE, NUM_CLASSES).to(device)
else:
    raise ValueError("Unknown model type in config.json")
final_model.load_state_dict(torch.load("artifacts/model_final/final_model.pth", map_location=device))
final_model.eval()

LSTMClassifier(
  (emb): Embedding(19560, 300)
  (lstm): LSTM(300, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=9, bias=True)
)

### Step 4 — Preprocess judul baru agar sama kayak preprocessing di fase 1
Kalau beda, model bisa ngaco.

In [5]:
import re

def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def encode_text(text):
    tokens = clean_text(text).split()
    seq = [word2idx.get(tok, word2idx["<UNK>"]) for tok in tokens]

    if len(seq) < MAX_LEN:
        seq = seq + [word2idx["<PAD>"]] * (MAX_LEN - len(seq))
    else:
        seq = seq[:MAX_LEN]

    return torch.tensor([seq], dtype=torch.long).to(device)

### Step 5 — Fungsi prediksi LSTM & GRU
Fungsi untuk coba tes judul baru kapan saja.

In [6]:
def predict_final(text):
    seq = encode_text(text)
    with torch.no_grad():
        out = final_model(seq)
        pred = torch.argmax(out, dim=1).item()
    return label_encoder.inverse_transform([pred])[0]

### Step 6 — Tes prediksi dengan judul berita baru

In [7]:
judul = "Putri KW Enggan Terbebani SEA Games dan World Tour Finals"

print("Input :", judul)
print("Model Prediction :", predict_final(judul))

Input : Putri KW Enggan Terbebani SEA Games dan World Tour Finals
Model Prediction : sport
