In [5]:
import json

# Fungsi untuk memuat dan menampilkan struktur file JSON
def load_and_check_json(filepath):
    try:
        with open(filepath, 'r') as f:
            data = json.load(f)
            return data
    except Exception as e:
        print(f"Terjadi kesalahan: {e}")
        return None

# Contoh pemanggilan
data = load_and_check_json("tokenizer.json")


In [7]:
def validate_tokenizer_json(tokenizer_json):
    required_keys = ['config', 'word_index']
    for key in required_keys:
        if key not in tokenizer_json:
            raise ValueError(f"File JSON tidak memiliki kunci '{key}'.")

    # Validasi bagian config
    config_keys = ['num_words', 'filters', 'lower', 'split', 'char_level', 'oov_token']
    for key in config_keys:
        if key not in tokenizer_json['config']:
            raise ValueError(f"Bagian 'config' tidak memiliki kunci '{key}'.")

    # Validasi word_index
    if not isinstance(tokenizer_json['word_index'], dict):
        raise ValueError("Bagian 'word_index' harus berupa dictionary.")

# Contoh penggunaan
try:
    tokenizer_json = load_and_check_json("tokenizer.json")
    if tokenizer_json:
        validate_tokenizer_json(tokenizer_json)
        print("File JSON valid!")
except ValueError as e:
    print(f"Kesalahan validasi: {e}")


File JSON valid!


In [8]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from utility import load_tokenizer_from_json

# Fungsi untuk memproses input teks
def preprocess_input(texts, tokenizer, maxlen=100):
    sequences = tokenizer.texts_to_sequences(texts)  # Tokenisasi teks
    padded = pad_sequences(sequences, maxlen=maxlen, padding='post', truncating='post')
    return padded

# Memuat model dan tokenizer
model = load_model("best_model-2.keras")
tokenizer = load_tokenizer_from_json("tokenizer.json")

# Memuat dataset uji
X_test = np.load("x_test.npy", allow_pickle=True)
y_test = np.load("y_test.npy", allow_pickle=True)  # Asumsi ini adalah label numerik (0 atau 1)

# Preprocessing data uji
X_test_processed = preprocess_input(X_test, tokenizer, maxlen=100)

# Melakukan prediksi
predictions = model.predict(X_test_processed)

# Mendapatkan confidence score untuk kelas positif (label 1)
confidence_scores = predictions[:, 1]  # Probabilitas kelas "Negatif"

# Plot distribusi confidence score
plt.figure(figsize=(10, 6))
plt.hist(confidence_scores, bins=20, alpha=0.7, color='blue', edgecolor='black')
plt.title("Distribusi Confidence Score untuk Dataset Uji")
plt.xlabel("Confidence Score (Kelas Negatif)")
plt.ylabel("Frekuensi")
plt.grid(axis='y')
plt.show()

# Analisis tambahan
mean_confidence = np.mean(confidence_scores)
std_confidence = np.std(confidence_scores)
print(f"Rata-rata Confidence Score: {mean_confidence:.2f}")
print(f"Standar Deviasi Confidence Score: {std_confidence:.2f}")

ValueError: Object arrays cannot be loaded when allow_pickle=False