In [28]:
import re
import tensorflow as tf

# Load vocabulary and vectorize layer
with open('vocab.txt', 'r') as f:
    vocabulary = [line.strip() for line in f]

vectorize_layer = tf.keras.layers.TextVectorization(max_tokens=1001, output_sequence_length=20)
vectorize_layer.set_vocabulary(vocabulary)

# Load model
loaded_model = tf.keras.models.load_model('saved_model.h5')

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text).strip()  # Normalize spaces
    return text
# Test job titles
test_jobs = [
    "Kami membutuhkan Hacker untuk Membobol Sistem"  # Illegal
   
]

# Preprocess jobs
test_jobs_cleaned = [preprocess_text(job) for job in test_jobs]

# Vectorize test jobs
test_jobs_sequences = vectorize_layer(test_jobs_cleaned)

# Predict
predictions = loaded_model.predict(test_jobs_sequences)

# Interpret predictions
threshold = 0.5
results = []
for job, prediction in zip(test_jobs, predictions):
    label = "Legal" if prediction[0] >= threshold else "Illegal"
    results.append((job, label, prediction[0]))

# Display results
for job, label, prob in results:
    print(f"Job Title: {job}\nPredicted Label: {label} (Probability: {prob:.2f})\n")


Job Title: Kami membutuhkan Hacker untuk Membobol Sistem
Predicted Label: Illegal (Probability: 0.00)



In [29]:
print("Cleaned Text:", test_jobs_cleaned)
print("Vectorized Sequences:", test_jobs_sequences.numpy())
print("Raw Predictions:", predictions)
print("Vocabulary (Python):", vocabulary)
print("Vectorized Sequences (Python):", test_jobs_sequences.numpy())



Cleaned Text: ['kami membutuhkan hacker untuk membobol sistem']
Vectorized Sequences: [[  7   6 439 223   1 370   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]]
Raw Predictions: [[0.00139521]]
Vocabulary (Python): ['', '[UNK]', 'lowongan', 'segera', 'staff', 'dibuka', 'membutuhkan', 'kami', 'admin', 'dicari', 'cepat', 'dibutuhkan', 'butuh', 'sales', 'sebagai', 'bergabung', 'peluang', 'kerja', 'marketing', 'tersedia', 'posisi', 'time', 'operator', 'manager', 'jasa', 'crew', 'part', 'full', 'freelance', 'service', 'media', 'dan', 'staf', 'customer', 'live', 'supervisor', 'produksi', 'host', 'tukang', 'online', 'kasir', 'specialist', 'officer', 'creator', 'accounting', 'spv', 'content', 'store', 'social', 'cook', 'karyawan', 'digital', 'toko', 'outlet', 'guru', 'teknisi', 'barista', 'finance', 'desain', 'tenaga', 'ilegal', 'head', 'data', 'asisten', 'area', 'penjaga', 'office', 'penjual', 'mesin', 'konten', 'kitchen', 'grafis', 'editor', 'spg', 'designer', 'barang', 'support', 