In [1]:
import re
import pickle
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

TOKENIZER_PATH = '../model/tokenizer.pkl'
MODEL_PATH = '../model/latest'


# load tokenizer
tokenizer = None
with open(TOKENIZER_PATH, 'rb') as f:
    tokenizer = pickle.load(f)
    
# load model
model = load_model(MODEL_PATH)

def basic_cleaning(text):
    # get all words (ignore number)
    words = re.findall("[a-zA-Z]+", str(text))

    # take words that has length > 2
    filtered = filter(lambda w: len(w) > 2, words)
    return ' '.join(filtered)

def predict_text(text):
    labels = {
        0: 'PENIPUAN',
        1: 'JUDI ONLINE',
        2: 'KTA/PINJAMAN ONLINE',
        3: 'LAIN-LAIN'
    }
    
    cleaned_text = basic_cleaning(text)
    tokenized_text = tokenizer.texts_to_sequences([cleaned_text])
    paded_sequences = pad_sequences(tokenized_text, 50)
    pred = model.predict(paded_sequences)
    return labels.get(np.argmax(pred)), pred[0, np.argmax(pred)]

In [2]:
predict_text("Gratis akses Youtube + 1GB selama setahun hanya cukup isi pulsa 25rb/bln tanpa potong pulsa, balas sms ini ketik UL1")

('LAIN-LAIN', 0.3933378)

In [3]:
predict_text("Apa kabar bos?")

('PENIPUAN', 0.6701171)

In [4]:
predict_text("Cari Link Resmi Mudah Menang? Supertaipan99.com Solusinya Dengan Winrate Di Atas Rata2 & Tarikan Meja Hingga Ratusan juta! Info Lanjut WA:+62 823-3409-6248")

('JUDI ONLINE', 0.94886315)

In [5]:
predict_text("Apply KTA 2O-3OOjt, cicilan 1-3 thn gak ribet buat KTA Cukup lampirkan Copy KTP dan Copy CC INFO LANJUT Call/Wa Bagas : 085283908813 Nisfa : 081214403218")

('KTA/PINJAMAN ONLINE', 0.964355)

In [6]:
predict_text("Nmr reknya: 5631-01-017240-123. BRI a/n Chairul Alam.")

('PENIPUAN', 0.9709929)

In [7]:
predict_text("Plgn Yth,raih peluang pembiayaan multiguna sd Rp15jt dr mitra Telkomsel. Syarat mdh, dgn KTP&rek bank Anda. S&K: tsel.me/dj_tac Balas sms ini (gratis) ketik: CL")

('KTA/PINJAMAN ONLINE', 0.65789175)