In [1]:
from os import path
import pandas as pd
import pickle
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import sys

In [2]:
dataset_path = path.abspath('../') + "/datasets"
model_path = path.abspath('../') + "/models"
data = pd.read_csv(dataset_path + '/test.csv')

In [3]:
nb_model = pickle.load(open(model_path + "/naive_bayes.pkl", "rb"))
svm_model = pickle.load(open(model_path + "/svm.pkl", "rb"))
le = pickle.load(open(model_path + "/label_encoder.pkl", "rb"))
vectorizer = pickle.load(open(model_path + "/vectorizer.pkl", "rb"))

In [4]:
data['Kategori'] = le.transform(data['Kategori'])

In [5]:
X_test = vectorizer.transform(data["Pesan"])
y_test = data["Kategori"]

In [6]:
print("Naive Bayes")
y_pred_nb = nb_model.predict(X_test)
report = classification_report(y_test, y_pred_nb, target_names=le.classes_)
confusion_mat = confusion_matrix(y_test, y_pred_nb)
print(report)
print(confusion_mat)

Naive Bayes
              precision    recall  f1-score   support

         ham       0.98      0.98      0.98       254
        spam       0.98      0.98      0.98       274

    accuracy                           0.98       528
   macro avg       0.98      0.98      0.98       528
weighted avg       0.98      0.98      0.98       528

[[249   5]
 [  5 269]]


In [7]:
print("\nSVM")
y_pred_svm = svm_model.predict(X_test)
report = classification_report(y_test, y_pred_svm, target_names=le.classes_)
confusion_mat = confusion_matrix(y_test, y_pred_svm)
print(report)
print(confusion_mat)


SVM
              precision    recall  f1-score   support

         ham       0.99      0.98      0.99       254
        spam       0.98      0.99      0.99       274

    accuracy                           0.99       528
   macro avg       0.99      0.99      0.99       528
weighted avg       0.99      0.99      0.99       528

[[249   5]
 [  2 272]]


In [8]:
ACC_THRESHOLD = 0.95
nb_acc = accuracy_score(y_test, y_pred_nb)
svm_acc = accuracy_score(y_test, y_pred_svm)

print(f"Naive Bayes model accuracy: {nb_acc}")
print(f"SVM model accuracy: {svm_acc}")

Naive Bayes model accuracy: 0.9810606060606061
SVM model accuracy: 0.9867424242424242


In [9]:
if nb_acc < ACC_THRESHOLD or svm_acc < ACC_THRESHOLD:
    print(f"Model accuracy below threshold ({ACC_THRESHOLD}), failing pipeline.")
    sys.exit(1)  # Non-zero exit code fails the Jenkins stage