In [1]:
import os
import gc
import pickle
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from gensim.models import KeyedVectors
from transformers import AutoTokenizer, AutoModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import MaxAbsScaler
from imblearn.under_sampling import RandomUnderSampler
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU not available. Using CPU.")

FEATURE_DIR = "features"
MODEL_DIR = "models"
RESULT_FILE = "results.csv"

os.makedirs(FEATURE_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

GPU not available. Using CPU.


In [9]:
data = pd.read_csv('all_agree.csv') 
titles = data['title'].astype(str)
labels = data['label']  

def preprocess_text(text):
    text = text.lower()
    return text

titles = titles.apply(preprocess_text)

In [10]:
rus = RandomUnderSampler(random_state=42)
titles_resampled, labels_resampled = rus.fit_resample(titles.values.reshape(-1, 1), labels)
titles_resampled = pd.Series(titles_resampled.flatten())
labels_resampled = pd.Series(labels_resampled)

X_train, X_test, y_train, y_test = train_test_split(titles_resampled, labels_resampled, test_size=0.2, random_state=42)

In [11]:
print("Jumlah data sebelum resampling:")
print(labels.value_counts())

print("\nJumlah data setelah resampling:")
print(labels_resampled.value_counts())

Jumlah data sebelum resampling:
label
non-clickbait    5297
clickbait        3316
Name: count, dtype: int64

Jumlah data setelah resampling:
label
clickbait        3316
non-clickbait    3316
Name: count, dtype: int64


In [13]:
def normalize_for_naive_bayes(features):
    scaler = MinMaxScaler()
    return scaler.fit_transform(features)

def extract_tfidf(X_train, X_test, max_features=500):
    vectorizer = TfidfVectorizer(max_features=max_features)
    train_features = vectorizer.fit_transform(X_train).toarray()
    test_features = vectorizer.transform(X_test).toarray()
    return train_features, test_features

def extract_bow(X_train, X_test, max_features=500):
    vectorizer = CountVectorizer(max_features=max_features)
    train_features = vectorizer.fit_transform(X_train).toarray()
    test_features = vectorizer.transform(X_test).toarray()
    return train_features, test_features

def extract_glove(X_train, X_test, glove_path):
    glove_model = {}
    with open(glove_path, 'r', encoding='utf-8') as f:
        for line in f:
            split_line = line.split()
            word = split_line[0]
            embedding = np.array([float(val) for val in split_line[1:]])
            glove_model[word] = embedding
    def embed(texts):
        return np.array([
            np.mean([glove_model[word] for word in text.split() if word in glove_model]
                    or [np.zeros(300)], axis=0)
            for text in texts
        ])
    train_features = embed(X_train)
    test_features = embed(X_test)
    return train_features, test_features

def extract_fasttext(X_train, X_test, fasttext_path):
    fasttext_model = KeyedVectors.load_word2vec_format(fasttext_path)
    tokenized_train = [text.split() for text in X_train]
    tokenized_test = [text.split() for text in X_test]
    train_features = np.array([
        np.mean([fasttext_model[word] for word in words if word in fasttext_model]
                or [np.zeros(fasttext_model.vector_size)], axis=0)
        for words in tokenized_train
    ])
    test_features = np.array([
        np.mean([fasttext_model[word] for word in words if word in fasttext_model]
                or [np.zeros(fasttext_model.vector_size)], axis=0)
        for words in tokenized_test
    ])
    return train_features, test_features

def extract_indobert(X_train, X_test, model_name='indobenchmark/indobert-base-p2'):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name).to(device)
    def embed(texts):
        tokenized = tokenizer(list(texts), padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**tokenized)
        return outputs.last_hidden_state[:, 0, :].cpu().numpy()  # CLS token
    train_features = embed(X_train)
    test_features = embed(X_test)
    return train_features, test_features

In [14]:
def save_feature(name, train_features, test_features):
    np.save(os.path.join(FEATURE_DIR, f"{name}_train.npy"), train_features)
    np.save(os.path.join(FEATURE_DIR, f"{name}_test.npy"), test_features)

def save_model(name, model):
    with open(os.path.join(MODEL_DIR, f"{name}.pkl"), 'wb') as f:
        pickle.dump(model, f)

def load_feature(name):
    train_features = np.load(f"{FEATURE_DIR}/{name}_train.npy")
    test_features = np.load(f"{FEATURE_DIR}/{name}_test.npy")
    return train_features, test_features

In [15]:
def evaluate_model(X_train, X_test, y_train, y_test, model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    accuracy = accuracy_score(y_test, y_pred)
    return precision, recall, f1, accuracy

In [16]:
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'Naive Bayes': MultinomialNB(),
    'SVM': SVC(kernel='linear', probability=True, random_state=42),
    'KNN': KNeighborsClassifier(n_neighbors=5)
}

feature_extractors = {
    'TF-IDF': extract_tfidf,
    'Bag of Words (BOW)': extract_bow,
    'GloVe': lambda X_train, X_test: extract_glove(X_train, X_test, glove_path='glove.6B.300d.txt'),
    'FastText': lambda X_train, X_test: extract_fasttext(X_train, X_test, fasttext_path='cc.id.300.vec'),
    'IndoBERT': extract_indobert
}

In [27]:
from sklearn.model_selection import GridSearchCV

def tune_model(model, param_grid, X_train, y_train):
    """
    Melakukan hyperparameter tuning menggunakan GridSearchCV.
    """
    grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_, grid_search.best_params_

param_grids = {
    'Random Forest': {
        'n_estimators': [10, 50, 100, 200],
        'max_depth': [5, 10, 15, 20, 25],
        'min_samples_split': [2, 5, 10]
    },
    'Naive Bayes': {
        'alpha': [0, 0.1, 0.01, 0.001, 1.0],
        'fit_prior': [True, False]
    },
    'SVM': {
        'C': [1, 10, 100],
        'kernel': ['linear', 'poly', 'sigmoid', 'rbf'],
        'gamma': [0.1, 0.01, 0.001, 0.0001]
    },
    'KNN': {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance']
    }
}


In [28]:
if not os.path.exists(RESULT_FILE):
    with open(RESULT_FILE, 'w') as f:
        f.write('Feature Extraction,Model,Precision,Recall,F1-Score,Accuracy\n')

In [29]:
for feature_name, extractor in feature_extractors.items():
    print(f"Processing Feature Extraction: {feature_name}")
    feature_train_path = f"{FEATURE_DIR}/{feature_name}_train.npy"
    feature_test_path = f"{FEATURE_DIR}/{feature_name}_test.npy"

    if not os.path.exists(feature_train_path):
        X_train_features, X_test_features = extractor(X_train, X_test)
        save_feature(feature_name, X_train_features, X_test_features)
    else:
        X_train_features, X_test_features = load_feature(feature_name)
    print(f"Features for {feature_name} ready!")

    for model_name, model in models.items():
        print(f"  Training Model: {model_name} with {feature_name}")

        # Tuning Hyperparameters
        if model_name in param_grids:
            param_grid = param_grids[model_name]
            if model_name == 'Naive Bayes':
                X_train_normalized = normalize_for_naive_bayes(X_train_features)
                X_test_normalized = normalize_for_naive_bayes(X_test_features)
                tuned_model, best_params = tune_model(model, param_grid, X_train_normalized, y_train)
            else:
                tuned_model, best_params = tune_model(model, param_grid, X_train_features, y_train)
            
            print(f"Best parameters for {model_name} with {feature_name}: {best_params}")
        else:
            tuned_model = model

        # Evaluasi model
        if model_name == 'Naive Bayes':
            precision, recall, f1, accuracy = evaluate_model(
                normalize_for_naive_bayes(X_train_features),
                normalize_for_naive_bayes(X_test_features),
                y_train, y_test, tuned_model
            )
        else:
            precision, recall, f1, accuracy = evaluate_model(
                X_train_features, X_test_features, y_train, y_test, tuned_model
            )

        # Simpan model
        save_model(f"{feature_name}_{model_name}", tuned_model)

        # Simpan hasil ke file
        with open(RESULT_FILE, 'a') as f:
            f.write(f"{feature_name},{model_name},{precision},{recall},{f1},{accuracy}\n")

        print(f"Results for {feature_name} with {model_name}:")
        print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}, Accuracy: {accuracy:.4f}")
        print("=" * 50)

    del X_train_features, X_test_features
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()


Processing Feature Extraction: TF-IDF
Features for TF-IDF ready!
  Training Model: Random Forest with TF-IDF
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best parameters for Random Forest with TF-IDF: {'max_depth': 25, 'min_samples_split': 2, 'n_estimators': 100}
Results for TF-IDF with Random Forest:
Precision: 0.8202, Recall: 0.7973, F1-Score: 0.7941, Accuracy: 0.7973
  Training Model: Naive Bayes with TF-IDF
Fitting 3 folds for each of 10 candidates, totalling 30 fits




Best parameters for Naive Bayes with TF-IDF: {'alpha': 0, 'fit_prior': True}
Results for TF-IDF with Naive Bayes:
Precision: 0.8031, Recall: 0.8011, F1-Score: 0.8009, Accuracy: 0.8011
  Training Model: SVM with TF-IDF
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with TF-IDF: {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Results for TF-IDF with SVM:
Precision: 0.8396, Recall: 0.8350, F1-Score: 0.8346, Accuracy: 0.8350
  Training Model: KNN with TF-IDF
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with TF-IDF: {'n_neighbors': 3, 'weights': 'uniform'}
Results for TF-IDF with KNN:
Precision: 0.7130, Recall: 0.6903, F1-Score: 0.6830, Accuracy: 0.6903
Processing Feature Extraction: Bag of Words (BOW)
Features for Bag of Words (BOW) ready!
  Training Model: Random Forest with Bag of Words (BOW)
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best parameters for Random Forest with Bag of Words (BOW): {'ma



Best parameters for Naive Bayes with Bag of Words (BOW): {'alpha': 0, 'fit_prior': True}
Results for Bag of Words (BOW) with Naive Bayes:
Precision: 0.8164, Recall: 0.8161, F1-Score: 0.8161, Accuracy: 0.8161
  Training Model: SVM with Bag of Words (BOW)
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with Bag of Words (BOW): {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Results for Bag of Words (BOW) with SVM:
Precision: 0.8368, Recall: 0.8297, F1-Score: 0.8290, Accuracy: 0.8297
  Training Model: KNN with Bag of Words (BOW)
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with Bag of Words (BOW): {'n_neighbors': 3, 'weights': 'distance'}
Results for Bag of Words (BOW) with KNN:
Precision: 0.7081, Recall: 0.6797, F1-Score: 0.6699, Accuracy: 0.6797
Processing Feature Extraction: GloVe
Features for GloVe ready!
  Training Model: Random Forest with GloVe
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best 



Best parameters for Naive Bayes with GloVe: {'alpha': 0, 'fit_prior': False}
Results for GloVe with Naive Bayes:
Precision: 0.7362, Recall: 0.7280, F1-Score: 0.7261, Accuracy: 0.7280
  Training Model: SVM with GloVe
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with GloVe: {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
Results for GloVe with SVM:
Precision: 0.7838, Recall: 0.7830, F1-Score: 0.7829, Accuracy: 0.7830
  Training Model: KNN with GloVe
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with GloVe: {'n_neighbors': 3, 'weights': 'distance'}
Results for GloVe with KNN:
Precision: 0.7069, Recall: 0.6956, F1-Score: 0.6904, Accuracy: 0.6956
Processing Feature Extraction: FastText
Features for FastText ready!
  Training Model: Random Forest with FastText
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best parameters for Random Forest with FastText: {'max_depth': 25, 'min_samples_split': 10, 'n_estim



Best parameters for Naive Bayes with FastText: {'alpha': 0, 'fit_prior': True}
Results for FastText with Naive Bayes:
Precision: 0.7788, Recall: 0.7589, F1-Score: 0.7551, Accuracy: 0.7589
  Training Model: SVM with FastText
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with FastText: {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
Results for FastText with SVM:
Precision: 0.8448, Recall: 0.8433, F1-Score: 0.8432, Accuracy: 0.8433
  Training Model: KNN with FastText
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with FastText: {'n_neighbors': 7, 'weights': 'distance'}
Results for FastText with KNN:
Precision: 0.7808, Recall: 0.7777, F1-Score: 0.7768, Accuracy: 0.7777
Processing Feature Extraction: IndoBERT
Features for IndoBERT ready!
  Training Model: Random Forest with IndoBERT
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best parameters for Random Forest with IndoBERT: {'max_depth': 15, 'min_sample



Best parameters for Naive Bayes with IndoBERT: {'alpha': 0, 'fit_prior': True}
Results for IndoBERT with Naive Bayes:
Precision: 0.8291, Recall: 0.8289, F1-Score: 0.8289, Accuracy: 0.8289
  Training Model: SVM with IndoBERT
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with IndoBERT: {'C': 10, 'gamma': 0.001, 'kernel': 'poly'}
Results for IndoBERT with SVM:
Precision: 0.9213, Recall: 0.9209, F1-Score: 0.9209, Accuracy: 0.9209
  Training Model: KNN with IndoBERT
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with IndoBERT: {'n_neighbors': 7, 'weights': 'distance'}
Results for IndoBERT with KNN:
Precision: 0.8861, Recall: 0.8855, F1-Score: 0.8854, Accuracy: 0.8855


In [34]:
import pandas as pd

RESULT_FILE = 'results_tuning.csv'

# Fungsi untuk menyimpan hasil ke file CSV
def save_results_to_csv(feature_name, model_name, precision, recall, f1, accuracy, best_params):
    # Cek apakah file sudah ada
    try:
        results_df = pd.read_csv(RESULT_FILE)
    except FileNotFoundError:
        # Jika file belum ada, buat DataFrame baru
        results_df = pd.DataFrame(columns=[
            'Feature Extraction', 'Model', 'Precision', 'Recall', 'F1-Score', 'Accuracy', 'Best Params'
        ])

    # Tambahkan hasil baru ke DataFrame
    new_result = {
        'Feature Extraction': feature_name,
        'Model': model_name,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'Accuracy': accuracy,
        'Best Params': str(best_params)
    }
    results_df = pd.concat([results_df, pd.DataFrame([new_result])], ignore_index=True)

    # Simpan kembali ke CSV
    results_df.to_csv(RESULT_FILE, index=False)

In [35]:
# Modifikasi bagian loop untuk menyimpan hasil ke file CSV
for feature_name, extractor in feature_extractors.items():
    print(f"Processing Feature Extraction: {feature_name}")
    feature_train_path = f"{FEATURE_DIR}/{feature_name}_train.npy"
    feature_test_path = f"{FEATURE_DIR}/{feature_name}_test.npy"

    if not os.path.exists(feature_train_path):
        X_train_features, X_test_features = extractor(X_train, X_test)
        save_feature(feature_name, X_train_features, X_test_features)
    else:
        X_train_features, X_test_features = load_feature(feature_name)
    print(f"Features for {feature_name} ready!")

    for model_name, model in models.items():
        print(f"  Training Model: {model_name} with {feature_name}")

        # Tuning Hyperparameters
        best_params = None
        if model_name in param_grids:
            param_grid = param_grids[model_name]
            if model_name == 'Naive Bayes':
                X_train_normalized = normalize_for_naive_bayes(X_train_features)
                X_test_normalized = normalize_for_naive_bayes(X_test_features)
                tuned_model, best_params = tune_model(model, param_grid, X_train_normalized, y_train)
            else:
                tuned_model, best_params = tune_model(model, param_grid, X_train_features, y_train)
            
            print(f"Best parameters for {model_name} with {feature_name}: {best_params}")
        else:
            tuned_model = model

        # Evaluasi model
        if model_name == 'Naive Bayes':
            precision, recall, f1, accuracy = evaluate_model(
                normalize_for_naive_bayes(X_train_features),
                normalize_for_naive_bayes(X_test_features),
                y_train, y_test, tuned_model
            )
        else:
            precision, recall, f1, accuracy = evaluate_model(
                X_train_features, X_test_features, y_train, y_test, tuned_model
            )

        # Simpan hasil ke CSV
        save_results_to_csv(
            feature_name, model_name, precision, recall, f1, accuracy, best_params
        )

        print(f"Results for {feature_name} with {model_name}:")
        print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}, Accuracy: {accuracy:.4f}")
        print("=" * 50)

    del X_train_features, X_test_features
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()


Processing Feature Extraction: TF-IDF
Features for TF-IDF ready!
  Training Model: Random Forest with TF-IDF
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best parameters for Random Forest with TF-IDF: {'max_depth': 25, 'min_samples_split': 2, 'n_estimators': 100}
Results for TF-IDF with Random Forest:
Precision: 0.8202, Recall: 0.7973, F1-Score: 0.7941, Accuracy: 0.7973
  Training Model: Naive Bayes with TF-IDF
Fitting 3 folds for each of 10 candidates, totalling 30 fits




Best parameters for Naive Bayes with TF-IDF: {'alpha': 0, 'fit_prior': True}
Results for TF-IDF with Naive Bayes:
Precision: 0.8031, Recall: 0.8011, F1-Score: 0.8009, Accuracy: 0.8011
  Training Model: SVM with TF-IDF
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with TF-IDF: {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Results for TF-IDF with SVM:
Precision: 0.8396, Recall: 0.8350, F1-Score: 0.8346, Accuracy: 0.8350
  Training Model: KNN with TF-IDF
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with TF-IDF: {'n_neighbors': 3, 'weights': 'uniform'}
Results for TF-IDF with KNN:
Precision: 0.7130, Recall: 0.6903, F1-Score: 0.6830, Accuracy: 0.6903
Processing Feature Extraction: Bag of Words (BOW)
Features for Bag of Words (BOW) ready!
  Training Model: Random Forest with Bag of Words (BOW)
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best parameters for Random Forest with Bag of Words (BOW): {'ma



Best parameters for Naive Bayes with Bag of Words (BOW): {'alpha': 0, 'fit_prior': True}
Results for Bag of Words (BOW) with Naive Bayes:
Precision: 0.8164, Recall: 0.8161, F1-Score: 0.8161, Accuracy: 0.8161
  Training Model: SVM with Bag of Words (BOW)
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with Bag of Words (BOW): {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Results for Bag of Words (BOW) with SVM:
Precision: 0.8368, Recall: 0.8297, F1-Score: 0.8290, Accuracy: 0.8297
  Training Model: KNN with Bag of Words (BOW)
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with Bag of Words (BOW): {'n_neighbors': 3, 'weights': 'distance'}
Results for Bag of Words (BOW) with KNN:
Precision: 0.7081, Recall: 0.6797, F1-Score: 0.6699, Accuracy: 0.6797
Processing Feature Extraction: GloVe
Features for GloVe ready!
  Training Model: Random Forest with GloVe
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best 



Best parameters for Naive Bayes with GloVe: {'alpha': 0, 'fit_prior': False}
Results for GloVe with Naive Bayes:
Precision: 0.7362, Recall: 0.7280, F1-Score: 0.7261, Accuracy: 0.7280
  Training Model: SVM with GloVe
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with GloVe: {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
Results for GloVe with SVM:
Precision: 0.7838, Recall: 0.7830, F1-Score: 0.7829, Accuracy: 0.7830
  Training Model: KNN with GloVe
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with GloVe: {'n_neighbors': 3, 'weights': 'distance'}
Results for GloVe with KNN:
Precision: 0.7069, Recall: 0.6956, F1-Score: 0.6904, Accuracy: 0.6956
Processing Feature Extraction: FastText
Features for FastText ready!
  Training Model: Random Forest with FastText
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best parameters for Random Forest with FastText: {'max_depth': 25, 'min_samples_split': 10, 'n_estim



Best parameters for Naive Bayes with FastText: {'alpha': 0, 'fit_prior': True}
Results for FastText with Naive Bayes:
Precision: 0.7788, Recall: 0.7589, F1-Score: 0.7551, Accuracy: 0.7589
  Training Model: SVM with FastText
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with FastText: {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
Results for FastText with SVM:
Precision: 0.8448, Recall: 0.8433, F1-Score: 0.8432, Accuracy: 0.8433
  Training Model: KNN with FastText
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with FastText: {'n_neighbors': 7, 'weights': 'distance'}
Results for FastText with KNN:
Precision: 0.7808, Recall: 0.7777, F1-Score: 0.7768, Accuracy: 0.7777
Processing Feature Extraction: IndoBERT
Features for IndoBERT ready!
  Training Model: Random Forest with IndoBERT
Fitting 3 folds for each of 60 candidates, totalling 180 fits
Best parameters for Random Forest with IndoBERT: {'max_depth': 15, 'min_sample



Best parameters for Naive Bayes with IndoBERT: {'alpha': 0, 'fit_prior': True}
Results for IndoBERT with Naive Bayes:
Precision: 0.8291, Recall: 0.8289, F1-Score: 0.8289, Accuracy: 0.8289
  Training Model: SVM with IndoBERT
Fitting 3 folds for each of 48 candidates, totalling 144 fits
Best parameters for SVM with IndoBERT: {'C': 10, 'gamma': 0.001, 'kernel': 'poly'}
Results for IndoBERT with SVM:
Precision: 0.9213, Recall: 0.9209, F1-Score: 0.9209, Accuracy: 0.9209
  Training Model: KNN with IndoBERT
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for KNN with IndoBERT: {'n_neighbors': 7, 'weights': 'distance'}
Results for IndoBERT with KNN:
Precision: 0.8861, Recall: 0.8855, F1-Score: 0.8854, Accuracy: 0.8855
