In [None]:
import pandas as pd
import numpy as np
import torch
import re
import tqdm
import xgboost as xgb

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Masking
from tensorflow.keras.optimizers import Adam

from sklearn.utils import shuffle
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import matthews_corrcoef,accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.model_selection import train_test_split

from tqdm import tqdm
from tqdm.keras import TqdmCallback
from transformers import AutoTokenizer, AutoModel, RobertaModel, RobertaTokenizer

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import tensorflow as tf
print("GPU Available:", tf.config.list_physical_devices('GPU'))

GPU Available: []


# Read Dataset

In [None]:
# embedding = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Gammafest 2025/specter_embedding.csv')
metadata = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Gammafest 2025/combined_with_nan.csv')

mtdt = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Gammafest 2025/mtdt_embedding.csv')
txt = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Gammafest 2025/txt_embedding.csv')

train_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Gammafest 2025/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Gammafest 2025/test.csv')
sample = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Gammafest 2025/sample_submission.csv')

Preparation

In [None]:
txt_embed = txt.drop(columns=['Unnamed: 0'])
mtdt_embed = mtdt.drop(columns=['Unnamed: 0', 'paper_id'])

In [None]:
data_embed = pd.concat([txt_embed, mtdt_embed], axis=1)
data_embed

Unnamed: 0,paper_id,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,...,mtdt_758,mtdt_759,mtdt_760,mtdt_761,mtdt_762,mtdt_763,mtdt_764,mtdt_765,mtdt_766,mtdt_767
0,p0000,-0.307896,0.233469,-0.795748,-0.020367,-0.135363,0.108508,1.538086,1.053744,0.992476,...,0.361256,0.309300,0.940870,0.438250,-0.097464,-0.177169,0.318410,0.378203,-0.130136,0.547858
1,p0001,0.084872,0.392815,-0.504959,-0.021132,-0.114442,0.422108,0.588093,0.723649,0.348245,...,0.345426,-0.708377,0.869000,0.223402,-0.416547,0.202308,-0.492604,0.643108,-0.323697,0.918231
2,p0002,-0.028009,0.680236,-0.821320,0.205567,0.157409,1.311699,-0.855878,0.208994,1.789679,...,-0.080286,-0.225180,0.869219,0.469897,0.418345,0.023076,0.550739,0.508122,-0.765913,1.141435
3,p0003,-0.462517,1.123400,0.482200,0.050586,0.252007,-0.154191,0.874468,1.153361,0.787142,...,0.327962,-1.480308,0.990188,1.004630,-1.131652,0.046745,0.652956,0.394007,0.403567,0.913617
4,p0004,-0.686830,1.164063,0.139771,-0.061362,-0.311246,0.477096,1.062190,0.358880,0.112960,...,0.267060,-0.313841,0.427698,0.400070,-0.242398,0.235142,0.353829,0.140394,0.488866,0.461436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4349,p4349,-0.228524,0.531688,-0.480547,0.038501,-0.109922,-0.126173,0.954068,0.801017,0.071041,...,-0.106525,-1.055142,0.591994,-0.062940,-0.504965,0.337807,0.331830,0.070886,0.197972,0.950505
4350,p4350,-0.760740,0.784616,-0.522071,-0.182086,-0.432741,0.345590,-0.227202,0.635391,0.787055,...,0.678842,-0.207159,1.074868,0.143668,-0.590506,0.170782,0.200454,-0.447116,-0.931619,0.611930
4351,p4351,0.003420,0.617399,-0.119199,-0.603085,0.375625,-0.284949,0.108157,-0.025868,0.097006,...,0.406650,0.062788,0.820162,0.283859,-0.373997,-0.174518,0.225618,-0.155076,-0.131684,0.226827
4352,p4352,-0.822705,0.531300,0.687420,-0.998790,0.647437,-0.258114,0.966562,0.221419,0.495811,...,0.824986,-0.652105,0.342011,1.322424,-0.120471,0.523878,0.667718,0.203245,0.513689,0.717953


In [None]:
for col in data_embed.select_dtypes(include='float').columns:
    data_embed[col] = data_embed[col].astype('float32')

In [None]:
embedding = data_embed

# Prepare dt_train and dt_test

In [None]:
train_df_paper = train_df.merge(metadata, how='left', left_on='paper', right_on='paper_id') \
                   .rename(columns={'publication_year': 'paper_year', 'publication_date': 'paper_date'}) \
                   .drop(columns=['paper_id', 'content', 'doi', 'title', 'cited_by_count', 'type', 'authors', 'concepts'])

train_df_new = train_df_paper.merge(metadata, how='left', left_on='referenced_paper', right_on='paper_id') \
                   .rename(columns={'publication_year': 'ref_paper_year', 'publication_date': 'ref_paper_date'}) \
                   .drop(columns=['paper_id', 'content', 'doi', 'title', 'cited_by_count', 'type', 'authors', 'concepts'])

In [None]:
train_df_new['paper_date'] = pd.to_datetime(train_df_new['paper_date'], format='mixed', errors='coerce')
train_df_new['ref_paper_date'] = pd.to_datetime(train_df_new['ref_paper_date'], format='mixed', errors='coerce')

In [None]:
# Filter baris yang valid: paper_date > ref_paper_date
train_df_cleaned = train_df_new[train_df_new['paper_date'] > train_df_new['ref_paper_date']].reset_index(drop=True)

In [None]:
dt_train = train_df_cleaned.copy()
dt_train = dt_train[['paper', 'referenced_paper', 'is_referenced']]

In [None]:
test_df_paper = test_df.merge(metadata, how='left', left_on='paper', right_on='paper_id') \
                   .rename(columns={'publication_year': 'paper_year', 'publication_date': 'paper_date'}) \
                   .drop(columns=['paper_id', 'content', 'doi', 'title', 'cited_by_count', 'type', 'authors', 'concepts'])

test_df_new = test_df_paper.merge(metadata, how='left', left_on='referenced_paper', right_on='paper_id') \
                   .rename(columns={'publication_year': 'ref_paper_year', 'publication_date': 'ref_paper_date'}) \
                   .drop(columns=['paper_id', 'content', 'doi', 'title', 'cited_by_count', 'type', 'authors', 'concepts'])

In [None]:
test_df_new['paper_date'] = pd.to_datetime(test_df_new['paper_date'], format='mixed', errors='coerce')
test_df_new['ref_paper_date'] = pd.to_datetime(test_df_new['ref_paper_date'], format='mixed', errors='coerce')

In [None]:
test_df_cleaned = test_df_new[test_df_new['paper_date'] > test_df_new['ref_paper_date']].reset_index(drop=True)
test_df_0 = test_df_new[test_df_new['paper_date'] <= test_df_new['ref_paper_date']].reset_index(drop=True)

# Handle Imbalance Data

In [None]:
dt_train

Unnamed: 0,paper,referenced_paper,is_referenced
0,p2128,p3728,0
1,p1298,p3760,0
2,p0211,p1808,0
3,p0843,p2964,0
4,p1606,p3627,0
...,...,...,...
266002,p2256,p3507,0
266003,p1032,p0730,0
266004,p0719,p2382,1
266005,p4213,p0457,0


In [None]:
# Balancing the Imbalance Data
df_majority = dt_train[dt_train['is_referenced'] == 0]
df_minority = dt_train[dt_train['is_referenced'] == 1]
df_majority_sampled = df_majority.sample(n = 6500, random_state = 2025)
df_balanced = pd.concat([df_majority_sampled, df_minority], axis = 0).sample(frac = 1, random_state = 2025).reset_index(drop=True)

In [None]:
df_balanced

Unnamed: 0,paper,referenced_paper,is_referenced
0,p2483,p0981,0
1,p0725,p1704,1
2,p0782,p2732,0
3,p3064,p1405,1
4,p3658,p0586,1
...,...,...,...
10592,p3921,p1024,0
10593,p3560,p0702,1
10594,p3509,p1847,0
10595,p0080,p0829,0


# Merging Train & Embedded

In [None]:
batch_size = 5000
num_batches = (len(df_balanced) + batch_size - 1) // batch_size

merged_batches = []

embedding_cols = [col for col in embedding.columns if col != 'paper_id']

embedding_paper = embedding.rename(columns={'paper_id': 'paper'})
embedding_ref = embedding.rename(columns={'paper_id': 'referenced_paper'})
embedding_ref = embedding_ref.rename(columns={col: f"{col}_ref" for col in embedding_cols})

for i in range(num_batches):
    start = i * batch_size
    end = min((i + 1) * batch_size, len(df_balanced))

    batch = df_balanced.iloc[start:end].copy()

    # Merge embedding
    batch = batch.merge(embedding_paper, on='paper', how='left')
    batch = batch.merge(embedding_ref, on='referenced_paper', how='left')

    # Buat fitur baru
    for col in embedding_cols:
        ref_col = f"{col}_ref"
        batch[f"{col}_abs_diff"] = (batch[col] - batch[ref_col]).abs()
        batch[f"{col}_mul"] = batch[col] * batch[ref_col]

    merged_batches.append(batch)
    print(f"✅ Batch {i+1}/{num_batches} selesai digabung & dihitung.")

# Gabung semua batch
train_df_with_embed = pd.concat(merged_batches, ignore_index=True)

✅ Batch 1/3 selesai digabung & dihitung.
✅ Batch 2/3 selesai digabung & dihitung.
✅ Batch 3/3 selesai digabung & dihitung.


In [None]:
train_df_with_embed

Unnamed: 0,paper,referenced_paper,is_referenced,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,...,mtdt_763_abs_diff,mtdt_763_mul,mtdt_764_abs_diff,mtdt_764_mul,mtdt_765_abs_diff,mtdt_765_mul,mtdt_766_abs_diff,mtdt_766_mul,mtdt_767_abs_diff,mtdt_767_mul
0,p2483,p0981,0,-0.216859,0.467223,-0.809789,-0.034501,-0.247385,0.353072,0.601202,...,0.382217,-0.030982,0.664769,0.615720,1.276122,-0.373337,0.972628,-0.223302,0.133626,0.024774
1,p0725,p1704,1,-0.285272,0.571589,0.877238,-0.182401,0.297220,-0.856047,0.312085,...,0.044452,0.024835,0.300502,-0.022573,0.030306,0.015065,0.420410,0.857826,0.184417,1.397279
2,p0782,p2732,0,-0.300498,0.542462,-0.038007,0.172063,-0.082823,-0.189893,1.093386,...,0.159195,0.006307,1.092071,-0.031410,0.740996,0.125553,1.379470,-0.397638,1.055579,0.147415
3,p3064,p1405,1,-0.182774,0.585294,-0.077243,0.590000,-0.663280,0.238440,1.134478,...,0.106910,0.019728,0.750646,-0.069042,0.367496,0.652594,0.325389,0.000663,0.389469,0.927421
4,p3658,p0586,1,-0.819823,-0.249222,-0.032340,-0.164345,-0.367130,-0.228473,1.458627,...,0.293247,0.014278,0.249919,0.090094,0.036655,0.218296,0.005848,0.000493,0.169910,0.768086
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10592,p3921,p1024,0,0.358312,0.315352,-0.748817,0.715001,-0.558609,0.414044,0.549428,...,0.458871,0.028271,0.141301,0.027548,0.635540,-0.099873,0.309719,-0.023383,0.384165,0.802947
10593,p3560,p0702,1,-0.374393,0.867002,-0.417435,0.470116,-0.285632,-0.120061,0.670968,...,1.029470,-0.254251,0.133620,0.015410,0.325031,0.117755,0.462013,0.137234,0.540410,1.466624
10594,p3509,p1847,0,-0.195052,0.496944,0.581423,-0.196783,-0.316882,-0.612991,1.147740,...,0.182327,-0.002791,0.178938,0.050734,0.469848,0.705918,0.429587,0.214241,1.370306,-0.360105
10595,p0080,p0829,0,0.073932,0.379445,-0.236112,0.018178,-0.769028,0.346248,-1.124123,...,1.061936,-0.277933,0.003037,0.875896,1.521550,-0.520755,0.511617,-0.016631,1.681394,0.308604


# Modeling

In [None]:
X = train_df_with_embed.drop(columns=['paper', 'referenced_paper', 'is_referenced'])
y = train_df_with_embed['is_referenced']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=2025, stratify = y
)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((8477, 6144), (2120, 6144), (8477,), (2120,))

In [None]:
class LSTMClassifier:
    def __init__(self, input_shape, epochs=5, batch_size=32, patience=3):
        self.input_shape = input_shape
        self.epochs = epochs
        self.batch_size = batch_size
        self.patience = patience
        self.model = None

    def fit(self, X, y):
        X = X.reshape((-1, 1, self.input_shape))

        self.model = Sequential([
            Masking(mask_value=0., input_shape=(1, self.input_shape)),
            LSTM(16),
            Dense(1, activation='sigmoid')
        ])
        self.model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

        early_stop = EarlyStopping(
            monitor='loss',
            patience=self.patience,
            restore_best_weights=True,
            verbose=1
        )

        self.model.fit(
            X, y,
            epochs=self.epochs,
            batch_size=self.batch_size,
            callbacks=[early_stop],
            verbose=1
        )

    def predict(self, X):
        X = X.reshape((-1, 1, self.input_shape))
        proba = self.model.predict(X, verbose=0)
        return (proba > 0.5).astype(int).flatten()

    def predict_proba(self, X):
        X = X.reshape((-1, 1, self.input_shape))
        return self.model.predict(X, verbose=0).flatten()

In [None]:
# TSSE-BIM Boosting-style ensemble using custom base model (e.g., LSTM)
class TSSEBIMBoost:
    def __init__(self, n_estimators=5, base_model_class=None, input_shape=None, **base_model_kwargs):
        self.n_estimators = n_estimators
        self.base_model_class = base_model_class
        self.input_shape = input_shape
        self.base_model_kwargs = base_model_kwargs
        self.classifiers = []
        self.weights = []

    def _bin_samples(self, probs, bins):
        return np.digitize(probs, bins) - 1

    def _assign_weight(self, fp, fn, fp_max, fn_max):
        return (1 - fp / (fp_max + 1e-10)) * (1 - fn / (fn_max + 1e-10))

    def fit(self, X, y):
        X, y = shuffle(X, y, random_state=42)
        pos_idx = np.where(y == 1)[0]
        neg_idx = np.where(y == 0)[0]
        k = len(pos_idx)
        n_bins = k if k > 0 else 1
        bins = np.linspace(0, 1, n_bins + 1)

        pred_probs_all = np.zeros(len(y))

        for t in range(self.n_estimators):
            if t > 0:
                pred_probs_all = np.zeros(len(y))
                total_weight = sum(self.weights)
                for clf, w in zip(self.classifiers, self.weights):
                    pred_probs_all += w * clf.predict_proba(X)
                pred_probs_all /= total_weight
            else:
                pred_probs_all[:] = 0.5

            neg_probs = pred_probs_all[neg_idx]
            bin_assignments = self._bin_samples(neg_probs, bins)
            neg_sampled_idx = []

            for i in range(n_bins):
                bin_indices = neg_idx[bin_assignments == i]
                if len(bin_indices) > 0:
                    neg_sampled = np.random.choice(bin_indices, 1, replace=False)
                    neg_sampled_idx.extend(neg_sampled)

            train_idx = np.concatenate([pos_idx, neg_sampled_idx])
            clf = self.base_model_class(self.input_shape)
            clf.fit(X[train_idx], y[train_idx])
            y_pred = clf.predict(X)

            tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
            fp_max = max([fp] + [confusion_matrix(y, c.predict(X)).ravel()[1] for c in self.classifiers]) if self.classifiers else fp
            fn_max = max([fn] + [confusion_matrix(y, c.predict(X)).ravel()[2] for c in self.classifiers]) if self.classifiers else fn

            weight = self._assign_weight(fp, fn, fp_max, fn_max)
            self.classifiers.append(clf)
            self.weights.append(weight)

    def predict(self, X):
        total_weight = sum(self.weights)
        preds = sum(w * clf.predict(X) for clf, w in zip(self.classifiers, self.weights))
        return (preds / total_weight) > 0.5

    def predict_proba(self, X):
        total_weight = sum(self.weights)
        probs = sum(w * clf.predict_proba(X) for clf, w in zip(self.classifiers, self.weights))
        return probs / total_weight

In [None]:
X_train = np.array(X)
y_train = np.array(y)

boost_model = TSSEBIMBoost(
    n_estimators=5,
    base_model_class=LSTMClassifier,
    input_shape=X.shape[1],
    base_model_kwargs={
        'epochs': 20,
        'batch_size': 32,
        'patience': 3
    }
)

# Latih model
boost_model.fit(X_train, y_train)

Epoch 1/5
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.9607 - loss: 0.2303
Epoch 2/5
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 1.0000 - loss: 0.0907
Epoch 3/5
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 1.0000 - loss: 0.0525
Epoch 4/5
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 1.0000 - loss: 0.0344
Epoch 5/5
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.9996 - loss: 0.0258
Restoring model weights from the end of the best epoch: 5.
Epoch 1/5
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.9996 - loss: 0.1311
Epoch 2/5
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.9996 - loss: 0.0534
Epoch 3/5
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.9

In [None]:
# Label Predict
y_pred = boost_model.predict(X_test)

y_proba = boost_model.predict_proba(X_test)

print(classification_report(y, y_pred))

AttributeError: 'DataFrame' object has no attribute 'reshape'

In [None]:
# XGBoost
xgb_model = XGBClassifier(
    tree_method='gpu_hist',
    predictor='gpu_predictor',
    gpu_id=0,
    n_estimators=100,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=2025,
)

xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

print("⚡ XGBoost")
print(classification_report(y_test, y_pred_xgb))

⚡ XGBoost
              precision    recall  f1-score   support

           0       0.89      0.92      0.90      1300
           1       0.87      0.81      0.84       820

    accuracy                           0.88      2120
   macro avg       0.88      0.87      0.87      2120
weighted avg       0.88      0.88      0.88      2120



In [None]:
mcc_xgb = matthews_corrcoef(y_test, y_pred_xgb)

In [None]:
print("Accuracy:")
print(f"XGB : {accuracy_score(y_test, y_pred_xgb):.4f}")

print("")

print("📈 Matthews Correlation Coefficient")
print(f"XGB : {mcc_xgb:.4f}")

Accuracy:
XGB : 0.8807

📈 Matthews Correlation Coefficient
XGB : 0.7464


# With LSTM

In [None]:
n_samples_train = X_train.shape[0]
n_samples_test = X_test.shape[0]
timesteps = 3
features = X_train.shape[1] // timesteps

In [None]:
X_train_lstm = X_train.values.reshape((n_samples_train, timesteps, features))
X_test_lstm = X_test.values.reshape((n_samples_test, timesteps, features))

In [None]:
# LSTM Model
model = models.Sequential([
    layers.LSTM(64, return_sequences=True, input_shape=(timesteps, features)),
    layers.Dropout(0.6),
    layers.LSTM(32, return_sequences=False),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])


In [None]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Earlystopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    mode='min'
)

In [None]:
history = model.fit(
    X_train_lstm, y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_test_lstm, y_test),
    verbose=0,
    callbacks=[TqdmCallback(verbose=1), early_stop]
)

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

In [None]:
import joblib
joblib.dump(xgb_model, 'model_xgb_1.pkl')

['model_xgb_1.pkl']

In [None]:
model.save('model_lstm.keras')
model.save_weights('lstm_weights_base.weights.h5')

# Finetune XGB

In [None]:
X_tr, X_val, y_tr, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 5),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 5),
        'use_label_encoder': False,
        'eval_metric': 'logloss',
        'random_state': 2025,
        'tree_method': 'gpu_hist',
        'predictor': 'gpu_predictor',
        'gpu_id': 0,
    }

    model = XGBClassifier(**params)
    model.fit(X_tr, y_tr)
    preds = model.predict(X_val)
    f1 = f1_score(y_val, preds, average='macro')
    return f1

In [None]:
# Buat dan jalankan studi Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2025-04-24 00:44:23,620] A new study created in memory with name: no-name-1cf1c5b8-95df-4a49-9e9a-1c1905b01597
[I 2025-04-24 00:45:09,465] Trial 0 finished with value: 0.8578267500341141 and parameters: {'n_estimators': 110, 'max_depth': 9, 'learning_rate': 0.046625566159516435, 'subsample': 0.7172159673513914, 'colsample_bytree': 0.9309305461266135, 'gamma': 4.221026470577533, 'reg_alpha': 4.446368231954924, 'reg_lambda': 2.23864468148598}. Best is trial 0 with value: 0.8578267500341141.
[I 2025-04-24 00:45:44,851] Trial 1 finished with value: 0.8051022753390025 and parameters: {'n_estimators': 74, 'max_depth': 8, 'learning_rate': 0.012538981461005239, 'subsample': 0.5507791639587509, 'colsample_bytree': 0.8052991397535261, 'gamma': 3.038567376502248, 'reg_alpha': 4.363953544060398, 'reg_lambda': 0.5630683341598353}. Best is trial 0 with value: 0.8578267500341141.
[I 2025-04-24 00:46:12,113] Trial 2 finished with value: 0.8547019002886254 and parameters: {'n_estimators': 181, 'max_

In [None]:
# Tampilkan hasil terbaik
print("✅ Best trial:")
print(study.best_trial)

✅ Best trial:
FrozenTrial(number=47, state=1, values=[0.877924134456201], datetime_start=datetime.datetime(2025, 4, 24, 1, 9, 8, 889132), datetime_complete=datetime.datetime(2025, 4, 24, 1, 9, 31, 985494), params={'n_estimators': 276, 'max_depth': 3, 'learning_rate': 0.1757199345744328, 'subsample': 0.9696270609661942, 'colsample_bytree': 0.5246063036711618, 'gamma': 0.5624570862750969, 'reg_alpha': 0.724073392826929, 'reg_lambda': 0.24576713709293393}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=300, log=False, low=50, step=1), 'max_depth': IntDistribution(high=15, log=False, low=3, step=1), 'learning_rate': FloatDistribution(high=0.3, log=False, low=0.01, step=None), 'subsample': FloatDistribution(high=1.0, log=False, low=0.5, step=None), 'colsample_bytree': FloatDistribution(high=1.0, log=False, low=0.5, step=None), 'gamma': FloatDistribution(high=5.0, log=False, low=0.0, step=None), 'reg_alpha': FloatDistribution(high=

In [None]:
best_params = study.best_params
best_params.update({
    'use_label_encoder': False,
    'eval_metric': 'logloss',
    'random_state': 2025,
    'tree_method': 'gpu_hist',
    'predictor': 'gpu_predictor',
    'gpu_id': 0,
})

In [None]:
best_model = XGBClassifier(**best_params)
best_model.fit(X_train, y_train)

In [None]:
# Prediksi dan evaluasi
y_pred = best_model.predict(X_test)

from sklearn.metrics import classification_report
print("\n⚡ Final XGBoost (GPU) Evaluation:")
print(classification_report(y_test, y_pred))


⚡ Final XGBoost (GPU) Evaluation:
              precision    recall  f1-score   support

           0       0.90      0.93      0.91      1300
           1       0.88      0.84      0.86       820

    accuracy                           0.89      2120
   macro avg       0.89      0.88      0.89      2120
weighted avg       0.89      0.89      0.89      2120



In [None]:
# Save Best Params
import joblib
joblib.dump(best_model, 'best_xgb_2.pkl')

['best_xgb_2.pkl']

# LSTM FINETUNE

In [None]:
from optuna.integration import TFKerasPruningCallback

def create_lstm_model(trial):
    model = models.Sequential()

    # Param tuning
    units1 = trial.suggest_int('units1', 32, 128)
    dropout1 = trial.suggest_float('dropout1', 0.3, 0.7)
    units2 = trial.suggest_int('units2', 16, 64)
    dropout2 = trial.suggest_float('dropout2', 0.3, 0.7)
    dense_units = trial.suggest_int('dense_units', 32, 128)
    dense_dropout = trial.suggest_float('dense_dropout', 0.3, 0.7)
    l2_reg = trial.suggest_float('l2_reg', 1e-4, 1e-2, log=True)
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)

    model.add(layers.LSTM(units1, return_sequences=True, input_shape=(timesteps, features)))
    model.add(layers.Dropout(dropout1))
    model.add(layers.LSTM(units2, return_sequences=False))
    model.add(layers.Dropout(dropout2))
    model.add(layers.Dense(dense_units, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_reg)))
    model.add(layers.Dropout(dense_dropout))
    model.add(layers.Dense(1, activation='sigmoid'))

    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model

def objective(trial):
    model = create_lstm_model(trial)

    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True
    )

    model.fit(
        X_train_lstm, y_train,
        validation_data=(X_test_lstm, y_test),
        epochs=30,
        batch_size=32,
        verbose=0,
        callbacks=[early_stop]
    )

    # Predict & compute f1_score
    y_pred_prob = model.predict(X_test_lstm)
    y_pred = (y_pred_prob > 0.5).astype(int)
    f1 = f1_score(y_test, y_pred, average='binary')
    return f1  # Karena kita pengen maximize f1

In [None]:
# Jalankan Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

[I 2025-04-24 01:12:35,693] A new study created in memory with name: no-name-14794d7f-7840-462f-8e40-32fcdc8127e8


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


[I 2025-04-24 01:13:19,982] Trial 0 finished with value: 0.8690176322418136 and parameters: {'units1': 95, 'dropout1': 0.610513312880824, 'units2': 60, 'dropout2': 0.5395985692679546, 'dense_units': 96, 'dense_dropout': 0.3531041483293317, 'l2_reg': 0.0003290165341517008, 'lr': 0.00030263723632762226}. Best is trial 0 with value: 0.8690176322418136.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:13:51,516] Trial 1 finished with value: 0.8716094032549728 and parameters: {'units1': 67, 'dropout1': 0.6317110749587823, 'units2': 28, 'dropout2': 0.6630067013167328, 'dense_units': 58, 'dense_dropout': 0.42036158506865906, 'l2_reg': 0.0007238895094497908, 'lr': 0.00012687345707440136}. Best is trial 0 with value: 0.8690176322418136.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:14:27,771] Trial 2 finished with value: 0.8637212196639701 and parameters: {'units1': 57, 'dropout1': 0.4502560781018847, 'units2': 32, 'dropout2': 0.40897074694455593, 'dense_units': 61, 'dense_dropout': 0.6060670082738182, 'l2_reg': 0.00012105185254313688, 'lr': 0.0017330726504821848}. Best is trial 2 with value: 0.8637212196639701.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:14:45,616] Trial 3 finished with value: 0.8475648323845667 and parameters: {'units1': 127, 'dropout1': 0.6424828250360002, 'units2': 45, 'dropout2': 0.4467104075315053, 'dense_units': 118, 'dense_dropout': 0.44861310689961553, 'l2_reg': 0.0046969726009269495, 'lr': 0.0019420965206452037}. Best is trial 3 with value: 0.8475648323845667.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


[I 2025-04-24 01:15:19,363] Trial 4 finished with value: 0.8660933660933661 and parameters: {'units1': 109, 'dropout1': 0.698835890020539, 'units2': 18, 'dropout2': 0.44291243092837845, 'dense_units': 115, 'dense_dropout': 0.42694360237144435, 'l2_reg': 0.0001710434715235898, 'lr': 0.0003379067703117713}. Best is trial 3 with value: 0.8475648323845667.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:15:47,622] Trial 5 finished with value: 0.8707653701380176 and parameters: {'units1': 34, 'dropout1': 0.44161728535192135, 'units2': 18, 'dropout2': 0.4016294183808602, 'dense_units': 96, 'dense_dropout': 0.3338389185191607, 'l2_reg': 0.00026164634429516864, 'lr': 0.0002473896255733787}. Best is trial 3 with value: 0.8475648323845667.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:16:11,558] Trial 6 finished with value: 0.8694610778443114 and parameters: {'units1': 114, 'dropout1': 0.4677796596332582, 'units2': 35, 'dropout2': 0.44550957718782364, 'dense_units': 121, 'dense_dropout': 0.40663678080271953, 'l2_reg': 0.0008129712713967738, 'lr': 0.0008498089374201877}. Best is trial 3 with value: 0.8475648323845667.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:16:35,907] Trial 7 finished with value: 0.857486470234516 and parameters: {'units1': 52, 'dropout1': 0.55421871755588, 'units2': 36, 'dropout2': 0.46079165053956966, 'dense_units': 76, 'dense_dropout': 0.5991872330242758, 'l2_reg': 0.004629131191343523, 'lr': 0.003194753104383214}. Best is trial 3 with value: 0.8475648323845667.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


[I 2025-04-24 01:16:58,483] Trial 8 finished with value: 0.8552709946396665 and parameters: {'units1': 72, 'dropout1': 0.5845996763064358, 'units2': 23, 'dropout2': 0.38116058994785806, 'dense_units': 61, 'dense_dropout': 0.6194818312052772, 'l2_reg': 0.0015079187430515329, 'lr': 0.0023985157539415437}. Best is trial 3 with value: 0.8475648323845667.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:17:28,078] Trial 9 finished with value: 0.8645707376058042 and parameters: {'units1': 106, 'dropout1': 0.38321467768502554, 'units2': 36, 'dropout2': 0.31010299988788226, 'dense_units': 69, 'dense_dropout': 0.4814011916024321, 'l2_reg': 0.0011252501710552773, 'lr': 0.0031157764624590383}. Best is trial 3 with value: 0.8475648323845667.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step


[I 2025-04-24 01:17:50,324] Trial 10 finished with value: 0.8443649373881932 and parameters: {'units1': 128, 'dropout1': 0.328064406233726, 'units2': 50, 'dropout2': 0.5603051009176592, 'dense_units': 35, 'dense_dropout': 0.6908592140036788, 'l2_reg': 0.008950986448289365, 'lr': 0.009392265143890964}. Best is trial 10 with value: 0.8443649373881932.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:18:18,848] Trial 11 finished with value: 0.8383838383838383 and parameters: {'units1': 128, 'dropout1': 0.31579046123770455, 'units2': 50, 'dropout2': 0.5648756212817794, 'dense_units': 34, 'dense_dropout': 0.6945249224149157, 'l2_reg': 0.009802714753817474, 'lr': 0.009604981048057275}. Best is trial 11 with value: 0.8383838383838383.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:18:47,600] Trial 12 finished with value: 0.8238276299112801 and parameters: {'units1': 127, 'dropout1': 0.3026058364786072, 'units2': 50, 'dropout2': 0.5794781100349495, 'dense_units': 32, 'dense_dropout': 0.6935093531119139, 'l2_reg': 0.009889196751726791, 'lr': 0.009743170562809675}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:19:15,280] Trial 13 finished with value: 0.8301435406698564 and parameters: {'units1': 93, 'dropout1': 0.3024164658298171, 'units2': 56, 'dropout2': 0.6249437636513379, 'dense_units': 33, 'dense_dropout': 0.6965213430797922, 'l2_reg': 0.008569159322080296, 'lr': 0.008726565819618353}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


[I 2025-04-24 01:19:39,318] Trial 14 finished with value: 0.855072463768116 and parameters: {'units1': 92, 'dropout1': 0.38074427187474624, 'units2': 61, 'dropout2': 0.6673073760664477, 'dense_units': 45, 'dense_dropout': 0.5505604019459379, 'l2_reg': 0.0029643672031155945, 'lr': 0.0055752574590825065}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:20:06,430] Trial 15 finished with value: 0.857312018946122 and parameters: {'units1': 90, 'dropout1': 0.37642769525168585, 'units2': 53, 'dropout2': 0.6222598709051608, 'dense_units': 47, 'dense_dropout': 0.6540306793556278, 'l2_reg': 0.002401102981873755, 'lr': 0.004918994244086839}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


[I 2025-04-24 01:20:37,216] Trial 16 finished with value: 0.863822326125073 and parameters: {'units1': 82, 'dropout1': 0.3137570955062217, 'units2': 43, 'dropout2': 0.6072800632409683, 'dense_units': 47, 'dense_dropout': 0.5437232762174395, 'l2_reg': 0.00564882499894872, 'lr': 0.0009359158808199661}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:21:04,578] Trial 17 finished with value: 0.8395945140131187 and parameters: {'units1': 114, 'dropout1': 0.5134445474408157, 'units2': 56, 'dropout2': 0.6934085717350829, 'dense_units': 32, 'dense_dropout': 0.6497977709537283, 'l2_reg': 0.0066988545452829696, 'lr': 0.005184126616237167}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:21:35,403] Trial 18 finished with value: 0.8345410628019324 and parameters: {'units1': 105, 'dropout1': 0.3642053230035758, 'units2': 47, 'dropout2': 0.5017564424797103, 'dense_units': 87, 'dense_dropout': 0.5582066007335718, 'l2_reg': 0.002582502165200636, 'lr': 0.009936624403826845}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:21:57,657] Trial 19 finished with value: 0.8709288299155609 and parameters: {'units1': 35, 'dropout1': 0.42127103408223604, 'units2': 64, 'dropout2': 0.602445363168278, 'dense_units': 43, 'dense_dropout': 0.647126614185581, 'l2_reg': 0.0005079103390426012, 'lr': 0.0005187740569238247}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:22:16,977] Trial 20 finished with value: 0.8683886838868389 and parameters: {'units1': 81, 'dropout1': 0.3525889305476765, 'units2': 56, 'dropout2': 0.5066581177288402, 'dense_units': 52, 'dense_dropout': 0.5151548869158299, 'l2_reg': 0.0016867146061609111, 'lr': 0.0013396593935543469}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:22:48,178] Trial 21 finished with value: 0.8423076923076923 and parameters: {'units1': 101, 'dropout1': 0.3068052912306007, 'units2': 46, 'dropout2': 0.5290692887951395, 'dense_units': 86, 'dense_dropout': 0.5690923731116067, 'l2_reg': 0.003860147936738979, 'lr': 0.0066727227939739195}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:23:25,941] Trial 22 finished with value: 0.8518057285180572 and parameters: {'units1': 119, 'dropout1': 0.34765771178239246, 'units2': 41, 'dropout2': 0.583289174371991, 'dense_units': 102, 'dense_dropout': 0.694944307998272, 'l2_reg': 0.002736026329642624, 'lr': 0.009527100488077216}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:23:44,000] Trial 23 finished with value: 0.8489028213166144 and parameters: {'units1': 101, 'dropout1': 0.4037269006951159, 'units2': 50, 'dropout2': 0.6417054249604938, 'dense_units': 82, 'dense_dropout': 0.6476872019333838, 'l2_reg': 0.007283858922304792, 'lr': 0.0040233952677060335}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


[I 2025-04-24 01:24:18,144] Trial 24 finished with value: 0.854630715123095 and parameters: {'units1': 116, 'dropout1': 0.3523044948328384, 'units2': 56, 'dropout2': 0.4910237934809573, 'dense_units': 72, 'dense_dropout': 0.5802055041440923, 'l2_reg': 0.009817605196480268, 'lr': 0.006870622471499311}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:24:47,987] Trial 25 finished with value: 0.8460575719649562 and parameters: {'units1': 88, 'dropout1': 0.3027926526005794, 'units2': 48, 'dropout2': 0.5025552963971223, 'dense_units': 108, 'dense_dropout': 0.5095776134614688, 'l2_reg': 0.0035198320829670443, 'lr': 0.006852085778340903}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:25:15,961] Trial 26 finished with value: 0.8546475358702433 and parameters: {'units1': 100, 'dropout1': 0.5053958655837542, 'units2': 53, 'dropout2': 0.5722716751848889, 'dense_units': 88, 'dense_dropout': 0.6174713400839027, 'l2_reg': 0.0019191393814039105, 'lr': 0.003551840360345015}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:25:39,317] Trial 27 finished with value: 0.8419737663960025 and parameters: {'units1': 121, 'dropout1': 0.344906583672296, 'units2': 42, 'dropout2': 0.5306867764373757, 'dense_units': 128, 'dense_dropout': 0.6622781609725253, 'l2_reg': 0.006298642842433657, 'lr': 0.009993267388731934}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


[I 2025-04-24 01:26:08,335] Trial 28 finished with value: 0.8473748473748474 and parameters: {'units1': 109, 'dropout1': 0.39649912821106514, 'units2': 60, 'dropout2': 0.6332901409370442, 'dense_units': 39, 'dense_dropout': 0.47938457846472543, 'l2_reg': 0.00470902249079871, 'lr': 0.006916417955561353}. Best is trial 12 with value: 0.8238276299112801.


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


[I 2025-04-24 01:26:55,422] Trial 29 finished with value: 0.8587223587223587 and parameters: {'units1': 97, 'dropout1': 0.47946828069627173, 'units2': 64, 'dropout2': 0.5461022085159606, 'dense_units': 92, 'dense_dropout': 0.35879395135103953, 'l2_reg': 0.0021444153185149755, 'lr': 0.00454801053250985}. Best is trial 12 with value: 0.8238276299112801.


In [None]:
# Print best result
print("✅ Best trial:")
print(study.best_trial)

✅ Best trial:
FrozenTrial(number=12, state=1, values=[0.8238276299112801], datetime_start=datetime.datetime(2025, 4, 24, 1, 18, 18, 849440), datetime_complete=datetime.datetime(2025, 4, 24, 1, 18, 47, 600487), params={'units1': 127, 'dropout1': 0.3026058364786072, 'units2': 50, 'dropout2': 0.5794781100349495, 'dense_units': 32, 'dense_dropout': 0.6935093531119139, 'l2_reg': 0.009889196751726791, 'lr': 0.009743170562809675}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'units1': IntDistribution(high=128, log=False, low=32, step=1), 'dropout1': FloatDistribution(high=0.7, log=False, low=0.3, step=None), 'units2': IntDistribution(high=64, log=False, low=16, step=1), 'dropout2': FloatDistribution(high=0.7, log=False, low=0.3, step=None), 'dense_units': IntDistribution(high=128, log=False, low=32, step=1), 'dense_dropout': FloatDistribution(high=0.7, log=False, low=0.3, step=None), 'l2_reg': FloatDistribution(high=0.01, log=True, low=0.0001, step=None), 'lr': Float

In [None]:
# Train ulang pakai best params
final_model = create_lstm_model(study.best_trial)
final_model.fit(
    X_train_lstm, y_train,
    validation_data=(X_test_lstm, y_test),
    epochs=30,
    batch_size=32,
    verbose=1,
    callbacks=[EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)]
)

Epoch 1/30
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.6015 - loss: 0.7621 - val_accuracy: 0.6623 - val_loss: 0.5202
Epoch 2/30
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.7153 - loss: 0.5724 - val_accuracy: 0.7976 - val_loss: 0.4977
Epoch 3/30
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.7066 - loss: 0.5842 - val_accuracy: 0.8505 - val_loss: 0.4429
Epoch 4/30
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.7494 - loss: 0.5382 - val_accuracy: 0.8792 - val_loss: 0.3838
Epoch 5/30
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.7877 - loss: 0.4908 - val_accuracy: 0.8642 - val_loss: 0.3892
Epoch 6/30
[1m265/265[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.7866 - loss: 0.4993 - val_accuracy: 0.8665 - val_loss: 0.3859
Epoch 7/30
[1m265/265[0m

<keras.src.callbacks.history.History at 0x78c9d0debf90>

In [None]:
# Evaluasi akhir
y_pred_final = (final_model.predict(X_test_lstm) > 0.5).astype(int)
print("⚡ Final Tuned LSTM")
print(classification_report(y_test, y_pred_final))

[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
⚡ Final Tuned LSTM
              precision    recall  f1-score   support

           0       0.90      0.89      0.90      1300
           1       0.83      0.84      0.84       820

    accuracy                           0.87      2120
   macro avg       0.87      0.87      0.87      2120
weighted avg       0.87      0.87      0.87      2120



In [None]:
final_model.save('model_lstm_tuning.keras')
final_model.save_weights('lstm_weights_tune.weights.h5')