# INFORMASI
### CNN
### Fine Tuning pada data Twitter, pergunakan weights nya lalu fine-tune kembali pada data YouTube

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, SpatialDropout1D, Conv1D, GlobalMaxPooling1D
from sklearn.model_selection import train_test_split

# --- Konfigurasi ---
MAX_NB_WORDS = 5000
MAX_SEQUENCE_LENGTH = 50
EMBEDDING_DIM = 100

# --- Load Data Twitter ---
df_twitter = pd.read_csv('final_data_twitter.csv')
df_twitter = df_twitter.dropna(subset=['clean_text_ML_2'])

In [2]:
# --- Tokenisasi ---
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, lower=True)
tokenizer.fit_on_texts(df_twitter['clean_text_ML_2'].values)

X_tw = tokenizer.texts_to_sequences(df_twitter['clean_text_ML_2'].values)
X_tw = pad_sequences(X_tw, maxlen=MAX_SEQUENCE_LENGTH)
y_tw = df_twitter['cyberbullying'].values

# Split Data Twitter
X_temp_tw, X_test_tw, y_temp_tw, y_test_tw = train_test_split(
    X_tw, y_tw, test_size=0.2, random_state=42
)
X_train_tw, X_val_tw, y_train_tw, y_val_tw = train_test_split(
    X_temp_tw, y_temp_tw, test_size=1/8, random_state=42
)

In [3]:
from tensorflow.keras.metrics import Precision, Recall

# --- Custom F1 Metric ---
def f1_metric(y_true, y_pred):
    y_pred = tf.round(y_pred)
    tp = tf.reduce_sum(tf.cast(y_true * y_pred, 'float32'))
    fp = tf.reduce_sum(tf.cast((1 - y_true) * y_pred, 'float32'))
    fn = tf.reduce_sum(tf.cast(y_true * (1 - y_pred), 'float32'))
    precision = tp / (tp + fp + 1e-7)
    recall = tp / (tp + fn + 1e-7)
    return 2 * precision * recall / (precision + recall + 1e-7)

# --- Membangun Model CNN ---
def create_model():
    model = Sequential()
    model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH))
    model.add(SpatialDropout1D(0.2))
    model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(1, activation='sigmoid'))
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=[
            'accuracy',
            Precision(name="precision"),
            Recall(name="recall"),
            f1_metric
        ]
    )
    return model

In [4]:
# Train Model pada Twitter
model_tw = create_model()
print("Training Model CNN pada Data Twitter...")
model_tw.fit(
    X_train_tw, y_train_tw,
    epochs=8,
    batch_size=64,
    validation_data=(X_val_tw, y_val_tw),
    verbose=1
)


Training Model CNN pada Data Twitter...
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x1e3785d6770>

In [5]:
model_tw.evaluate(X_test_tw, y_test_tw, verbose=1)



[0.8009842038154602,
 0.794773280620575,
 0.7629151344299316,
 0.7490941882133484,
 0.7521436214447021]

In [6]:
# --- Simpan Weights ---
model_tw.save_weights('twitter_weights.h5')
print("Bobot model Twitter berhasil disimpan.")

Bobot model Twitter berhasil disimpan.


In [7]:
# --- Load Data YouTube ---
df_yt = pd.read_csv('final_data_yt.csv')
df_yt = df_yt.dropna(subset=['clean_text_ML_2'])

# Preprocessing YouTube
X_yt = tokenizer.texts_to_sequences(df_yt['clean_text_ML_2'].values)
X_yt = pad_sequences(X_yt, maxlen=MAX_SEQUENCE_LENGTH)
y_yt = df_yt['cyberbullying'].values

# Split Data YouTube
X_temp_yt, X_test_yt, y_temp_yt, y_test_yt = train_test_split(
    X_yt, y_yt, test_size=0.2, random_state=42
)
X_train_yt, X_val_yt, y_train_yt, y_val_yt = train_test_split(
    X_temp_yt, y_temp_yt, test_size=1/8, random_state=42
)

In [8]:
# --- Load Model untuk Fine-Tuning ---
model_finetune = create_model()

# Build model sebelum load weights
dummy_input = np.zeros((1, MAX_SEQUENCE_LENGTH))
model_finetune(dummy_input)

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.49288136]], dtype=float32)>

In [9]:
model_finetune.load_weights('twitter_weights.h5')
print("\nBobot Twitter dimuat. Mulai Fine-tuning pada Data YouTube...")



Bobot Twitter dimuat. Mulai Fine-tuning pada Data YouTube...


In [10]:

from tensorflow.keras.callbacks import EarlyStopping
es = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

# --- Fine-Tuning ---
model_finetune.fit(
    X_train_yt, y_train_yt,
    epochs=8,
    batch_size=64,
    validation_data=(X_val_yt, y_val_yt),
    callbacks=[es],
    verbose=1
)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x1e4938db280>

In [11]:
model_finetune.evaluate(X_test_yt, y_test_yt, verbose=1)



[1.0471127033233643,
 0.6230769157409668,
 0.42500001192092896,
 0.39534884691238403,
 0.32740095257759094]