In [4]:
# =========================
# 1) SETUP & LOAD DATA
# =========================
!pip install -q nltk scikit-learn tensorflow
!pip install -q transformers datasets accelerate

import os
import re
import numpy as np
import pandas as pd
import nltk
import torch

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix,
    roc_auc_score
)

from datasets import Dataset
from transformers import (
    ElectraTokenizer,
    ElectraForSequenceClassification,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

from itertools import product

os.environ["WANDB_DISABLED"] = "true"

nltk.download("stopwords")
nltk.download("wordnet")

if torch.cuda.is_available():
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    print("Using CPU")

# Load dataset
df = pd.read_csv("balanced_ai_human_prompts.csv")
df["generated"] = df["generated"].astype(int)   # ensure 0/1
print(df.columns)
df.head()


Using GPU: Tesla T4
Index(['text', 'generated'], dtype='object')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Unnamed: 0,text,generated
0,"Machine learning, a subset of artificial intel...",1
1,"A decision tree, a prominent machine learning ...",1
2,"Education, a cornerstone of societal progress,...",1
3,"Computers, the backbone of modern technology, ...",1
4,"Chess, a timeless game of strategy and intelle...",1


# Preprocessing (for LSTM)

In [5]:
# =========================
# 2) PREPROCESSING (LSTM)
# =========================
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text = str(text).lower()                           # lowercase
    text = re.sub(r"[^a-zA-Z\s]", "", text)           # keep only letters and spaces
    tokens = [w for w in text.split() if w not in stop_words]
    tokens = [lemmatizer.lemmatize(w) for w in tokens]
    return " ".join(tokens)

df["clean_text"] = df["text"].astype(str).apply(preprocess_text)
df[["text", "clean_text", "generated"]].head()

# Common metrics function to reuse later
def print_binary_metrics(y_true, y_pred, y_scores, model_name="Model"):
    acc  = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec  = recall_score(y_true, y_pred)
    f1   = f1_score(y_true, y_pred)
    auc  = roc_auc_score(y_true, y_scores)

    print(f"{model_name} Test Accuracy : {acc:.4f}")
    print(f"{model_name} Precision    : {prec:.4f}")
    print(f"{model_name} Recall       : {rec:.4f}")
    print(f"{model_name} F1-score     : {f1:.4f}")
    print(f"{model_name} ROC-AUC      : {auc:.4f}\n")

    print(f"{model_name} Classification Report:")
    print(classification_report(y_true, y_pred, target_names=["human", "ai"]))

    print(f"{model_name} Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("=" * 60)


# LSTM – Baseline Model + Metrics

In [6]:
# =========================
# 3) LSTM BASELINE + METRICS
# =========================
MAX_WORDS = 20000
MAX_LEN   = 400

tokenizer_lstm = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
tokenizer_lstm.fit_on_texts(df["clean_text"])

sequences = tokenizer_lstm.texts_to_sequences(df["clean_text"])
padded_sequences = pad_sequences(sequences, maxlen=MAX_LEN, padding="post")

y = df["generated"].values

X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

EMBED_DIM = 128
LSTM_UNITS = 128

def build_lstm_baseline():
    model = Sequential([
        Embedding(input_dim=MAX_WORDS, output_dim=EMBED_DIM, input_length=MAX_LEN),
        LSTM(LSTM_UNITS, return_sequences=False),
        Dropout(0.3),
        Dense(64, activation="relu"),
        Dropout(0.3),
        Dense(1, activation="sigmoid")
    ])
    model.compile(
        loss="binary_crossentropy",
        optimizer=Adam(learning_rate=0.001),
        metrics=["accuracy"]
    )
    return model

lstm_baseline = build_lstm_baseline()
lstm_baseline.summary()

history = lstm_baseline.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=5,
    batch_size=32,
    verbose=1
)

# Metrics
y_prob_lstm = lstm_baseline.predict(X_test).ravel()
y_pred_lstm = (y_prob_lstm >= 0.5).astype(int)

print_binary_metrics(y_test, y_pred_lstm, y_prob_lstm, model_name="LSTM Baseline")




Epoch 1/5
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 27ms/step - accuracy: 0.4905 - loss: 0.6933 - val_accuracy: 0.5705 - val_loss: 0.6845
Epoch 2/5
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.6877 - loss: 0.6107 - val_accuracy: 0.9000 - val_loss: 0.2725
Epoch 3/5
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.8908 - loss: 0.2998 - val_accuracy: 0.9341 - val_loss: 0.2091
Epoch 4/5
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.9129 - loss: 0.2589 - val_accuracy: 0.9432 - val_loss: 0.1882
Epoch 5/5
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.9102 - loss: 0.2536 - val_accuracy: 0.9182 - val_loss: 0.2320
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
LSTM Baseline Test Accuracy : 0.9127
LSTM Baseline Precision    : 1.0000
LSTM Baseline Recall       : 0.8255
LSTM Baseline F1-

# LSTM – Hyperparameter Tuning + Metrics

In [7]:
# ======================================
# 4) LSTM HYPERPARAMETER TUNING + METRICS
# ======================================
param_grid_lstm = {
    "lstm_units":    [64, 128],
    "dropout_rate":  [0.3, 0.5],
    "learning_rate": [0.001, 0.0005],
    "batch_size":    [32, 64],
    "epochs":        [5, 8]
}

def build_lstm_tuned(lstm_units, dropout_rate, learning_rate):
    model = Sequential([
        Embedding(MAX_WORDS, EMBED_DIM, input_length=MAX_LEN),
        LSTM(lstm_units, return_sequences=True),
        Dropout(dropout_rate),
        LSTM(lstm_units // 2, return_sequences=False),
        Dropout(dropout_rate),
        Dense(64, activation="relu"),
        Dropout(dropout_rate),
        Dense(1, activation="sigmoid")
    ])

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(
        loss="binary_crossentropy",
        optimizer=optimizer,
        metrics=["accuracy"]
    )
    return model

best_acc_lstm = 0.0
best_config_lstm = None
best_lstm_model = None
results_lstm = []

for units in param_grid_lstm["lstm_units"]:
    for dr in param_grid_lstm["dropout_rate"]:
        for lr in param_grid_lstm["learning_rate"]:
            for bs in param_grid_lstm["batch_size"]:
                for ep in param_grid_lstm["epochs"]:
                    print(f"\n[GRID] units={units}, dropout={dr}, lr={lr}, batch={bs}, epochs={ep}")
                    model = build_lstm_tuned(units, dr, lr)

                    early_stop = EarlyStopping(
                        monitor="val_loss",
                        patience=2,
                        restore_best_weights=True
                    )

                    model.fit(
                        X_train, y_train,
                        validation_split=0.2,
                        batch_size=bs,
                        epochs=ep,
                        callbacks=[early_stop],
                        verbose=0
                    )

                    loss, acc = model.evaluate(X_test, y_test, verbose=0)
                    print(f"   -> Test Accuracy: {acc:.4f}")

                    results_lstm.append({
                        "units": units,
                        "dropout": dr,
                        "lr": lr,
                        "batch": bs,
                        "epochs": ep,
                        "accuracy": acc
                    })

                    if acc > best_acc_lstm:
                        best_acc_lstm = acc
                        best_config_lstm = {
                            "units": units,
                            "dropout": dr,
                            "lr": lr,
                            "batch": bs,
                            "epochs": ep
                        }
                        best_lstm_model = model

print("\nBest LSTM config:", best_config_lstm)
print("Best LSTM accuracy:", best_acc_lstm)

# Metrics for best LSTM
y_prob_lstm_best = best_lstm_model.predict(X_test).ravel()
y_pred_lstm_best = (y_prob_lstm_best >= 0.5).astype(int)

print_binary_metrics(y_test, y_pred_lstm_best, y_prob_lstm_best, model_name="LSTM Tuned")



[GRID] units=64, dropout=0.3, lr=0.001, batch=32, epochs=5




   -> Test Accuracy: 0.5618

[GRID] units=64, dropout=0.3, lr=0.001, batch=32, epochs=8
   -> Test Accuracy: 0.9673

[GRID] units=64, dropout=0.3, lr=0.001, batch=64, epochs=5
   -> Test Accuracy: 0.9800

[GRID] units=64, dropout=0.3, lr=0.001, batch=64, epochs=8
   -> Test Accuracy: 0.5600

[GRID] units=64, dropout=0.3, lr=0.0005, batch=32, epochs=5
   -> Test Accuracy: 0.9145

[GRID] units=64, dropout=0.3, lr=0.0005, batch=32, epochs=8
   -> Test Accuracy: 0.5600

[GRID] units=64, dropout=0.3, lr=0.0005, batch=64, epochs=5
   -> Test Accuracy: 0.5618

[GRID] units=64, dropout=0.3, lr=0.0005, batch=64, epochs=8
   -> Test Accuracy: 0.9673

[GRID] units=64, dropout=0.5, lr=0.001, batch=32, epochs=5
   -> Test Accuracy: 0.5873

[GRID] units=64, dropout=0.5, lr=0.001, batch=32, epochs=8
   -> Test Accuracy: 0.5673

[GRID] units=64, dropout=0.5, lr=0.001, batch=64, epochs=5
   -> Test Accuracy: 0.5582

[GRID] units=64, dropout=0.5, lr=0.001, batch=64, epochs=8
   -> Test Accuracy: 0.5636


# ELECTRA – Baseline + Metrics

In [8]:
# =========================
# 5) ELECTRA BASELINE + METRICS
# =========================

df_elec = df[["text", "generated"]].copy()
df_elec.rename(columns={"generated": "label"}, inplace=True)

train_df_elec, test_df_elec = train_test_split(
    df_elec, test_size=0.2, random_state=42, stratify=df_elec["label"]
)

train_ds_elec = Dataset.from_pandas(train_df_elec.reset_index(drop=True))
test_ds_elec  = Dataset.from_pandas(test_df_elec.reset_index(drop=True))

tokenizer_elec = ElectraTokenizer.from_pretrained("google/electra-small-discriminator")

def tokenize_elec(batch):
    return tokenizer_elec(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=256
    )

train_ds_elec = train_ds_elec.map(tokenize_elec, batched=True)
test_ds_elec  = test_ds_elec.map(tokenize_elec, batched=True)

train_ds_elec = train_ds_elec.remove_columns(["text"])
test_ds_elec  = test_ds_elec.remove_columns(["text"])

train_ds_elec.set_format(type="torch")
test_ds_elec.set_format(type="torch")

def compute_metrics_acc(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc}

model_elec = ElectraForSequenceClassification.from_pretrained(
    "google/electra-small-discriminator",
    num_labels=2
)

training_args_elec = TrainingArguments(
    output_dir="electra_baseline",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,

    # OLD-VERSION COMPATIBLE ARGUMENTS:
    eval_strategy="epoch",      # instead of evaluation_strategy
    logging_steps=100,          # use classic logging
    save_strategy="no"
)


trainer_elec = Trainer(
    model=model_elec,
    args=training_args_elec,
    train_dataset=train_ds_elec,
    eval_dataset=test_ds_elec,
    compute_metrics=compute_metrics_acc
)


trainer_elec.train()

# Metrics
pred_output_elec = trainer_elec.predict(test_ds_elec)
logits_elec = pred_output_elec.predictions
y_true_elec = pred_output_elec.label_ids

y_pred_elec = np.argmax(logits_elec, axis=1)
y_scores_elec = logits_elec[:, 1]  # positive class

print_binary_metrics(y_true_elec, y_pred_elec, y_scores_elec, model_name="ELECTRA Baseline")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Map:   0%|          | 0/2200 [00:00<?, ? examples/s]

Map:   0%|          | 0/550 [00:00<?, ? examples/s]

pytorch_model.bin:   0%|          | 0.00/54.2M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


model.safetensors:   0%|          | 0.00/54.2M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4297,0.161209,0.998182
2,0.1744,0.095592,0.998182


ELECTRA Baseline Test Accuracy : 0.9982
ELECTRA Baseline Precision    : 1.0000
ELECTRA Baseline Recall       : 0.9964
ELECTRA Baseline F1-score     : 0.9982
ELECTRA Baseline ROC-AUC      : 0.9991

ELECTRA Baseline Classification Report:
              precision    recall  f1-score   support

       human       1.00      1.00      1.00       275
          ai       1.00      1.00      1.00       275

    accuracy                           1.00       550
   macro avg       1.00      1.00      1.00       550
weighted avg       1.00      1.00      1.00       550

ELECTRA Baseline Confusion Matrix:
[[275   0]
 [  1 274]]


# ELECTRA – Hyperparameter Tuning + Metrics

In [9]:
# ======================================
# 6) ELECTRA HYPERPARAMETER TUNING + METRICS
# ======================================
learning_rates_elec = [2e-5, 3e-5, 5e-5]
batch_sizes_elec    = [8, 16]
epochs_list_elec    = [2, 3]
weight_decays_elec  = [0.0, 0.01]

results_elec = []
best_acc_elec = 0.0
best_cfg_elec = None
best_model_elec = None

for lr, bs, ep, wd in product(
    learning_rates_elec, batch_sizes_elec, epochs_list_elec, weight_decays_elec
):
    print(f"\n[ELECTRA GRID] lr={lr}, batch={bs}, epochs={ep}, wd={wd}")

    model_tmp = ElectraForSequenceClassification.from_pretrained(
        "google/electra-small-discriminator",
        num_labels=2
    )



    args_tmp = TrainingArguments(
        output_dir="electra_tuned_tmp",
        learning_rate=lr,
        per_device_train_batch_size=bs,
        per_device_eval_batch_size=bs,
        num_train_epochs=ep,
        weight_decay=wd,
        eval_strategy="epoch",
        logging_strategy="epoch",
        save_strategy="no"
    )

    trainer_tmp = Trainer(
        model=model_tmp,
        args=args_tmp,
        train_dataset=train_ds_elec,
        eval_dataset=test_ds_elec,
        compute_metrics=compute_metrics_acc
    )

    trainer_tmp.train()
    eval_res = trainer_tmp.evaluate()
    acc = eval_res["eval_accuracy"]
    print(f"   -> Accuracy: {acc:.4f}")

    results_elec.append({
        "lr": lr,
        "batch_size": bs,
        "epochs": ep,
        "weight_decay": wd,
        "accuracy": acc
    })

    if acc > best_acc_elec:
        best_acc_elec = acc
        best_cfg_elec = {"lr": lr, "batch_size": bs, "epochs": ep, "weight_decay": wd}
        best_model_elec = model_tmp

print("\nBest ELECTRA config:", best_cfg_elec)
print("Best ELECTRA accuracy:", best_acc_elec)

# Metrics for best ELECTRA
trainer_elec_best = Trainer(
    model=best_model_elec,
    args=TrainingArguments(
        output_dir="electra_best_final",
        learning_rate=best_cfg_elec["lr"],
        per_device_train_batch_size=best_cfg_elec["batch_size"],
        per_device_eval_batch_size=best_cfg_elec["batch_size"],
        num_train_epochs=best_cfg_elec["epochs"],
        weight_decay=best_cfg_elec["weight_decay"],
        eval_strategy="epoch",
        logging_strategy="epoch",
        save_strategy="no"
    ),
    train_dataset=train_ds_elec,
    eval_dataset=test_ds_elec,
    compute_metrics=compute_metrics_acc
)

pred_output_elec_best = trainer_elec_best.predict(test_ds_elec)
logits_elec_best = pred_output_elec_best.predictions
y_true_elec_best = pred_output_elec_best.label_ids

y_pred_elec_best = np.argmax(logits_elec_best, axis=1)
y_scores_elec_best = logits_elec_best[:, 1]

print_binary_metrics(
    y_true_elec_best,
    y_pred_elec_best,
    y_scores_elec_best,
    model_name="ELECTRA Tuned"
)



[ELECTRA GRID] lr=2e-05, batch=8, epochs=2, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2302,0.047027,0.998182
2,0.0364,0.027931,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=2e-05, batch=8, epochs=2, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2302,0.047032,0.998182
2,0.0364,0.027935,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=2e-05, batch=8, epochs=3, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2217,0.038662,0.998182
2,0.026,0.018864,0.998182
3,0.0158,0.016641,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=2e-05, batch=8, epochs=3, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2222,0.037123,0.998182
2,0.0258,0.01841,0.998182
3,0.0158,0.016243,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=2e-05, batch=16, epochs=2, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3623,0.145858,0.998182
2,0.1181,0.084233,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=2e-05, batch=16, epochs=2, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3621,0.153941,0.998182
2,0.1238,0.089536,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=2e-05, batch=16, epochs=3, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3502,0.128847,0.998182
2,0.0893,0.051915,0.998182
3,0.0509,0.041519,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=2e-05, batch=16, epochs=3, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3501,0.121072,0.998182
2,0.0851,0.04905,0.998182
3,0.0494,0.039585,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=3e-05, batch=8, epochs=2, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.165,0.023146,0.998182
2,0.0174,0.016477,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=3e-05, batch=8, epochs=2, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1634,0.024709,0.998182
2,0.0175,0.016818,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=3e-05, batch=8, epochs=3, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1585,0.021637,0.998182
2,0.0135,0.014077,0.998182
3,0.0098,0.01352,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=3e-05, batch=8, epochs=3, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1602,0.020235,0.998182
2,0.0135,0.013856,0.998182
3,0.0098,0.013317,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=3e-05, batch=16, epochs=2, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2774,0.070408,0.998182
2,0.0569,0.040893,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=3e-05, batch=16, epochs=2, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2783,0.075535,0.998182
2,0.0593,0.042917,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=3e-05, batch=16, epochs=3, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2679,0.061381,0.998182
2,0.0417,0.026127,0.998182
3,0.0238,0.021922,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=3e-05, batch=16, epochs=3, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2673,0.057563,0.998182
2,0.0407,0.02541,0.998182
3,0.0237,0.021411,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=5e-05, batch=8, epochs=2, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1071,0.014366,0.998182
2,0.0095,0.013121,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=5e-05, batch=8, epochs=2, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1043,0.014564,0.998182
2,0.0094,0.013291,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=5e-05, batch=8, epochs=3, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1022,0.013948,0.998182
2,0.0084,0.013154,0.998182
3,0.0076,0.013215,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=5e-05, batch=8, epochs=3, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1049,0.013769,0.998182
2,0.0084,0.012999,0.998182
3,0.0076,0.013113,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=5e-05, batch=16, epochs=2, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1858,0.028602,0.998182
2,0.0234,0.019196,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=5e-05, batch=16, epochs=2, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1863,0.030996,0.998182
2,0.0226,0.019538,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=5e-05, batch=16, epochs=3, wd=0.0


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1805,0.026384,0.998182
2,0.0169,0.015152,0.998182
3,0.0111,0.014154,0.998182


   -> Accuracy: 0.9982

[ELECTRA GRID] lr=5e-05, batch=16, epochs=3, wd=0.01


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.18,0.026254,0.996364
2,0.0182,0.014966,0.998182
3,0.0114,0.014008,0.998182


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


   -> Accuracy: 0.9982

Best ELECTRA config: {'lr': 2e-05, 'batch_size': 8, 'epochs': 2, 'weight_decay': 0.0}
Best ELECTRA accuracy: 0.9981818181818182


ELECTRA Tuned Test Accuracy : 0.9982
ELECTRA Tuned Precision    : 1.0000
ELECTRA Tuned Recall       : 0.9964
ELECTRA Tuned F1-score     : 0.9982
ELECTRA Tuned ROC-AUC      : 0.9986

ELECTRA Tuned Classification Report:
              precision    recall  f1-score   support

       human       1.00      1.00      1.00       275
          ai       1.00      1.00      1.00       275

    accuracy                           1.00       550
   macro avg       1.00      1.00      1.00       550
weighted avg       1.00      1.00      1.00       550

ELECTRA Tuned Confusion Matrix:
[[275   0]
 [  1 274]]


# DistilBERT – Baseline + Metrics

In [10]:
# =========================
# 7) DISTILBERT BASELINE + METRICS
# =========================
df_distil = df[["text", "generated"]].copy()
df_distil.rename(columns={"generated": "label"}, inplace=True)

train_df_distil, test_df_distil = train_test_split(
    df_distil, test_size=0.2, random_state=42, stratify=df_distil["label"]
)

train_ds_distil = Dataset.from_pandas(train_df_distil.reset_index(drop=True))
test_ds_distil  = Dataset.from_pandas(test_df_distil.reset_index(drop=True))

tokenizer_distil = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_distil(batch):
    return tokenizer_distil(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=256
    )

train_ds_distil = train_ds_distil.map(tokenize_distil, batched=True)
test_ds_distil  = test_ds_distil.map(tokenize_distil, batched=True)

train_ds_distil = train_ds_distil.remove_columns(["text"])
test_ds_distil  = test_ds_distil.remove_columns(["text"])

train_ds_distil.set_format(type="torch")
test_ds_distil.set_format(type="torch")

model_distil = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=2
)

training_args_distil = TrainingArguments(
    output_dir="distilbert_baseline",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    eval_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="no"
)

trainer_distil = Trainer(
    model=model_distil,
    args=training_args_distil,
    train_dataset=train_ds_distil,
    eval_dataset=test_ds_distil,
    compute_metrics=compute_metrics_acc
)

trainer_distil.train()

# Metrics
pred_output_distil = trainer_distil.predict(test_ds_distil)
logits_distil = pred_output_distil.predictions
y_true_distil = pred_output_distil.label_ids

y_pred_distil = np.argmax(logits_distil, axis=1)
y_scores_distil = logits_distil[:, 1]

print_binary_metrics(
    y_true_distil,
    y_pred_distil,
    y_scores_distil,
    model_name="DistilBERT Baseline"
)

# Optional quick predictor
def predict_distil(text):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model_distil.to(device)

    tokens = tokenizer_distil(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=256
    ).to(device)

    with torch.no_grad():
        logits = model_distil(**tokens).logits

    pred = torch.argmax(logits, dim=1).item()
    return "human" if pred == 0 else "ai"

print(predict_distil("This essay is written by a student."))
print(predict_distil("In this paper, artificial intelligence enables efficient processing."))


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/2200 [00:00<?, ? examples/s]

Map:   0%|          | 0/550 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0488,0.013376,0.998182
2,0.0072,0.014068,0.998182
3,0.0058,0.013972,0.998182


DistilBERT Baseline Test Accuracy : 0.9982
DistilBERT Baseline Precision    : 1.0000
DistilBERT Baseline Recall       : 0.9964
DistilBERT Baseline F1-score     : 0.9982
DistilBERT Baseline ROC-AUC      : 0.9994

DistilBERT Baseline Classification Report:
              precision    recall  f1-score   support

       human       1.00      1.00      1.00       275
          ai       1.00      1.00      1.00       275

    accuracy                           1.00       550
   macro avg       1.00      1.00      1.00       550
weighted avg       1.00      1.00      1.00       550

DistilBERT Baseline Confusion Matrix:
[[275   0]
 [  1 274]]
ai
ai


# DistilBERT – Fast Tuning + Metrics

In [11]:
# ======================================
# 8) DISTILBERT FAST TUNING + METRICS
# ======================================
learning_rates_distil = [1e-5, 2e-5, 3e-5, 5e-5]
batch_size_distil     = 16
epochs_distil         = 2
weight_decay_distil   = 0.0

results_distil = []
best_acc_distil = 0.0
best_cfg_distil = None
best_model_distil = None

for lr in learning_rates_distil:
    print(f"\n[DISTILBERT GRID] lr={lr}")

    model_tmp = AutoModelForSequenceClassification.from_pretrained(
        "distilbert-base-uncased",
        num_labels=2
    )

    args_tmp = TrainingArguments(
        output_dir="distilbert_fast_tune",
        learning_rate=lr,
        per_device_train_batch_size=batch_size_distil,
        per_device_eval_batch_size=batch_size_distil,
        num_train_epochs=epochs_distil,
        weight_decay=weight_decay_distil,
        eval_strategy="epoch",
        logging_strategy="epoch",
        save_strategy="no"
    )

    trainer_tmp = Trainer(
        model=model_tmp,
        args=args_tmp,
        train_dataset=train_ds_distil,
        eval_dataset=test_ds_distil,
        compute_metrics=compute_metrics_acc
    )

    trainer_tmp.train()
    eval_res = trainer_tmp.evaluate()
    acc = eval_res["eval_accuracy"]
    print(f"   -> Accuracy: {acc:.4f}")

    results_distil.append({
        "lr": lr,
        "batch_size": batch_size_distil,
        "epochs": epochs_distil,
        "weight_decay": weight_decay_distil,
        "accuracy": acc
    })

    if acc > best_acc_distil:
        best_acc_distil = acc
        best_cfg_distil = {
            "lr": lr,
            "batch_size": batch_size_distil,
            "epochs": epochs_distil,
            "weight_decay": weight_decay_distil
        }
        best_model_distil = model_tmp

print("\nBest DistilBERT config:", best_cfg_distil)
print("Best DistilBERT accuracy:", best_acc_distil)

# Metrics for best DistilBERT
trainer_distil_best = Trainer(
    model=best_model_distil,
    args=TrainingArguments(
        output_dir="distilbert_best_final",
        learning_rate=best_cfg_distil["lr"],
        per_device_train_batch_size=best_cfg_distil["batch_size"],
        per_device_eval_batch_size=best_cfg_distil["batch_size"],
        num_train_epochs=best_cfg_distil["epochs"],
        weight_decay=best_cfg_distil["weight_decay"],
        eval_strategy="epoch",
        logging_strategy="epoch",
        save_strategy="no"
    ),
    train_dataset=train_ds_distil,
    eval_dataset=test_ds_distil,
    compute_metrics=compute_metrics_acc
)

pred_output_distil_best = trainer_distil_best.predict(test_ds_distil)
logits_distil_best = pred_output_distil_best.predictions
y_true_distil_best = pred_output_distil_best.label_ids

y_pred_distil_best = np.argmax(logits_distil_best, axis=1)
y_scores_distil_best = logits_distil_best[:, 1]

print_binary_metrics(
    y_true_distil_best,
    y_pred_distil_best,
    y_scores_distil_best,
    model_name="DistilBERT Tuned"
)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



[DISTILBERT GRID] lr=1e-05


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1322,0.017758,0.998182
2,0.012,0.014816,0.998182


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


   -> Accuracy: 0.9982

[DISTILBERT GRID] lr=2e-05


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0724,0.013378,0.998182
2,0.007,0.01316,0.998182


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


   -> Accuracy: 0.9982

[DISTILBERT GRID] lr=3e-05


Epoch,Training Loss,Validation Loss,Accuracy
1,0.054,0.013348,0.998182
2,0.0058,0.013543,0.998182


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


   -> Accuracy: 0.9982

[DISTILBERT GRID] lr=5e-05


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0435,0.012864,0.998182
2,0.007,0.013001,0.998182


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


   -> Accuracy: 0.9982

Best DistilBERT config: {'lr': 1e-05, 'batch_size': 16, 'epochs': 2, 'weight_decay': 0.0}
Best DistilBERT accuracy: 0.9981818181818182


DistilBERT Tuned Test Accuracy : 0.9982
DistilBERT Tuned Precision    : 1.0000
DistilBERT Tuned Recall       : 0.9964
DistilBERT Tuned F1-score     : 0.9982
DistilBERT Tuned ROC-AUC      : 0.9996

DistilBERT Tuned Classification Report:
              precision    recall  f1-score   support

       human       1.00      1.00      1.00       275
          ai       1.00      1.00      1.00       275

    accuracy                           1.00       550
   macro avg       1.00      1.00      1.00       550
weighted avg       1.00      1.00      1.00       550

DistilBERT Tuned Confusion Matrix:
[[275   0]
 [  1 274]]


# LIME - SHAP

In [21]:
# ============================================
# EXPLAINABILITY FOR BEST MODEL: TUNED LSTM
# LIME (PNG) + SHAP (TEXT EXPLANATION)
# ============================================

!pip install -q lime shap

import numpy as np
import tensorflow as tf
import shap
from lime.lime_text import LimeTextExplainer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# 1) Prepare text data (same source as training)
X_text = df["text"].astype(str).values       # raw essays
y      = df["generated"].values              # 0 = Human, 1 = AI

X_train_text, X_test_text, y_train_text, y_test_text = train_test_split(
    X_text,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Train texts:", len(X_train_text))
print("Test texts:", len(X_test_text))

# 2) Helper: encode texts exactly like during LSTM training
def encode_texts_lstm(text_list):
    cleaned = [preprocess_text(t) for t in text_list]
    seqs = tokenizer_lstm.texts_to_sequences(cleaned)
    padded = tf.keras.preprocessing.sequence.pad_sequences(
        seqs, maxlen=MAX_LEN, padding="post"
    )
    return padded

# 3) Prediction wrapper for explainability
class_names = ["Human", "AI"]

def predict_proba_lstm(text_list):
    padded = encode_texts_lstm(text_list)
    probs  = best_lstm_model.predict(padded)

    # If output is (N,1), convert to (N,2)
    if probs.shape[1] == 1:
        probs = np.hstack([1 - probs, probs])

    return probs

# ==========================
# 4) LIME – bar plot (saved as PNG)
# ==========================

explainer_lime = LimeTextExplainer(class_names=class_names)

sample_idx = 0  # choose any test essay index
sample_text = X_test_text[sample_idx]
sample_label = y_test_text[sample_idx]

print("\n=== LIME – Tuned LSTM sample (index", sample_idx, ") ===")
print(sample_text[:500], "...\n")
print("True label:", class_names[sample_label])
print("Model probabilities:", predict_proba_lstm([sample_text])[0])

lime_exp = explainer_lime.explain_instance(
    sample_text,
    predict_proba_lstm,
    num_features=15,
    labels=[0, 1]
)

# Bar plot for predicted class
pred_class = int(np.argmax(predict_proba_lstm([sample_text])[0]))
fig = lime_exp.as_pyplot_figure(label=pred_class)
plt.title("LIME Word Importance – Tuned LSTM")
plt.tight_layout()
fig.savefig("lime_barplot.png", dpi=300)
plt.close(fig)

print("Saved LIME bar plot as lime_barplot.png")

# ==========================
# 5) SHAP – text explanation (screenshot from notebook)
# ==========================

shap.initjs()

masker = shap.maskers.Text()
explainer_shap = shap.Explainer(predict_proba_lstm, masker)

# Use one test essay for SHAP explanation
shap_texts = X_test_text[:1].tolist()

print("\nComputing SHAP values for Tuned LSTM on 1 sample...")
shap_values = explainer_shap(shap_texts)

print("\n=== SHAP – Tuned LSTM – Sample 0 ===")
shap.plots.text(shap_values[0])  # Take a screenshot of this output for the report


Train texts: 2200
Test texts: 550

=== LIME – Tuned LSTM sample (index 0 ) ===
Throughout the past century car usage has exploded from a luxury available only to the highest echelon of society to a tool almost essential to life. Although cars are convenient there are many downsides to having them. Some reasons a society that limits car use could be beneficial are: expenses, environmental costs, and health degredation. Hopefully together we can curb car usage and lead the way for a cheaper, greener, and healthier future.

Its a known fact that cars are expensive, but when y ...

True label: Human
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
Model probabilities: [0.98331046 0.01668955]
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
Saved LIME bar plot as lime_barplot.png



Computing SHAP values for Tuned LSTM on 1 sample...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m

  0%|          | 0/498 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52

PartitionExplainer explainer: 2it [00:10, 10.12s/it]               


=== SHAP – Tuned LSTM – Sample 0 ===



