In [None]:
!git clone https://huggingface.co/yazidsupriadi/bot-detection-mbert

Cloning into 'bot-detection-mbert'...
remote: Enumerating objects: 47, done.[K
remote: Counting objects: 100% (44/44), done.[K
remote: Compressing objects: 100% (42/42), done.[K
remote: Total 47 (delta 13), reused 0 (delta 0), pack-reused 3 (from 1)[K
Unpacking objects: 100% (47/47), 1.60 MiB | 4.38 MiB/s, done.


In [2]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel
import gradio as gr
import numpy as np
import joblib

# ======== Model Definition ========
class mBERTBotClassifier(nn.Module):
    def __init__(self, bert_model, num_numerical_features):
        super(mBERTBotClassifier, self).__init__()
        self.bert = bert_model
        self.dropout = nn.Dropout(0.3)
        self.fc_text = nn.Linear(self.bert.config.hidden_size, 128)
        self.fc_numeric = nn.Linear(num_numerical_features, 32)
        self.classifier = nn.Linear(128 + 32, 1)

    def forward(self, input_ids, attention_mask, numerical_features):
        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = bert_output.last_hidden_state[:, 0, :]  # [CLS]
        text_feat = self.dropout(self.fc_text(pooled_output))
        numeric_feat = self.fc_numeric(numerical_features)
        combined = torch.cat((text_feat, numeric_feat), dim=1)
        logits = self.classifier(combined)
        return logits.squeeze()

# ======== Load Assets ========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")
bert_model = AutoModel.from_pretrained("bert-base-multilingual-cased")

model = mBERTBotClassifier(bert_model=bert_model, num_numerical_features=5)
model.load_state_dict(torch.load("./bot-detection-mbert/pytorch_model.bin", map_location=device))
model.to(device)
model.eval()

scaler = joblib.load("./bot-detection-mbert/scaler.pkl")

# ======== Predict Function ========
def predict(username, text, favorite_count, retweet_count, reply_count, quote_count, tweet_per_day, model_choice):
    # Tokenize text input
    tokens = tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors='pt')
    input_ids = tokens['input_ids'].to(device)
    attention_mask = tokens['attention_mask'].to(device)

    # Scale numeric features
    numeric = np.array([[tweet_per_day, favorite_count, retweet_count, reply_count, quote_count]])
    numeric_scaled = scaler.transform(numeric)
    numeric_tensor = torch.tensor(numeric_scaled, dtype=torch.float32).to(device)

    # Predict
    with torch.no_grad():
        raw_logits = model(input_ids, attention_mask, numeric_tensor)

        # Ambil satu nilai float logit saja
        if isinstance(raw_logits, torch.Tensor):
            raw_logits = raw_logits.squeeze().item()

        # Hitung probabilitas
        prob = torch.sigmoid(torch.tensor(raw_logits)).item()
        prob = float(max(1e-6, min(1 - 1e-6, prob)))  # clamp agar stabil

    label = "🤖 Bot" if prob > 0.5 else "👤 Human"
    confidence_pct = prob * 100.0

    # format output
    return f"""
### 🔍 Hasil Prediksi Akun
**Username:** @{username}
**Model:** {model_choice}

**Prediksi:** {label}
**Confidence:** {confidence_pct:.2f}%

🧠 Berdasarkan analisis teks dan aktivitas akun, sistem memperkirakan akun ini **{'bot otomatis' if prob > 0.5 else 'akun manusia'}** dengan tingkat keyakinan **{confidence_pct:.2f}%**.
"""

# ======== Gradio Interface ========
demo = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(label="Username", placeholder="@akun_x"),
        gr.Textbox(label="Teks Tweet"),
        gr.Slider(0, 10000, step=1, label="Favorite Count"),
        gr.Slider(0, 10000, step=1, label="Retweet Count"),
        gr.Slider(0, 10000, step=1, label="Reply Count"),
        gr.Slider(0, 10000, step=1, label="Quote Count"),
        gr.Slider(0.0, 100.0, step=0.1, label="Tweet Per Day"),
        gr.Radio(["mBERT"], label="Pilih Model")
    ],
    outputs=gr.Markdown(label="Hasil Prediksi"),
    title="🤖 Deteksi Akun Bot (mBERT + Fitur Numerik)",
    description=(
        "Prediksi apakah akun termasuk **bot atau manusia** berdasarkan teks tweet dan "
        "fitur aktivitas seperti retweet, reply, dan quote count."
    ),
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()


KeyboardInterrupt: 

In [1]:
!git clone https://huggingface.co/yazidsupriadi/mbert_gru_bot

Cloning into 'mbert_gru_bot'...
remote: Enumerating objects: 55, done.[K
remote: Counting objects: 100% (52/52), done.[K
remote: Compressing objects: 100% (52/52), done.[K
remote: Total 55 (delta 18), reused 0 (delta 0), pack-reused 3 (from 1)[K
Unpacking objects: 100% (55/55), 91.94 KiB | 3.28 MiB/s, done.


In [4]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel
import gradio as gr
import numpy as np
import joblib

# ======== Model Definition ========
class mBERT_GRU(nn.Module):
    def __init__(self, numeric_input_dim):
        super(mBERT_GRU, self).__init__()
        self.bert = AutoModel.from_pretrained("bert-base-multilingual-cased")
        self.gru = nn.GRU(self.bert.config.hidden_size, 128, batch_first=True, bidirectional=True)

        self.numeric_fc = nn.Linear(numeric_input_dim, 32)

        self.fc1 = nn.Linear(128 * 2 + 32, 64)
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_ids, attention_mask, numeric_feats):
        with torch.no_grad():
            bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state

        gru_output, _ = self.gru(bert_output)
        gru_output = gru_output[:, -1, :]  # Ambil output terakhir

        numeric_out = torch.relu(self.numeric_fc(numeric_feats))

        combined = torch.cat((gru_output, numeric_out), dim=1)

        x = torch.relu(self.fc1(combined))
        logits = self.fc2(x)
        return self.sigmoid(logits)

# ======== Load Assets ========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")

model = mBERT_GRU(numeric_input_dim=5)
model.load_state_dict(torch.load("./mbert_gru_bot/model_gru.pt", map_location=device))
model.to(device)
model.eval()

scaler = joblib.load(".//mbert_gru_bot/scaler.pkl")

# ======== Predict Function ========
def predict(username, text, favorite_count, retweet_count, reply_count, quote_count, tweet_per_day, model_choice):
    # Tokenize text input
    tokens = tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors='pt')
    input_ids = tokens['input_ids'].to(device)
    attention_mask = tokens['attention_mask'].to(device)

    # Scale numeric features
    numeric = np.array([[tweet_per_day, favorite_count, retweet_count, reply_count, quote_count]])
    numeric_scaled = scaler.transform(numeric)
    numeric_tensor = torch.tensor(numeric_scaled, dtype=torch.float32).to(device)

    # Predict
    with torch.no_grad():
        prob = model(input_ids, attention_mask, numeric_tensor).item()
        prob = max(1e-6, min(1 - 1e-6, prob))  # avoid 0 or 1
        label = "Bot" if prob > 0.5 else "Human"

    return f"""👤 Username: @{username}
📦 Model: {model_choice}
🧠 Prediction: {label}
🔢 Confidence: {prob:.4f}"""

# ======== Gradio Interface ========
demo = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(label="Username", placeholder="@akun_x"),
        gr.Textbox(label="Teks Tweet"),
        gr.Slider(0, 10000, step=1, label="Favorite Count"),
        gr.Slider(0, 10000, step=1, label="Retweet Count"),
        gr.Slider(0, 10000, step=1, label="Reply Count"),
        gr.Slider(0, 10000, step=1, label="Quote Count"),
        gr.Slider(0.0, 100.0, step=0.1, label="Tweet Per Day"),
        gr.Radio(["mBERT + GRU"], label="Pilih Model")
    ],
    outputs=gr.Textbox(label="Hasil Prediksi"),
    title="Deteksi Akun Bot (mBERT + GRU + Fitur Numerik)",
    description="Prediksi apakah sebuah akun merupakan bot berdasarkan teks tweet dan fitur aktivitas.",
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()


model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]



It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://86b627f98eb1f9b3b1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
!git clone https://huggingface.co/yazidsupriadi/indo_gru_bot/

Cloning into 'indo_gru_bot'...
remote: Enumerating objects: 277, done.[K
remote: Counting objects: 100% (274/274), done.[K
remote: Compressing objects: 100% (274/274), done.[K
remote: Total 277 (delta 109), reused 0 (delta 0), pack-reused 3 (from 1)[K
Receiving objects: 100% (277/277), 363.74 KiB | 4.60 MiB/s, done.
Resolving deltas: 100% (109/109), done.
Filtering content: 100% (6/6), 1.88 GiB | 38.34 MiB/s, done.


In [None]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel
import gradio as gr
import numpy as np
import joblib

# ======== Model Definition IndoBERT + GRU ========
class IndoBERT_GRU(nn.Module):
    def __init__(self, numeric_input_dim):
        super(IndoBERT_GRU, self).__init__()
        self.bert = AutoModel.from_pretrained("indobenchmark/indobert-base-p1")
        self.gru = nn.GRU(self.bert.config.hidden_size, 128, batch_first=True)  # hanya GRU 1 arah

        self.numeric_fc = nn.Linear(numeric_input_dim, 32)
        self.fc1 = nn.Linear(128 + 32, 64)
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_ids, attention_mask, numeric_feats):
        with torch.no_grad():
            bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state

        gru_output, _ = self.gru(bert_output)
        gru_output = gru_output[:, -1, :]  # ambil last hidden state

        numeric_out = torch.relu(self.numeric_fc(numeric_feats))

        combined = torch.cat((gru_output, numeric_out), dim=1)
        x = torch.relu(self.fc1(combined))
        logits = self.fc2(x)
        return self.sigmoid(logits)

# ======== Load Assets ========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p1")

model = IndoBERT_GRU(numeric_input_dim=5)
model.load_state_dict(torch.load("./indo_gru_bot/indobert_gru_bot_detection.pth", map_location=device))
model.to(device)
model.eval()

scaler = joblib.load("./indo_gru_bot/scaler.pkl")

# ======== Predict Function ========
def predict(username, text, favorite_count, retweet_count, reply_count, quote_count, tweet_per_day, model_choice):
    # Tokenize text input
    tokens = tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors='pt')
    input_ids = tokens['input_ids'].to(device)
    attention_mask = tokens['attention_mask'].to(device)

    # Scale numeric features
    numeric = np.array([[tweet_per_day, favorite_count, retweet_count, reply_count, quote_count]])
    numeric_scaled = scaler.transform(numeric)
    numeric_tensor = torch.tensor(numeric_scaled, dtype=torch.float32).to(device)

    # Predict
    with torch.no_grad():
        prob = model(input_ids, attention_mask, numeric_tensor).item()
        prob = max(1e-6, min(1 - 1e-6, prob))  # avoid 0 or 1
        label = "Bot" if prob > 0.5 else "Human"

    return f"""👤 Username: @{username}
📦 Model: {model_choice}
🧠 Prediction: {label}
🔢 Confidence: {prob:.4f}"""

# ======== Gradio Interface ========
demo = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(label="Username", placeholder="@akun_x"),
        gr.Textbox(label="Teks Tweet"),
        gr.Slider(0, 10000, step=1, label="Favorite Count"),
        gr.Slider(0, 10000, step=1, label="Retweet Count"),
        gr.Slider(0, 10000, step=1, label="Reply Count"),
        gr.Slider(0, 10000, step=1, label="Quote Count"),
        gr.Slider(0.0, 100.0, step=0.1, label="Tweet Per Day"),
        gr.Radio(["IndoBERT + GRU"], label="Pilih Model")
    ],
    outputs=gr.Textbox(label="Hasil Prediksi"),
    title="Deteksi Akun Bot (IndoBERT + GRU + Fitur Numerik)",
    description="Prediksi apakah sebuah akun merupakan bot berdasarkan teks tweet dan fitur aktivitas.",
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://bfa27b346ca907e71e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
