In [None]:
!git lfs install

Git LFS initialized.


In [None]:
!git clone https://huggingface.co/yazidsupriadi/bot-detector-lstm

Cloning into 'bot-detector-lstm'...
remote: Enumerating objects: 60, done.[K
remote: Counting objects: 100% (57/57), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 60 (delta 14), reused 0 (delta 0), pack-reused 3 (from 1)[K
Unpacking objects: 100% (60/60), 218.91 KiB | 7.82 MiB/s, done.


In [None]:
!git clone https://huggingface.co/yazidsupriadi/gru_bot

Cloning into 'gru_bot'...
remote: Enumerating objects: 216, done.[K
remote: Counting objects: 100% (213/213), done.[K
remote: Compressing objects: 100% (213/213), done.[K
remote: Total 216 (delta 84), reused 0 (delta 0), pack-reused 3 (from 1)[K
Receiving objects: 100% (216/216), 263.24 KiB | 11.44 MiB/s, done.
Resolving deltas: 100% (84/84), done.
Filtering content: 100% (3/3), 45.31 MiB | 30.21 MiB/s, done.


In [None]:
import torch
import torch.nn as nn
import gradio as gr
import pickle
import re
import numpy as np
from torch.nn.utils.rnn import pad_sequence

# === Define Model ===
class BotDetector(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_numeric, output_dim):
        super(BotDetector, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc_text = nn.Linear(hidden_dim, 64)
        self.fc_numeric = nn.Linear(num_numeric, 32)
        self.classifier = nn.Sequential(
            nn.ReLU(),
            nn.Linear(96, 32),
            nn.ReLU(),
            nn.Linear(32, output_dim)
        )

    def forward(self, x_text, x_num):
        x = self.embedding(x_text)
        _, (h_n, _) = self.lstm(x)
        text_out = self.fc_text(h_n[-1])
        num_out = self.fc_numeric(x_num)
        combined = torch.cat((text_out, num_out), dim=1)
        return self.classifier(combined)

# === Load assets ===
with open("./bot-detector-lstm/vocab.pkl", "rb") as f:
    vocab = pickle.load(f)

with open("./bot-detector-lstm/scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

with open("./bot-detector-lstm/label_encoder.pkl", "rb") as f:
    le = pickle.load(f)

device = torch.device("cpu")
model = BotDetector(
    vocab_size=len(vocab),
    embed_dim=100,
    hidden_dim=128,
    num_numeric=5,
    output_dim=len(le.classes_)
)
model.load_state_dict(torch.load("./bot-detector-lstm/model.pth", map_location=device))
model.eval()

# === Preprocessing ===
def clean_text(text):
    text = re.sub(r"http\S+", "", str(text))
    text = re.sub(r"@\w+", "", text)
    text = re.sub(r"#\w+", "", text)
    return text.strip()

def tokenize(text):
    return text.split()

def encode(tokens):
    return [vocab.get(token, 1) for token in tokens]

# === Predict Function ===
def predict_bot(text, favorite_count, retweet_count, reply_count, quote_count, tweet_per_day):
    clean = clean_text(text)
    tokens = tokenize(clean)
    encoded = torch.tensor(encode(tokens))
    padded = pad_sequence([encoded], batch_first=True)
    text_input = padded.to(device)

    numeric_input = np.array([[favorite_count, retweet_count, reply_count, quote_count, tweet_per_day]])
    numeric_scaled = scaler.transform(numeric_input)
    numeric_tensor = torch.tensor(numeric_scaled, dtype=torch.float32).to(device)

    with torch.no_grad():
        output = model(text_input, numeric_tensor)
        probs = torch.softmax(output, dim=1).cpu().numpy()[0]
        pred_idx = np.argmax(probs)
        label = le.classes_[pred_idx]
        confidence = probs[pred_idx]

    return f"{label} (confidence: {confidence:.2f})"

# === Gradio UI ===
inputs = [
    gr.Textbox(label="Tweet Text", placeholder="Masukkan teks tweet di sini..."),
    gr.Number(label="Favorite Count", value=0),
    gr.Number(label="Retweet Count", value=0),
    gr.Number(label="Reply Count", value=0),
    gr.Number(label="Quote Count", value=0),
    gr.Number(label="Tweet Per Day", value=1),
]

output = gr.Textbox(label="Hasil Prediksi")

gr.Interface(
    fn=predict_bot,
    inputs=inputs,
    outputs=output,
    title="Bot Detector with LSTM",
    description="Masukkan teks dan fitur numerik untuk memprediksi apakah tweet berasal dari bot atau manusia."
).launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f27ecc612f3a2da985.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import gradio as gr
import joblib

# ===== Model Definitions =====
class LSTMBotDetector(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_numeric, output_dim):
        super(LSTMBotDetector, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc_text = nn.Linear(hidden_dim, 64)
        self.fc_numeric = nn.Linear(num_numeric, 32)
        self.classifier = nn.Sequential(
            nn.ReLU(),
            nn.Linear(96, 32),
            nn.ReLU(),
            nn.Linear(32, output_dim)
        )

    def forward(self, x_text, x_num):
        x = self.embedding(x_text)
        _, (h_n, _) = self.lstm(x)
        text_out = self.fc_text(h_n[-1])
        num_out = self.fc_numeric(x_num)
        combined = torch.cat((text_out, num_out), dim=1)
        return self.classifier(combined)

class GRUBotDetector(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_numeric, output_dim):
        super(GRUBotDetector, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.gru = nn.GRU(embed_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(0.3)
        self.bn_text = nn.BatchNorm1d(hidden_dim * 2)
        self.fc_text = nn.Linear(hidden_dim * 2, 64)
        self.bn_num = nn.BatchNorm1d(num_numeric)
        self.fc_numeric = nn.Linear(num_numeric, 32)
        self.classifier = nn.Sequential(
            nn.Linear(96, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),
            nn.Linear(128, output_dim)
        )

    def forward(self, x_text, x_num):
        x = self.embedding(x_text)
        x = self.dropout(x)
        _, h_n = self.gru(x)
        h_n = torch.cat((h_n[-2, :, :], h_n[-1, :, :]), dim=1)
        text_out = self.bn_text(h_n)
        text_out = self.fc_text(text_out)
        num_out = self.bn_num(x_num)
        num_out = self.fc_numeric(num_out)
        combined = torch.cat((text_out, num_out), dim=1)
        return self.classifier(combined)

# ===== Load Scaler & Label Encoder =====
scaler = joblib.load("./bot-detector-lstm/scaler.pkl")
le = joblib.load("./bot-detector-lstm/label_encoder.pkl")

# ===== Config =====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

vocab_size_gru = 36380
vocab_size_lstm = 36380
embedding_dim = 100
hidden_dim = 128
num_numerical_features = 5
num_classes = len(le.classes_)

# ===== Load Models =====
gru_model = GRUBotDetector(vocab_size_gru, embedding_dim, hidden_dim, num_numerical_features, num_classes).to(device)
gru_model.load_state_dict(torch.load("gru_bot/best_gru_bot_model.pth", map_location=device))
gru_model.eval()

lstm_model = LSTMBotDetector(vocab_size_lstm, embedding_dim, hidden_dim, num_numerical_features, num_classes).to(device)
lstm_model.load_state_dict(torch.load("bot-detector-lstm/model.pth", map_location=device))
lstm_model.eval()

# ===== Preprocessing =====
def clean_text(text):
    return text.lower()

def tokenize(text):
    return text.split()

def encode(tokens, vocab_size):
    return [min(hash(t) % vocab_size, vocab_size - 1) for t in tokens]

# ===== Prediction Function =====
def predict_bot(username, text, fav, rt, reply, quote, tpd, model_choice):
    text = clean_text(text)
    tokens = tokenize(text)

    vocab_size = vocab_size_gru if model_choice == "GRU" else vocab_size_lstm
    encoded = torch.tensor([encode(tokens, vocab_size)], dtype=torch.long).to(device)

    if encoded.shape[1] < 5:
        encoded = F.pad(encoded, (0, 5 - encoded.shape[1]))

    numeric = np.array([[fav, rt, reply, quote, tpd]])
    numeric = scaler.transform(numeric)
    numeric_tensor = torch.tensor(numeric, dtype=torch.float32).to(device)

    model = gru_model if model_choice == "GRU" else lstm_model

    with torch.no_grad():
        output = model(encoded, numeric_tensor)
        probs = torch.softmax(output, dim=1).cpu().numpy()[0]
        pred = np.argmax(probs)
        label = le.inverse_transform([pred])[0]

    # Optional: log or show username (here we just ignore it, but you can use it)
    return {le.classes_[i]: float(probs[i]) for i in range(len(probs))}, f"{label} (username: {username})"


# ===== Gradio Interface =====
inputs = [
    gr.Textbox(label="Username"),  # <--- Tambahan input
    gr.Textbox(label="Teks Tweet"),
    gr.Slider(0, 10000, step=1, label="favorite_count"),
    gr.Slider(0, 10000, step=1, label="retweet_count"),
    gr.Slider(0, 10000, step=1, label="reply_count"),
    gr.Slider(0, 10000, step=1, label="quote_count"),
    gr.Slider(0, 100, step=0.1, label="tweet_per_day"),
    gr.Radio(["GRU", "LSTM"], label="Pilih Model")

]


outputs = [
    gr.Label(num_top_classes=len(le.classes_), label="Probabilitas Tiap Kelas"),
    gr.Textbox(label="Prediksi Label")
]

demo = gr.Interface(fn=predict_bot, inputs=inputs, outputs=outputs, title="Deteksi Bot dengan GRU & LSTM")
demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a1b9a499db18575dd5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


