In [13]:
from mastodon import Mastodon
import pandas as pd
import datetime
import time
import numpy as np
from tensorflow.keras.models import load_model

# Mastodon credentials Please replace with your own credentials The code will not work without valid credentials
mastodonInstanceUrl = "https://mastodon.social"
mastodonClientKey    = ""
mastodonClientSecret = ""
mastodonAccessToken  = ""

mastodonClient = Mastodon( client_id=     mastodonClientKey, client_secret= mastodonClientSecret, access_token=  mastodonAccessToken, api_base_url=  mastodonInstanceUrl)
botModel = load_model("my_model.h5", compile=False)
print("Model loaded successfully")
candidateList       = []
processedUsernames  = set()

while len(candidateList) < 20:
    print("Fetching a batch of Mastodon statuses")
    mastodonStatuses = mastodonClient.timeline_public(limit=20)
    print("Retrieved", len(mastodonStatuses), "statuses")

    for mastodonStatus in mastodonStatuses:
        mastodonAccount = mastodonStatus.get("account", {})
        mastodonUsername = mastodonAccount.get("acct", "")
        if mastodonUsername in processedUsernames:
            continue
        processedUsernames.add(mastodonUsername)
        createdAtStr = mastodonAccount.get("created_at")
        if createdAtStr:
            createdDt = pd.to_datetime(createdAtStr)
            nowUtc   = datetime.datetime.now(datetime.timezone.utc)
            if createdDt.tzinfo is None:
                createdDt = createdDt.replace(tzinfo=datetime.timezone.utc)
            else:
                createdDt = createdDt.tz_convert(datetime.timezone.utc)
            accountAgeDays = (nowUtc - createdDt).days
        else:
            accountAgeDays = 0

        statusCount    = mastodonAccount.get("statuses_count", 0)
        followingCount = mastodonAccount.get("following_count", 0)
        followerCount  = mastodonAccount.get("followers_count", 0)
        avatarUrl      = mastodonAccount.get("avatar", "")
        headerUrl      = mastodonAccount.get("header", "")
        hasAvatar     = 1 if avatarUrl and "default" not in avatarUrl.lower() else 0
        hasBackground = 1 if headerUrl and "default" not in headerUrl.lower() else 0
        featureVector = [accountAgeDays, 0, 0, statusCount, followingCount, followerCount, 0, hasAvatar, hasBackground, 0]
        inputArray = np.array([featureVector], dtype=np.float32)
        predictionValues = botModel.predict(inputArray)
        botProbability   = predictionValues[0][0]
        print("User:", mastodonUsername, "Probability:", botProbability)

        if botProbability > 0.6:
            candidateList.append({
                "username":       mastodonUsername,
                "bot_probability": botProbability
            })
            print("Added candidate", mastodonUsername, "(total so far:", len(candidateList), ")")
            if len(candidateList) >= 20:
                break

    if len(candidateList) < 20:
        print("Only found", len(candidateList), "candidates; waiting 2 seconds")
        time.sleep(2)



Model loaded successfully
Fetching a batch of Mastodon statuses
Retrieved 20 statuses
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
User: notasnark Probability: 0.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
User: quester@mstdn.games Probability: 0.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
User: WashPost@flipboard.com Probability: 0.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
User: misryoum Probability: 0.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
User: FrieMa_DeRiaRike@pixelfed.social Probability: 0.0031096796
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
User: jessica.brezende@threads.net Probability: 0.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
User: FIPMondePlaysNow@social.nocle.fr Probability: 0.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
User: phuki

KeyboardInterrupt: 

In [12]:
import requests

# BlueSky credentials Please replace with your own credentials The code will not work without valid credentials
blueSkyHandle      = ""
blueSkyAppPassword = ""

responseSession = requests.post( "https://bsky.social/xrpc/com.atproto.server.createSession", json={  "identifier": blueSkyHandle, "password":   blueSkyAppPassword })
responseSession.raise_for_status()
blueSkyJwt = responseSession.json().get("accessJwt")
blueSkyHeaders = { "Authorization": "Bearer " + blueSkyJwt}
botDetectorModel = load_model("my_model.h5", compile=False)
print("Model loaded successfully")
botCandidates   = []
seenUserHandles = set()

while len(botCandidates) < 20:
    responseFeed = requests.get(  "https://public.api.bsky.app/xrpc/app.bsky.feed.getTimeline", params={"limit": 20},  headers=blueSkyHeaders)
    responseFeed.raise_for_status()
    feedList = responseFeed.json().get("feed", [])
    for feedItem in feedList:
        authorInfo = feedItem["post"]["author"]
        handleOnly = authorInfo.get("handle", "")
        fullUserId = "bsky/" + handleOnly
        if fullUserId in seenUserHandles:
            continue
        seenUserHandles.add(fullUserId)
        createdAtStr = authorInfo.get("createdAt", None)
        if createdAtStr:
            createdDatetime = pd.to_datetime(createdAtStr)
            if createdDatetime.tzinfo is None:
                createdDatetime = createdDatetime.tz_localize("UTC")
            else:
                createdDatetime = createdDatetime.tz_convert("UTC")
            nowUtc     = datetime.datetime.now(datetime.timezone.utc)
            ageInDays  = (nowUtc - createdDatetime).days
        else:
            ageInDays = 0
        featureVector = [ageInDays, 0, 0, authorInfo.get("postCount", 0), authorInfo.get("followingCount", 0), authorInfo.get("followerCount", 0), 0, 1 if "bsky-cdn" in authorInfo.get("avatar", "").lower() else 0, 1 if "bsky-cdn" in authorInfo.get("banner", "").lower() else 0, 0]
        inputArray     = np.array([featureVector], dtype=np.float32)
        predictionVals = botDetectorModel.predict(inputArray)
        botProb        = predictionVals[0][0]
        print("User:", fullUserId, "Bot probability:", botProb)
        if botProb > 0.6:
            botCandidates.append({
                "username":        fullUserId,
                "bot_probability": botProb
            })
            print("Added candidate", fullUserId, "total so far:", len(botCandidates))
            if len(botCandidates) >= 20:
                break

    if len(botCandidates) < 20:
        print("Found", len(botCandidates), "candidates so far. Waiting 2 seconds")
        time.sleep(2)
        

Model loaded successfully.


HTTPError: 401 Client Error: Unauthorized for url: https://public.api.bsky.app/xrpc/app.bsky.feed.getTimeline?limit=20

In [18]:

df_candidates = pd.DataFrame(bot_candidates)
df_candidates = df_candidates.sort_values(by="bot_probability", ascending=False)
print("\nFinal list of bot candidate accounts (sorted by bot probability):")
print(df_candidates)
output_file = "mastodon_bot_candidates.csv"
df_candidates.to_csv(output_file, index=False)
print(f"\nSaved the candidate accounts to {output_file}")



Final list of bot candidate accounts (sorted by bot probability):
                        username  bot_probability
0           Xibanya@mastodon.xyz         1.000000
2         abcfeeds@rssfeed.media         1.000000
3    panchromaholic@mastodon.art         1.000000
4          paco@infosec.exchange         1.000000
11  thegibson@masto.hackers.town         1.000000
5    henkka@some.henkkalaukka.fi         1.000000
6        brie@do.crimes.brie.gay         1.000000
9          savjee@mastodon.world         1.000000
17              Ask_aubry@mas.to         1.000000
15                   ChonoRyusei         1.000000
18                     nunu_bdls         0.995006
1           algernon@journa.host         0.993116
10                       fishass         0.991790
16                violetexecella         0.991007
19                   Fengzhi0411         0.985594
12                 pencil_is_pen         0.963696
13                      XiYanyan         0.956736
14                        Zumimi 