# Simplistic Collection and Labeling Practices Limit the Generalizability of Benchmark Datasets for Twitter Bot Detection

Analysis of benchmark datasets for bot detection.

## Setup

In [1]:
import bson
import datetime
import itertools
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from matplotlib.ticker import MaxNLocator
import pandas as pd
import pyreadr
import random
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score
from sklearn import tree, ensemble
import sys
import time
import importlib


In [531]:
from data_accessor import balance_dataset, load_twibot_20, load_twibot_22, load_dataset, load_cresci, load_cresci2017, load_cresci2015, load_caverlee, load_pan19, load_cresci2017_tweets, load_cresci2015_tweets, load_midterm, load_gilani_derived_bands, load_yang, load_yang_tweets, load_cresci_stock_tweets, tweets_to_countvectorized_df, load_spammers, get_intraclass_labels, load_human_dataset_list, load_fake_followers, load_other_bots, get_shared_cols
from fit_and_score import fit, score, fit_and_score, mean_performance_sdt, kfold_cv, analyze_dataset, train_test_fit_and_score, train_test_fit_and_score_clf, nonnumeric, plot_metrics, calculate_accuracy, analyze_twibot
from leave_one_dataset_out import leave_dataset_out, leave_dataset_out_botometer
from plotting import heatmap_train_on_one_test_on_another, process_tick_label
from preprocess import drop_and_one_hot, extract_users, load_json, preprocess_users, COLUMNS_TO_DROP, DUMMY_COLUMNS
from print_table import print_totoa_matrix, print_single_dataset_score_table, get_max_score, print_single_dataset_score_table_without_sota, print_intratype_test, print_leave_one_out_table
from train_on_one_test_on_another import train_on_one_test_on_another, train_on_one_test_on_another_botometer_combined, train_on_one_test_on_another_botometer_combined, train_test_botometer_combined



In [259]:
PROJ_PATH = "/nfs/sloanlab003/projects/bot_ensemble_proj/bot-detection"

def namestr(obj, namespace):
    return [name for name in namespace if namespace[name] is obj]

def get_dataset_name(df):
    try:
        return namestr(df, globals())[0]
    except:
        return "Unknown"


## Load datasets and calculate scores

## Profile data

In [224]:
max_depth = 4

## BENCHMARK DATASETS
# twibot
twibot_df, twibot_2020_one_hot, twibot_labels = load_twibot(PROJ_PATH + "/data/Twibot-20/train.json")
twibot_test, twibot_2020_one_hot_test, twibot_labels_test = load_twibot(PROJ_PATH + "/data/Twibot-20/test.json")
twibot_scores = analyze_twibot(twibot_2020_one_hot, twibot_2020_one_hot_test, twibot_labels, twibot_labels_test)

# botometer-feedback
bf, botometer_feedback_2019_one_hot, bf_labels = load_dataset(PROJ_PATH + "/data/botometer-feedback-2019_tweets.json", PROJ_PATH + "/data/botometer-feedback-2019.tsv")
botometer_feedback_scores = analyze_dataset(botometer_feedback_2019_one_hot, bf_labels, silent=True)

#rtbust
rtbust, cresci_rtbust_2019_one_hot, rtbust_labels = load_dataset(PROJ_PATH + "/data/cresci-rtbust-2019_tweets.json", PROJ_PATH + "/data/cresci-rtbust-2019.tsv")
rtbust_scores = analyze_dataset(cresci_rtbust_2019_one_hot, rtbust_labels, silent=True)

#gilani-2017
gilani, gilani_2017_one_hot, gilani_labels = load_dataset(PROJ_PATH + "/data/gilani-2017_tweets.json", PROJ_PATH + "/data/gilani-2017.tsv")
gilani_scores = analyze_dataset(gilani_2017_one_hot, gilani_labels, silent=True)

#gilani-2017 with derived features from orginal paper
gilani_derived_dfs, gilani_derived_labels = load_gilani_derived_bands(PROJ_PATH + f"/data/gilani_derived/classification_processed/")
#gilani_derived_scores = [analyze_dataset(gilani_derived_df, gilani_derived_label, silent=True) for gilani_derived_df, gilani_derived_label in zip(gilani_derived_dfs, gilani_derived_labels)]
#gilani_derived_scores_avg = [[sum([strat[depth_ind][scr_ind] for strat in gilani_derived_scores])/4 for scr_ind in range(5)] for depth_ind in range(max_depth)]
gilani_derived_scores = analyze_dataset(pd.concat(gilani_derived_dfs), pd.concat(gilani_derived_labels), silent=True)


#cresci-2017
cresci2017, cresci_2017_one_hot, cresci2017_labels = load_cresci2017(PROJ_PATH + "/data/cresci-2017/{}.csv/users.csv")
cresci2017_scores = analyze_dataset(cresci_2017_one_hot, cresci2017_labels, silent=True)

#cresci-2015
cresci2015, cresci_2015_one_hot, cresci2015_labels = load_cresci2015(PROJ_PATH + "/data/cresci-2015/{}/users.csv")
cresci2015_scores = analyze_dataset(cresci_2015_one_hot, cresci2015_labels, silent=True)

#caverlee-2011
caverlee_2011, caverlee_2011_labels = load_caverlee(PROJ_PATH + "/data/social_honeypot_icwsm_2011/")
caverlee_2011_scores = analyze_dataset(caverlee_2011, caverlee_2011_labels, silent=True)

#cresci-stock-2018
cresci_stock, cresci_stock_2018_one_hot, cresci_stock_labels = load_dataset(PROJ_PATH + "/data/cresci-stock-2018_tweets.json", PROJ_PATH + "/data/cresci-stock-2018.tsv")
cresci_stock_scores = analyze_dataset(cresci_stock_2018_one_hot, cresci_stock_labels, silent=True)

### SINGLE-CLASS DATAETS 
# bot-wiki
botwiki_df, botwiki_one_hot, botwiki_labels = load_dataset(PROJ_PATH + "/data/botwiki-2019_tweets.json", PROJ_PATH + "/data/botwiki-2019.tsv")
# celebrity
celebrity_df, celebrity_one_hot, celebrity_labels = load_dataset(PROJ_PATH + "/data/celebrity-2019_tweets.json", PROJ_PATH + "/data/celebrity-2019.tsv")
# political-bots
political_bots_df, political_bots_one_hot, political_bots_labels = load_dataset(PROJ_PATH + "/data/political-bots-2019/political-bots-2019_tweets.json", PROJ_PATH + "/data/political-bots-2019/political-bots-2019.tsv")
# pronbots
pronbots_df, pronbots_one_hot, pronbots_labels = load_dataset(PROJ_PATH + "/data/pronbots-2019_tweets.json", PROJ_PATH + "/data/pronbots-2019.tsv")
# vendor purchased
vendor_purchased_df, vendor_purchased_one_hot, vendor_purchased_labels = load_dataset(PROJ_PATH + "/data/vendor-purchased-2019_tweets.json", PROJ_PATH + "/data/vendor-purchased-2019.tsv")
#verified
verified_df, verified_one_hot, verified_labels = load_dataset(PROJ_PATH + "/data/verified-2019_tweets.json", PROJ_PATH + "/data/verified-2019.tsv")


Starting kfold_cv at 10/10/22 17:49:52
Finished kfold_cv at 1665438592.19653. Execution time: 0.09122705459594727 s
Starting kfold_cv at 10/10/22 17:49:52
Finished kfold_cv at 1665438592.2896469. Execution time: 0.0930182933807373 s
Starting kfold_cv at 10/10/22 17:49:52
Finished kfold_cv at 1665438592.3837435. Execution time: 0.09387660026550293 s
Starting kfold_cv at 10/10/22 17:49:52
Finished kfold_cv at 1665438592.4826028. Execution time: 0.09876298904418945 s
Starting kfold_cv at 10/10/22 17:49:52
Finished kfold_cv at 1665438592.6107624. Execution time: 0.07801270484924316 s
Starting kfold_cv at 10/10/22 17:49:52
Finished kfold_cv at 1665438592.692383. Execution time: 0.08152556419372559 s
Starting kfold_cv at 10/10/22 17:49:52
Finished kfold_cv at 1665438592.7761953. Execution time: 0.08350086212158203 s
Starting kfold_cv at 10/10/22 17:49:52
Finished kfold_cv at 1665438592.8622482. Execution time: 0.08582735061645508 s
Starting kfold_cv at 10/10/22 17:49:53
Finished kfold_cv at 

In [222]:
midterm, midterm_2018_one_hot, midterm_labels = load_midterm(PROJ_PATH + "/data/midterm-2018/midterm-2018_processed_user_objects.json", PROJ_PATH + "/data/midterm-2018/midterm-2018.tsv")
midterm_scores = analyze_dataset(midterm_2018_one_hot, midterm_labels, kfold=True, silent=True)

Starting kfold_cv at 10/10/22 17:26:23
Finished kfold_cv at 1665437297.8509936. Execution time: 114.37081527709961 s
Starting kfold_cv at 10/10/22 17:28:17
Finished kfold_cv at 1665437437.4996521. Execution time: 139.64831042289734 s
Starting kfold_cv at 10/10/22 17:30:37
Finished kfold_cv at 1665437597.1995146. Execution time: 159.6995084285736 s
Starting kfold_cv at 10/10/22 17:33:17
Finished kfold_cv at 1665437774.573369. Execution time: 177.37332701683044 s


In [648]:
yang, yang_2013_one_hot, yang_labels = load_yang(PROJ_PATH + "/data/yang-2013/")
yang_2013_one_hot['friends_count'] = yang_2013_one_hot['followings_count']
yang_scores = analyze_dataset(yang_2013_one_hot, yang_labels)

Starting kfold_cv at 10/13/22 11:11:41
Fold 0 in progress
|--- followers_count <= 250.50
|   |--- weights: [349.00, 460.00] class: 1
|--- followers_count >  250.50
|   |--- weights: [7649.00, 341.00] class: 0

Accuracy: 0.9173103134938664
Precision 0.5422885572139303
Recall: 0.5477386934673367
F1: 0.545
Balanced accuracy: 0.7508923237566454
Fold 1 in progress
|--- followers_count <= 295.50
|   |--- weights: [424.00, 492.00] class: 1
|--- followers_count >  295.50
|   |--- weights: [7563.00, 320.00] class: 0

Accuracy: 0.9182189913675602
Precision 0.5175438596491229
Recall: 0.6276595744680851
F1: 0.5673076923076923
Balanced accuracy: 0.7865073828624578
Fold 2 in progress
|--- followers_count <= 241.50
|   |--- weights: [327.00, 465.00] class: 1
|--- followers_count >  241.50
|   |--- weights: [7661.00, 346.00] class: 0

Accuracy: 0.9145842798727851
Precision 0.5026737967914439
Recall: 0.4973544973544973
F1: 0.4999999999999999
Balanced accuracy: 0.7255659166692964
Fold 3 in progress
|---

Accuracy: 0.9527487505679236
Precision 0.8466666666666667
Recall: 0.6105769230769231
F1: 0.7094972067039106
Balanced accuracy: 0.7995182658535644
Fold 3 in progress
|--- followers_count <= 305.50
|   |--- statuses_count <= 31.50
|   |   |--- followings_count <= 198.00
|   |   |   |--- weights: [39.00, 8.00] class: 0
|   |   |--- followings_count >  198.00
|   |   |   |--- weights: [44.00, 447.00] class: 1
|   |--- statuses_count >  31.50
|   |   |--- friends_count <= 720.50
|   |   |   |--- weights: [317.00, 17.00] class: 0
|   |   |--- friends_count >  720.50
|   |   |   |--- weights: [38.00, 36.00] class: 0
|--- followers_count >  305.50
|   |--- background_url_http://s.twimg.com/a/1288470193/images/themes/theme1/bg.png <= 0.50
|   |   |--- followers_count <= 1123.50
|   |   |   |--- weights: [1348.00, 204.00] class: 0
|   |   |--- followers_count >  1123.50
|   |   |   |--- weights: [6204.00, 72.00] class: 0
|   |--- background_url_http://s.twimg.com/a/1288470193/images/themes/theme

Accuracy: 0.9582008178100864
Precision 0.9122807017543859
Recall: 0.5591397849462365
F1: 0.6933333333333332
Balanced accuracy: 0.7770885028949545
Fold 3 in progress
|--- followers_count <= 305.50
|   |--- statuses_count <= 27.50
|   |   |--- friends_count <= 198.00
|   |   |   |--- friends_count <= 151.00
|   |   |   |   |--- weights: [28.00, 2.00] class: 0
|   |   |   |--- friends_count >  151.00
|   |   |   |   |--- weights: [7.00, 7.00] class: 0
|   |   |--- friends_count >  198.00
|   |   |   |--- account creation time <= 1264300982272.00
|   |   |   |   |--- weights: [33.00, 424.00] class: 1
|   |   |   |--- account creation time >  1264300982272.00
|   |   |   |   |--- weights: [12.00, 3.00] class: 0
|   |--- statuses_count >  27.50
|   |   |--- followings_count <= 700.00
|   |   |   |--- background_url_http://s.twimg.com/a/1288374569/images/themes/theme1/bg.png <= 0.50
|   |   |   |   |--- weights: [325.00, 16.00] class: 0
|   |   |   |--- background_url_http://s.twimg.com/a/128

In [659]:
yang_balanced, yang_labels_balanced = balance_dataset(yang_2013_one_hot, yang_labels)
yang_balanced_scores = analyze_dataset(yang_balanced, yang_labels_balanced)

Starting kfold_cv at 10/13/22 13:33:49
Fold 0 in progress
|--- statuses_count <= 103.50
|   |--- weights: [130.00, 673.00] class: 1
|--- statuses_count >  103.50
|   |--- weights: [674.00, 122.00] class: 0

Accuracy: 0.8229426433915212
Precision 0.8602150537634409
Recall: 0.7804878048780488
F1: 0.8184143222506394
Balanced accuracy: 0.8239173718267795
Fold 1 in progress
|--- statuses_count <= 156.50
|   |--- weights: [153.00, 705.00] class: 1
|--- statuses_count >  156.50
|   |--- weights: [641.00, 100.00] class: 0

Accuracy: 0.8329177057356608
Precision 0.82
Recall: 0.841025641025641
F1: 0.830379746835443
Balanced accuracy: 0.8331341797361215
Fold 2 in progress
|--- statuses_count <= 155.50
|   |--- weights: [142.00, 697.00] class: 1
|--- statuses_count >  155.50
|   |--- weights: [652.00, 108.00] class: 0

Accuracy: 0.8229426433915212
Precision 0.7844036697247706
Recall: 0.8769230769230769
F1: 0.8280871670702179
Balanced accuracy: 0.8243838685586258
Fold 3 in progress
|--- statuses_co

Accuracy: 0.8628428927680798
Precision 0.8686868686868687
Recall: 0.8557213930348259
F1: 0.862155388471178
Balanced accuracy: 0.8628606965174129
Fold 3 in progress
|--- statuses_count <= 156.50
|   |--- followers_count <= 1417.50
|   |   |--- followers_count <= 323.50
|   |   |   |--- weights: [18.00, 495.00] class: 1
|   |   |--- followers_count >  323.50
|   |   |   |--- weights: [57.00, 186.00] class: 1
|   |--- followers_count >  1417.50
|   |   |--- account creation time <= 1265086234624.00
|   |   |   |--- weights: [75.00, 12.00] class: 0
|   |   |--- account creation time >  1265086234624.00
|   |   |   |--- weights: [4.00, 4.00] class: 0
|--- statuses_count >  156.50
|   |--- followers_count <= 1479.50
|   |   |--- statuses_count <= 694.00
|   |   |   |--- weights: [72.00, 61.00] class: 0
|   |   |--- statuses_count >  694.00
|   |   |   |--- weights: [83.00, 10.00] class: 0
|   |--- followers_count >  1479.50
|   |   |--- background_url_http://s.twimg.com/a/1288305442/images/t

Accuracy: 0.8728179551122195
Precision 0.8571428571428571
Recall: 0.8795811518324608
F1: 0.8682170542635659
Balanced accuracy: 0.8731239092495637
Fold 3 in progress
|--- statuses_count <= 156.50
|   |--- followers_count <= 1417.50
|   |   |--- followers_count <= 371.00
|   |   |   |--- friends_count <= 261.00
|   |   |   |   |--- weights: [10.00, 29.00] class: 1
|   |   |   |--- friends_count >  261.00
|   |   |   |   |--- weights: [9.00, 470.00] class: 1
|   |   |--- followers_count >  371.00
|   |   |   |--- timezone_Pacific Time (US & Canada) <= 0.50
|   |   |   |   |--- weights: [42.00, 184.00] class: 1
|   |   |   |--- timezone_Pacific Time (US & Canada) >  0.50
|   |   |   |   |--- weights: [9.00, 2.00] class: 0
|   |--- followers_count >  1417.50
|   |   |--- statuses_count <= 141.50
|   |   |   |--- account creation time <= 1267022495744.00
|   |   |   |   |--- weights: [70.00, 12.00] class: 0
|   |   |   |--- account creation time >  1267022495744.00
|   |   |   |   |--- weigh

## Tweets data

In [11]:
yang_tweets, yang_2013_tweets_labels = load_yang_tweets(PROJ_PATH + "/data/yang-2013/")
yang_tweets_scores = analyze_dataset(yang_tweets, yang_2013_tweets_labels)


Starting kfold_cv at 10/09/22 18:33:47
Finished kfold_cv at 1665354850.2980096. Execution time: 22.648863077163696 s
Starting kfold_cv at 10/09/22 18:34:10
Finished kfold_cv at 1665354885.2695007. Execution time: 34.97130537033081 s
Starting kfold_cv at 10/09/22 18:34:45
Finished kfold_cv at 1665354932.1829548. Execution time: 46.91325569152832 s
Starting kfold_cv at 10/09/22 18:35:32
Finished kfold_cv at 1665354990.595978. Execution time: 58.41265058517456 s


In [12]:
pan_2019_df, pan19_labels = load_pan19(PROJ_PATH + "/data/pan19/pan19-author-profiling-training-2019-02-18/en/{}.xml", PROJ_PATH + "/data/pan19/pan19-author-profiling-training-2019-02-18/en/truth.txt")
pan19_scores = analyze_dataset(pan_2019_df, pan19_labels)


Starting kfold_cv at 10/09/22 18:36:38
Finished kfold_cv at 1665355003.04464. Execution time: 4.932588338851929 s
Starting kfold_cv at 10/09/22 18:36:43
Finished kfold_cv at 1665355010.2420366. Execution time: 7.197096586227417 s
Starting kfold_cv at 10/09/22 18:36:50
Finished kfold_cv at 1665355019.6079922. Execution time: 9.365800142288208 s
Starting kfold_cv at 10/09/22 18:36:59
Finished kfold_cv at 1665355031.132208. Execution time: 11.524060726165771 s


In [13]:
cresci_2017_tweets, cresci_2017_tweets_labels = load_cresci2017_tweets(PROJ_PATH + "/data/cresci-2017/{}.csv/tweets.csv")
cresci_2017_tweets_scores = analyze_dataset(cresci_2017_tweets, cresci_2017_tweets_labels, silent=True)


  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


Starting kfold_cv at 10/09/22 18:38:46
Finished kfold_cv at 1665355177.767371. Execution time: 50.88655734062195 s
Starting kfold_cv at 10/09/22 18:39:37
Finished kfold_cv at 1665355256.6510406. Execution time: 78.88348150253296 s
Starting kfold_cv at 10/09/22 18:40:56
Finished kfold_cv at 1665355361.7824152. Execution time: 105.13099026679993 s
Starting kfold_cv at 10/09/22 18:42:41
Finished kfold_cv at 1665355493.635087. Execution time: 131.85222005844116 s


In [14]:
cresci_2015_tweets, cresci_2015_tweets_labels = load_cresci2015_tweets(PROJ_PATH + "/data/cresci-2015/{}/tweets.csv")
cresci_2015_tweets_scores = analyze_dataset(cresci_2015_tweets, cresci_2015_tweets_labels)


  exec(code_obj, self.user_global_ns, self.user_ns)


Starting kfold_cv at 10/09/22 18:45:39
Finished kfold_cv at 1665355562.6589656. Execution time: 23.49260425567627 s
Starting kfold_cv at 10/09/22 18:46:02
Finished kfold_cv at 1665355597.2574382. Execution time: 34.59829545021057 s
Starting kfold_cv at 10/09/22 18:46:37
Finished kfold_cv at 1665355639.3805707. Execution time: 42.122673988342285 s
Starting kfold_cv at 10/09/22 18:47:19
Finished kfold_cv at 1665355688.5380826. Execution time: 49.157062292099 s


In [15]:
cresci_stock_2018_tweets, cresci_stock_2018_tweets_labels = load_cresci_stock_tweets(PROJ_PATH + "/data/cresci-stock-2018_tweets.json", PROJ_PATH + "/data/cresci-stock-2018.tsv", PROJ_PATH + "/data/golbeck/tweets/{}_tweets.csv")
cresci_stock_2018_tweets_scores = analyze_dataset(cresci_stock_2018_tweets, cresci_stock_2018_tweets_labels)


Starting kfold_cv at 10/09/22 18:56:45
Finished kfold_cv at 1665356428.3787742. Execution time: 222.49842143058777 s
Starting kfold_cv at 10/09/22 19:00:28
Finished kfold_cv at 1665356784.0436714. Execution time: 355.6646194458008 s
Starting kfold_cv at 10/09/22 19:06:24
Finished kfold_cv at 1665357261.5308852. Execution time: 477.4868063926697 s
Starting kfold_cv at 10/09/22 19:14:21
Finished kfold_cv at 1665357841.073829. Execution time: 579.5425610542297 s


In [649]:
datasets = [
    twibot_2020_one_hot, 
    botometer_feedback_2019_one_hot, 
    cresci_rtbust_2019_one_hot, 
    gilani_2017_one_hot, 
    cresci_2017_one_hot, 
    cresci_2015_one_hot, 
    cresci_stock_2018_one_hot, 
    yang_2013_one_hot,
    caverlee_2011,
    midterm_2018_one_hot
    ]
dataset_names = [process_tick_label(get_dataset_name(df)) for df in datasets]
labels = [
    twibot_labels, 
    bf_labels, 
    rtbust_labels, 
    gilani_labels, 
    cresci2017_labels, 
    cresci2015_labels, 
    cresci_stock_labels, 
    yang_labels,
    caverlee_2011_labels,
    midterm_labels
]

scores = {
    'twibot-2020': twibot_scores, 
    'feedback-2019': botometer_feedback_scores, 
    'rtbust-2019': rtbust_scores, 
    'pan-2019': pan19_scores,
    'midterm-2018': midterm_scores, 
    'stock-2018': cresci_stock_scores,
    'gilani-2017': gilani_derived_scores, 
    'cresci-2017': cresci2017_scores, 
    'cresci-2015': cresci2015_scores, 
    'yang-2013': yang_scores,
    'caverlee-2011': caverlee_2011_scores,
}

rows = []

for name, scr in scores.items():
    row = {'name': name}
    for i, (a,p,r,f,ba) in enumerate(scr):
        row.update({
            f'a{i+1}': a,  #accuracy
            f'p{i+1}': p,  #precision
            f'r{i+1}': r,  #recall
            f'f{i+1}': f,  #f1
            f'ba{i+1}': ba #balanced accuracy
        })
    rows.append(row)
    
scores_df = pd.DataFrame(rows)
scores_df.to_csv("scores.csv", index=False)

## Trees used in "Three shallow decision trees that fit datasets well"

In [133]:
mean_performance_sdt(cresci_2017_tweets, axis=1), cresci_2017_tweets_labels, depth=1)

Starting train_test_fit_and_score_clf at 10/10/22 13:24:36
|--- earthquake <= 0.50
|   |--- weights: [72.00, 7251.00] class: 1
|--- earthquake >  0.50
|   |--- weights: [797.00, 37.00] class: 0

Accuracy: 0.9887254901960785
Precision 0.9912806539509537
Recall: 0.9961664841182913
F1: 0.9937175635072385
Balanced accuracy: 0.9607000644890522
Finished train_test_fit_and_score_clf at 1665422688.3162985. Execution time: 12.011878490447998 s
Starting train_test_fit_and_score_clf at 10/10/22 13:24:48
|--- earthquake <= 0.50
|   |--- weights: [68.00, 7257.00] class: 1
|--- earthquake >  0.50
|   |--- weights: [798.00, 34.00] class: 0

Accuracy: 0.9852941176470589
Precision 0.989088925259138
Recall: 0.9945145364783324
F1: 0.9917943107221007
Balanced accuracy: 0.9511743189304105
Finished train_test_fit_and_score_clf at 1665422700.1377175. Execution time: 11.821343421936035 s
Starting train_test_fit_and_score_clf at 10/10/22 13:25:00
|--- earthquake <= 0.50
|   |--- weights: [69.00, 7255.00] class

(0.9867647058823529, 0.9867647058823529, 0.955248660181164)

In [134]:
mean_performance_sdt(cresci_2017_one_hot, cresci2017_labels, depth=1)

Starting train_test_fit_and_score_clf at 10/10/22 13:25:35
|--- favourites_count <= 16.50
|   |--- weights: [151.00, 8606.00] class: 1
|--- favourites_count >  16.50
|   |--- weights: [2607.00, 130.00] class: 0

Accuracy: 0.9808629088378567
Precision 0.9838932351587667
Recall: 0.9907321594068582
F1: 0.9873008543061649
Balanced accuracy: 0.9709247389213063
Finished train_test_fit_and_score_clf at 1665422736.9770334. Execution time: 1.136648416519165 s
Starting train_test_fit_and_score_clf at 10/10/22 13:25:36
|--- favourites_count <= 16.50
|   |--- weights: [144.00, 8577.00] class: 1
|--- favourites_count >  16.50
|   |--- weights: [2653.00, 120.00] class: 0

Accuracy: 0.9749478079331941
Precision 0.980986871887732
Recall: 0.9863450159308147
F1: 0.9836586472991374
Balanced accuracy: 0.9621533056020395
Finished train_test_fit_and_score_clf at 1665422738.1156442. Execution time: 1.1385843753814697 s
Starting train_test_fit_and_score_clf at 10/10/22 13:25:38
|--- favourites_count <= 16.50


(0.9764787752261658, 0.9764787752261658, 0.9658085615061196)

# Train on one, test on another

In [654]:
rows = []

for train_on, train_on_labels in zip(datasets,labels):
    for test_on, test_on_labels in zip(datasets,labels):
        print(get_dataset_name(train_on), get_dataset_name(test_on))
        #cols = get_shared_cols([train_on, test_on])
        cols = {'listed_count', 'followers_count', 'friends_count', 'statuses_count'}
        #if  get_dataset_name(test_on) == get_dataset_name(train_on):
        if  'caverlee_2011' in [get_dataset_name(train_on), get_dataset_name(test_on)]:
            cols = {'followers_count', 'friends_count', 'statuses_count'}
        elif 'yang_2013_one_hot' in [get_dataset_name(train_on), get_dataset_name(test_on)]:
            cols = {'followers_count', 'friends_count', 'statuses_count'}
        
        rf = ensemble.RandomForestClassifier()
        rows.append(train_on_one_test_on_another(train_on[cols], train_on_labels, test_on[cols], test_on_labels, get_dataset_name(train_on), get_dataset_name(test_on), method=rf))
train_on_one_test_on_another_performance = pd.DataFrame(rows)


twibot_2020_one_hot twibot_2020_one_hot
Starting train_test_fit_and_score_clf at 10/13/22 12:11:47
Finished train_test_fit_and_score_clf at 1665677509.0415745. Execution time: 1.5582056045532227 s
twibot_2020_one_hot botometer_feedback_2019_one_hot
twibot_2020_one_hot cresci_rtbust_2019_one_hot
twibot_2020_one_hot gilani_2017_one_hot
twibot_2020_one_hot cresci_2017_one_hot
twibot_2020_one_hot cresci_2015_one_hot
twibot_2020_one_hot cresci_stock_2018_one_hot
twibot_2020_one_hot yang_2013_one_hot
twibot_2020_one_hot caverlee_2011
twibot_2020_one_hot midterm_2018_one_hot
botometer_feedback_2019_one_hot twibot_2020_one_hot
botometer_feedback_2019_one_hot botometer_feedback_2019_one_hot
Starting train_test_fit_and_score_clf at 10/13/22 12:12:07
Finished train_test_fit_and_score_clf at 1665677528.1691306. Execution time: 0.2210693359375 s
botometer_feedback_2019_one_hot cresci_rtbust_2019_one_hot
botometer_feedback_2019_one_hot gilani_2017_one_hot
botometer_feedback_2019_one_hot cresci_2017_

  _warn_prf(average, modifier, msg_start, len(result))


yang_2013_one_hot cresci_2017_one_hot
yang_2013_one_hot cresci_2015_one_hot
yang_2013_one_hot cresci_stock_2018_one_hot
yang_2013_one_hot yang_2013_one_hot
Starting train_test_fit_and_score_clf at 10/13/22 12:13:19
Finished train_test_fit_and_score_clf at 1665677600.7264957. Execution time: 0.9769716262817383 s
yang_2013_one_hot caverlee_2011
yang_2013_one_hot midterm_2018_one_hot
caverlee_2011 twibot_2020_one_hot
caverlee_2011 botometer_feedback_2019_one_hot
caverlee_2011 cresci_rtbust_2019_one_hot
caverlee_2011 gilani_2017_one_hot
caverlee_2011 cresci_2017_one_hot
caverlee_2011 cresci_2015_one_hot
caverlee_2011 cresci_stock_2018_one_hot
caverlee_2011 yang_2013_one_hot
caverlee_2011 caverlee_2011
Starting train_test_fit_and_score_clf at 10/13/22 12:14:02
Finished train_test_fit_and_score_clf at 1665677647.0595117. Execution time: 4.060115575790405 s
caverlee_2011 midterm_2018_one_hot
midterm_2018_one_hot twibot_2020_one_hot
midterm_2018_one_hot botometer_feedback_2019_one_hot
midterm_

In [573]:
print_totoa_matrix(train_on_one_test_on_another_performance, 'a')


\begin{tikzpicture}[]
  \matrix[matrix of nodes,row sep=-\pgflinewidth, column sep=-.39em,
nodes={{rectangle}},
column 1/.style={{anchor=east}},]{
\data{\small{feedback-2019}} & |[fill={rgb,255:red,65;green,65;blue,209}, value=0.87]|&|[fill={rgb,255:red,224;green,224;blue,221}, value=0.56]|&|[fill={rgb,255:red,233;green,208;blue,182}, value=0.41]|&|[fill={rgb,255:red,248;green,240;blue,210}, value=0.47]|&|[fill={rgb,255:red,244;green,244;blue,223}, value=0.52]|&|[fill={rgb,255:red,252;green,250;blue,219}, value=0.49]|&|[fill={rgb,255:red,188;green,188;blue,219}, value=0.63]|&|[fill={rgb,255:red,168;green,168;blue,217}, value=0.67]|&|[fill={rgb,255:red,249;green,243;blue,214}, value=0.48]|&|[fill={rgb,255:red,208;green,152;blue,133}, value=0.3]| \\
\data{\small{caverlee-2011}} & |[fill={rgb,255:red,218;green,174;blue,152}, value=0.34]|&|[fill={rgb,255:red,30;green,30;blue,207}, value=0.94]|&|[fill={rgb,255:red,158;green,158;blue,216}, value=0.69]|&|[fill={rgb,255:red,102;green,102;blue,

In [574]:
print_totoa_matrix(train_on_one_test_on_another_performance, 'f')


\begin{tikzpicture}[]
  \matrix[matrix of nodes,row sep=-\pgflinewidth, column sep=-.39em,
nodes={{rectangle}},
column 1/.style={{anchor=east}},]{
\data{\small{feedback-2019}} & |[fill={rgb,255:red,98;green,98;blue,212}, value=0.61]|&|[fill={rgb,255:red,156;green,156;blue,216}, value=0.39]|&|[fill={rgb,255:red,217;green,217;blue,221}, value=0.15]|&|[fill={rgb,255:red,135;green,135;blue,215}, value=0.47]|&|[fill={rgb,255:red,194;green,194;blue,219}, value=0.24]|&|[fill={rgb,255:red,194;green,194;blue,219}, value=0.24]|&|[fill={rgb,255:red,166;green,166;blue,217}, value=0.35]|&|[fill={rgb,255:red,61;green,61;blue,209}, value=0.76]|&|[fill={rgb,255:red,194;green,194;blue,219}, value=0.24]|&|[fill={rgb,255:red,207;green,207;blue,220}, value=0.19]| \\
\data{\small{caverlee-2011}} & |[fill={rgb,255:red,143;green,143;blue,215}, value=0.44]|&|[fill={rgb,255:red,12;green,12;blue,205}, value=0.95]|&|[fill={rgb,255:red,50;green,50;blue,208}, value=0.8]|&|[fill={rgb,255:red,38;green,38;blue,207}, 

In [655]:
print_totoa_matrix(train_on_one_test_on_another_performance, 'ba', start_x=-1.1, start_y=-2.6)


\begin{tikzpicture}[]
  \matrix[matrix of nodes,row sep=-\pgflinewidth, column sep=-.39em,
nodes={{rectangle}},
column 1/.style={{anchor=east}},]{
\data{\small{twibot-2020}} & |[fill={rgb,255:red,142;green,142;blue,215}, value=0.72]|&|[fill={rgb,255:red,244;green,244;blue,223}, value=0.52]|&|[fill={rgb,255:red,250;green,250;blue,223}, value=0.51]|&|[fill={rgb,255:red,240;green,240;blue,222}, value=0.53]|&|[fill={rgb,255:red,234;green,234;blue,222}, value=0.54]|&|[fill={rgb,255:red,244;green,244;blue,223}, value=0.52]|&|[fill={rgb,255:red,224;green,224;blue,221}, value=0.56]|&|[fill={rgb,255:red,254;green,254;blue,223}, value=0.5]|&|[fill={rgb,255:red,254;green,254;blue,223}, value=0.5]|&|[fill={rgb,255:red,252;green,250;blue,219}, value=0.49]| \\
\data{\small{feedback-2019}} & |[fill={rgb,255:red,240;green,240;blue,222}, value=0.53]|&|[fill={rgb,255:red,158;green,158;blue,216}, value=0.69]|&|[fill={rgb,255:red,244;green,244;blue,223}, value=0.52]|&|[fill={rgb,255:red,234;green,234;blue

# Leave-one-dataset-out

In [632]:
oos_rows = []
is_rows = []


for i in range(len(datasets)):
    rf = ensemble.RandomForestClassifier()
    oos_scores, is_scores = leave_dataset_out(i, datasets, dataset_names, labels, method=rf)
    oos_rows.append(oos_scores)
    is_rows.append(is_scores)
    
leave_one_out_scores = pd.DataFrame(oos_rows)
leave_one_out_scores_in_sample = pd.DataFrame(is_rows)

KeyboardInterrupt: 

In [634]:
leave_one_out_scores['year'] = leave_one_out_scores['left_out'].str[-4:]
leave_one_out_scores.sort_values(by=['year', 'left_out'], ascending=[False, True], inplace=True)

In [635]:
print_leave_one_out_table(leave_one_out_scores, leave_one_out_scores_in_sample, random_forest=True)

\data{twibot-2020} & 0.80/0.84/0.78 & 0.52/0.44/0.55 \\
\data{feedback-2019} & 0.78/0.82/0.77 & 0.64/0.37/0.56 \\
\data{rtbust-2019} & 0.78/0.82/0.76 & 0.52/0.31/0.53 \\
\data{midterm-2018} & 0.78/0.82/0.77 & 0.77/0.85/0.77 \\
\data{stock-2018} & 0.76/0.81/0.75 & 0.55/0.49/0.56 \\
\data{cresci-2017} & 0.77/0.82/0.75 & 0.83/0.88/0.84 \\
\data{gilani-2017} & 0.79/0.84/0.78 & 0.58/0.22/0.52 \\
\data{cresci-2015} & 0.81/0.86/0.78 & 0.87/0.90/0.83 \\
\data{yang-2013} & 0.84/0.88/0.83 & 0.32/0.21/0.62 \\
\data{caverlee-2011} & 0.71/0.71/0.71 & 0.56/0.56/0.57 \\


## Analysis of each of the specialized classifiers in Botometer

In [261]:
def get_columns(df):
    return df.columns[~df.columns.str.contains('location') & ~df.columns.str.contains('utc') & ~df.columns.str.contains('time_zone')& ~df.columns.str.contains('lang')]



### humans

In [445]:
importlib.reload(sys.modules['data_accessor'])
from data_accessor import load_human_dataset_list

In [446]:
human_df_list = load_human_dataset_list()
human_df, human_dataset_class_labels = get_intraclass_labels(human_df_list)


In [448]:
human_scores = analyze_dataset(human_df, human_dataset_class_labels, silent=False, prec_rec=False)


Starting kfold_cv at 10/10/22 22:37:14
Fold 0 in progress
|--- followers_count <= 20699.50
|   |--- weights: [15401.00, 278.00, 274.00, 427.00, 4855.00, 6223.00] class: 0
|--- followers_count >  20699.50
|   |--- weights: [46.00, 36.00, 4458.00, 700.00, 55.00, 249.00] class: 2

Accuracy: 0.5992000969579445
Precision -1
Recall: -1
F1: -1
Balanced accuracy: 0.3242288261633394
Fold 1 in progress
|--- followers_count <= 13586.00
|   |--- weights: [15305.00, 261.00, 200.00, 430.00, 4849.00, 6144.00] class: 0
|--- followers_count >  13586.00
|   |--- weights: [57.00, 38.00, 4572.00, 701.00, 84.00, 361.00] class: 2

Accuracy: 0.6047751787662101
Precision -1
Recall: -1
F1: -1
Balanced accuracy: 0.32551857370938825
Fold 2 in progress
|--- followers_count <= 21313.00
|   |--- weights: [15355.00, 258.00, 274.00, 455.00, 4887.00, 6216.00] class: 0
|--- followers_count >  21313.00
|   |--- weights: [37.00, 34.00, 4484.00, 684.00, 66.00, 252.00] class: 2

Accuracy: 0.600896860986547
Precision -1
Rec

Accuracy: 0.6749484910919888
Precision -1
Recall: -1
F1: -1
Balanced accuracy: 0.42496009888013525
Fold 4 in progress
|--- followers_count <= 20808.50
|   |--- friends_count <= 197.50
|   |   |--- friends_count <= 0.50
|   |   |   |--- weights: [358.00, 2.00, 2.00, 19.00, 1469.00, 4.00] class: 4
|   |   |--- friends_count >  0.50
|   |   |   |--- weights: [11224.00, 67.00, 76.00, 53.00, 972.00, 1479.00] class: 0
|   |--- friends_count >  197.50
|   |   |--- statuses_count <= 13348.50
|   |   |   |--- weights: [3509.00, 128.00, 148.00, 89.00, 1476.00, 3318.00] class: 0
|   |   |--- statuses_count >  13348.50
|   |   |   |--- weights: [317.00, 66.00, 37.00, 273.00, 938.00, 1437.00] class: 5
|--- followers_count >  20808.50
|   |--- followers_count <= 207693.50
|   |   |--- statuses_count <= 16427.00
|   |   |   |--- weights: [36.00, 9.00, 504.00, 115.00, 8.00, 101.00] class: 2
|   |   |--- statuses_count >  16427.00
|   |   |   |--- weights: [8.00, 14.00, 178.00, 246.00, 41.00, 114.00] c

|--- followers_count <= 21313.00
|   |--- friends_count <= 206.50
|   |   |--- friends_count <= 0.50
|   |   |   |--- statuses_count <= 93.00
|   |   |   |   |--- weights: [132.00, 0.00, 0.00, 8.00, 2.00, 3.00] class: 0
|   |   |   |--- statuses_count >  93.00
|   |   |   |   |--- weights: [221.00, 2.00, 1.00, 8.00, 1483.00, 2.00] class: 4
|   |   |--- friends_count >  0.50
|   |   |   |--- friends_count <= 87.50
|   |   |   |   |--- weights: [7628.00, 34.00, 40.00, 29.00, 410.00, 610.00] class: 0
|   |   |   |--- friends_count >  87.50
|   |   |   |   |--- weights: [3717.00, 40.00, 51.00, 26.00, 603.00, 969.00] class: 0
|   |--- friends_count >  206.50
|   |   |--- statuses_count <= 13920.50
|   |   |   |--- followers_count <= 118.50
|   |   |   |   |--- weights: [253.00, 26.00, 0.00, 2.00, 313.00, 670.00] class: 5
|   |   |   |--- followers_count >  118.50
|   |   |   |   |--- weights: [3083.00, 105.00, 147.00, 90.00, 1157.00, 2621.00] class: 0
|   |   |--- statuses_count >  13920.50

### simple bots

In [450]:
simple_df = caverlee_2011[caverlee_2011_labels.values == 1].rename(columns={'CreatedAt': 'created_at', 'NumerOfFollowings': 'friends_count', 'NumberOfFollowers': 'followers_count', "NumberOfTweets": 'statuses_count'})


### spammers

In [451]:
spammers_df, spammers_labels, spammers_n_datasets = load_spammers(intradataset=True)
spammers_scores = analyze_dataset(spammers_intradataset, spammers_labels, silent=False, prec_rec=False)


Starting kfold_cv at 10/10/22 22:38:09
Fold 0 in progress
|--- favourites_count <= 11.50
|   |--- weights: [778.00, 2777.00, 361.00, 796.00, 80.00, 283.00, 894.00, 76.00] class: 1
|--- favourites_count >  11.50
|   |--- weights: [21.00, 12.00, 13.00, 13.00, 0.00, 49.00, 0.00, 14186.00] class: 7

Accuracy: 0.8411325206449076
Precision -1
Recall: -1
F1: -1
Balanced accuracy: 0.2491969100473087
Fold 1 in progress
|--- favourites_count <= 11.50
|   |--- weights: [772.00, 2748.00, 358.00, 770.00, 77.00, 260.00, 903.00, 67.00] class: 1
|--- favourites_count >  11.50
|   |--- weights: [27.00, 13.00, 15.00, 12.00, 0.00, 45.00, 0.00, 14272.00] class: 7

Accuracy: 0.8299252850963429
Precision -1
Recall: -1
F1: -1
Balanced accuracy: 0.24907631123049823
Fold 2 in progress
|--- favourites_count <= 13.50
|   |--- weights: [785.00, 2788.00, 364.00, 802.00, 88.00, 284.00, 905.00, 73.00] class: 1
|--- favourites_count >  13.50
|   |--- weights: [22.00, 11.00, 16.00, 7.00, 0.00, 42.00, 0.00, 14152.00] c

|--- favourites_count <= 13.50
|   |--- followers_count <= 16.50
|   |   |--- friends_count <= 23.50
|   |   |   |--- weights: [313.00, 9.00, 3.00, 1.00, 0.00, 65.00, 0.00, 30.00] class: 0
|   |   |--- friends_count >  23.50
|   |   |   |--- weights: [8.00, 2674.00, 2.00, 3.00, 0.00, 9.00, 0.00, 2.00] class: 1
|   |--- followers_count >  16.50
|   |   |--- lang_en <= 0.50
|   |   |   |--- weights: [375.00, 0.00, 1.00, 795.00, 58.00, 0.00, 0.00, 9.00] class: 3
|   |   |--- lang_en >  0.50
|   |   |   |--- weights: [69.00, 57.00, 353.00, 0.00, 21.00, 212.00, 915.00, 32.00] class: 6
|--- favourites_count >  13.50
|   |--- listed_count <= 4.50
|   |   |--- followers_count <= 512.00
|   |   |   |--- weights: [11.00, 3.00, 4.00, 0.00, 0.00, 2.00, 0.00, 14153.00] class: 7
|   |   |--- followers_count >  512.00
|   |   |   |--- weights: [2.00, 1.00, 1.00, 1.00, 0.00, 0.00, 0.00, 0.00] class: 0
|   |--- listed_count >  4.50
|   |   |--- favourites_count <= 53.00
|   |   |   |--- weights: [1.00,

|--- favourites_count <= 13.50
|   |--- followers_count <= 16.50
|   |   |--- friends_count <= 23.50
|   |   |   |--- time_zone_Athens <= 0.50
|   |   |   |   |--- weights: [1.00, 7.00, 4.00, 0.00, 0.00, 63.00, 0.00, 28.00] class: 5
|   |   |   |--- time_zone_Athens >  0.50
|   |   |   |   |--- weights: [319.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00] class: 0
|   |   |--- friends_count >  23.50
|   |   |   |--- lang_en <= 0.50
|   |   |   |   |--- weights: [8.00, 0.00, 1.00, 3.00, 0.00, 0.00, 0.00, 0.00] class: 0
|   |   |   |--- lang_en >  0.50
|   |   |   |   |--- weights: [0.00, 2704.00, 2.00, 0.00, 0.00, 7.00, 0.00, 1.00] class: 1
|   |--- followers_count >  16.50
|   |   |--- lang_en <= 0.50
|   |   |   |--- lang_it <= 0.50
|   |   |   |   |--- weights: [0.00, 0.00, 1.00, 766.00, 60.00, 0.00, 0.00, 9.00] class: 3
|   |   |   |--- lang_it >  0.50
|   |   |   |   |--- weights: [383.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00] class: 0
|   |   |--- lang_en >  0.50
|   |   |   |--

### fake followers

In [452]:
fake_followers_df, fake_followers_labels, ff_n_datasets = load_fake_followers(intradataset=True)
ff_scores = analyze_dataset(fake_followers_df.drop(['created_at'], axis=1), fake_followers_labels, silent=True, prec_rec=False)


### self-declared

In [453]:
botwiki_df, self_declared_df, botwiki_labels = load_dataset(PROJ_PATH + "/data/botwiki-2019_tweets.json", PROJ_PATH + "/data/botwiki-2019.tsv")


### political bots

In [454]:
political_bots, political_bots_df, political_bots_labels = load_dataset(PROJ_PATH + "/data/political-bots-2019/political-bots-2019_tweets.json", PROJ_PATH + "/data/political-bots-2019/political-bots-2019.tsv")


### other bots

In [455]:
importlib.reload(sys.modules['fit_and_score'])
from fit_and_score import analyze_dataset

In [456]:
other_bots, other_bots_labels, other_n_datasets = load_other_bots(intradataset=True)
other_scores = analyze_dataset(other_bots, other_bots_labels, silent=False, prec_rec=False)


Starting kfold_cv at 10/10/22 22:38:21
Fold 0 in progress
|--- lang_it <= 0.50
|   |--- weights: [114.00, 860.00, 32.00] class: 1
|--- lang_it >  0.50
|   |--- weights: [2.00, 5.00, 252.00] class: 2

Accuracy: 0.8927444794952681
Precision -1
Recall: -1
F1: -1
Balanced accuracy: 0.6135265700483091
Fold 1 in progress
|--- lang_it <= 0.50
|   |--- weights: [112.00, 867.00, 33.00] class: 1
|--- lang_it >  0.50
|   |--- weights: [2.00, 5.00, 246.00] class: 2

Accuracy: 0.889589905362776
Precision -1
Recall: -1
F1: -1
Balanced accuracy: 0.6216216216216216
Fold 2 in progress
|--- lang_it <= 0.50
|   |--- weights: [106.00, 873.00, 36.00] class: 1
|--- lang_it >  0.50
|   |--- weights: [2.00, 5.00, 243.00] class: 2

Accuracy: 0.8801261829652997
Precision -1
Recall: -1
F1: -1
Balanced accuracy: 0.6351351351351351
Fold 3 in progress
|--- lang_it <= 0.50
|   |--- weights: [108.00, 873.00, 34.00] class: 1
|--- lang_it >  0.50
|   |--- weights: [1.00, 3.00, 246.00] class: 2

Accuracy: 0.870662460567

## financial bots

In [457]:
#cresci-stock-2018
financial_bots, financial_bots_df, financial_bots_labels = load_dataset(PROJ_PATH + "/data/cresci-stock-2018_tweets.json", PROJ_PATH + "/data/cresci-stock-2018.tsv")
financial_bots_df = financial_bots_df.copy()


## Single-type specialized classifiers

In [458]:
intraclass_dict = {
    'humans': {
        'n_datasets': len(human_df_list)
    },
    'spammers': {
        'n_datasets': spammers_n_datasets
    },
    'fake followers': {
        'n_datasets': ff_n_datasets
    },
    'other bots': {
        'n_datasets': other_n_datasets      
    }
}

for i in range(max_depth):
    intraclass_dict['humans'].update({
        f'a{i+1}': human_scores[i][0],
        f'ba{i+1}': human_scores[i][4],
    })
    intraclass_dict['spammers'].update({
        f'a{i+1}': spammers_scores[i][0],
        f'ba{i+1}': spammers_scores[i][4],
    })
    intraclass_dict['fake followers'].update({
        f'a{i+1}': ff_scores[i][0],
        f'ba{i+1}': ff_scores[i][4],
    })
    intraclass_dict['other bots'].update({
        f'a{i+1}': other_scores[i][0],
        f'ba{i+1}': other_scores[i][4],
    })
    
intraclass_df = pd.DataFrame.from_dict(intraclass_dict, orient='index')

In [475]:
print_intratype_test(intraclass_df, max_depth)

\data{humans} & 0.67/0.43 & 3 & 6 \\
\data{spammers} & 0.97/0.75 & 4 & 7 \\
\data{fake followers} & 0.97/0.94 & 1 & 2 \\
\data{other bots} & 0.91/0.77 & 2 & 3 \\
