In [None]:
### Network analysis
# Centralities
# Clusters
# Bot detection

In [1]:
import pandas as pd

In [2]:
data = pd.read_pickle("preprocessed_data.pkl")

Bot detection

In [3]:
data["NodeType"].value_counts()

Neutral        96528
Unknown        37371
pro_Ukraine    27098
pro_Russian      559
Both              72
Name: NodeType, dtype: int64

In [79]:
pro_UkraineBefore = data["userid"][(data["NodeType"] == "pro_Ukraine") & (data["tweetcreatedts"] <= "2022-03-04 23:59:59+00:00")].sample(10)
pro_UkraineAfter = data["userid"][(data["NodeType"] == "pro_Ukraine") & (data["tweetcreatedts"] > "2022-03-04 23:59:59+00:00")].sample(10)
pro_RussianBefore = data["userid"][(data["NodeType"] == "pro_Russian") & (data["tweetcreatedts"] <= "2022-03-04 23:59:59+00:00")].sample(10)
pro_RussianAfter = data["userid"][(data["NodeType"] == "pro_Russian") & (data["tweetcreatedts"] > "2022-03-04 23:59:59+00:00")].sample(10)

groups = {"pro_UkraineBefore": pro_UkraineBefore, "pro_UkraineAfter": pro_UkraineAfter, "pro_RussianBefore":pro_RussianBefore, "pro_RussianAfter":pro_RussianAfter}

In [80]:
### Get results from botometer
# More info: https://github.com/IUNetSci/botometer-python

# !pip3 install botometer
# !pip3 install requests tweepy #dependencies for botometer
import botometer, csv
from config import *

bom = botometer.Botometer(wait_on_ratelimit=True,
                          rapidapi_key=rapidapi_key,
                          **twitter_app_auth)

with open('group_results.csv', 'a', encoding="utf-8") as file:
    w = csv.writer(file)

    group_results = dict()
    for group in groups.items():
        accounts = group[1]
        results = []     
        for screen_name, result in bom.check_accounts_in(accounts):
            w.writerow([group[0], screen_name, result])
            results.append(result)
        group_results[group[0]] = results
            

In [89]:
# Check results. If bot: add results to the attributes
import ast

bot_data = pd.read_csv("group_results.csv")
print(bot_data.shape)
bot_data.drop_duplicates(inplace=True)
print(bot_data.shape)
print(bot_data["group"].value_counts())
grouped_bot_data = bot_data.groupby(by="group")

sample_size = len(bot_data)/4

bot_scores = dict()
bot_score_means = dict()

for group, scores in grouped_bot_data:
    non_existing = 0
    sum_en = 0
    sum_un = 0
    nbots = 0
    bot_types_en = {'astroturf': 0, 'fake_follower': 0, 'financial': 0, 'other': 0, 'self_declared': 0, 'spammer': 0}
    bot_types_un = {'astroturf': 0, 'fake_follower': 0, 'financial': 0, 'other': 0, 'self_declared': 0, 'spammer': 0}
    for result in scores["bot_scores"]:
        result = ast.literal_eval(result)
        try:
            english_benchmark = result["cap"]["english"]
            universal_benchmark = result["cap"]["universal"]
            english_score = result["raw_scores"]["english"]["overall"]
            universal_score = result["raw_scores"]["universal"]["overall"]

            bot_en = False
            if english_score >= english_benchmark:
                bot_en = True
                
            bot_uni = False
            if universal_score >= universal_benchmark:
                bot_uni = True
                
            if bot_en or bot_uni:
                english_all_scores = result["raw_scores"]["english"]
                universal_all_scores = result["raw_scores"]["universal"]
                nbots += 1
                # add bot scores attribute to the node
                bot_types = {'astroturf': 0, 'fake_follower': 0, 'financial': 0, 'other': 0, 'self_declared': 0, 'spammer': 0}
                bot_type_en = max(english_all_scores.items(), key=lambda x: x[1])
                bot_type_un = max(universal_all_scores.items(), key=lambda x: x[1])
                bot_types_en[bot_type_en[0]] = bot_types[bot_type_en[0]] + 1 
                bot_types_un[bot_type_un[0]] = bot_types[bot_type_un[0]] + 1 
                bot_scores[result["user"]["user_data"]["screen_name"]] = {"en": english_all_scores, "un": universal_all_scores, "group":group}
            
            sum_en += english_score
            sum_un += universal_score
        except:
            non_existing +=1
            continue
    mean_en = sum_en/(len(scores)-non_existing)
    mean_un = sum_un/(len(scores)-non_existing)

    bot_score_means[group] = {"nbots":nbots, "n":len(scores), "bot_types":bot_types,"en": mean_en, "un": mean_un}

    # print("Group:", group)
    # print("Number of bots:",nbots)
    # print("Mean score EN:",round(mean_en, 4),"Mean score UNIVERSAL:",round(mean_un, 4))

# example result: {'cap':
#   {'english': 0.8995513244218455, 'universal': 0.8733944954488508},
#       'display_scores':
#           {'english': {'astroturf': 0.8, 'fake_follower': 1.9, 'financial': 0.2, 'other': 4.7, 'overall': 4.7, 'self_declared': 4.6, 'spammer': 0.8},
#           universal': {'astroturf': 0.8, 'fake_follower': 1.2, 'financial': 0.2, 'other': 4.4, 'overall': 4.6, 'self_declared': 4.6, 'spammer': 0.8}},
#       'raw_scores':
#           {'english': {'astroturf': 0.17, 'fake_follower': 0.38, 'financial': 0.03, 'other': 0.94, 'overall': 0.94, 'self_declared': 0.93, 'spammer': 0.16},
#           'universal': {'astroturf': 0.15, 'fake_follower': 0.24, 'financial': 0.04, 'other': 0.87, 'overall': 0.91, 'self_declared': 0.91, 'spammer': 0.15}},
#   'user': {'majority_lang': 'en', 'user_bot_data': {'id_str': '3380828067', 'screen_name': 'dsn_status'}}}

(570, 3)
(522, 3)
pro_UkraineAfter     139
pro_UkraineBefore    137
pro_RussianBefore    126
pro_RussianAfter     120
Name: group, dtype: int64


In [93]:
for group, scores in bot_score_means.items():
    print("Group:", group)
    # print("Number of bots:", scores["nbots"])#%2, (%3 persent)",% (scores["nbots"], scores["nbots"]/sample_size*100))
    sample_size = scores["n"]
    bots_percentage = scores["nbots"] / sample_size * 100
    print("Number of bots:",scores["nbots"],"out of", sample_size)
    print("Bots percentage in the sample:",round(bots_percentage), "%")
    print("Number of bots:",scores["bot_types"])
    print("Mean score EN:",round(scores["en"], 4),"Mean score UNIVERSAL:",round(scores["un"], 4))
    print()

Group: pro_RussianAfter
Number of bots: 14 out of 120
Bots percentage in the sample: 12 %
Number of bots: {'astroturf': 10, 'fake_follower': 2, 'financial': 0, 'other': 2, 'self_declared': 0, 'spammer': 0}
Mean score EN: 0.3859 Mean score UNIVERSAL: 0.3746

Group: pro_RussianBefore
Number of bots: 9 out of 126
Bots percentage in the sample: 7 %
Number of bots: {'astroturf': 0, 'fake_follower': 4, 'financial': 0, 'other': 5, 'self_declared': 0, 'spammer': 0}
Mean score EN: 0.4205 Mean score UNIVERSAL: 0.3739

Group: pro_UkraineAfter
Number of bots: 25 out of 139
Bots percentage in the sample: 18 %
Number of bots: {'astroturf': 6, 'fake_follower': 2, 'financial': 0, 'other': 17, 'self_declared': 0, 'spammer': 0}
Mean score EN: 0.4848 Mean score UNIVERSAL: 0.3872

Group: pro_UkraineBefore
Number of bots: 22 out of 137
Bots percentage in the sample: 16 %
Number of bots: {'astroturf': 3, 'fake_follower': 1, 'financial': 0, 'other': 18, 'self_declared': 0, 'spammer': 0}
Mean score EN: 0.4731

In [95]:
for id, scores in bot_scores.items():
    print("User:",id, "Group:", scores["group"])
    print("Bot type EN:", max(scores["en"].items(), key=lambda x: x[1]))
    print("Bot type EN:", scores["en"])
    print()
    # print("Bot type UNIVERSAL:", max(scores["un"].items(), key=lambda x: x[1]))

User: Free_Libyan2022 Group: pro_RussianAfter
Bot type EN: ('other', 0.84)
Bot type EN: {'astroturf': 0.34, 'fake_follower': 0.5, 'financial': 0.35, 'other': 0.84, 'overall': 0.84, 'self_declared': 0.04, 'spammer': 0.08}

User: votebLOUbLOU Group: pro_RussianAfter
Bot type EN: ('astroturf', 0.93)
Bot type EN: {'astroturf': 0.93, 'fake_follower': 0.05, 'financial': 0.0, 'other': 0.23, 'overall': 0.93, 'self_declared': 0.0, 'spammer': 0.0}

User: iris65973995 Group: pro_RussianAfter
Bot type EN: ('fake_follower', 0.86)
Bot type EN: {'astroturf': 0.05, 'fake_follower': 0.86, 'financial': 0.45, 'other': 0.69, 'overall': 0.86, 'self_declared': 0.12, 'spammer': 0.7}

User: Iluvmy2dogs88 Group: pro_RussianAfter
Bot type EN: ('astroturf', 0.94)
Bot type EN: {'astroturf': 0.94, 'fake_follower': 0.24, 'financial': 0.0, 'other': 0.2, 'overall': 0.94, 'self_declared': 0.0, 'spammer': 0.09}

User: grinningbamE Group: pro_RussianAfter
Bot type EN: ('astroturf', 0.99)
Bot type EN: {'astroturf': 0.99,