In [None]:
### Network analysis
# Centralities
# Clusters
# Bot detection

In [1]:
import pandas as pd

In [2]:
data = pd.read_pickle("preprocessed_data.pkl")

Bot detection

In [3]:
data["NodeType"].value_counts()

Neutral        96528
Unknown        37371
pro_Ukraine    27098
pro_Russian      559
Both              72
Name: NodeType, dtype: int64

In [15]:
pro_UkraineBefore = data["userid"][(data["NodeType"] == "pro_Ukraine") & (data["tweetcreatedts"] <= "2022-03-04 23:59:59+00:00")].sample(10)
pro_UkraineAfter = data["userid"][(data["NodeType"] == "pro_Ukraine") & (data["tweetcreatedts"] > "2022-03-04 23:59:59+00:00")].sample(10)
pro_RussianBefore = data["userid"][(data["NodeType"] == "pro_Russian") & (data["tweetcreatedts"] <= "2022-03-04 23:59:59+00:00")].sample(10)
pro_RussianAfter = data["userid"][(data["NodeType"] == "pro_Russian") & (data["tweetcreatedts"] > "2022-03-04 23:59:59+00:00")].sample(10)

groups = {"pro_UkraineBefore": pro_UkraineBefore, "pro_UkraineAfter": pro_UkraineAfter, "pro_RussianBefore":pro_RussianBefore, "pro_RussianAfter":pro_RussianAfter}

In [16]:
### Get results from botometer
# More info: https://github.com/IUNetSci/botometer-python

# !pip3 install botometer
# !pip3 install requests tweepy #dependencies for botometer
import botometer, csv
from config import *

bom = botometer.Botometer(wait_on_ratelimit=True,
                          rapidapi_key=rapidapi_key,
                          **twitter_app_auth)

with open('group_results.csv', 'a', encoding="utf-8") as file:
    w = csv.writer(file)

    group_results = dict()
    for group in groups.items():
        accounts = group[1]
        results = []     
        for screen_name, result in bom.check_accounts_in(accounts):
            w.writerow([group[0], screen_name, result])
            results.append(result)
        group_results[group[0]] = results
            

In [21]:
# Check results. If bot: add results to the attributes
import ast

bot_data = pd.read_csv("group_results.csv")
print(bot_data.shape)
print(bot_data["group"].value_counts())
bot_data = bot_data.groupby(by="group")

bot_scores = dict()
bot_score_means = dict()

for group, scores in bot_data:
    non_existing = 0
    sum_en = 0
    sum_un = 0
    nbots = 0
    for result in scores["bot_scores"]:
        result = ast.literal_eval(result)
        try:
            english_benchmark = result["cap"]["english"]
            universal_benchmark = result["cap"]["universal"]
            english_score = result["raw_scores"]["english"]["overall"]
            universal_score = result["raw_scores"]["universal"]["overall"]

            bot_en = False
            if english_score >= english_benchmark:
                bot_en = True
                
            bot_uni = False
            if universal_score >= universal_benchmark:
                bot_uni = True
                
            if bot_en or bot_uni:
                english_all_scores = result["raw_scores"]["english"]
                universal_all_scores = result["raw_scores"]["universal"]
                nbots += 1
                # add bot scores attribute to the node
                bot_scores[result["user"]["user_data"]["screen_name"]] = {"en": english_all_scores, "un": universal_all_scores}
            
            sum_en += english_score
            sum_un += universal_score
        except:
            non_existing +=1
            continue
    mean_en = sum_en/(len(scores)-non_existing)
    mean_un = sum_un/(len(scores)-non_existing)

    bot_score_means[group] = {"en": mean_en, "un": mean_un}

    print("Group:", group)
    print("Number of bots:",nbots)
    print("Mean score EN:",round(mean_en, 4),"Mean score UNIVERSAL:",round(mean_un, 4))

# example result: {'cap':
#   {'english': 0.8995513244218455, 'universal': 0.8733944954488508},
#       'display_scores':
#           {'english': {'astroturf': 0.8, 'fake_follower': 1.9, 'financial': 0.2, 'other': 4.7, 'overall': 4.7, 'self_declared': 4.6, 'spammer': 0.8},
#           universal': {'astroturf': 0.8, 'fake_follower': 1.2, 'financial': 0.2, 'other': 4.4, 'overall': 4.6, 'self_declared': 4.6, 'spammer': 0.8}},
#       'raw_scores':
#           {'english': {'astroturf': 0.17, 'fake_follower': 0.38, 'financial': 0.03, 'other': 0.94, 'overall': 0.94, 'self_declared': 0.93, 'spammer': 0.16},
#           'universal': {'astroturf': 0.15, 'fake_follower': 0.24, 'financial': 0.04, 'other': 0.87, 'overall': 0.91, 'self_declared': 0.91, 'spammer': 0.15}},
#   'user': {'majority_lang': 'en', 'user_bot_data': {'id_str': '3380828067', 'screen_name': 'dsn_status'}}}

(300, 3)
pro_UkraineBefore    75
pro_UkraineAfter     75
pro_RussianBefore    75
pro_RussianAfter     75
Name: group, dtype: int64
Group: pro_RussianAfter
Number of bots: 9
Mean score EN: 0.3645 Mean score UNIVERSAL: 0.3478
Group: pro_RussianBefore
Number of bots: 4
Mean score EN: 0.4216 Mean score UNIVERSAL: 0.3706
Group: pro_UkraineAfter
Number of bots: 13
Mean score EN: 0.4616 Mean score UNIVERSAL: 0.3769
Group: pro_UkraineBefore
Number of bots: 15
Mean score EN: 0.5144 Mean score UNIVERSAL: 0.4297
