In [2]:
import os
from tqdm import tqdm

from fyp.crypto import Crypto


In [3]:
crypto = Crypto()


In [4]:
base = '/its/home/ep396/Documents/FYP/'
name = "dataset"


In [5]:
def load_db(name, base):
    e = base + f"encrypted_{name}.db"
    d = base + f"decrypted_{name}.db"
    crypto.age_decrypt_file(e, d)


In [6]:
def unload_db(name, base):
    e = base + f"encrypted_{name}.db"
    d = base + f"decrypted_{name}.db"
    crypto.age_encrypt_file(d, e)

    os.remove(d)


In [7]:
load_db(name, base)


In [8]:
from fyp.db_dataset import Tweet
from fyp.db import User, UserInteractorRelationships


In [9]:
query_all = Tweet.select().dicts()


In [10]:
neutral = ["trans", "enby", "transgender", "nonbinary"]
non_neutral = ["genderist", "genderism", "gender cult", "adult human female", "#sexnotgender", "#istandwithjkRowling", "#sexmatters", "#biologynotbigotry", "#waronwomen", "#istandWithJKR", "gender critical", "#istandwithmayaforstater"]


# Suspicious Retweets

In [11]:
suspicious_retweet_count = []
no_likes = Tweet.select().where(Tweet.like_count == 0).dicts()

for tweet in tqdm(no_likes):
    if tweet["retweet_count"] > tweet["like_count"]:
        suspicious_retweet_count.append(tweet)


100%|██████████| 628292/628292 [00:00<00:00, 1818553.47it/s]


In [12]:
len(suspicious_retweet_count)


4420

In [13]:
sus_retweet = {"neutral":0, "non-neutral":0, "both":0, "neither":0}

for tweet in tqdm(suspicious_retweet_count):
    text = tweet["text"].lower()
    is_neutral = any(n in text for n in neutral)
    is_non_neutral = any(n in text for n in non_neutral)

    if is_neutral and is_non_neutral:
        sus_retweet["both"] += 1
    elif is_neutral:
        sus_retweet["neutral"] += 1
    elif is_non_neutral:
        sus_retweet["non-neutral"] += 1
    else:
        sus_retweet["neither"] += 1


100%|██████████| 4420/4420 [00:00<00:00, 116216.30it/s]


In [14]:
def get_ratio(arr):
    sum_ = sum(arr)
    return [item / sum_ for item in arr]


In [15]:
sus_retweet


{'neutral': 3719, 'non-neutral': 354, 'both': 101, 'neither': 246}

In [16]:
get_ratio([value for value in sus_retweet.values()])


[0.8414027149321267,
 0.08009049773755657,
 0.022850678733031673,
 0.055656108597285064]

In [17]:
from collections import Counter


In [18]:
unique_retweet_counter = Counter([tweet["author_id"] for tweet in suspicious_retweet_count])


In [19]:
len(unique_retweet_counter)


1982

In [20]:
sorted_suspicous_all_retweet = dict(sorted(unique_retweet_counter.items(), key=lambda x:x[1], reverse=True))


# Suspicious Times

In [21]:
from dateutil import parser
import datetime


In [22]:
times = {}

for tweet in tqdm(query_all):
    converted = parser.isoparse(tweet["created_at"]).replace(second=0, microsecond=0)
    if converted not in times:
        times[converted] = 1
    else:
        times[converted] += 1


100%|██████████| 1873093/1873093 [00:30<00:00, 60829.46it/s]


In [32]:
top_one_suspicous_times = dict(sorted(times.items(), key=lambda x:x[1], reverse=True)[:300])


In [33]:
one_minute = datetime.timedelta(minutes=1)
suspicous_times_tweets = {}

time_val = (one_minute * 2)

for tweet in tqdm(query_all):
    converted = parser.isoparse(tweet["created_at"]).replace(second=0, microsecond=0)
    converted_plus_one = parser.isoparse(tweet["created_at"]) + one_minute
    converted_minus_one = parser.isoparse(tweet["created_at"]) - one_minute
    if any(time <= (converted + time_val) and time >= (converted - time_val) for time in top_one_suspicous_times.keys()):
        if converted not in suspicous_times_tweets:
            suspicous_times_tweets[converted] = [tweet]
        else:
            suspicous_times_tweets[converted].append(tweet)


100%|██████████| 1873093/1873093 [04:42<00:00, 6636.04it/s] 


In [34]:
time_groups = [set([tweet["author_id"] for tweet in tweets]) for tweets in suspicous_times_tweets.values()]


In [35]:
occurances = {}

for i,s1 in enumerate(time_groups[:-1]):
    for j,s2 in enumerate(time_groups[i+1:]):
        new_set = frozenset(s1&s2)
        if len(new_set) > 1:
            if new_set not in occurances:
                occurances[new_set] = 1
            else:
                occurances[new_set] += 1


In [None]:
occurances_sorted = dict(sorted(occurances.items(), key=lambda x:x[1], reverse=True))
occurances_sorted


# Network Analysis


In [11]:
import json

f = open("suspicious.json")
sus_raw = json.load(f)
sus = [int(sus) for sus in sus_raw]
f.close()


In [12]:
twitter_id_to_anon = { crypto.fernet_decrypt(user.twitter_user_id) : user.id for user in tqdm(User.select()) }
anon_to_twitter_id = { val:key for key, val in twitter_id_to_anon.items() }


100%|██████████| 14745/14745 [00:01<00:00, 8911.51it/s]


---

In [38]:
unload_db(name, base)
