In [1]:
import os
import networkx as nx
from tqdm import tqdm
import pandas as pd
import numpy as np

from fyp.crypto import Crypto
from fyp.db  import User, UserInteractorRelationships


In [2]:
crypto = Crypto()


In [3]:
base = '/its/home/ep396/Documents/FYP/'
name = "dataset"

def load_db(name, base):
    e = base + f"encrypted_{name}.db"
    d = base + f"decrypted_{name}.db"
    crypto.age_decrypt_file(e, d)

def unload_db(name, base):
    e = base + f"encrypted_{name}.db"
    d = base + f"decrypted_{name}.db"
    crypto.age_encrypt_file(d, e)

    os.remove(d)


In [4]:
load_db(name, base)


In [5]:
from fyp.db_dataset import Tweet

In [6]:
all_twitter_user_ids_query = Tweet.select(Tweet.author_id).group_by(Tweet.author_id).dicts()


In [7]:
twitter_id_to_anon = { int(crypto.fernet_decrypt(user.twitter_user_id)) : int(user.id) for user in tqdm(User.select()) }
anon_to_twitter_id = { val:key for key, val in twitter_id_to_anon.items() }


100%|██████████| 14745/14745 [00:02<00:00, 5861.22it/s]


In [8]:
dataset_users = []

for author in tqdm(all_twitter_user_ids_query):
    dataset_users.append(twitter_id_to_anon[author["author_id"]])


100%|██████████| 7281/7281 [00:00<00:00, 1243129.83it/s]


In [25]:
pre_graph = []
users = []
users_v2 = []

for relation in tqdm(UserInteractorRelationships.select().dicts()):
    if relation["user"] != relation["interactor"]:
        if relation["count"] >= 50:
            if (relation["user"] not in users_v2) and (relation["user"] in dataset_users):
                users_v2.append(relation["user"])
            if (relation["interactor"] not in users_v2) and (relation["interactor"] in dataset_users):
                users_v2.append(relation["interactor"])

        if relation["count"] >= 150:
            if (relation["user"] not in users) and (relation["user"] in dataset_users):
                users.append(relation["user"])
            if (relation["interactor"] not in users) and (relation["interactor"] in dataset_users):
                users.append(relation["interactor"])
            
        pre_graph.append((relation["interactor"], relation["user"], {"weight": np.log(relation["count"])}))


100%|██████████| 121028/121028 [00:02<00:00, 57485.98it/s]


In [26]:
G = nx.DiGraph(pre_graph)

In [14]:
def phi(u, v):
    w = nx.shortest_path_length(G, source=u, target=v, weight="weight")
    s = len(nx.shortest_path(G, source=u, target=v, weight="weight")) - 1

    if s > 0:
        return  w / s
    else:
        return 0.0


In [16]:
def interaction_graph_similarity(u, v):
    vu = set(G[u])
    vv = set(G[v])

    X = vu.intersection(vv)
    top = sum([phi(u, n) + phi(v, n) for n in X])
    bottom = len(vu.union(vv))

    return top / bottom


In [18]:
from itertools import combinations

In [19]:
len(users)

126

In [20]:
combos = list(combinations(users, 2))
len(combos)


7875

In [21]:
similarity_scores = {}

for pair in tqdm(combos):
    u, v = pair
    try:
        score = interaction_graph_similarity(u, v)
        similarity_scores[pair] = score
    except (KeyError, ZeroDivisionError):
        similarity_scores[pair] = 0.0


100%|██████████| 7875/7875 [04:55<00:00, 26.65it/s] 


In [22]:
sorted(similarity_scores.items(), key=lambda x:x[1], reverse=True)


[((36913, 12062), 0.963457252632055),
 ((14616, 24937), 0.27465307216702745),
 ((25, 36913), 0.27465307216702745),
 ((47003, 8678), 0.14853153869141683),
 ((75445, 76355), 0.13427483528165787),
 ((2544, 47003), 0.12996509635498973),
 ((47003, 14365), 0.12996509635498973),
 ((36913, 14776), 0.12924850454918937),
 ((36913, 656), 0.12737533781659244),
 ((24937, 4967), 0.12206803207423442),
 ((36913, 4380), 0.11718531079126505),
 ((46992, 15480), 0.11299280121190393),
 ((14365, 8678), 0.11240224549620734),
 ((2544, 8678), 0.10944429166735978),
 ((50205, 14776), 0.1039720770839918),
 ((2544, 14365), 0.10143617276487003),
 ((47003, 15511), 0.10143617276487003),
 ((58686, 14776), 0.0999731510422998),
 ((22698, 14776), 0.09452007007635617),
 ((8678, 15511), 0.09041050181216677),
 ((14365, 15511), 0.08848687411403557),
 ((25, 12062), 0.08664339756999316),
 ((36913, 16914), 0.08370379342233218),
 ((2544, 15511), 0.08317766166719343),
 ((47003, 15835), 0.08208321875051983),
 ((36913, 15315), 0.07

In [23]:
vu = set(G[36913])
vv = set(G[12062])
vu.intersection(vv)


{27}

In [27]:
combos = list(combinations(users_v2, 2))
all_similarity_scores = {}

for pair in tqdm(combos):
    u, v = pair
    
    try:
        score = interaction_graph_similarity(u, v)
        all_similarity_scores[pair] = score
    except (KeyError, ZeroDivisionError):
        all_similarity_scores[pair] = 0.0


100%|██████████| 119316/119316 [1:04:34<00:00, 30.80it/s] 


In [41]:
all_result = sorted(all_similarity_scores.items(), key=lambda x:x[1], reverse=True)
all_result


[((49687, 49735), 1.92691450526411),
 ((9051, 7187), 1.1121363262031054),
 ((36913, 37457), 1.0989924596437632),
 ((9051, 6439), 0.9735068900911165),
 ((36913, 12062), 0.963457252632055),
 ((15095, 37457), 0.7327123292259292),
 ((23772, 37457), 0.7327123292259292),
 ((9051, 2395), 0.6953620643507975),
 ((9051, 14311), 0.6953620643507975),
 ((9051, 15918), 0.6953620643507975),
 ((58858, 65363), 0.6184601096741844),
 ((36913, 5517), 0.6179694123758117),
 ((22502, 37457), 0.6105936076882744),
 ((9051, 7399), 0.6084418063069478),
 ((9051, 15164), 0.6084418063069478),
 ((9051, 15654), 0.6084418063069478),
 ((22586, 37457), 0.5626021026138105),
 ((9051, 13348), 0.5560681631015527),
 ((24937, 27814), 0.5493061443340549),
 ((9051, 19838), 0.5408371611617313),
 ((9051, 19272), 0.5408371611617313),
 ((29315, 37457), 0.5233659494470924),
 ((24660, 37457), 0.5233659494470924),
 ((15080, 37457), 0.4579452057662058),
 ((22608, 37457), 0.4579452057662058),
 ((34660, 37457), 0.4579452057662058),
 ((37

In [43]:
result_users = []

for result in all_result:
    u, v = result[0]
    r = result[1]

    if r < 0.5:
        break
    else:
        result_users.append(u)
        result_users.append(v)


In [45]:
from collections import Counter


In [47]:
counted_results = Counter(result_users)
sorted_counted_results = sorted(counted_results.items(), key=lambda x:x[1], reverse=True)
sorted_counted_results


[(9051, 11),
 (37457, 7),
 (36913, 3),
 (49687, 1),
 (49735, 1),
 (7187, 1),
 (6439, 1),
 (12062, 1),
 (15095, 1),
 (23772, 1),
 (2395, 1),
 (14311, 1),
 (15918, 1),
 (58858, 1),
 (65363, 1),
 (5517, 1),
 (22502, 1),
 (7399, 1),
 (15164, 1),
 (15654, 1),
 (22586, 1),
 (13348, 1),
 (24937, 1),
 (27814, 1),
 (19838, 1),
 (19272, 1),
 (29315, 1),
 (24660, 1)]

In [None]:
suspicious_large_interaction = []

for idx, relation in enumerate(tqdm(UserInteractorRelationships.select().where(UserInteractorRelationships.count >= 365).order_by(UserInteractorRelationships.count.desc()).dicts()), start=1):
    suspicious_large_interaction.append(relation["interactor"])
    print(idx, relation)


100%|██████████| 43/43 [00:00<00:00, 38073.69it/s]

1 {'id': 131952, 'user': 9213, 'interactor': 8765, 'count': 2540}
2 {'id': 122269, 'user': 7187, 'interactor': 75445, 'count': 2232}
3 {'id': 205419, 'user': 22698, 'interactor': 3915, 'count': 2016}
4 {'id': 205420, 'user': 22698, 'interactor': 6329, 'count': 1810}
5 {'id': 122265, 'user': 7187, 'interactor': 7187, 'count': 1791}
6 {'id': 131981, 'user': 9213, 'interactor': 389, 'count': 1495}
7 {'id': 172086, 'user': 15315, 'interactor': 49641, 'count': 1414}
8 {'id': 122270, 'user': 7187, 'interactor': 76355, 'count': 1323}
9 {'id': 205412, 'user': 22698, 'interactor': 16914, 'count': 1305}
10 {'id': 131954, 'user': 9213, 'interactor': 694, 'count': 1170}
11 {'id': 167426, 'user': 15087, 'interactor': 15087, 'count': 1159}
12 {'id': 74910, 'user': 1004, 'interactor': 1004, 'count': 1154}
13 {'id': 122264, 'user': 7187, 'interactor': 76192, 'count': 1152}
14 {'id': 154236, 'user': 14365, 'interactor': 1960, 'count': 1045}
15 {'id': 154286, 'user': 14365, 'interactor': 50276, 'count':




In [None]:
len(suspicious_large_interaction)

43

In [None]:
len(set(suspicious_large_interaction))


42

In [None]:
Counter(suspicious_large_interaction).most_common(1)

[(36913, 2)]

In [None]:
import json

with open("suspicious_network.json", "w") as outfile:
    json.dump(suspicious_large_interaction, outfile)


In [62]:
unique_sus_all = [key for key in counted_results.keys()]
len(unique_sus_all)


28

In [60]:
import json


with open("suspicious_network_two.json", "w") as outfile:
    json.dump(unique_sus_all, outfile)


In [74]:
G[22]

AtlasView({37: {'weight': 0.0}, 84: {'weight': 0.0}, 263: {'weight': 0.0}, 349: {'weight': 0.6931471805599453}, 376: {'weight': 0.0}, 698: {'weight': 0.0}, 1960: {'weight': 0.0}, 4152: {'weight': 0.0}, 5828: {'weight': 0.0}, 6098: {'weight': 0.0}, 6121: {'weight': 0.0}, 6124: {'weight': 0.0}, 6302: {'weight': 0.6931471805599453}, 6329: {'weight': 1.6094379124341003}, 6780: {'weight': 0.0}, 12211: {'weight': 1.0986122886681098}, 12824: {'weight': 0.0}, 13314: {'weight': 1.0986122886681098}, 14380: {'weight': 0.0}, 14396: {'weight': 0.0}, 14499: {'weight': 2.1972245773362196}, 14545: {'weight': 0.0}, 14664: {'weight': 0.0}, 15080: {'weight': 0.0}, 15157: {'weight': 0.0}, 15300: {'weight': 0.0}, 15360: {'weight': 1.0986122886681098}, 15517: {'weight': 0.0}, 15731: {'weight': 0.0}, 19275: {'weight': 0.0}, 19535: {'weight': 0.0}, 20863: {'weight': 1.791759469228055}, 21003: {'weight': 0.0}, 22845: {'weight': 0.0}, 23853: {'weight': 1.3862943611198906}, 23877: {'weight': 0.6931471805599453},

In [63]:
unload_db(name, base)
