In [1]:
import os
import networkx as nx
from tqdm import tqdm
import pandas as pd
import numpy as np

from fyp.crypto import Crypto
from fyp.db  import User, UserInteractorRelationships


In [2]:
crypto = Crypto()


In [3]:
base = '/its/home/ep396/Documents/FYP/'
name = "dataset"

def load_db(name, base):
    e = base + f"encrypted_{name}.db"
    d = base + f"decrypted_{name}.db"
    crypto.age_decrypt_file(e, d)

def unload_db(name, base):
    e = base + f"encrypted_{name}.db"
    d = base + f"decrypted_{name}.db"
    crypto.age_encrypt_file(d, e)

    os.remove(d)


In [4]:
load_db(name, base)


In [5]:
from fyp.db_dataset import Tweet


In [6]:
twitter_id_to_anon = { int(crypto.fernet_decrypt(user.twitter_user_id)) : int(user.id) for user in tqdm(User.select()) }
anon_to_twitter_id = { val:key for key, val in twitter_id_to_anon.items() }


100%|██████████| 14745/14745 [00:03<00:00, 4615.60it/s]


In [7]:
pre_graph = []

for relation in tqdm(UserInteractorRelationships.select().dicts()):
    if relation["user"] != relation["interactor"]:            
        pre_graph.append((relation["interactor"], relation["user"], {"weight": relation["count"]}))


100%|██████████| 121028/121028 [00:00<00:00, 580315.66it/s]


In [8]:
G = nx.DiGraph(pre_graph)


In [9]:
pagerank_no_weight = nx.pagerank(G, weight=None, alpha=0.9)


In [10]:
sorted(pagerank_no_weight.items(), key=lambda x:x[1], reverse=True)


[(22, 0.032938635146758606),
 (25, 0.023825895442134846),
 (20, 0.019975700181278297),
 (17, 0.01586638293758769),
 (16, 0.015288989524431758),
 (27, 0.010395399087589643),
 (26, 0.008982360547636088),
 (15602, 0.008610464118085137),
 (5, 0.008572881685749286),
 (21, 0.00787411803355292),
 (28, 0.007449759956149896),
 (6, 0.007233272500068555),
 (9, 0.006667378649785819),
 (13, 0.0064045078481691844),
 (23875, 0.005912733517504022),
 (698, 0.00577076642446586),
 (10, 0.005707917673333158),
 (19, 0.005383612710920481),
 (28514, 0.0051443103777262045),
 (14643, 0.004845573198058517),
 (12, 0.004660552600272632),
 (11686, 0.004353219660561848),
 (17651, 0.0043268715132462705),
 (2, 0.004213242800886492),
 (35347, 0.00413538659977213),
 (6121, 0.003962773875176903),
 (16646, 0.0037484622573800835),
 (23197, 0.0034959461034351417),
 (22944, 0.0031511464763993306),
 (33772, 0.003125856559762744),
 (22521, 0.0030898127481562283),
 (20761, 0.0030879601798262444),
 (29, 0.0027752836500043797),


In [11]:
pagerank_w_rank = nx.pagerank(G, weight="weight", alpha=0.9)


In [12]:
all_sorted_results = sorted(pagerank_w_rank.items(), key=lambda x:x[1], reverse=True)
all_sorted_results


[(22, 0.052805455767940906),
 (25, 0.03317132886152742),
 (17, 0.03137005183946231),
 (16, 0.026175207105160005),
 (20, 0.019142487532239412),
 (27, 0.018913406216777253),
 (13, 0.013349483740401841),
 (12, 0.012917151204121109),
 (22698, 0.010832212580792755),
 (5, 0.010703763912459497),
 (21, 0.008766143726576003),
 (15602, 0.007921403203483255),
 (6, 0.007561259166001367),
 (26, 0.007355384054319593),
 (13821, 0.0072553540827923375),
 (10, 0.007126213266590877),
 (9, 0.006912833771504673),
 (14499, 0.00596304693712527),
 (23197, 0.005803398823214704),
 (28, 0.005670791564476904),
 (3915, 0.005440530054722164),
 (28514, 0.005389807883307646),
 (22944, 0.0053833348741109625),
 (29315, 0.005359458019950875),
 (2, 0.005243931145964984),
 (20863, 0.005148435640097382),
 (3883, 0.004984285533784013),
 (30, 0.004941887240526029),
 (678, 0.004805527813806071),
 (19, 0.004115592705425565),
 (9488, 0.004004152164755533),
 (14365, 0.0038427348922618555),
 (7243, 0.003804183711237434),
 (7833, 

In [13]:
import json


In [14]:
f = open("suspicious.json")
sus_raw = json.load(f)
sus = [int(sus) for sus in sus_raw]
f.close()


In [15]:
f = open("suspicious_network.json")
sus_raw = json.load(f)
sus_net_one = [int(sus) for sus in sus_raw]
f.close()


In [16]:
f = open("suspicious_network_two.json")
sus_raw = json.load(f)
sus_net_two = [int(sus) for sus in sus_raw]
f.close()


In [17]:
all_sus_users = set(sus_net_two).union(set(sus_net_one).union(set(sus)))
len(all_sus_users)


96

In [18]:
len(sus) + len(sus_net_one) + len(sus_net_two)


102

In [19]:
from collections import Counter
sorted(Counter(sus_net_two + sus_net_one + sus).items(), key=lambda x:x[1], reverse=True)


[(36913, 3),
 (7187, 2),
 (58858, 2),
 (50605, 2),
 (22698, 2),
 (49687, 1),
 (49735, 1),
 (9051, 1),
 (37457, 1),
 (6439, 1),
 (12062, 1),
 (15095, 1),
 (23772, 1),
 (2395, 1),
 (14311, 1),
 (15918, 1),
 (22502, 1),
 (7399, 1),
 (15164, 1),
 (15654, 1),
 (65363, 1),
 (5517, 1),
 (13348, 1),
 (19838, 1),
 (19272, 1),
 (22586, 1),
 (29315, 1),
 (24660, 1),
 (8765, 1),
 (75445, 1),
 (3915, 1),
 (6329, 1),
 (389, 1),
 (49641, 1),
 (76355, 1),
 (16914, 1),
 (694, 1),
 (15087, 1),
 (1004, 1),
 (76192, 1),
 (1960, 1),
 (50276, 1),
 (2204, 1),
 (53444, 1),
 (28021, 1),
 (199, 1),
 (1060, 1),
 (8678, 1),
 (58970, 1),
 (24937, 1),
 (14453, 1),
 (47003, 1),
 (9213, 1),
 (6196, 1),
 (14776, 1),
 (52764, 1),
 (47134, 1),
 (50904, 1),
 (45751, 1),
 (8739, 1),
 (2293, 1),
 (4380, 1),
 (15156, 1),
 (6334, 1),
 (76362, 1),
 (1062, 1),
 (5061, 1),
 (13144, 1),
 (27540, 1),
 (209, 1),
 (22646, 1),
 (15272, 1),
 (22441, 1),
 (6175, 1),
 (79282, 1),
 (16925, 1),
 (6751, 1),
 (8417, 1),
 (46807, 1),
 (2628

In [20]:
suspicious_rankings = {user:pagerank_w_rank[user] for user in all_sus_users}


In [21]:
results = sorted(suspicious_rankings.items(), key=lambda x:x[1], reverse=True)
results 


[(22698, 0.010832212580792755),
 (3915, 0.005440530054722164),
 (29315, 0.005359458019950875),
 (30, 0.004941887240526029),
 (6329, 0.003401041705455838),
 (9213, 0.0031092571892195743),
 (13314, 0.002789282282503237),
 (15087, 0.002768148123481331),
 (14311, 0.002350584776279505),
 (15315, 0.001981081232550488),
 (15272, 0.0014691306038686369),
 (22174, 0.0012444939756858459),
 (1060, 0.001219968574809135),
 (1960, 0.001216500285476196),
 (1004, 0.0010398670307756636),
 (15414, 0.0008310498538716543),
 (6334, 0.0008195500428032696),
 (7399, 0.0006151740960053192),
 (14776, 0.000604586189092213),
 (6175, 0.0005811357693472354),
 (8678, 0.0004526917328571475),
 (24660, 0.0004347290147867982),
 (6751, 0.0004327990733325259),
 (7187, 0.0004287187697267466),
 (37457, 0.00042576443272271766),
 (1945, 0.0003664535667367243),
 (8739, 0.00029567725604807385),
 (2293, 0.000290598088854286),
 (24937, 0.000285423689831394),
 (19838, 0.0002756031989645797),
 (209, 0.0002548595243351922),
 (15480, 

In [22]:
avg_sus = sum([item[1] for item in results]) / len(results)


In [23]:
avg_sus


0.0006293615078049171

In [24]:
for idx, pair in enumerate(all_sorted_results):
    user, pr = pair
    if pr < avg_sus:
        print(idx)
        break


216


In [25]:
len(all_sorted_results)

14745

In [None]:
unload_db(name, base)
