Functions used to analyze our social network off Conti communications.

To generate network run conti_network.py

First, we import our necessary libraries and read our .gexf file into a nx.Graph. 

In [6]:
import networkx as nx
import json
import dateutil.parser
import datetime
from clean import time_parser
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import statistics

G = nx.read_gexf('conti.gexf')

Sort users based off in-degrees and out-degrees 

In [7]:
def sort_in_degrees():
    indegrees = G.in_degree()
    sorted_nodes = sorted(indegrees, key=lambda x: x[1], reverse=True)
    return sorted_nodes

def sort_out_degrees():
    outdegrees = G.out_degree()
    sorted_nodes = sorted(outdegrees, key=lambda x: x[1], reverse=True)
    return sorted_nodes

top_senders = sort_out_degrees()
top_receivers = sort_in_degrees()
#print(top_receivers)

Message clustering based on 12 hour windows. Based off start- and end- times of messages. 

In [8]:
def message_cluster(message_lst):
    start = message_lst[0] + datetime.timedelta(hours=12)
    result = 1
    for i in range(len(message_lst)):
        if message_lst[i] > start:
            result+=1
            start = message_lst[i] + datetime.timedelta(hours=12)
    return result

Uses above function to find the number of interactions between a user and all other users.

In [14]:
def node_communication_frequency():
    #create a formula to determine number of interactions, not just messages
    #determine 12 hour differences between conversation 
    chat_logs = time_parser('logs/chat_logs.json')
    jabber_logs = time_parser('logs/jabber_logs.json')
    messages = {}
    with open('user_lists/users.txt') as f:
        users = f.read().splitlines()
    for i in users:user
        messages[i] = {}
    for i in users:
        for j in users:
            messages[i][j] = []
    for i in chat_logs:
        sender = i['from']
        receiver = i['to']
        messages[sender][receiver].append(i['ts'])
    for i in jabber_logs:
        sender = i['from']
        receiver = i['to']
        messages[sender][receiver].append(i['ts'])
    for i in messages.keys():
        for j in messages[i].keys():
            messages[i][j] = sorted(messages[i][j])
    interactions = {}
    for u in users:
        interactions[u] = {}
    for i in messages.keys():
        for j in messages[i].keys():
            if len(messages[i][j]) > 0: 
                interactions[i][j] = message_cluster(messages[i][j])
    return interactions

all_user_interactions = node_communication_frequency()
#print(all_user_interactions)
number_of_interactions = {}
for i in all_user_interactions.keys():
    number_of_interactions[i] = sum(all_user_interactions[i].values())
number_of_interactions = dict(sorted(number_of_interactions.items(), key=lambda item: item[1]))
#print(number_of_interactions)

Create histogram buckets based off running average between messages.

In [10]:
def create_buckets(message_lst, delta):
    start = message_lst[0] + datetime.timedelta(hours=delta)
    buckets = {}
    buckets[start] = 1
    for i in range(len(message_lst)):
        if message_lst[i] > start:
            start = message_lst[i] + datetime.timedelta(hours=delta)
            buckets[start] = 1
        else:
            buckets[start] += 1
    return buckets

Return a list of interaction time stamps of two users. 

In [15]:
def find_interaction(to, fro):
    chat_logs = time_parser('logs/chat_logs.json')
    jabber_logs = time_parser('logs/jabber_logs.json')
    messages = {}
    with open('user_lists/users.txt') as f:
        users = f.read().splitlines()
    for i in users:
        messages[i] = {}
    for i in users:
        for j in users:
            messages[i][j] = []
    for i in chat_logs:
        sender = i['from']
        receiver = i['to']
        messages[sender][receiver].append(i['ts'])
    for i in jabber_logs:
        sender = i['from']
        receiver = i['to']
        messages[sender][receiver].append(i['ts'])
    for i in messages.keys():
        for j in messages[i].keys():
            messages[i][j] = sorted(messages[i][j])
    return messages[to][fro]

    
user1 = 'professor'
user2 = 'target'
inter1 = find_interaction(user1, user2)
inter2 = find_interaction(user2, user1)

Finds the lifespan of each user. (Last interaction time - First interaction time)

In [16]:
def sorted_node_lifespan():
    user_frequency = {}
    with open('user_lists/users.txt') as f:
        users = f.read().splitlines()
    jabber_json = time_parser('logs/jabber_logs.json')
    chat_json = time_parser('logs/chat_logs.json')
    for u in users:
        user_frequency[u] = []
    for i in jabber_json:
        sender = i['from']
        receiver = i['to']
        user_frequency[sender].append(i['ts'])
        user_frequency[receiver].append(i['ts'])
    for i in chat_json:
        sender = i['from']
        receiver = i['to']
        user_frequency[sender].append(i['ts'])
        user_frequency[receiver].append(i['ts'])
    result = {}
    for u in users: 
        user_frequency[u] = sorted(user_frequency[u])
        result[u] = user_frequency[u][-1]-user_frequency[u][0]
    result = dict(sorted(result.items(), key=lambda item: item[1]))
    return result

top_lifespan_users = sorted_node_lifespan()
for i in top_lifespan_users:
    print(i, ": ", top_lifespan_users[i])

air :  0:00:00
badboy :  0:00:00
fff :  0:00:00
frost :  0:00:00
lom :  0:00:00
mops :  0:00:00
qwerqwerqwerqwer :  0:00:00
seven300 :  0:00:00
beny :  0:00:00.000474
mavelak :  0:00:00.000652
jora :  0:00:00.000698
packman :  0:00:00.000722
dantis :  0:00:00.000744
max17 :  0:00:00.000799
horse :  0:00:00.000958
def :  0:00:00.001225
vjud.q3mcco35auwcstmt.onion :  0:00:00.001285
ahtung :  0:00:00.001410
billgeizh :  0:00:00.001478
balzak :  0:00:00.001814
pin2 :  0:00:00.002135
nick :  0:00:00.002154
demetrius :  0:00:00.002472
mashroom :  0:00:00.002738
hookahplace :  0:00:00.006947
odw5mdwotufuxxrgw3pvqjjuze3e33bylylkl667h4nefwiimwqsumyd.onion :  0:00:00.020293
btdpnqwg2nqkqceetqnn7zlarduyjhvjwt :  0:00:00.022634
стов :  0:00:00.141095
efrain :  0:00:00.144245
twinq3mcco35auwcstmt.onion :  0:00:00.209371
saulgdmn :  0:00:00.528723
joynses :  0:00:00.594311
salmon21 :  0:00:00.803470
eleos :  0:00:01.276412
eleoslin :  0:00:01.287479
dylan :  0:00:01.335983
exploit.im :  0:00:01.9595

## Extracting Conversations
Extracts conversations between two users. Checks that users are both senders and receivers. Bases a 'conversation' unit off a running average between last sent message and next. If above conditions are met, then a conversation is added to the result output using start-time and end-time.

In [13]:
def extract_conversation(interaction_dict):
    interactions = []
    users = []
    for i in interaction_dict.keys():
        interactions += interaction_dict[i]
        users.append(i)
    user1 = users[0]
    user2 = users[1]
    interactions.sort()
    conversations = []
    init_interaction = interactions[0]
    if init_interaction in interaction_dict[user1]:
        sender2 = user2
    else:
        sender2 = user1
    check_users = False
    for i in range(len(interactions)-1):
        delta_new = interactions[i+1]-interactions[i]
        if interactions[i+1] in interaction_dict[sender2]:
            check_users = True
        if delta_new > datetime.timedelta(hours=8) and check_users:
            end_interaction = interactions[i+1]
            conversations.append([init_interaction, end_interaction])
            init_interaction = interactions[i+2]
    return conversations

both = {user1:inter1, user2:inter2}
print((extract_conversation(both)))

[[datetime.datetime(2020, 6, 22, 11, 24, 46, 377962), datetime.datetime(2020, 6, 24, 16, 23, 8, 117134)], [datetime.datetime(2020, 6, 24, 16, 23, 32, 513687), datetime.datetime(2020, 6, 28, 14, 9, 36, 393933)], [datetime.datetime(2020, 6, 29, 14, 19, 43, 863507), datetime.datetime(2020, 6, 29, 14, 19, 43, 863507)], [datetime.datetime(2020, 6, 30, 13, 46, 46, 983914), datetime.datetime(2020, 6, 30, 13, 46, 46, 983914)], [datetime.datetime(2020, 6, 30, 13, 50, 34, 678080), datetime.datetime(2020, 7, 6, 18, 33, 53, 626060)], [datetime.datetime(2020, 7, 6, 18, 34, 51, 798055), datetime.datetime(2020, 7, 7, 18, 16, 0, 947072)], [datetime.datetime(2020, 7, 7, 18, 16, 38, 460310), datetime.datetime(2020, 7, 8, 10, 40, 17, 129888)], [datetime.datetime(2020, 7, 9, 15, 18, 35, 592214), datetime.datetime(2020, 7, 9, 15, 18, 35, 592214)], [datetime.datetime(2020, 7, 9, 15, 18, 53, 607416), datetime.datetime(2020, 7, 15, 14, 18, 24, 900450)], [datetime.datetime(2020, 7, 20, 16, 53, 10, 269502), dat