Add nodes for all users

In [1]:
from network_analysis import sorted_node_lifespan, find_interaction, extract_conversation, node_communication_frequency
from conti_network import find_users
from clean import time_parser
import datetime 
import networkx as nx
G = nx.Graph() 
# remove admin nodes
# remove admins, test accounts, users with lifespan <30days
# def clean_users():
#     users = sorted_node_lifespan()
#     return_users = []
#     for u in users.keys():
#         if users[u] >= datetime.timedelta(days = 30):
#             if 'admin' not in u and 'alarm' not in u:
#                 return_users.append(u)
#     return return_users
Users = find_users('user_lists/users.txt')
G.add_nodes_from(Users)



Find interactions

In [2]:
def all_chat_timestamps():
    chat_logs = time_parser('logs/chat_logs.json')
    jabber_logs = time_parser('logs/jabber_logs.json')
    messages = {}
    with open('user_lists/users.txt') as f:
        users = f.read().splitlines()
    for i in users:
        messages[i] = {}
    # for i in users:
    #     for j in users:
    #         messages[i][j] = []
    for i in chat_logs:
        sender = i['from']
        receiver = i['to']
        if receiver in messages[sender].keys():
            messages[sender][receiver].append(i['ts'])
        else: 
            messages[sender][receiver] = [i['ts']]
    for i in jabber_logs:
        sender = i['from']
        receiver = i['to']
        if receiver in messages[sender].keys():
            messages[sender][receiver].append(i['ts'])
        else: 
            messages[sender][receiver] = [i['ts']]
    for i in messages.keys():
        for j in messages[i].keys():
            messages[i][j] = sorted(messages[i][j])
    return messages
interactions = all_chat_timestamps()

Add edges based on meaningful conversations

In [3]:
def generate_edges(interactions):
    for user1 in interactions.keys():
        for user2 in interactions[user1].keys():
            inter1 = interactions[user1][user2]
            if user2 in interactions.keys():
                if user1 in interactions[user2].keys():
                    inter2 = interactions[user2][user1]
                else:
                    inter2 = []
            else: inter2 = []
            both = {user1:inter1, user2:inter2}
            wgt = len(extract_conversation(both))
            #ADD SOMETHING FOR IF FROM ONE USER TO SAME USER
            if wgt > 0:
                G.add_edge(user1, user2, key='edge', weight = wgt)
generate_edges(interactions)
#print(G)

In [22]:
def normalize(dictionary):
    n = min(dictionary.values())
    m = max(dictionary.values())
    for i in dictionary.keys():
        dictionary[i] = (dictionary[i]-n) / (m-n)
    return dictionary

In [45]:
user_cliques = {}
degrees = G.degree()
#degrees_dict = {degrees[i][0]: degrees[i][1] for i in range(0, len(degrees))}
degrees_dict = {}
for i in degrees:
    degrees_dict[i[0]] = i[1]
degrees_dict = normalize(degrees_dict)
betweeness_centrality_dict = normalize(nx.betweenness_centrality(G))
degree_centrality_dict = normalize(nx.degree_centrality(G))
hubs_authorities = normalize(nx.hits(G)[0])
user_clustering_coefficient = {}
user_shortest_path = {}
for node in Users:
    user_cliques[node] = len(nx.cliques_containing_node(G, node))
    cur_clustering_coefficient = nx.clustering(G, nodes = node)
    user_clustering_coefficient[node] = cur_clustering_coefficient
    shortest_path = nx.shortest_path_length(G, source=node).values()
    shortest_path_val = sum(shortest_path) / (len(shortest_path))
    user_shortest_path[node] = shortest_path_val
user_clustering_coefficient = normalize(user_clustering_coefficient)
user_shortest_path = normalize(user_shortest_path)
user_cliques = normalize(user_cliques)

  A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)


Calculate social score

In [46]:
social_score = {}
for u in Users:
    social_score[u] = ((degrees_dict[u] + betweeness_centrality_dict[u] + degree_centrality_dict[u] + hubs_authorities[u] + user_clustering_coefficient[u] + user_shortest_path[u] + user_cliques[u])/7)*100
social_score = dict(sorted(social_score.items(), key=lambda item: item[1]))

Create Pandas dataframe to store our values in 

In [53]:
import pandas as pd
df = pd.DataFrame.from_dict(social_score, orient='index', column)
df["Degree"] = pd.Series(degrees_dict)
print(df)

                  0    Degree
0x00lord   0.000000  0.000000
ahtung     0.000000  0.000000
air        0.000000  0.000000
airbnb1    0.000000  0.000000
alaska     0.000000  0.000000
...             ...       ...
buza      43.509731  0.600000
bentley   45.270605  0.550000
mango     47.821786  0.635714
defender  59.936479  0.764286
stern     76.684920  1.000000

[449 rows x 2 columns]
