In [None]:
import pickle
import pandas as pd
import numpy as np
from collections import defaultdict
import math
import matplotlib.pyplot as plt
import networkx as nx

# 1. Load input

In [None]:
lines = 500000

Load episode set $E$ with the users that retweeted each original tweet in the trace. 

In [None]:
E = pickle.load(open("./extracted/E"+ str(lines) + ".p", "rb"))

In [None]:
for tweet in E:
    E[tweet] = list(dict.fromkeys(E[tweet]))

In [None]:
D = pickle.load(open("./extracted/D"+ str(lines) + ".p", "rb"))

Load the set of original tweets denoted by $S$. 

The set of original tweets is denoted by $S$, where $S$ = S is the total number of original tweets

In [None]:
S = pickle.load(open("./extracted/S"+ str(lines) + ".p", "rb"))

Load $U$ set xith unique users

In [None]:
U = pickle.load(open("./extracted/U"+ str(lines) + ".p", "rb"))
U = list(U)

Load $M_{ij}$ variables that count number of episodes where the ordered pair (i,j) appears

In [None]:
M = pickle.load(open("./extracted/M"+ str(lines) + ".p", "rb"))

Load $Q_{ij}$ results for the ordered pair (i,j) derived from the consrained algorithm

In [None]:
Q = pickle.load(open("./extracted/Q_constrained_"+ str(lines) + ".p", "rb"))

Load $s_{ij}$ derived from the consrained algorithm

In [None]:
s = pickle.load(open("./extracted/s_constrained_"+ str(lines) + ".p", "rb"))

Load $k_{ij}$ derived from Saito

In [None]:
k = pickle.load(open("./extracted/k_saito_"+ str(lines) + ".p", "rb"))

Load $Q_{ij}$ derived from Newman's

In [None]:
Q_newman = pickle.load(open("./extracted/Q_newman_"+ str(lines) + ".p", "rb"))

## Necessary functions

In [None]:
def flatten(obj):
    if type(obj) == list:
        return [l for L in obj for l in L]
    if type(obj) == dict:
        return [l for i in obj for l in obj[i].values()]
    if type(obj) == defaultdict:
        return [l for i in obj for l in obj[i].values()]

In [None]:
def create_chain_graph(U,D):
    G = nx.DiGraph()
    G.add_nodes_from(U)
    for tweet in D:
        for time in D[tweet]:
            ind = list(D[tweet].keys()).index(time)
            if ind+1==len(D[tweet]): break
            next_time = list(D[tweet].keys())[ind+1]
            for u1 in D[tweet][time]:
                for u2 in D[tweet][next_time]:
                    G.add_edge(u1,u2)
    return G

In [None]:
def create_saito_graph(U,k):
    G = nx.DiGraph()
    G.add_nodes_from(U)

    for i in k:
        for j in k[i]:
            if k[i][j] > 0.5:
                G.add_edge(i,j)
    return G

In [None]:
def create_star_graph(U,E):
    G = nx.DiGraph()
    G.add_nodes_from(U)
    for s in E:
        for j in E[s][1:]:
            G.add_edge(E[s][0],j)
    return G

In [None]:
def create_our_graph(U,Q):
    G = nx.DiGraph()
    G.add_nodes_from(U)
    for i in Q:
        for j in Q[i]:
            if Q[i][j] > 0.5:
                G.add_edge(i,j)
    return G

In [None]:
def create_newman_graph(U,Q):
    G = nx.DiGraph()
    G.add_nodes_from(U)
    for i in Q:
        for j in Q[i]:
            if Q[i][j] > 0.5:
                G.add_edge(i,j)
    return G

In [None]:
def f_check(E, Q):
    '''
    Function that checks feasibility of graph.

    '''
    retweets = 0 # minimum existing edges
    infeasible_episodes = 0 
    total_feasible_edges = []
    total_inf = 0 
    for s in E:
        feasible_edges = 0 
        for j in E[s]:
            indx = E[s].index(j)
            if indx!=0:
                u_before = E[s][:indx]
                for i in u_before: 
                    if i in Q and j in Q[i] and Q[i][j] > 0.5:
                        feasible_edges +=1
                        total_feasible_edges.append((i,j))

        infeasible = (len(E[s]) - 1) - feasible_edges
        if infeasible > 0:
            total_inf+=infeasible
            infeasible_episodes+=1
        retweets += len(E[s])-1
        total_feasible_edges = list(set(total_feasible_edges))
    return infeasible_episodes

In [None]:
def longest_path(E, U, Q, k, graph_type):
    max_l = 0
    max_path = 0
    if graph_type=='ours' or graph_type=='newman':    
        for s in E:
            G = nx.DiGraph()
            G.add_nodes_from(U)
            for j in E[s][1:]:
                    indx = E[s].index(j)
                    u_before = E[s][:indx]
                    for i in u_before: 
                            if j in Q[i] and Q[i][j] > 0.5:
                                G.add_edge(i,j)
            if len(nx.dag_longest_path(G))>max_l: 
                max_l = len(nx.dag_longest_path(G))
                max_path = nx.dag_longest_path(G)
                
    if graph_type=='star':
        for s in E:
            G = nx.DiGraph()
            G.add_nodes_from(U)
            for j in E[s][1:]:
                G.add_edge(E[s][0],j)
            if len(nx.dag_longest_path(G))>max_l: 
                max_l = len(nx.dag_longest_path(G))
                max_path = nx.dag_longest_path(G)
                
    if graph_type=='saito':    
        for s in E:
            G = nx.DiGraph()
            G.add_nodes_from(U)
            for j in E[s][1:]:
                    indx = E[s].index(j)
                    u_before = E[s][:indx]
                    for i in u_before: 
                            if j in k[i] and k[i][j] > 0.5:
                                G.add_edge(i,j)

            if len(nx.dag_longest_path(G))>max_l: 
                max_l = len(nx.dag_longest_path(G))
                max_path = nx.dag_longest_path(G)
    return max_l, max_path

In [None]:
def plot_tweetgraph(s, E, S, Q, k, pos, graph_type):
        G = nx.DiGraph()
        if graph_type=='ours' or graph_type=='newman':
            t = 0
            for j in E[s][1:]:
                G.add_node(j)
                indx = E[s].index(j)
                u_before = E[s][:indx]
                for i in u_before: 
                        G.add_node(i)
                        if j in Q[i] and Q[i][j] > 0.5:
                            G.add_edge(i,j, label=f'{t}')
                            t+=1
        elif graph_type=='star':
            G.add_node(E[s][0])
            for j in E[s][1:]:
                    G.add_edge(E[s][0],j)
                    G.add_node(j)
        elif graph_type=='saito':
            t = 0
            for j in E[s][1:]:
                G.add_node(j)
                indx = E[s].index(j)
                u_before = E[s][:indx]
                for i in u_before: 
                        G.add_node(i)
                        if j in k[i] and k[i][j] > 0.5:
                            G.add_edge(i,j, label=f'{t}')
                            t+=1
        elif graph_type=='chain':
            for time in D[s]:
                ind = list(D[tweet].keys()).index(time)
                if ind+1==len(D[tweet]): break
                next_time = list(D[tweet].keys())[ind+1]
                for u1 in D[tweet][time]:
                    for u2 in D[tweet][next_time]:
                        G.add_edge(u1,u2)
        color_map = []
        cmap = plt.get_cmap('Greens')
        for node in G:
            if node == S[tweet]:
                color_map.append('green')
            else:
                color_map.append('yellow')
        pos = nx.spring_layout(G)        
        nx.draw_networkx_nodes(G, pos, node_color = color_map, cmap=plt.get_cmap('jet'), node_size = 300)
        nx.draw_networkx_edges(G, pos, edge_color='r', arrows=True)
        nx.draw_networkx_labels(G, pos, font_size=8)
        if graph_type=='ours' or graph_type=='saito':
            nx.draw_networkx_edge_labels(G, pos, font_size=8)
        plt.show()

# Statistics

## 1. number of infeasible episodes

In [None]:
data = dict()
data['Graph Type with Lines: ' + str(lines)] = ['Ours','Saito','Star','Chain', 'Newman']

In [None]:
inf_ep_ours = f_check(E, Q)
inf_ep_saito = f_check(E, k)
inf_ep_newman = f_check(E, Q_newman)
data['Infeasible Episodes'] = [inf_ep_ours, inf_ep_saito, 0, 0, inf_ep_newman]

## 2. Number of edges

In [None]:
G_star = create_star_graph(U,E)
G_ours = create_our_graph(U,Q)
G_saito = create_saito_graph(U,k)
G_newman = create_newman_graph(U,Q_newman)
G_chain = create_chain_graph(U,D)

In [None]:
edges_ours = len(G_ours.edges())
edges_saito = len(G_saito.edges())
edges_star = len(G_star.edges())
edges_chain = len(G_chain.edges())
edges_newman = len(G_newman.edges())
data['Number of edges'] = [edges_ours, edges_saito, edges_star, edges_chain, edges_newman]

## 3. Average out degree

In [None]:
av_ours = sum(d[1] for d in G_ours.out_degree())/float(len(G_ours))
av_saito = sum(d[1] for d in G_saito.out_degree())/float(len(G_saito))
av_star = sum(d[1] for d in G_star.out_degree())/float(len(G_star))
av_chain = sum(d[1] for d in G_chain.out_degree())/float(len(G_chain))
av_newman = sum(d[1] for d in G_newman.out_degree())/float(len(G_newman))

data['Average out degree'] = [av_ours, av_saito, av_star, av_chain, av_newman]

In [None]:
degree_sequence = list(G_ours.out_degree())
max_degree_our = max(np.array(degree_sequence)[:,1])

degree_sequence = list(G_saito.out_degree())
max_degree_saito = max(np.array(degree_sequence)[:,1])

degree_sequence = list(G_star.out_degree())
max_degree_star = max(np.array(degree_sequence)[:,1])

degree_sequence = list(G_chain.out_degree())
max_degree_chain = max(np.array(degree_sequence)[:,1])

degree_sequence = list(G_newman.out_degree())
max_degree_newman = max(np.array(degree_sequence)[:,1])

data['Max out degree'] = [max_degree_our, max_degree_saito, max_degree_star, max_degree_chain, max_degree_newman]

In [None]:
degree_sequence = list(G_ours.in_degree())
max_degree_our = max(np.array(degree_sequence)[:,1])

degree_sequence = list(G_saito.in_degree())
max_degree_saito = max(np.array(degree_sequence)[:,1])

degree_sequence = list(G_star.in_degree())
max_degree_star = max(np.array(degree_sequence)[:,1])

degree_sequence = list(G_chain.in_degree())
max_degree_chain = max(np.array(degree_sequence)[:,1])

degree_sequence = list(G_newman.in_degree())
max_degree_newman = max(np.array(degree_sequence)[:,1])

data['Max in degree'] = [max_degree_our, max_degree_saito, max_degree_star, max_degree_chain, max_degree_newman]

## 4. Graph diameter

The maximum among all the distances between a vertex to all other vertices is considered as the diameter of the Graph G.

In [None]:
def avg_sh_path(G, graph_type):
    lst = dict(nx.all_pairs_shortest_path_length(G))
    sum_lst = sum(lst[i][j] for i in lst for j in lst[i])
    l_lst = sum(1 for i in lst for j in lst[i] if i!=j)
    avg_spl = sum_lst/l_lst
    diameter = [max(val.values()) for key, val in lst.items()]
    diameter = max(diameter)
    return avg_spl, diameter

In [None]:
avg_ours, d_ours = avg_sh_path(G_ours, 'Ours')
avg_saito, d_saito = avg_sh_path(G_saito, 'Saito')
avg_star, d_star = avg_sh_path(G_star, 'Star')
avg_chain, d_chain= avg_sh_path(G_chain, 'Chain')
avg_newman, d_newman = avg_sh_path(G_newman, 'Newman')

In [None]:
data['Graph diameter'] = [d_ours, d_saito, d_star, d_chain, d_newman]
data['Average shortest path'] = [avg_ours, avg_saito, avg_star, avg_chain, avg_newman]

# 5. Number of connected components

In [None]:
def number_cc(G, graph_type):
    scc = 0 
    wcc = 0 
    for C in nx.strongly_connected_components(G):
        C = G.subgraph(C)
        if len(C)>1: # skip one nodes
            scc+=1
    for C in nx.weakly_connected_components(G):
        C = G.subgraph(C)
        if len(C)>1: # skip one nodes
            wcc+=1
    return scc, wcc

In [None]:
scc_ours, wcc_ours = number_cc(G_ours, 'Ours')
scc_saito, wcc_saito = number_cc(G_saito, 'Saito')
scc_star, wcc_star = number_cc(G_star, 'Star')
scc_chain, wcc_chain = number_cc(G_chain, 'Chain')
scc_newman, wcc_newman = number_cc(G_newman, 'Newman')

data['Number of scc'] = [scc_ours, scc_saito, scc_star, scc_chain, scc_newman]
data['Number of wcc'] = [wcc_ours, wcc_saito, wcc_star, wcc_chain, wcc_newman]

# Save results

In [None]:
df = pd.DataFrame(data)
df.to_csv('./Results.csv', mode='a', index = False, header=True)