In [None]:
import networkx as nx
import matplotlib.pyplot as plt
import scipy
import os
import pandas as pd
from prettyprinter import pprint
from itertools import combinations

path_data_processed = os.path.join(os.path.dirname(os.getcwd()), "data", "processed")

df_participants = pd.read_csv(os.path.join(path_data_processed, "participants.csv"))
df_events = pd.read_csv(os.path.join(path_data_processed, "events.csv"))
df_event_attendance = pd.read_csv(os.path.join(path_data_processed, "event_attendance.csv"))

In [None]:
# initialise empty graph
G = nx.MultiGraph()

In [None]:
# add nodes
for row in df_participants.iterrows():
    row_dict = row[1].to_dict()
    node_id = row_dict["ParticipantId"]
    del row_dict["ParticipantId"]
    node_attributes = row_dict
    G.add_node(node_id, **node_attributes)

print("First node is:\n")
pprint(G.nodes[0])
print("Last node is:\n")
pprint(G.nodes[len(G.nodes)-1])

In [None]:
# add edges

# generate combinations
combinations_output = df_event_attendance.groupby("EventId")["EventParticipantId"].apply(combinations, r=2)

display(df_event_attendance.head())

for combination in combinations_output.tolist()[0:1]:
    for edge in combination:
        print(edge)
        
for event_idx, combination in enumerate(combinations_output):
    edge_attributes = df_events.loc[df_events["EventId"]==event_idx].to_dict("records")[0]
    del edge_attributes["EventId"]
    for edge in combination:
        G.add_edge(edge[0], edge[1], **edge_attributes)

In [None]:
def generate_subgraph_viz(G, primary_node_id_list, secondary_node_id_list=[]):
    
    subgraph_nodes = []
    
    for node_id in primary_node_id_list+secondary_node_id_list:
        subgraph_nodes.extend([node_id])
        neighbours = G.neighbors(node_id)
        for neighbour_id in neighbours:
            subgraph_nodes.extend([neighbour_id])
            
    subgraph = G.subgraph(subgraph_nodes)
    pos=nx.spring_layout(subgraph)
    
    colour_map = []
    for node in subgraph:
        if node in primary_node_id_list:
            colour_map.extend(["red"])
        elif node in secondary_node_id_list:
            colour_map.extend(["green"])
        else:
            colour_map.extend(["blue"])

    nx.draw(subgraph, pos=pos, node_color=colour_map);

    node_labels = nx.get_node_attributes(subgraph,"ParticipantName")
    nx.draw_networkx_labels(subgraph, pos, labels = node_labels);

#     edge_labels_original = nx.get_edge_attributes(subgraph,"EventDate")
#     edge_labels = dict(((key[0], key[1]), value) for (key, value) in edge_labels_original.items())

#     nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels);
#     nx.draw_networkx_edge_labels(subgraph, pos, edge_labels=[1,2,3])

In [None]:
# who could we invite to the next event?
attendee_list = [1,20,50]
attendee_max_size = 6
generate_subgraph_viz(G, attendee_list)

# generate candidate nodes, who are not known to attendees
attendee_neighbours = []
for attendee in attendee_list:
    attendee_neighbours.extend(G.neighbors(attendee))

candidate_nodes = []
for node in G:
    if node not in attendee_list+attendee_neighbours:
        candidate_nodes.append(node)

# pair candidate nodes
pairs_to_calculate_jaccard = [(attendee, candidate) for attendee in attendee_list for candidate in candidate_nodes]

# to_undirected since nx's jaccard algo doesn't calculate across multigraph
G_normalGraph = nx.Graph(G)
jaccard_coefficient_result = nx.jaccard_coefficient(G_normalGraph, pairs_to_calculate_jaccard)

In [None]:
nodeA_list = []
nodeB_list = []
jaccard_result_list = []
for result in jaccard_coefficient_result:
    nodeA_list.append(result[0])
    nodeB_list.append(result[1])
    jaccard_result_list.append(result[2])
    
df_jaccard_result = pd.DataFrame.from_dict({"NodeA":nodeA_list, 
                                           "NodeB": nodeB_list, 
                                           "JaccardResult": jaccard_result_list})

In [None]:
display(df_jaccard_result.sort_values(by="JaccardResult", ascending=False)[0:3])
display(df_jaccard_result.sort_values(by="JaccardResult", ascending=False)[-4:-1])

In [None]:
generate_subgraph_viz(G, attendee_list, [96, 78, 0])

In [None]:
generate_subgraph_viz(G, attendee_list, [40, 41, 42])