In [1]:
import networkx as nx

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# This will filter warnings, which can be lengthy but sometimes useful. You may comment it out if you want to see them.
import warnings
warnings.filterwarnings('ignore')

# Construct Networks

In [2]:
data = np.loadtxt('Data/data1.dat', dtype='int')

In [3]:
def divide_dataset(data):
    '''
    Input: 
        data: the original dataset (12 days)
    Output: 
        data_list: a list of three partitioned datasets (4 days each)
    '''
    
    data1, data2, data3 = [], [], []
    time_division = [60*60*24 * 4, 60*60*24 * 8]
    
    for contact in data:
        contact = np.array(contact)
        if contact[0] < time_division[0]:
            data1.append(contact)
        elif contact[0] < time_division[1]:
            data2.append(contact)
        else:
            data3.append(contact)
    
    data_list = [np.array(data1), np.array(data2), np.array(data3)]
    
    return data_list

In [4]:
data_list = divide_dataset(data)

In [5]:
from collections import Counter

def process_dataset(data_list):
    '''
    Input: 
        data_list: a list of three partitioned datasets
    Output: 
        data_tuple: a list of three preprocessed datasets in the form of tuple: (ID1, ID2, weight). 
    '''
    
    data_tuple = []
    
    for data_p in data_list:
        pairs = list(zip(data_p[:, 1],data_p[:, 2])) # create a list of pairs: (ID1, ID2) to represent each contact
        dict_pairs = dict(Counter(pairs)) # count the occurance of each pair; in the form of dictionary: "(ID1, ID2): count"
        tuple_pairs = [(k[0], k[1], dict_pairs[k]) for k in dict_pairs.keys()] # reorganise into tuples
        
        data_tuple.append(tuple_pairs)
    
    return data_tuple

In [6]:
data_tuple = process_dataset(data_list)

In [7]:
def get_network(data_tuple):
    '''
    Input: 
        data_tuple: a list of three preprocessed datasets in the form of tuple: (ID1, ID2, weight).
    Output: 
        G_list: a list of three undirected weighted networks.
    '''
    
    G_list = []
    
    for data_tuple_p in data_tuple:
        G_p = nx.Graph()
        individuals = np.concatenate((data[:, 1], data[:, 2])) # individuals involved in two columns
        G_p.add_nodes_from(set(individuals)) # Each individual is represented as a node in a graph
        G_p.add_weighted_edges_from(data_tuple_p)
        
        G_list.append(G_p)
    
    return G_list

In [8]:
G_list = get_network(data_tuple)
G1, G2, G3 = G_list

# Add Department of Each Individual

In [9]:
data_department = np.loadtxt('Data/data2.txt', dtype = str)

In [10]:
dict_department = {int(row[0]):row[1] for row in data_department}

In [11]:
for G in G_list: 
    nx.set_node_attributes(G, dict_department, "department")

# Community Detection

In [46]:
import networkx.algorithms.community as nx_comm

def G_comm_dict(G):
    
    # Community detection
    comm = nx_comm.louvain_communities(G, resolution = 0.5, seed = 0)
    
    # Arrange the sets in the comm list in a descending order based on the size of each set
    comm_copy = comm.copy(); # retain the input "comm"
    comm_copy.sort(key=len, reverse = True) 
    
    # Create dict
    comm_dict = {}
    index = 0
    for comm_set in comm_copy:
        for node in comm_set:
            comm_dict.update({node: index})
        index += 1
    
    # Add node attribute
    nx.set_node_attributes(G, comm_dict, "comm")

In [47]:
for G in G_list: 
    G_comm_dict(G)

# Save Networks

In [48]:
G1.nodes[513]

{'department': 'DSE', 'comm': 0}

In [49]:
G2.nodes[513]

{'department': 'DSE', 'comm': 4}

In [50]:
G3.nodes[513]

{'department': 'DSE', 'comm': 0}

In [51]:
set(nx.get_node_attributes(G1, 'comm').values())

{0, 1, 2, 3, 4, 5, 6, 7}

In [52]:
set(nx.get_node_attributes(G2, 'comm').values())

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}

In [53]:
set(nx.get_node_attributes(G3, 'comm').values())

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}

In [54]:
nx.write_gpickle(G1, "Data/G1.gpickle")
nx.write_gpickle(G2, "Data/G2.gpickle")
nx.write_gpickle(G3, "Data/G3.gpickle")