# Temporal networks

Abu Ibne Bayazid

Consider a human contact dataset (highschool_2011.csv) which consists of contacts between highschool students. Column 1 indicates the time and columns 2 and 3 indicates the node ids (you can ignore the rest of the columns). 

Write a function that takes as input the dataset and calculates the temporal correlation present in the dataset. 

In [126]:
import numpy as np

In [127]:
def createEdgelist(fname):
    T = 1
    current_time = 0
    edge_list = []
    edge = []
    with open('datasets/highschool_2011.csv') as fs:
        for line in fs:
            temp = line.strip().split('\t')
            time,u,v = int(temp[0]),int(temp[1]),int(temp[2])
            if time!=current_time:
                T+=1
                current_time = time
                edge_list.append(edge)
                edge = [(u,v)]
            else:    
                edge.append((u,v))
    
    return edge_list,T            

In [128]:
def getNeighbor(edge):
    neighbor = {}
    for u,v in edge:
        if u not in neighbor:
            neighbor[u] = [v]
        else:
            neighbor[u].append(v)
        if v not in neighbor:
            neighbor[v] = [u]
        else:
            neighbor[v].append(u)
    return neighbor            

In [129]:
def calculate_coefficient(edge_t,edge_t_1):
    # get_neighbors
    n_t = getNeighbor(edge_t) # neighbors at t
    n_t_1 = getNeighbor(edge_t_1) # neighbors at t+1
    for node in n_t:
        if node in n_t_1:
            coeff = len(set(n_t[node])&set(n_t_1[node]))/(len(set(n_t[node]))*len(set(n_t_1[node])))
            if node not in coefficient_node:
                coefficient_node[node]=coeff
            else:
                coefficient_node[node]+=coeff

In [130]:
def temporal_correlation_coefficient(edge_list,T):
    for i in range(1,len(edge_list)-1):
        calculate_coefficient(edge_list[i],edge_list[i+1])
    
    return np.mean([coefficient_node[i]/(T-1) for i in coefficient_node])

In [131]:
coefficient_node = {} 
edge_list,T = createEdgelist('datasets/highschool_2011.csv')
temporal_correlation_coefficient(edge_list,T)

0.041547143168433956

Write a function to create a null model of the network by randomly shuffling the time stamps of the edges. Typically, consider a random pair of edges and change their time stamps (repeat this step 1000 times). Input to the function should be the network only. Recalculate the temporal correlation in this null model. 

In [132]:
import random

In [133]:
def NullModel(edge_list,T):
    edge_list_null_model = edge_list.copy()
    for i in range(1000):
        x,y = random.randint(1,T),random.randint(1,T)
        edge_x = edge_list_null_model[x]
        edge_y = edge_list_null_model[y]
        a,b = random.randint(0,len(edge_x)-1),random.randint(0,len(edge_y)-1)
        temp = edge_x[a]
        del edge_x[a]
        edge_x.append(edge_y[b])
        del edge_y[b]
        edge_y.append(temp)
        
    return edge_list_null_model   

In [134]:
coefficient_node = {}
edge_list_null_model = NullModel(edge_list,T-1)
temporal_correlation_coefficient(edge_list_null_model,T)

0.03582840362578355

From the contact information provided in the dataset write a function to calculate the activity potential of each node i (F(i)). The function should take as input the network and return a dictionary of nodes and the corresponding activity potential. Now write a function to generate the network for next time step using the activity-driven network model. You can set the value of m (the number of links generated by each active node to 2). Note that a node i becomes active with a probability alpha\*F(i), alpha = 10

In [135]:
def calculateActivityPotential(fname):
    activity_potential = {}
    ego = {}
    nodes = []
    events = 0
    with open('datasets/highschool_2011.csv') as fs:
        for line in fs:
            events+=1
            temp = line.strip().split('\t')
            u,v = int(temp[1]),int(temp[2])
            if u not in activity_potential:
                activity_potential[u]=1
                ego[u] = [v]
                nodes.append(u)
            else:
                activity_potential[u]+=1
                if v not in ego[u]:
                    ego[u].append(v)
            if v not in activity_potential:
                activity_potential[v] = 1
                ego[v] = [u]
                nodes.append(v)
            else:
                activity_potential[v]+=1
                if u not in ego[v]:
                    ego[v].append(u)
                    
    return activity_potential,ego,nodes,events                

In [136]:
def getActiveNodes(activity_potential):
    activation_probability = [10*activity_potential[i]/events for i in activity_potential]
    active_nodes = []
    for i in range(len(activation_probability)):
        if random.random()<activation_probability[i]:
            active_nodes.append(i)
            
    return active_nodes        

In [137]:
def ActivityDrivenModel(fname):
    activity_potential,ego,nodes,events = calculateActivityPotential(fname)
    active_nodes = getActiveNodes(activity_potential)
    edges = []
    for i in active_nodes:
        for j in range(2):
            edges.append((i,random.randint(0,len(active_nodes))))
    return edges        

In [138]:
ActivityDrivenModel('datasets/highschool_2011.csv')

[(1, 7),
 (1, 0),
 (14, 17),
 (14, 0),
 (23, 10),
 (23, 17),
 (24, 14),
 (24, 17),
 (27, 15),
 (27, 18),
 (30, 18),
 (30, 10),
 (32, 1),
 (32, 15),
 (50, 15),
 (50, 13),
 (54, 6),
 (54, 14),
 (70, 1),
 (70, 2),
 (72, 8),
 (72, 9),
 (74, 11),
 (74, 12),
 (84, 17),
 (84, 8),
 (88, 1),
 (88, 1),
 (93, 6),
 (93, 5),
 (100, 0),
 (100, 2),
 (108, 16),
 (108, 18),
 (121, 3),
 (121, 2)]

Write a function to obtain the ego (immediate neighbors) for each node. Your function should take as input the network and the node id and return its ego.
Using the activity potential calculated previously write a function to generate the network for the next time step using the activity-driven network model with memory. Any node links with a previously contacted node with probability n/n+1 (n is the size of its ego) or with a new node with probability 1/n+1. Note that a node i becomes active with a probability alpha\*F(i), alpha = 10

In [139]:
def ActivityDrivenModelwithMemory(fname):
    activity_potential,ego,nodes,events = calculateActivityPotential(fname)
    active_nodes = getActiveNodes(activity_potential)
    edges = []
    for i in active_nodes:
        prob = len(ego[i])/(len(ego[i])+1)
        new_node = [j for j in range(len(activity_potential)) if j not in ego[i]]
        for j in range(2):
            if random.random()<prob:
                edges.append((i,random.choice(ego[i])))
            else:
                edges.append((i,random.choice(new_node)))
                
    return edges            

In [140]:
ActivityDrivenModelwithMemory('datasets/highschool_2011.csv')

[(4, 45),
 (4, 81),
 (22, 34),
 (22, 23),
 (23, 25),
 (23, 19),
 (27, 75),
 (27, 51),
 (33, 19),
 (33, 115),
 (34, 14),
 (34, 115),
 (37, 64),
 (37, 68),
 (47, 95),
 (47, 10),
 (50, 51),
 (50, 27),
 (53, 96),
 (53, 20),
 (54, 4),
 (54, 90),
 (60, 10),
 (60, 74),
 (61, 107),
 (61, 34),
 (67, 57),
 (67, 58),
 (75, 6),
 (75, 62),
 (78, 51),
 (78, 45),
 (100, 62),
 (100, 106),
 (103, 48),
 (103, 106),
 (106, 104),
 (106, 67),
 (108, 11),
 (108, 25)]