# Importing the Data 

In [15]:
import pickle
import pandas as pd
import numpy as np

with open('interactions.pickle', 'rb') as handle:
    my_dict = pickle.load(handle)

# Functionality 3 - Shortest Ordered Route

It takes in input:

* An interval of time
* A sequence of users p = [p_2, ..., p_n-1]
* Initial user p_1 and an end user p_n

Implement an algorithm that returns the shortest walk that goes from user p_j to p_n, and that visits in order the nodes in p. The choice of p_j and p_n can be done randomly (or if it improves the performance of the algorithm you can also define it in any other way)

Consider that:

* The algorithm needs to handle the case that the graph is not connected, thus not all the nodes in p are reachable from p_1. In such scenario, it is enough to let the program give in output the string "Not possible".
* That the graph is weighted
* Since we are dealing with walks, you can pass more than once on the same node p_i, but you have to preserve order. E.g.: if you pass through p_2 and you are going to p_3, you can pass through p_10, but once you will be in p_9, you will have to go back to p_10 as well.

In [18]:
def flatten_f(array_to_flatten):
    return [item for sublist in array_to_flatten for item in sublist]

In [17]:
def filter_by_timestamps_f(initial_time,final_time,dict_to_filter):
    '''
    Function that filters a graph and leaves only those edges with a timestamp within the interval of initial_time and final_time,
    defined by the user. Function currently creates a new dictionary but it can be modified so that it updates the existing dictionary.
    '''
    #Create new dictionary
    filtered_dictionary = {}
    for each_key in dict_to_filter: #For each key in the dictionary
        to_test = np.array(dict_to_filter[each_key])[:,1] #Extract only the timestamps 
        indexes_to_filter = flatten_f(np.where(np.bitwise_and(to_test>initial_time,to_test<final_time))) #Get indexes of the timestamps that are within the desired time intervals
        values_for_key = dict_to_filter[each_key] #Extracts all edges associated with a specific key
        
        #Redefine values of a particular key
        filtered_dictionary.update({each_key: [values_for_key[i] for i in indexes_to_filter]}) #Keeps only those edges that have timestamps within desired time intervals.
    return(filtered_dictionary)

In [18]:
import time

start = time.time()

filtered_dict = filter_by_timestamps_f(15000,15020,my_dict)

end = time.time()
print(end - start)

294.87537479400635


In [53]:
import collections
from collections import Counter

def standardize_weights(dict_to_standardize):
    #Create new dictionary
    standard_weighs_dictionary = {}
    for each_key in dict_to_standardize:
        standard_weighs_dictionary.update({each_key: list(collections.Counter(np.array(dict_to_standardize[each_key])[:,0]).items())})
    
    return(standard_weighs_dictionary) 

In [21]:
import time

start = time.time()

graph_with_merged_weights = standardize_weights(my_dict)

end = time.time()
print(end - start)

539.1119320392609


In [57]:
dict_test_2 = {
9: [[8, 1217567877, 10]],
13: [[1, 1217606247, 10], [23, 1217618560, 10], [11, 1217618799, 10]],
17: [[1, 1217617639, 10], [1, 1217618239, 10]],
48: [[2, 1217618182, 10]],
19: [[9, 1217618357, 10]],
}

In [58]:
dict_test_2 = standardize_weights(dict_test_2)
dict_test_2

{9: [(8, 1)],
 13: [(1, 1), (23, 1), (11, 1)],
 17: [(1, 2)],
 48: [(2, 1)],
 19: [(9, 1)]}

# Dijkstra Implementation

In [64]:
def dijkstra(graph_to_analyze, start_vertex):
    #****************************************1) Collect all vertexes in the graph**********************************
    #Collect all keys (vertexes) in the graph
    #Problem: There are other vertexes that are not keys since the graph is directed and also disconnected. We should consider them too. 
    related_nodes = list()
    for each_key in graph_to_analyze.keys():
        related_nodes.append([i for i in np.array(graph_to_analyze[each_key])[:,0]])
    
    #Union of sets: keys of the graph and vertexes that are not keys of the graph but still exist
    vertexes = set(graph_to_analyze.keys()).union(set(flatten_f(related_nodes)))
    
    #*************************************2) Track which vertexes are still unvisited******************************
    unvisited_vertexes = vertexes 
    #***********************************3) Keep track of the smallest distances to each node***********************
    #All vertexes are set to have an infinite distance at the beginning. Then they'll be updated
    default_distance = float('inf')
    smallest_dist_dict = {k: default_distance for k in vertexes}
    #Set distance of the starting vertex as zero
    smallest_dist_dict[start_vertex] = 0
    
    #*******4) Keep track of the parent with the smallest distance from which we reach each visited vertex*********
    parent_vertex = {k: 'nd' for k in vertexes}
    
    #Now while we still have vertexes that haven't been visited yet... keep rolling
    while(len(unvisited_vertexes)>0):
        #Create another dictionary with weights only for the unvisited vertexes
        #We use this to select the vertex with the smallest distance and then move to it... 9 to 7 in dict_test_2
        dict_unvisited_dist = {each_unvisited_node: smallest_dist_dict[each_unvisited_node] for each_unvisited_node in unvisited_vertexes}
        
        #We select, from the dictionary of unvisited vertexes, the vertex with the smallest weight
        selected_vertex = min(dict_unvisited_dist, key=dict_unvisited_dist.get)
        #We remove the selected vertex from unvisited as it is already visited. 
        unvisited_vertexes.remove(selected_vertex)
        
        #Now we have to update the distances from the selected vertex to each vertex that is related to it. 
        if(selected_vertex in graph_to_analyze.keys()): #If there are edges that go out from the selected vertex (aka is a key in graph) update distances of outgoing nodes
            for each_related_vertex, dist_related_vertex in graph_to_analyze[selected_vertex]:
                #If the distance from the selected vertex plus the dist to the related vertex is smaller than the already reported dist of relat vertex...update!
                combined_dist = smallest_dist_dict[selected_vertex] + (1/dist_related_vertex)
                if(combined_dist < smallest_dist_dict[each_related_vertex]):
                    #Update the just discovered shortest distance to the related vertex of the selected vertex
                    smallest_dist_dict[each_related_vertex] = combined_dist
                    #Keep track of who is the selected vertex that lead to the related_vertex with the shortest dist (aka as the parent)
                    parent_vertex[each_related_vertex] = selected_vertex
            print(len(unvisited_vertexes))
    
    return smallest_dist_dict, parent_vertex