# Importing the Data 

In [11]:
import pickle
import pandas as pd
import numpy as np

In [1]:
with open('interactions.pickle', 'rb') as handle:
    my_dict = pickle.load(handle)

# Functionality 3 - Shortest Ordered Route

It takes in input:

* An interval of time
* A sequence of users p = [p_2, ..., p_n-1]
* Initial user p_1 and an end user p_n

Implement an algorithm that returns the shortest walk that goes from user p_j to p_n, and that visits in order the nodes in p. The choice of p_j and p_n can be done randomly (or if it improves the performance of the algorithm you can also define it in any other way)

Consider that:

* The algorithm needs to handle the case that the graph is not connected, thus not all the nodes in p are reachable from p_1. In such scenario, it is enough to let the program give in output the string "Not possible".
* That the graph is weighted
* Since we are dealing with walks, you can pass more than once on the same node p_i, but you have to preserve order. E.g.: if you pass through p_2 and you are going to p_3, you can pass through p_10, but once you will be in p_9, you will have to go back to p_10 as well.

In [25]:
def flatten_f(array_to_flatten):
    return [item for sublist in array_to_flatten for item in sublist]

In [3]:
def filter_by_timestamps_f(initial_time,final_time,dict_to_filter):
    '''
    Function that filters a graph and leaves only those edges with a timestamp within the interval of initial_time and final_time,
    defined by the user. Function currently creates a new dictionary but it can be modified so that it updates the existing dictionary.
    '''
    #Create new dictionary
    filtered_dictionary = {}
    for each_key in dict_to_filter: #For each key in the dictionary
        to_test = np.array(dict_to_filter[each_key])[:,1] #Extract only the timestamps 
        indexes_to_filter = flatten_f(np.where(np.bitwise_and(to_test>initial_time,to_test<final_time))) #Get indexes of the timestamps that are within the desired time intervals
        values_for_key = dict_to_filter[each_key] #Extracts all edges associated with a specific key
        
        #Redefine values of a particular key
        filtered_dictionary.update({each_key: [values_for_key[i] for i in indexes_to_filter]}) #Keeps only those edges that have timestamps within desired time intervals.
    return(filtered_dictionary)

In [4]:
import time

start = time.time()

filtered_dict = filter_by_timestamps_f(15000,15020,my_dict)

end = time.time()
print(end - start)

125.3041479587555


In [4]:
import collections
from collections import Counter

def standardize_weights_f(dict_to_standardize):
    #Create new dictionary
    standard_weighs_dictionary = {}
    for each_key in dict_to_standardize:
        standard_weighs_dictionary.update({each_key: list(collections.Counter(np.array(dict_to_standardize[each_key])[:,0]).items())})
    
    return(standard_weighs_dictionary) 

In [6]:
import time

start = time.time()

graph_with_merged_weights = standardize_weights_f(filtered_dict)

end = time.time()
print(end - start)

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

# Dijkstra Implementation filtered_dict

In [5]:
def dijkstra_f(graph_to_analyze, start_vertex):
    #****************************************1) Collect all vertexes in the graph**********************************
    #Collect all keys (vertexes) in the graph
    #Problem: There are other vertexes that are not keys since the graph is directed and also disconnected. We should consider them too. 
    related_nodes = list()
    for each_key in graph_to_analyze.keys():
        related_nodes.append([i for i in np.array(graph_to_analyze[each_key])[:,0]])
    
    #Union of sets: keys of the graph and vertexes that are not keys of the graph but still exist
    vertexes = set(graph_to_analyze.keys()).union(set(flatten_f(related_nodes)))
    
    #*************************************2) Track which vertexes are still unvisited******************************
    unvisited_vertexes = vertexes 
    #***********************************3) Keep track of the smallest distances to each node***********************
    #All vertexes are set to have an infinite distance at the beginning. Then they'll be updated
    default_distance = float('inf')
    smallest_dist_dict = {k: default_distance for k in vertexes}
    #Set distance of the starting vertex as zero
    smallest_dist_dict[start_vertex] = 0
    
    #*******4) Keep track of the parent with the smallest distance from which we reach each visited vertex*********
    parent_vertex = {k: 'nd' for k in vertexes}
    
    #Now while we still have vertexes that haven't been visited yet... keep rolling
    while(len(unvisited_vertexes)>0):
        #Create another dictionary with weights only for the unvisited vertexes
        #We use this to select the vertex with the smallest distance and then move to it... 9 to 7 in dict_test_2
        dict_unvisited_dist = {each_unvisited_node: smallest_dist_dict[each_unvisited_node] for each_unvisited_node in unvisited_vertexes}
        
        #We select, from the dictionary of unvisited vertexes, the vertex with the smallest weight
        selected_vertex = min(dict_unvisited_dist, key=dict_unvisited_dist.get)
        #We remove the selected vertex from unvisited as it is already visited. 
        unvisited_vertexes.remove(selected_vertex)
        
        #Now we have to update the distances from the selected vertex to each vertex that is related to it. 
        if(selected_vertex in graph_to_analyze.keys()): #If there are edges that go out from the selected vertex (aka is a key in graph) update distances of outgoing nodes
            for each_related_vertex, dist_related_vertex in graph_to_analyze[selected_vertex]:
                #If the distance from the selected vertex plus the dist to the related vertex is smaller than the already reported dist of relat vertex...update!
                combined_dist = smallest_dist_dict[selected_vertex] + (1/dist_related_vertex)
                if(combined_dist < smallest_dist_dict[each_related_vertex]):
                    #Update the just discovered shortest distance to the related vertex of the selected vertex
                    smallest_dist_dict[each_related_vertex] = combined_dist
                    #Keep track of who is the selected vertex that lead to the related_vertex with the shortest dist (aka as the parent)
                    parent_vertex[each_related_vertex] = selected_vertex
            #print(len(unvisited_vertexes))
    
    return smallest_dist_dict, parent_vertex

In [22]:
dict_test_2 = {
9: [[8, 1217567877, 10],[13, 1217567877, 10],[13, 1217567877, 10],[17, 1217567877, 10],[17, 1217567877, 10],[17, 1217567877, 10]],
13: [[1, 1217606247, 10], [23, 1217618560, 10], [11, 1217618799, 10]],
17: [[1, 1217617639, 10], [1, 1217618239, 10]],
48: [[2, 1217618182, 10]],
19: [[9, 1217618357, 10]],
23: [[19, 1217618357, 10],[19, 1217618357, 10],[19, 1217618357, 10],[48, 1217618357, 10]],    

}

In [23]:
dict_test_2 = standardize_weights_f(dict_test_2)
dict_test_2

{9: [(8, 1), (13, 2), (17, 3)],
 13: [(1, 1), (23, 1), (11, 1)],
 17: [(1, 2)],
 48: [(2, 1)],
 19: [(9, 1)],
 23: [(19, 3)]}

In [81]:
dict_test_2 = {
9: [[8,1],[13,2],[17,3]],
19: [[9,1]],
48: [[2,1]],
2: [[88,3]],
17: [[1,2],[99,4]],
13: [[11,1], [1,1], [23,3]],
23: [[48,7], [19,3]],
99: [[101,3],[88,10],[77,1]],
101: [[77, 3]]
}

In [24]:
dijkstra_f(dict_test_2,9)

({1: 0.8333333333333333,
  2: inf,
  8: 1.0,
  9: 0,
  11: 1.5,
  13: 0.5,
  48: inf,
  17: 0.3333333333333333,
  19: 1.8333333333333333,
  23: 1.5},
 {1: 17,
  2: 'nd',
  8: 9,
  9: 'nd',
  11: 13,
  13: 9,
  48: 'nd',
  17: 9,
  19: 23,
  23: 13})

# Function to rebuild the path

In [16]:
def reconstruct_path_f(graph_to_analyze,starting_vertex,ending_vertex):
    #Run Dijkstra from the starting_vertex. Gets dict of min distance from starting_vertex to each vertex that can be reached from it
    #and gets dict of the parent vertex that lead to each vertex that can be reached from the starting_vertex
    min_dist_dict, parent_dict = dijkstra_f(graph_to_analyze,starting_vertex) 
    
    #List to reconstruct the path that lead to each vertex
    path_to_goal = list()
    #path_to_goal.append(ending_vertex)
    #Define the goal vertex from which we will start constructing the path that lead to it from the starting_vertex
    goal = starting_vertex
    
    #While we haven't reached our goal vertex, keep rolling...unless it's impossible to construct a path between starting and ending vertex
    while(ending_vertex != goal):
        #If there's no parent vertex... it was not possible to reach ending_vertex from starting_vertex, so stop searching
        if parent_dict[ending_vertex] == 'nd':
            print("Impossible to reach vertex",ending_vertex,"from vertex",goal,"!")
            path_to_goal.clear()
            break #Stop searching
        else:
            #print(parent_dict[ending_vertex]) #Debug
            #Add the parent of the list to reconstruct the path that lead from starting_vertex to ending_vertex
            path_to_goal.append(parent_dict[ending_vertex])
            #Redefine parent vertex for next iteration and continue searching backwards
            ending_vertex = parent_dict[ending_vertex]
    return(path_to_goal[::-1])

In [136]:
zzz = reconstruct_path_f(dict_test_2,7,88)

Node 7 does not exist in the graph.


In [118]:
zzz

[19, 9, 17, 99, 88, 100]

In [132]:
dict_test_2.keys()

dict_keys([9, 19, 48, 2, 17, 13, 23, 99, 101])

# Functionality 3
Give a list and return the shortest walk between the starting_vertex and ending_vertex that visits in order the vertex in the list

In [7]:
dict_test_2 = {
9: [[8,1],[13,2],[17,3]],
19: [[9,1],[23,3]],
48: [[2,1]],
2: [[88,3]],
17: [[1,2],[99,4]],
13: [[11,1], [1,1], [23,3]],
23: [[19,3],[48,7]],
99: [[101,3],[88,10],[77,1]],
101: [[77, 3]]
}

In [None]:
#Receives a list of vertexes
#Breaks the list in couples of subsequent vertexes
#Finds shortest route between this couple of vertexes
#Collects the shortest route in a list
#At the end we find the shortest route that connnects first vertex with final vertex

In [9]:
def functionality_3(graph_to_analyze, interval_time, p1, pn, p): #Where interval of time has to have a default
    if isinstance(p, list):
        p.insert(0,p1) #Set the parameter p1 as the first element of the list p.
        p.insert(len(p),pn) #Set the parameter pn as the last element of the list p. 
        
        shortest_route = list() #Define a list to collect the shortest route between a couple of vertexes
        
        
    else:
        print("Input p is not a list. Please insert a object of type list.")

In [37]:
p1 = 19
p = [23,9,48]
pn = 88

p.insert(0,p1)
p.insert(len(p),pn)

shortest_route = list()
for i in range(0,len(p)-1):
    starting_vertex = p[i]
    ending_vertex = p[i+1]
    #print(starting_vertex,ending_vertex)
    path = reconstruct_path_f(dict_test_2,starting_vertex,ending_vertex)
    
    shortest_route.extend(path)

    if i == (len(p)-2):
        shortest_route.append(pn)

In [38]:
shortest_route

[19, 23, 19, 9, 13, 23, 48, 2, 88]

In [121]:
dijkstra_f(dict_test_2,23)

({1: 2.1666666666666665,
  2: inf,
  8: 2.333333333333333,
  9: 1.3333333333333333,
  11: 2.833333333333333,
  13: 1.8333333333333333,
  77: 2.5833333333333335,
  17: 1.6666666666666665,
  19: 0.3333333333333333,
  23: 0,
  88: 2.0166666666666666,
  99: 1.9166666666666665,
  101: 2.25,
  48: inf},
 {1: 17,
  2: 'nd',
  8: 9,
  9: 19,
  11: 13,
  13: 9,
  77: 101,
  17: 9,
  19: 23,
  23: 'nd',
  88: 99,
  99: 17,
  101: 99,
  48: 'nd'})