# Graphs

* Used in any social network
* Any time we're modeling users
* Recommendation engines (people you might know, people also watched, frequently bought with...)
* Location / Mapping
* Visual Hierarchy
* File system optimizations
* Peer to peer networking
* Web crawlers
* Shortest path problems
    * GPS navigation
    * Solving mazes
    * AI


![Alt Text](https://upload.wikimedia.org/wikipedia/commons/c/cf/Complete_graph_K5.svg)

* Vertex - a node
* Edge - connection between two nodes

# Types of graphs
* Trees are graphs in which each node is connected to another one by exactly one path

* Undirected - There is no direction associated with an edge (Ex: Facebook connections)
* Directed graph - There is a direction associated with an edge (Ex: Instagram followers) 

* Weighted graph - When we assign value to the edges

* Google maps - Weighted, directed graphs. 




# Representing graphs 

* Adjacency matrix 
* Adjacency list
* Hash table

Adjacency List | Adjacency Matrix
------------ | -------------
Can take up less space (in sparse graphs) | Takes up more space (in sparse graphs)
Faster to iterate over all edges | Slower to iterate over all edges
Can be slower to lookup specific edge| Faster to lookup specific edge

In [150]:
class Graph(object): 
    def __init__(self):
        self.adjacency_list = {}

    # Adding a vertex 
    # It should add a key to the adjacency list 
    # with the name of the vertex and set its vale to be
    # an empty array
    def add_vertex(self, node): 
        if node not in self.adjacency_list: 
            self.adjacency_list[node] = []
        
    
    # Adding an edge
    # This function should accept two vertices
    # The function should find in the adjacency list
    # the key of v1 and push v2 to the array, and find
    # the key of v2 and push v1 to the array
    def add_edge(self, v1, v2): 
        self.adjacency_list[v1].append(v2)
        self.adjacency_list[v2].append(v1)
        
    # Removing an edge
    # This function should accept two vertices
    # The function should find in the adjacency list
    # the key of v1 and remove v2 from the array, and find
    # the key of v2 and remove v1 from the array
    def remove_edge(self, v1, v2): 
        for i in range(len(self.adjacency_list[v1])): 
            if self.adjacency_list[v1][i] == v2:
                self.adjacency_list[v1].pop(i)
        for i in range(len(self.adjacency_list[v2])): 
            if self.adjacency_list[v2][i] == v1:
                self.adjacency_list[v2].pop(i)
    
    # Removing a vertex 
    # It should loop as long as there are any other vertices
    # in the adjacency list for that vertex
    # Inside of the loop, call our removeEdge function with 
    # the vertex we are removing and any values in the adjacency list 
    # for that vertex
    # Delete key in the adjacency list for that vertex 
    def remove_vertex(self, node): 
        for item in self.adjacency_list[node]: 
            self.remove_edge(node, item)
        self.adjacency_list.pop(node, None)

    
#     The function should accept a starting node
#     Create a list to store the end result, to be returned at the very end
#     Create an object to store visited vertices
#     Create a helper function which accepts a vertex
#     The helper function should return early if the vertex is empty
#     The helper function should place the vertex it accepts into the visited 
#     object and push that vertex into the result array
#     Loop over all of the values in the adjacency_list for that vertex
#     If any of those values have not been visited, recursively invoke the helper function with that vertex
    def DFS_rec(self, start):
        out = []
        visited = {}
        self.DFS_rec_helper(start, out, visited)
        return out 
    
    def DFS_rec_helper(self, vertex, out, visited):
        if not vertex: return 
        out += vertex
        visited[vertex] = True 
        for item in self.adjacency_list[vertex]: 
            if item not in visited: 
                self.DFS_rec_helper(item, out, visited)
                
    
    # The function should accept a starting node
    # Create a stack to help use keep track of vertices (use a list or array)
    # Create a list to store the end result, to be returned at the very end
    # Create an object to store visited vertices
    # Add the starting vertex to the stack, and mark as visited
    # While the stack has something in it:
    # Pop the next vertex from the stack
    # If that vertex hasn't been visited yet
    # Mark it as visited
    # Add it to the result list
    # Push all of its neighbors into the stack
    # Return the result array

    def DFS_iter(self, start):
        stack, result, visited = [start], [], {start: True}
        while stack: 
            curr = stack.pop()
            result += curr
            for neighbor in self.adjacency_list[curr]: 
                if neighbor not in visited: 
                    visited[neighbor] = True
                    stack += neighbor
        return result
    
    # This function should accept a starting vertex
    # * Create a queue (you can use an array) and place the starting vertex in it
    # * Create an array to store the nodes visited
    # * Create an object to store nodes visited
    # * Mark the starting vertex as visited
    # * Loop as long as there is anything in the queue
    # * Remove the first vertex from the queue and push it into the array that stores nodes visited
    # * Loop over each vertex in the adjacency list for the vertex you are visiting
    # * If it is not inside the object that stores nodes visited, mark it as visited and enqueue that vertex
    # * Once you have finished looping, return the array of visited nodes
    def DFS_iter(self, start):
        
        queue, result, visited = [start], [], {start: True}
        while queue: 
            curr = queue.pop(0)
            result += curr
            for neighbor in self.adjacency_list[curr]: 
                if neighbor not in visited: 
                    visited[neighbor] = True
                    queue += neighbor
        return result
        
        
        
        
        
        
        

In [151]:
g = Graph()

In [152]:
g

<__main__.Graph at 0x1118dd668>

In [153]:
g.add_vertex('A')
g.add_vertex('B')
g.add_vertex('C')
g.add_vertex('D')
g.add_vertex('E')
g.add_vertex('F')

In [154]:
g.adjacency_list

{'A': [], 'B': [], 'C': [], 'D': [], 'E': [], 'F': []}

In [155]:
g.add_edge('A', 'B')
g.add_edge('A', 'C')
g.add_edge('B', 'D')
g.add_edge('C', 'E')
g.add_edge('D', 'E')
g.add_edge('D', 'F')
g.add_edge('E', 'F')

In [156]:
g.adjacency_list

{'A': ['B', 'C'],
 'B': ['A', 'D'],
 'C': ['A', 'E'],
 'D': ['B', 'E', 'F'],
 'E': ['C', 'D', 'F'],
 'F': ['D', 'E']}

In [136]:
g.DFS_rec('A')

['A', 'B', 'D', 'E', 'C', 'F']

# Depth first traversal (Recursive)

## Pseudocode

```
DEF (vertex): 
    if vertex is empty
        return (this is base case)
    add vertex to results list
    mark vertex as visited
    for each neighbor in vertex's neighbors: 
        if neighbor is not visited: 
            recursively call DFS on neighbor
```

* The function should accept a starting node
* Create a list to store the end result, to be returned at the very end
* Create an object to store visited vertices
* Create a helper function which accepts a vertex
    * The helper function should return early if the vertex is empty
    * The helper function should place the vertex it accepts into the visited object and push that vertex into the result array
    * Loop over all of the values in the adjacency_list for that vertex
    * If any of those values have not been visited, recursively invoke the helper function with that vertex

# Depth first traversal (Iterative)
# (stack)

## Pseudocode

```
DFS_iter(self, start): 
    let s be a stack
    s.push(start)
    while s in not empty: 
        vertex = s.pop()
        if vertex is not labeled as discovered: 
            visit vertex (add to result list) 
            label vertex as discovered
            for each of vertex's neighbors, N do: 
                s.push(n)
```

* The function should accept a starting node
* Create a stack to help use keep track of vertices (use a list or array)
* Create a list to store the end result, to be returned at the very end
* Create an object to store visited vertices
* Add the starting vertex to the stack, and mark as visited
* While the stack has something in it: 
    * Pop the next vertex from the stack 
    * If that vertex hasn't been visited yet
        * Mark it as visited
        * Add it to the result list
        * Push all of its neighbors into the stack
* Return the result array

In [157]:
g.DFS_iter('A')

['A', 'C', 'E', 'F', 'D', 'B']

# Breadth first (queue)

* This function should accept a starting vertex
* Create a queue (you can use an array) and place the starting vertex in it
* Create an array to store the nodes visited
* Create an object to store nodes visited
* Mark the starting vertex as visited
* Loop as long as there is anything in the queue
* Remove the first vertex from the queue and push it into the array that stores nodes visited
* Loop over each vertex in the adjacency list for the vertex you are visiting
* If it is not inside the object that stores nodes visited, mark it as visited and enqueue that vertex
* Once you have finished looping, return the array of visited nodes

# Dijkstra's Algorithm 

* GPS - Finding fastest route
* Network routing 
* Biology - Used to model the spread of viruses 
* Airline tickets - Finding cheapest route 


In [16]:
class WeightedGraph(object):
    def __init__(self): 
        self.adjacency_list = {}
    
    def add_vertex(self, vertex): 
        if vertex not in self.adjacency_list: 
            self.adjacency_list[vertex] = []
    
    def add_edge(self, vertex1, vertex2, weight): 
        self.adjacency_list[vertex1].append({'node': vertex2, 'weight': weight})
        self.adjacency_list[vertex2].append({'node': vertex1, 'weight': weight})
        


In [17]:
g = WeightedGraph()

In [18]:
g.add_vertex('A')
g.add_vertex('B')
g.add_vertex('C')
g.add_edge('A', 'B', 9)
g.add_edge('A', 'C', 5)
g.add_edge('B', 'C', 7)

In [20]:
g.adjacency_list 


{'A': [{'node': 'B', 'weight': 9}, {'node': 'C', 'weight': 5}],
 'B': [{'node': 'A', 'weight': 9}, {'node': 'C', 'weight': 7}],
 'C': [{'node': 'A', 'weight': 5}, {'node': 'B', 'weight': 7}]}

# The approach

1. Every time we look to visit a new node, we pick the node with the smallest known distance to visit first
2. Once we've moved to the node we're going to visit, we look at each of its neighbors
3. For each neighboring node, we calculate the distance by summing the total edges that lead to the node we're checking from the starting node
4. If the new total distance to a node is less than the previous total, we store the new shorter distance for that node

# Dijkstra's pseudocode

* This function should accept a starting and ending vertex
* Create an object (we'll call it distances) and set each key to be every vertex in the adjacency list with the value of infinity, except for the starting vertex which should have a value of 0
* After setting a value in the distances object, add each vertex with a priority of infinity to the priority queue, except the starting vertex, which should have a priority of of 0 because that's where we begin
* Create another object called previous and set each key to be every vertex in the adjacency list with a value of null
* Start looping as long as there is anything in the priority queue
    * dequeue the distance to that vertex with the new lower distance
    * If the distance is less than what is currently stored in our distances object
        * Update the distances object with the new lower distance
        * Update the previous object to contain that vertex
        * enqueue the vertex with the total distance from the start node

In [100]:
from collections import deque
class WeightedGraph(object):
    def __init__(self): 
        self.adjacency_list = {}
    
    def add_vertex(self, vertex): 
        if vertex not in self.adjacency_list: 
            self.adjacency_list[vertex] = []
    
    def add_edge(self, vertex1, vertex2, weight): 
        self.adjacency_list[vertex1].append({'node': vertex2, 'weight': weight})
        self.adjacency_list[vertex2].append({'node': vertex1, 'weight': weight})

    def dijkstra(self, start, end): 
        nodes = PriorityQueue()
        distances = {}
        previous = {}
        
        # Build up initial state
        for vertex in self.adjacency_list: 
            if vertex == start: 
                distances[vertex] = 0
                nodes.enqueue(vertex, 0)
            else:
                distances[vertex] = float('inf')
                nodes.enqueue(vertex, float('inf'))
            previous[vertex] = None
        
        # as long as there is something to visit
        while nodes: 
            smallest = nodes.dequeue()
            print(smallest)
            if smallest == end:
                continue
                # We are done and need to build path 
            if smallest or distances[smallest] < float('inf'):
                print(smallest)
                for neighbor in self.adjacency_list[smallest]:
                    nextNode = self.adjacency_list[smallest]
#                     print(neighbor)  
                print('-----')
                
class PriorityQueue(object):
    def __init__(self):
        self.values = []
    
    def enqueue(self, val, priority):
        self.values.append([val, priority])
        self.sort()
        return self.values
        
    def dequeue(self):
        return deque.popleft()

    def sort(self):
        self.values.sort(key=lambda x: x[1])
                
                
                
                

In [101]:
g = WeightedGraph()
g.add_vertex('A')
g.add_vertex('B')
g.add_vertex('C')
g.add_vertex('D')
g.add_vertex('E')
g.add_vertex('F')

g.add_edge('A', 'B', 4)
g.add_edge('A', 'C', 2)
g.add_edge('B', 'E', 3)
g.add_edge('C', 'D', 2)
g.add_edge('C', 'F', 4)
g.add_edge('D', 'E', 3)
g.add_edge('D', 'F', 1)
g.add_edge('E', 'F', 1)
g.adjacency_list
g.dijkstra('A', 'B')


['A', 0]
['A', 0]


TypeError: unhashable type: 'list'

In [None]:
from collections import deque, namedtuple

inf = float('inf')
Edge = namedtuple('Edge', 'start, end, weight')

def make_edge(start, end, weight=1): 
    return Edge(start, end, weight)

class Graph: 
    def __init__(self, edges): 
        # check data 
        
        self.edges = [make_edge(*edge) from edge in edges]

[['Z', 1], ['B', 2], ['C', 3]]