# GRAPHS

## Directed graphs

DAG - directed acyclic graph (graph with <b>no cycles</b>)</br>

source = verices with no incoming edges</br>
sink = vertices with no outgoing edges</br>

topological order </br>

weakly connected - if replacing all of its directed edges with undirected edges produces an undirected graph that is connected</br></br>

connected - it contains a directed path
from U to V or a directed path from V to U for every pair of vertices U and V</br></br>

strongly connected - it contains a directed path from U to V and a directed path from V to U for every pair of vertices U
and V.</br></br>

## Undirected graphs </br>

connected - if there is a path between every pair of vertices</br>
disconnected - no path between each to vertices</br>

## Implementation

- adjacency matrix
- adjacency lists

# TREE

special type of graph </br>

undirected graph <b>without cycles</b>

In [37]:
# https://www.geeksforgeeks.org/topological-sorting/
# TOPOLOGICAL SORT
import collections
n = 6
nodes = [[5,2], [5,0], [4,0], [4,1], [2,3], [3,1]]

graph = collections.defaultdict(list)

for n1, n2 in nodes:
    graph[n1].append(n2)
    
visited = set()
stack = []

print(graph)


def helper(i):
    
    # mark vertex i as visited
    visited.add(i)
    
    # iterate over its adjacents
    for adj in graph[i]:
        if adj not in visited:
            helper(adj)
            
    stack.append(i)

for i in range(n):
    if i not in visited:
        helper(i)
        
print(stack[::-1])
print(visited)

defaultdict(<class 'list'>, {5: [2, 0], 4: [0, 1], 2: [3], 3: [1]})
[5, 4, 2, 3, 1, 0]
{0, 1, 2, 3, 4, 5}


In [33]:
# https://www.geeksforgeeks.org/detect-cycle-in-a-graph/
# DETECT A CYCLE IN DIRECTED GRAPH
# dfs + recursive stack to keep track of visited during the recursion + visited ofc
import collections
def solution():
    n = 6
    nodes = [[2,5], [5,0], [4,0], [4,1], [2,3], [3,1], [0,2]]

    graph = collections.defaultdict(list)

    for n1, n2 in nodes:
        graph[n1].append(n2)

    visited = set()
    recStack = [False for _ in range(n)]

    def isCycle(n):
        visited.add(n)
        recStack[n] = True

        for adj in graph[n]:
            if adj not in visited:
                if isCycle(adj):
                    return True

            elif recStack[adj] == True:
                return True
            
        recStack[n] = False

        return False




    for i in range(n):
        if i not in visited:
            if isCycle(i): # if there is a cycle
                return True

    return False
        

solution()

True

In [35]:
# https://www.geeksforgeeks.org/detect-cycle-undirected-graph/
# detect a cycle in undirected graph
# DFS + parent array for each node + visited ofc

def solution():
    n = 6
    nodes = [[2,5], [5,0], [4,0], [4,1], [2,3], [3,1], [0,2]]

    graph = collections.defaultdict(list)

    for n1, n2 in nodes:
        graph[n1].append(n2)
        graph[n2].append(n1)

    visited = set()

    def isCycle(node, parent):
        
        
        visited.add(node)
        
        for adj in graph[node]:
            if adj not in visited:
                if isCycle(adj, node): # node is the new parent for adj
                    return True
                
            elif parent != node:
                return True
    

        return False




    for i in range(n):
        if i not in visited:
            if isCycle(i, -1): # if there is a cycle (node, parent) for the first node parent we don't have a paerent so is -1
                return True

    return False
        

solution()

True

In [159]:
# if we don't know if there are cycles or not we can:
# 1. check the graph for cycles by using DFS
# 2. apply Kahn’s algorithm for Topological Sorting

# Kahn’s algorithm for Topological Sorting:
# 1. calculate the indegree of nodes
# 1.a keep a counter of visited nodes when poping from queue
# 2. push all nodes with 0 indegree into the queue
# 3. pop from queue and add to the topo_sort_res and increment the cnt and add nodes with 0 indegree to the queue and decrese
# 4. repeat until queue
# 5. if cnt != number of nodes => there is a cycle, return []
# 6. else return the topo_sort_res

# GET ALL TOPOLOGICAL SORTING

# SSSSSSSSSSS KKKKKKKKKKKKKK IIIIIIIIIIIIIII PPPPPPPPPPPPPPPPP


def toposort_kahn_all(nodes, n):
    
    graph = collections.defaultdict(list)
    indegree = [0 for i in range(n)]
    
    for n1, n2 in nodes:
        graph[n1].append(n2)  
        indegree[n2] += 1
    
    visited = [False for _ in range(n)]
    
            
    toposort = []
 


    def helper(visited, indegree, stack):
        nonlocal toposort
        
        flag = False
        
        for i in range(n):
            if not visited[i] and indegree[i] == 0:
                visited[i] = True
                stack.append(i)
                
                for adj in graph[i]:
                    indegree[adj] -= 1
                helper(visited, indegree, stack)
                
                #backtrack
                visited[i] = False
                stack.pop()
                for adj in graph[i]:
                    indegree[adj] += 1
                    
                flag = True
                
        # visited all vertices so print      
        if not flag:
            # toposort.append([stack[:]]) why this shit is not working?!
            print(stack)
                
    
    helper(visited, indegree, toposort)
    return toposort


toposort_kahn_all([[5,0], [5,2], [4,0], [4,1], [2,3], [3,1]], 6)

[4, 5, 0, 2, 3, 1]
[4, 5, 2, 0, 3, 1]
[4, 5, 2, 3, 0, 1]
[4, 5, 2, 3, 1, 0]
[5, 2, 3, 4, 0, 1]
[5, 2, 3, 4, 1, 0]
[5, 2, 4, 0, 3, 1]
[5, 2, 4, 3, 0, 1]
[5, 2, 4, 3, 1, 0]
[5, 4, 0, 2, 3, 1]
[5, 4, 2, 0, 3, 1]
[5, 4, 2, 3, 0, 1]
[5, 4, 2, 3, 1, 0]


[]

In [162]:
# DETECT A CYCLE IN DIRECTED GRAPH
# DOUBLE CHEKCK !!!!
# ADD INDEGREES

import collections
def f(nodes, n):
    graph = collections.defaultdict(list)
    indegree = [0 for _ in range(n)]
    
    for n1, n2 in nodes:
        graph[n1].append(n2)
        indegree[n2] += 1
        
    queue = []
    for idx,val in enumerate(indegree):
        if val == 0:
            queue.append(idx)
            
    cnt = 0
    #toposort = []
    while queue:
        node = queue.pop(0)
        cnt += 1
        #toposort.appedn(node)
        
        for nei in graph[node]:
            indegree[nei] -= 1
            if indegree[nei] == 0:
                queue.append(nei)
                
                
    return cnt == n
        

f([[0,2], [2,2], [0,1], [2,3]], 4)

False

In [20]:
# DETECT A CYCLE IN UNDIRECTED GRAPH
# using union find


# DETECT A CYCLE IN UNDIRECTED GRAPH

import collections
def f(nodes, n):
    
    parent = [i for i in range(n)]
    
    def find(x):
        if parent[x] != x:
            parent[x] = find(parent[x])
        return parent[x]
    
    
    def union(x, y):
        p1 = find(x)
        p2 = find(y)
        
        if p1 == p2:
            return True # there is a cycle
        
        else:
            parent[p1] = p2
            return False
       
    
    for n1, n2 in nodes:
        if union(n1, n2):
            return True #cycle
        
#     for node in range(n):
#         for adj in graph[node]:
#             if union(node, adj):
#                 return True # there is a cycle
    
    
    # CHECK IF GRAPH IS DISCONNECTED AND WE HAVE A COUPLE OF GROUPS       
    for node in range(n):
        parent[node] = find(node)
        
    print(parent)
            
    return False
            


# f([[0,2], [0,3], [2,1]], 4)
# f([[1,0], [0,2], [2,1], [0,3], [3,4]], 5)
f([[0,1], [1,2]], 3)

[2, 2, 2]


False

In [30]:
# FIND MINIMUM PATH IN WEIGHTED UNDIRECTED GRAPHS
# we also have prim's algo

# USE UNION FIND TO CHECK FOR CYCLES
# SORT BY EDGE COST/DISTANCE/WEIGHTS/WHATEVER
# ALWAYS GET THE SHORTEST EDGE COST => GREEDY APPROACH

def kruskal(edges, n):
    
    parent = [i for i in range(n)]
    
    def find(x):
        if parent[x] != x:
            parent[x] = find[parent[x]]
        return parent[x]
    
    def union(x, y):
        cycle = False
        parx = parent[x]
        pary = parent[y]
        
        if parx == pary:
            cycle = True
            return cycle
        
        else:
            parent[parx] = pary
            return cycle
        
        
    sorted_edges = sorted(edges, key=lambda x: x[2])
    
    print('sorted by edges', sorted_edges)
    
    mst = 0
    mst_edges = []
    for n1, n2, cost in sorted_edges:
        if not union(n1, n2):
            mst_edges.append((n1, n2, cost))
            mst += cost
       
    print('mst_edges', mst_edges)
            
    return mst



kruskal([[0,1,10], [0,2,6], [0,3,5], [1,3,15], [2,3,4]], 4)


sorted by edges [[2, 3, 4], [0, 3, 5], [0, 2, 6], [0, 1, 10], [1, 3, 15]]
mst_edges [(2, 3, 4), (0, 3, 5), (0, 1, 10)]


19

We have discussed Dijkstra’s algorithm and its implementation for adjacency matrix representation of graphs. The time complexity for the matrix representation is O(V^2). In this post, O(ELogV) algorithm for adjacency list representation is discussed.

In [112]:
# FIND MINIMUM PATH IN WEIGHTED DIRECTED GRAPHS

# Dijiktra works on both DIRECTED and UNDIRECTED 

# very, very similar to prim's algo for undirected weighted graphs
# GREEDY ALGO

# USING BFS
# The idea is to traverse all vertices of graph using BFS 
# and use a Min Heap to store the vertices not yet included in SPT

#  Dijkstra’s algorithm doesn’t work for graphs with negative weight edges. 
# For graphs with negative weight edges, Bellman–Ford algorithm can be used.

import collections, heapq
def dijikstra(nodes, n, src, dst):
    
    graph = collections.defaultdict(list)
    
    print(nodes)
    for n1, n2, c in nodes:
        graph[n1].append((c, n2))  # (cost, node2)
        
    print(graph)
    
    seen = set()
    distances = {node:float('inf') for node in range(n)}
    distances[src] = 0
    
    heap = [(0, src, [])]   # cost, node, path
    
    while heap:
        cost, n1, path = heapq.heappop(heap)
        
        if n1 not in seen:
            
            seen.add(n1)
            path = path + [n1]
            
            if n1 == dst:
                return [cost, path]
            
            for adj_cost, adj in graph[n1]:
                if adj not in seen:
                    if adj_cost + cost < distances[adj]:
                        distances[adj] = adj_cost + cost
                        heapq.heappush(heap, (adj_cost + cost, adj, path))
     
          
    print(distances)
    return None
                
        


dijikstra([[0,1,1], [0,2,2], [0,3,4], [2,3,1], [1,3,2]], 4, 0, 3)

[[0, 1, 1], [0, 2, 2], [0, 3, 4], [2, 3, 1], [1, 3, 2]]
defaultdict(<class 'list'>, {0: [(1, 1), (2, 2), (4, 3)], 2: [(1, 3)], 1: [(2, 3)]})


[3, [0, 1, 3]]

In [124]:
# TOPOLOGICAL SORT
# ONLY FOR DAG GRAPHS (DIRECTED ACYCLIC GRAPH):
# 1. DIRECTED GRAPH
# 2. NO CYCLES
# time complexity is the same as DFS which is O(V+E)


# implementation of topological sort when we know there are no cycles!!!
import collections
def topological_sort(nodes, n):
    
    print(nodes)
    
    graph = collections.defaultdict(list)
    for n1, n2 in nodes:
        graph[n1].append(n2)  
    
    visited = set()
    stack = []
    
    def helper(node):
        visited.add(node)
        
        for adj in graph[node]:
            if adj not in visited:
                helper(adj)
            
        stack.append(node)
    
    for nod in range(n):
        if nod not in visited:
            helper(nod)
            
    
    return stack[::-1]


topological_sort([[5,0], [5,2], [4,0], [4,1], [2,3], [3,1]], 6)


[[5, 0], [5, 2], [4, 0], [4, 1], [2, 3], [3, 1], [1, 5]]


[4, 1, 5, 2, 3, 0]

In [140]:
# if we don't know if there are cycles or not we can:
# 1. check the graph for cycles by using DFS
# 2. apply Kahn’s algorithm for Topological Sorting

# Kahn’s algorithm for Topological Sorting:
# 1. calculate the indegree of nodes
# 1.a keep a counter of visited nodes when poping from queue
# 2. push all nodes with 0 indegree into the queue
# 3. pop from queue and add to the topo_sort_res and increment the cnt and add nodes with 0 indegree to the queue and decrese
# 4. repeat until queue
# 5. if cnt != number of nodes => there is a cycle, return []
# 6. else return the topo_sort_res


def toposort_kahn(nodes, n):
    
    graph = collections.defaultdict(list)
    indegree = [0 for i in range(n)]
    
    for n1, n2 in nodes:
        graph[n1].append(n2)  
        indegree[n2] += 1
    
    print(indegree)
    queue = []
    
    for idx, value in enumerate(indegree):
        if value == 0:
            queue.append(idx)
            
    cnt = 0
    toposort = []
    print(queue)
    
    while queue:
        node = queue.pop(0)
        cnt += 1
        toposort.append(node)
        
        for adj in graph[node]:
            indegree[adj] -= 1
            if indegree[adj] == 0:
                queue.append(adj)
            
        
    print(toposort)
    print(cnt)

    if cnt != n: # there is a cycle
        return []
    
    return toposort


toposort_kahn([[5,0], [5,2], [4,0], [4,1], [2,3], [3,1]], 6)

[2, 2, 1, 1, 0, 0]
[4, 5]
[4, 5, 0, 2, 3, 1]
6


[4, 5, 0, 2, 3, 1]

[4, 5, 0, 2, 3, 1]
[4, 5, 2, 0, 3, 1]
[4, 5, 2, 3, 0, 1]
[4, 5, 2, 3, 1, 0]
[5, 2, 3, 4, 0, 1]
[5, 2, 3, 4, 1, 0]
[5, 2, 4, 0, 3, 1]
[5, 2, 4, 3, 0, 1]
[5, 2, 4, 3, 1, 0]
[5, 4, 0, 2, 3, 1]
[5, 4, 2, 0, 3, 1]
[5, 4, 2, 3, 0, 1]
[5, 4, 2, 3, 1, 0]


[]

In [186]:
# find if there is a path between 2 nodes

def find_path(nodes, n, src, dst):
    
    g = collections.defaultdict(set)
    for n1, n2 in nodes:
        g[n1].add(n2)
        
    print(g)
    
    def helper(src, dst, visited=set(), path=[]):
        nonlocal all_paths
        visited.add(src)
        path.append(src)
        if src == dst:
            all_paths.append(path[:])
            
        else:
            for adj in g[src]:
                if adj not in visited:
                    helper(adj, dst, visited, path)
                
        visited.remove(src)
        path.pop()


    all_paths = []
    helper(src, dst)
    
    print(all_paths)


find_path([[0,2], [2,0], [2,1], [0,1], [0,3], [1,3]], 4, 0, 1)

defaultdict(<class 'set'>, {0: {1, 2, 3}, 2: {0, 1}, 1: {3}})
[[0, 1], [0, 2, 1]]


In [None]:
import collections
def task(ids, ip, email):
    
    n = len(ids)
    
    ip_graph = collections.defaultdict(set) # phone: id
    email_graph = collections.defaultdict(set) # email:id
    
    
    for iip, iids in zip(ip, ids):
        ip_graph[iip].add(iids)
        
        
    for i_email, i_ids in zip(email, ids):
        email_graph[i_email].add(i_ids)
        
    print(ip_graph)
    print(email_graph)
    
    visited = set()
    groups = []
    visited = set()
    
   
    parent = [i for i in range(n+1)]
    
    print('parent', parent)
    
    def find(x):
        if parent[x] != x:
            parent[x] = find(parent[x])
        return parent[x]
    
    def union(x,y):
        px = find(x)
        py = find(y)
        
        if px != py:
            parent[px] = py
            
            
    for node, iip, iemail in zip(ids, ip, email):
        
        for nei in ip_graph[iip]:
            union(node, nei)
            
        for nei in email_graph[iemail]:
            union(node,nei)
            
    for node in range(n):
        parent[node] = find(node)
        
    parent = parent[1:]
        
    print(parent)
    
    rwtu
    
task([1,2,3,4], [1,1,2,3], ['A', 'B', 'A', 'C'])