# Exercise 4. Backtracking algorithms
#### Algorithms ans Data Structures

by Jędrzej Kopiszka, 145304
10.06.2020

In [24]:
from timeit import default_timer as timer #time measuring
import concurrent.futures as futures #setting maximum execution time
import pandas as pd # DataFrame for output representation
import numpy as np 
import random # randomized element search
import seaborn as sns # plotting
import matplotlib.pyplot as plt #plotting

## 1. Graph representation, connectivity check
We can store graphs in different representations. For this exercise I use two methods: edge list in format [[a,b],[c,d]] and neighborhood matrix

#### 1.1 Algorithms for different representation

In [25]:
#list of incidents
def list_of_incidents(directed, size, tab):
    final_tab=[[] for i in range(size)]
    if directed==True:
        for vertex in tab:
            final_tab[vertex[0]].append(vertex[1])
          
    else:
        for vertex in tab:
            final_tab[vertex[0]].append(vertex[1])
            final_tab[vertex[1]].append(vertex[0])
            
    return final_tab

In [26]:
#edge list
def edge_list(directed, size, tab):
    final_tab=[]
    if directed==True:
        final_tab=tab
    else:
        for vertex in tab:
            final_tab.append(vertex)
            final_tab.append([vertex[1], vertex[0]])
    return final_tab

In [27]:
#Neighborhood matrix (vertex matrix, adjacency matrix)
def neighborhood_matrix(directed, size, tab):
    final_tab = [[0 for j in range(size)] for i in range(size)]
    if directed==False:
        for vertex in tab:
            final_tab[vertex[0]][vertex[1]]=1
            final_tab[vertex[1]][vertex[0]]=1
    else:
        for vertex in tab:
            final_tab[vertex[0]][vertex[1]]=1   
    return final_tab

In [28]:
#Incident matrix
def incident_matrix(directed, size, tab):
    final_tab=[[0 for i in range(len(tab))] for i in range(size)]
    if directed==False:
        edge_num=0
        for vertex in tab:
            final_tab[vertex[0]][edge_num]=1
            final_tab[vertex[1]][edge_num]=1
            edge_num+=1
    else:
        edge_num=0
        for vertex in tab:
            final_tab[vertex[0]][edge_num]=-1
            final_tab[vertex[1]][edge_num]=1
            edge_num+=1
    return final_tab

#### 1.2 Neighborhood matrix to edge list converter

In [29]:
def neighborhood_to_list_of_edges(neighborhood):
    edge=[]
    for i in range(len(neighborhood)):
        for j in range(len(neighborhood)):
            if neighborhood[i][j]==1:
                edge.append([i,j])
    return edge

#### 1.3 Connectivity check - if graph is connected

In [30]:
#checking connectivity using list of edges representation
def check_connectivity(edges,n):
    visited = [0 for i in range(n)]
    s=[]
    vc=0
    s.insert(0,0)
    visited[0]=1
    while(len(s)!=0):
        v=s[0]
        s.pop(0)
        vc+=1
        for edge in edges:
            if edge[0]==v:
                if visited[edge[1]]==1:
                    continue
                else:
                    visited[edge[1]]=1
                    s.insert(0,edge[1])
            elif edge[1]==v:
                if visited[edge[0]]==1:
                    continue
                else:
                    visited[edge[0]]=1
                    s.insert(0,edge[0])
    if vc==n:
        return True
    else:
        return False

## 2. Eulerian circuit

#### 2.1 Generate connected graphs with eulerian cycle for different saturations

In [31]:
def generator_eulerian(n,saturation):
    verticies = list(np.arange(n))
    start = random.choice(verticies)
    verticies.remove(start)
    path=[start]
    for i in range(n-1):
        choice = random.choice(verticies)
        verticies.remove(choice)
        path.append(choice)
    path.append(start)
    edges=[]
    for i in range(len(path)-1):
        edges.append([path[i], path[i+1]])
    neighborhood = neighborhood_matrix(False, n, edges) 
    l=n
    stop=False
    for i in range(n):
        for j in range(i+1,n):
            for z in range(j+1, n):
                if neighborhood[i][j]!=1 and neighborhood[j][z]!=1 and neighborhood[z][i]!=1:
                    neighborhood[i][j]=1
                    neighborhood[j][i]=1
                    neighborhood[j][z]=1
                    neighborhood[z][j]=1
                    neighborhood[z][i]=1
                    neighborhood[i][z]=1
                    l+=3
                if  l>=(saturation*n*(n-1)*0.5):
                    stop=True
                    break
            if stop==True:
                break
        if stop==True:
            break         
    return neighborhood

#### 2.2 Find eulerian path - Fleury’s Algorithm

In [32]:
#find eulerian circuit
def find_start_vert(tmpGraph, n):
    for i in range(n):
        deg=0
        for j in range(n):
            if tmpGraph[i][j]==1:
                deg+=1
        if deg%2!=0:
            return i
    return 0

def dfs(prev,start,visited, tmpGraph, n):
    count=1
    visited[start]=True
    for u in range(n):
        if prev!=u:
            if visited[u]!=1:
                if tmpGraph[start][u]==1:
                    count+=dfs(start, u, visited, tmpGraph,n)
    return count

def isBridge(u,v,tmpGraph,n):
    deg=0
    for i in range(n):
        if tmpGraph[v][i]==1:
            deg+=1
    if deg>1:
        return False
    return True

def edgeCount(tmpGraph, n):
    count=0
    for i in range(n):
        for j in range(n):
            if tmpGraph[i][j]==1:
                count+=1
    return count

def fleury_algorithm(start,tmpGraph, n):
    edge=edgeCount(tmpGraph,n)
    v_count = n
    for v in range(n):
        if tmpGraph[start][v]==1:
            visited = [0 for i in range(n)] 
            if isBridge(start,v, tmpGraph, n)==1:
                v_count-=1
            cnt = dfs(start, v, visited, tmpGraph, n)
            if abs(v_count-cnt)<=2:
                return True
                if isBridge(v,start, tmpGraph, n)==1:
                    v_count-=1
                tmpGraph[start][v]=0
                tmpGraph[v][start]=0
                edge-=1
                fleury_algorithm(v, tmpGraph, n)

def find_eulerian_circuit(graph,n):
    tmpGraph = [[graph[i][j] for j in range(n)] for i in range(n)]
    fleury_algorithm(find_start_vert(tmpGraph, n), tmpGraph, n)

#### 2.3 Generate data for time-measuring

In [33]:
def genertae_data_for_eulerian():
    list_of_saturation=[0.2, 0.3, 0.4, 0.6, 0.8, 0.95]
    dict_of_eulerian_graphs = {}
    for n in list(np.arange(50,401, 50)):
        dict_of_eulerian_graphs[n]=[]
        print("\ngenerating for n= ",n, end=' with ')
        for i in range(len(list_of_saturation)): #generate saturation
            dict_of_eulerian_graphs[n].append([])
            print("sat:", end=' ')
            how_many=0
            while(how_many<10):
                graph = generator_eulerian(n,list_of_saturation[i]) #generate random graph, check connectivity
                if check_connectivity(neighborhood_to_list_of_edges(graph), n)==1:
                    dict_of_eulerian_graphs[n][i].append(graph) # add generated graph to dictionary
                    how_many+=1
            print(list_of_saturation[i], end=', ')
    return dict_of_eulerian_graphs

In [34]:
dict_of_eulerian = genertae_data_for_eulerian()


generating for n=  50 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  100 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  150 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  200 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  250 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  300 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  350 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  400 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 

#### 2.4 Measure the time of finding eulerian cycle in connected graph for different sizes and saturations

In [35]:
times_eulerian={}
for key in dict_of_eulerian.keys():
    times_eulerian[key]=[]
    print("\n Done for key", key, "saturation: ", end='')
    itera=0
    for saturation in dict_of_eulerian[key]:
        mean_time=0
        itera+=1
        for graph in saturation:
            def test():
                    start=timer()
                    a = find_eulerian_circuit(graph,key)
                    end = timer()
                    return end-start
            with futures.ThreadPoolExecutor(max_workers=1) as executor:
                future = executor.submit(test)
                try:
                    resp = future.result(60*5)
                except futures.TimeoutError:
                    mean_time+=60*5
                else:
                    mean_time+=resp
                executor._threads.clear()
                futures.thread._threads_queues.clear()
        mean_time/=10
        times_eulerian[key].append(mean_time)
        print(itera, end=", ")


 Done for key 50 saturation: 1, 2, 3, 4, 5, 6, 
 Done for key 100 saturation: 1, 2, 3, 4, 5, 6, 
 Done for key 150 saturation: 1, 2, 3, 4, 5, 6, 
 Done for key 200 saturation: 1, 2, 3, 4, 5, 6, 
 Done for key 250 saturation: 1, 2, 3, 4, 5, 6, 
 Done for key 300 saturation: 1, 2, 3, 4, 5, 6, 
 Done for key 350 saturation: 1, 2, 3, 4, 5, 6, 
 Done for key 400 saturation: 1, 2, 3, 4, 5, 6, 

#### 2.5 Execution times - dataframe and plot

In [36]:
df_eulerian = pd.DataFrame.from_dict(orient='index',data=times_eulerian, columns=[0.2,0.3,0.4,0.6,0.8,0.95])
df_eulerian

Unnamed: 0,0.2,0.3,0.4,0.6,0.8,0.95
50,0.001017,0.00077,0.000678,0.00076,0.000788,0.000787
100,0.002186,0.002248,0.00227,0.002415,0.002845,0.002912
150,0.005164,0.004885,0.00564,0.005542,0.005678,0.005634
200,0.008819,0.00931,0.009885,0.008938,0.00944,0.010296
250,0.013277,0.013958,0.013709,0.013995,0.013741,0.015851
300,0.018917,0.020554,0.020338,0.022374,0.021665,0.022518
350,0.02601,0.026801,0.029146,0.027447,0.029336,0.029787
400,0.037306,0.034014,0.038614,0.040719,0.042464,0.042983


In [None]:
df_eulerian= df_eulerian.reset_index().melt('index', var_name='saturation',value_name='t=f(n)')

plt.figure(figsize=(8,5))
sns.swarmplot(x='index', y='t=f(n)', hue='saturation', data=df_eulerian)
plt.title("Execution time comparison of Fleury-s Algorithm on n elements with various saturation")
plt.ylabel("t=f(n) [s]")
plt.xlabel("n")
plt.show()

#### 2.6 Conclusions on graph representation

In the analysis, I use two graph representation methods: edge list and neighborhood matrix. 
Both methods are easily understandable and the most popular- unqualified readers should have no problem with understanding the representation or finding information about them - that is an advantage over the "incident matrix" and "list of incidents" whose representation may be confusing.

When it comes to performance - the biggest difference is in accessing or iterating through the representation when checking if a specific pair of vertices is already an edge. Edge list is accessed linearly meaning each time we need to go through the whole list to find if the list contains that pair. In neighborhood matrix, we can access specific pair by [i][j]==1 or [i][j]==0. That is much faster, especially works much better in generating random graphs which is done partially by looking for a pair that does not have an edge.

## 3. Hamiltonian cycle

#### 3.1  Random, connected graph with varius saturation - generator function

In [37]:
#generate connected for checking if is hamiltonian
def generate_connected(n,saturation):
    verticies = list(np.arange(n))
    start = random.choice(verticies)
    verticies.remove(start)
    path=[start]
    for i in range(n-1):
        choice = random.choice(verticies)
        verticies.remove(choice)
        path.append(choice)
    path.append(start)
    edges=[]
    for i in range(len(path)-1):
        edges.append([path[i], path[i+1]]) 
    every_permutation=[]
    for i in range(n):
        for j in range(i, n):
            if i!=j:
                every_permutation.append([i,j])
    while(len(edges)<((saturation*n*(n-1))/2)):
        a=random.choice(every_permutation)
        if a not in edges and [a[1],a[0]] not in edges:
            edges.append(a)
            every_permutation.remove(a)
            
    return edges

#### 3.2 Finding Hamiltonian cycle - backtracking algorithm

In [38]:
#checks if contains hamiltonian
def isValid(v,k, graph, path):
    if graph[path[k-1]][v]==0:
        return False
    for i in range(k):
        if path[i]==v:
            return False
    return True

def cycle_found(k, graph,n, path):
    if k==n:
        if graph[path[k-1]][path[0]]==1:
            return True
        else:
            return False
    for v in range(n):
        if isValid(v,k, graph,path)==True:
            path[k] = v
            if cycle_found(k+1, graph,n, path)==True:
                return True
            path[k]=-1
    return False


def hamilton_cycle(graph,n):
    path=[0 for i in range(n)]
    for i in range(n):
        path[i]=-1
    path[0]=0
    if cycle_found(1, graph,n, path)==False:
        return "yes"
    else:
        return "no"
    

#### 3.3 Generating random, connected graphs with different saturation and size, 10 graphs each combination

In [39]:
def genertae_data_for_hamilton():
    list_of_saturation=[0.2, 0.3, 0.4, 0.6, 0.8, 0.95]
    dict_of_connected_graphs = {}
    for n in list(np.arange(50,311, 20)):
        dict_of_connected_graphs[n]=[]
        print("\ngenerating for n= ",n, end=' with ')
        for i in range(len(list_of_saturation)): #generate saturation
            dict_of_connected_graphs[n].append([])
            print("sat:", end=' ')
            how_many=0
            while(how_many<10):
                graph = generate_connected(n,list_of_saturation[i]) #generate random graph
                if check_connectivity(graph, n)==True: #check connectivity of graph
                    graph = neighborhood_matrix(False, n, graph) #change representation to neighborhood matrix
                    dict_of_connected_graphs[n][i].append(graph) # add generated graph to dictionary
                    how_many+=1
            print(list_of_saturation[i], end=', ')
    return dict_of_connected_graphs

In [40]:
dict_of_graphs = genertae_data_for_hamilton() #generate data using the function above


generating for n=  50 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  70 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  90 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  110 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  130 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  150 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  170 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  190 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  210 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  230 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating for n=  250 with sat: 0.2, sat: 0.3, sat: 0.4, sat: 0.6, sat: 0.8, sat: 0.95, 
generating f

#### 3.4 Time-measuring for finding Hamiltonian cycle 

In [41]:
times_hamiltonian={}
for key in dict_of_graphs.keys():
    times_hamiltonian[key]=[]
    print("\nDone for key", key, "saturation: ", end='')
    itera=0
    for saturation in dict_of_graphs[key]:
        mean_time=0
        itera+=1
        for graph in saturation:
            def test():
                    start=timer()
                    a = hamilton_cycle(graph,key)
                    end = timer()
                    return end-start
            with futures.ThreadPoolExecutor(max_workers=1) as executor:
                future = executor.submit(test)
                try:
                    resp = future.result(60*2)
                except futures.TimeoutError:
                    mean_time+=60*2
                else:
                    mean_time+=resp
                executor._threads.clear()
                futures.thread._threads_queues.clear()
        mean_time/=10
        times_hamiltonian[key].append(mean_time)
        print(itera, end=", ")


Done for key 50 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 70 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 90 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 110 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 130 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 150 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 170 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 190 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 210 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 230 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 250 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 270 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 290 saturation: 1, 2, 3, 4, 5, 6, 
Done for key 310 saturation: 1, 2, 3, 4, 5, 6, 

#### 3.5 Execution time - dataframe and plot

In [42]:
df_hamiltonian = pd.DataFrame.from_dict(orient='index',data=times_hamiltonian, columns=[0.2,0.3,0.4,0.6,0.8,0.95])
df_hamiltonian

Unnamed: 0,0.2,0.3,0.4,0.6,0.8,0.95
50,87.078406,36.565236,11.460041,0.035389,0.001128,0.001164
70,87.675911,27.936988,36.002368,0.004304,0.004323,0.003722
90,96.730796,50.656808,12.968772,0.005103,0.004478,0.005849
110,120.0,49.001494,7.235499,11.150174,0.008511,0.009292
130,100.639529,25.863429,32.302781,0.384193,0.077104,0.057024
150,120.0,61.796531,32.850287,0.565783,0.680956,0.893295
170,120.0,94.309682,41.911774,0.758952,1.20335,1.980078
190,120.0,65.972994,37.241952,1.509881,2.769658,2.137305
210,119.157395,96.463119,52.789942,5.150096,2.695839,4.64853
230,120.0,88.658774,21.428921,5.028411,5.46615,8.529591


In [None]:
df_hamiltonian = pd.DataFrame.from_dict(orient='index',data=a, columns=[0.2,0.3,0.4,0.6,0.8,0.95])
df_hamiltonian = df_hamiltonian.reset_index().melt('index', var_name='saturation',value_name='t=f(n)')

plt.figure(figsize=(8,5))
sns.swarmplot(x='index', y='t=f(n)', hue='saturation', data=df_hamiltonian)
plt.title("Execution time comparison of Fleury-s Algorithm on n elements with various saturation")
plt.ylabel("t=f(n) [s]")
plt.xlabel("n")
plt.show()


## 4. Conclusion regarding complexity and algorithms

Finding Euler cycle: For finding Euler circuit I used Fleury's Algorithm. The idea behind the algorithm is straightforward: start in selected vertex, next repeat: add "u" to the solution, select [u,v] and remove it (do not remove bridge if possible). I have chosen this algorithm mainly because of easy-to-understand steps, simple implementation. The downside of this algorithm is it's high running time - O(|E|^2) (we can improve it by using advanced Thorup algorithm - then O(|E|(log|E|^3) ). Another idea is to use Hierholzer's algorithm - O(|E|)

Finding Hamilton cycle: I used a backtracking approach. It's idea is to start from any vertex, we make selected vertex a root of the constructed tree. Then we select the next vertex. When selected vertex makes a cycle with any vertex other than root we reach the maximum step. We must backtrack one step, and start the search once again by selecting another vertex and backtrack and remove the successful path. Such approach can give us polynomial time complexity for the NP-hard problem.