# Loading the libraries

In [None]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--driver-class-path /usr/share/java/postgresql-42.2.23.jar --jars /usr/share/java/postgresql-42.2.23.jar pyspark-shell'
import databricks.koalas as ks
import numpy as np
import pandas as pd
import networkx as nx
from heapq import heapify, heappop, heappush

# Splitting the lengthy edges

In [None]:
params = {'user': 'cristiano', 'password': 'cristiano'}

def loadMultiGraph():
	kdf = ks.read_sql_query('''	select	EDGE.IDVERTEXORIG_FK,
										EDGE.IDVERTEXDEST_FK,
										EDGE.IDEDGE,
										EDGE.LENGTH,
										EDGE.ONEWAY
								from	STREETSEGMENT as EDGE ''',
					'jdbc:postgresql:afterqualifying', options=params)

	G = nx.MultiGraph()
	for row in kdf.itertuples():
		dictRow = row._asdict()

		G.add_edge(dictRow['idvertexdest_fk'], dictRow['idvertexorig_fk'], key=dictRow['idedge'], idedge=dictRow['idedge'], length=dictRow['length'])

	#G = nx.from_pandas_edgelist(kdf.to_pandas(), 'idvertexorig_fk', 'idvertexdest_fk', ['idedge', 'length'], create_using=nx.MultiGraph(), edge_key='idedge')
	
	return G

#G = loadMultiGraph()

In [None]:
def reBuildGraph(G, edgesHeap, firstSplit):
    for item in edgesHeap:
        #The number of segments the edge must be split into is 1 less the heap value
        numSplit = item[-1] - 1
        if numSplit >= firstSplit:
            (heapValue, u, v, idedge, lengthOriginal, numSplit) = item

            lengthSplitted = lengthOriginal/numSplit
            vertexStart = u

            #print(G[u][v][idedge]['length'], nx.dijkstra_path_length(G, u, v, weight='length'), numSplit, lengthSplitted, lengthOriginal)
            oldDistance = nx.dijkstra_path_length(G, u, v, weight='length')

            G.remove_edge(u, v, key=int(idedge))
            for i in range(numSplit - 1):
                vertexEnd = str(idedge) + '_' + str(i + 1)
                G.add_edge(vertexStart, vertexEnd, key=vertexEnd, idedge=vertexEnd, length=lengthSplitted)
                vertexStart = vertexEnd
            keyLast = str(idedge) + '_' + str(numSplit)
            G.add_edge(vertexStart, v, key=keyLast, idedge=keyLast, length=lengthSplitted)

            #print(nx.dijkstra_path_length(G, u, v, weight='length'))
            newDistance = nx.dijkstra_path_length(G, u, v, weight='length')
            if round(oldDistance, 7) != round(newDistance, 7):
                print("ERROR IN DISTANCES:", oldDistance, newDistance)

    return G

In [None]:
def splitEdges(precision=1):
    G = loadMultiGraph()

    firstSplit = 2
    #The value must be negative because the data structure is a min heap
    edgesHeap = [(-1*data['length']/firstSplit, u, v, data['idedge'], data['length'], firstSplit) for u, v, data in G.edges(data=True)]
    heapify(edgesHeap)

    if precision == 0:
        lengths = sorted([item[4]/(item[5] - 1) for item in edgesHeap])
    else:
        for i in range(len(edgesHeap) * precision):
            (heapValue, u, v, idedge, lengthOriginal, numSplit) = heappop(edgesHeap)

            #The value must be negative because the data structure is a min heap
            #heapValue = -1 * (lengthOriginal/numSplit - lengthOriginal/(numSplit + 1))
            heapValue = -1 * lengthOriginal/(numSplit + 1)

            #The numSplit is prepared for the next time the edge may be splitted (numsplit + 1)
            heappush(edgesHeap, (heapValue, u, v, idedge, lengthOriginal, numSplit + 1))
        
        lengths = sorted([item[4]/(item[5] - 1) for item in edgesHeap])

        #reBuildGraph(G, edgesHeap, firstSplit)

    return lengths

data = np.asarray(splitEdges(0))
data = data.reshape((data.shape[0], 1))
maxPrecision = 11
for i in range(1, maxPrecision):
    newData = np.asarray(splitEdges(i))
    newData = newData.reshape((newData.shape[0], 1))
    data = np.concatenate((data, newData), axis=1)

kdf = ks.DataFrame(data=data, columns=range(maxPrecision))
kdf.describe()