# Loading the libraries

In [1]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--driver-class-path /usr/share/java/postgresql-42.2.23.jar --jars /usr/share/java/postgresql-42.2.23.jar pyspark-shell'
import databricks.koalas as ks
import numpy as np
import pandas as pd
import networkx as nx
from heapq import heapify, heapreplace#, heappop, heappush

# Splitting the lengthy edges

In [2]:
params = {'user': 'cristiano', 'password': 'cristiano'}

def loadMultiGraph():
	kdf = ks.read_sql_query('''	select	EDGE.IDVERTEXORIG_FK,
										EDGE.IDVERTEXDEST_FK,
										EDGE.IDEDGE,
										EDGE.LENGTH,
										EDGE.ONEWAY
								from	STREETSEGMENT as EDGE ''',
					'jdbc:postgresql:afterqualifying', options=params)

	G = nx.MultiGraph()
	for row in kdf.itertuples():
		dictRow = row._asdict()

		G.add_edge(dictRow['idvertexdest_fk'], dictRow['idvertexorig_fk'], key=dictRow['idedge'], idedge=dictRow['idedge'], length=dictRow['length'])

	#G = nx.from_pandas_edgelist(kdf.to_pandas(), 'idvertexorig_fk', 'idvertexdest_fk', ['idedge', 'length'], create_using=nx.MultiGraph(), edge_key='idedge')
	
	return G

#G = loadMultiGraph()

In [3]:
def reBuildGraph(G, edgesHeap, firstSplit):
    for item in edgesHeap:
        (heapValue, u, v, idedge, lengthOriginal, numSplit) = item
        #The number of segments the edge must be split into is 1 less the value stored in the heap
        numSplit = numSplit - 1
        if numSplit >= firstSplit:
            lengthSplitted = lengthOriginal/numSplit
            vertexStart = u

            #print(G[u][v][idedge]['length'], nx.dijkstra_path_length(G, u, v, weight='length'), numSplit, lengthSplitted, lengthOriginal)
            oldDistance = nx.dijkstra_path_length(G, u, v, weight='length')

            G.remove_edge(u, v, key=int(idedge))
            for i in range(numSplit - 1):
                vertexEnd = str(idedge) + '_' + str(i + 1)
                G.add_edge(vertexStart, vertexEnd, key=vertexEnd, idedge=vertexEnd, length=lengthSplitted)
                vertexStart = vertexEnd
            keyLast = str(idedge) + '_' + str(numSplit)
            G.add_edge(vertexStart, v, key=keyLast, idedge=keyLast, length=lengthSplitted)

            #print(nx.dijkstra_path_length(G, u, v, weight='length'))
            newDistance = nx.dijkstra_path_length(G, u, v, weight='length')
            if round(oldDistance, 7) != round(newDistance, 7):
                print("ERROR IN DISTANCES:", oldDistance, newDistance)

    return G

In [4]:
#TODO:  check if the graph with edges splitted has the correct number of edges and nodes
#       fix edges with length equal to zero
def splitEdges(precision=1):
    G = loadMultiGraph()

    firstSplit = 2
    #The value must be negative because the data structure is a min heap
    edgesHeap = [(-1*data['length']/firstSplit, u, v, data['idedge'], data['length'], firstSplit) for u, v, data in G.edges(data=True)]
    heapify(edgesHeap)
    
    if precision == 0:
        lengths = sorted([item[4]/(item[5] - 1) for item in edgesHeap])
    else:
        for i in range(len(edgesHeap) * precision):
            #(heapValue, u, v, idedge, lengthOriginal, numSplit) = heappop(edgesHeap)
            (heapValue, u, v, idedge, lengthOriginal, numSplit) = edgesHeap[0]

            #The value must be negative because the data structure is a min heap
            #heapValue = -1 * (lengthOriginal/numSplit - lengthOriginal/(numSplit + 1))
            heapValue = -1 * lengthOriginal/(numSplit + 1)

            #The numSplit is prepared for the next time the edge may be splitted (numsplit + 1)
            #heappush(edgesHeap, (heapValue, u, v, idedge, lengthOriginal, numSplit + 1))
            heapreplace(edgesHeap, (heapValue, u, v, idedge, lengthOriginal, numSplit + 1))

        lengths = sorted([item[4]/(item[5] - 1) for item in edgesHeap])

        #reBuildGraph(G, edgesHeap, firstSplit)

    return lengths

data = np.asarray(splitEdges(0))
data = data.reshape((data.shape[0], 1))
maxPrecision = 11
for i in range(1, maxPrecision):
    newData = np.asarray(splitEdges(i))
    newData = newData.reshape((newData.shape[0], 1))
    data = np.concatenate((data, newData), axis=1)

kdf = ks.DataFrame(data=data, columns=range(maxPrecision))
kdf.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
count,414234.0,414234.0,414234.0,414234.0,414234.0,414234.0,414234.0,414234.0,414234.0,414234.0,414234.0
mean,92.96271,45.075977,30.999826,23.655028,19.107781,16.025906,13.816536,12.132299,10.813729,9.753599,8.87405
std,146.066331,19.611037,10.917364,7.015027,4.981219,3.767039,2.978081,2.432742,2.053806,1.772088,1.550624
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32.216905,32.216905,28.739134,21.902818,17.726432,14.892694,12.828191,11.284357,10.063547,9.078929,8.27488
50%,63.693004,47.655723,31.360723,23.52184,18.840663,15.675082,13.429604,11.760113,10.436336,9.393606,8.533775
75%,112.984492,57.5502,36.206172,26.674968,20.869037,17.151203,14.648848,12.677846,11.204332,10.039559,9.072773
max,27778.344016,86.705306,56.989738,42.934093,34.628541,29.089521,25.076034,22.057883,19.697932,17.791067,16.228343


In [5]:
def formatFloat(value):
    numDecimaPlaces = 4
    return " & " + "{:.{nDigits}f}".format(value, nDigits=numDecimaPlaces)

stringKdf = kdf.describe().to_string(float_format=formatFloat).replace("\n", " \\\\\n")
print(stringKdf)

                  0              1              2              3              4              5              6              7              8              9              10 \\
count  & 414234.0000  & 414234.0000  & 414234.0000  & 414234.0000  & 414234.0000  & 414234.0000  & 414234.0000  & 414234.0000  & 414234.0000  & 414234.0000  & 414234.0000 \\
mean       & 92.9627      & 45.0760      & 30.9998      & 23.6550      & 19.1078      & 16.0259      & 13.8165      & 12.1323      & 10.8137       & 9.7536       & 8.8741 \\
std       & 146.0663      & 19.6110      & 10.9174       & 7.0150       & 4.9812       & 3.7670       & 2.9781       & 2.4327       & 2.0538       & 1.7721       & 1.5506 \\
min         & 0.0000       & 0.0000       & 0.0000       & 0.0000       & 0.0000       & 0.0000       & 0.0000       & 0.0000       & 0.0000       & 0.0000       & 0.0000 \\
25%        & 32.2169      & 32.2169      & 28.7391      & 21.9028      & 17.7264      & 14.8927      & 12.8282      & 11.2844     