# Loading the libraries

In [57]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--driver-class-path /usr/share/java/postgresql-42.2.23.jar --jars /usr/share/java/postgresql-42.2.23.jar pyspark-shell'
import databricks.koalas as ks
import pandas as pd
import networkx as nx
import psycopg2 as pg
import psycopg2.extras as pgExtras
from datetime import datetime, timedelta

# Loading the data

In [58]:
params = {'user': 'cristiano', 'password': 'cristiano'}

def loadMultiDigraph(kdf):
	D = nx.MultiDiGraph()
	for row in kdf.itertuples():
		dictRow = row._asdict()

		D.add_edge(dictRow['idvertexorig_fk'], dictRow['idvertexdest_fk'], idedge=dictRow['idedge'], length=dictRow['length'])
		if dictRow['oneway'] != 'YES' and dictRow['idvertexorig_fk'] != dictRow['idvertexdest_fk']:
			D.add_edge(dictRow['idvertexdest_fk'], dictRow['idvertexorig_fk'], idedge=dictRow['idedge'], length=dictRow['length'])
	
	return D

def loadMultiGraph():
	kdf = ks.read_sql_query('''	select	EDGE.IDVERTEXORIG_FK,
										EDGE.IDVERTEXDEST_FK,
										EDGE.IDEDGE,
										EDGE.LENGTH,
										EDGE.ONEWAY--,
										--EDGE.UTILITYVALUE,
										--EDGE.PARKINGEXPENSES
								from	STREETSEGMENT as EDGE ''',
					'jdbc:postgresql:afterqualifying', options=params)

	G = nx.from_pandas_edgelist(kdf.to_pandas(), 'idvertexorig_fk', 'idvertexdest_fk', ['idedge', 'length'], create_using=nx.MultiGraph())
	
	return G, kdf

#G, kdf = loadMultiGraph()
#D = loadMultiDigraph(kdf)
#kdf = None

# Deleting the smaller components

In [59]:
def deleteSmallerComponents(G):
	components = sorted(nx.connected_components(G), key=len, reverse=True)
	compToDelete = []
	for comp in components[1:]:
		compToDelete.extend(list(comp))
	compToDelete = [(x,) for x in compToDelete]
	compToDeleteDouble = [(x, y) for x, y in zip(compToDelete, compToDelete)]

	params.update({'host':'localhost', 'port':'5432', 'database':'afterqualifying'})
	conn = None
	try:
		conn = pg.connect(**params)
		cur = conn.cursor()
		cur.executemany('delete from STREETSEGMENT where IDVERTEXORIG_FK = %s or IDVERTEXDEST_FK = %s', compToDeleteDouble)
		cur.executemany('delete from STREETINTERSECTION where IDVERTEX = %s', compToDelete)
		conn.commit()
		cur.close()
	except(Exception, pg.DatabaseError) as error:
		print(error)
	finally:
		if conn is not None:
			conn.close()

#deleteSmallerComponents(G)

# Calculating and storing the shortest distances

In [60]:
def reopenConn(conn, cur, params):
    conn.commit()
    cur.close()
    conn.close()
    conn = pg.connect(**params)
    cur = conn.cursor()

    return conn, cur

def storeShortestDistances():
	G = loadMultiGraph()[0]
	cutOffThreshold = 500.000001
	distances = dict(nx.all_pairs_dijkstra_path_length(G, cutoff=cutOffThreshold, weight='length'))
	G = None
	
	params.update({'host':'localhost', 'port':'5432', 'database':'afterqualifying'})
	conn = None
	try:
		conn = pg.connect(**params)
		cur = conn.cursor()

		cur.execute('delete from VERTICESPAIRSNEARBY')

		lengthToInsert = 100000
		distancesToInsert = []
		sumLengths = 0
		for source in distances:
			for destination in distances[source]:
				distancesToInsert.append((source, destination, distances[source][destination]))

			if len(distancesToInsert) > lengthToInsert:
				sumLengths += len(distancesToInsert)
				pgExtras.execute_values(cur, 'insert into VERTICESPAIRSNEARBY (IDVERTEX1_FK, IDVERTEX2_FK, WALKINGDISTANCE) values %s', distancesToInsert)
				distancesToInsert = []
				conn, cur = reopenConn(conn, cur, params)
			
		if len(distancesToInsert) > 0:
			pgExtras.execute_values(cur, 'insert into VERTICESPAIRSNEARBY (IDVERTEX1_FK, IDVERTEX2_FK, WALKINGDISTANCE) values %s', distancesToInsert)
			
		conn.commit()
		cur.close()
	except(Exception, pg.DatabaseError) as error:
		print(error)
	finally:
		if conn is not None:
			conn.close()

#storeShortestDistances()

In [61]:
def distancesFromCenter(G):
	centerGraph = nx.center(G, usebounds=True)[0]
	dictHopsCenter = nx.single_source_shortest_path_length(G, source=centerGraph)
	hopsCenter = sorted(dictHopsCenter.items(), key=lambda item: item[1])
	
	dictDistancesCenter = nx.single_source_dijkstra_path_length(G, source=centerGraph, weight='length')
	distancesCenter = sorted(dictDistancesCenter.items(), key=lambda item: item[1])
	
	with open('centerDistances.txt', 'w') as f:
		f.write('%s' % centerGraph)
		for item in hopsCenter:
			f.write('\n%s %s' % (item[0], item[1]))
		for item in distancesCenter:
			f.write('\n%s %s' % (item[0], item[1]))

	return distancesCenter, centerGraph

#distancesCenter, centerGraph = distancesFromCenter(G)

# Manipulating and storing trips data

In [62]:
def checkAppendZeroDate(number):
    retValue = str(number)
    if len(retValue) == 7:
        retValue = '0' + retValue

    return retValue

def checkAppendZero(number):
    retValue = str(int(number))
    if len(retValue) == 1:
        retValue = '0' + retValue
    
    return retValue

def toHour(number1, number2):
    return checkAppendZero(number1) + ':' + checkAppendZero(number2)

def setTimestampsTrips():
    kdf = pd.read_excel('/home/cristiano/Dropbox/UFMG/Cristiano/Doutorado/Segunda Etapa da Qualificação/Pesquisas OD/OD 2017/Banco de dados/OD_2017.xlsx',
                        usecols=[   'ID_PESS', 'DATA', 'H_SAIDA', 'MIN_SAIDA', 'H_CHEG', 'MIN_CHEG', 'MOTIVO_O', 'MOTIVO_D',
                                     'MODO1', 'MODO2', 'MODO3', 'MODO4', 'MODOPRIN', 'CO_O_X', 'CO_O_Y', 'CO_D_X', 'CO_D_Y'])
    kdf = ks.from_pandas(kdf)

    kdf = ks.sql('''    select  ID_PESS, DATA, H_SAIDA, MIN_SAIDA, H_CHEG, MIN_CHEG, MOTIVO_O, MOTIVO_D, MODO1, MODO2, MODO3, MODO4, MODOPRIN, CO_O_X, CO_O_Y, CO_D_X, CO_D_Y
                        from    {kdf} OD
                        where   exists (    select  1
                                            from    {kdf} OD_SUBQUERY
                                            where   (OD_SUBQUERY.MODO1 = 9 or OD_SUBQUERY.MODO2 = 9 or OD_SUBQUERY.MODO3 = 9 or OD_SUBQUERY.MODO4 = 9) and
                                                    OD_SUBQUERY.ID_PESS = OD.ID_PESS
                                )
    ''')

    params = {  'user': 'cristiano', 'password': 'cristiano',
                'host':'localhost', 'port':'5432', 'database':'afterqualifying'}
    conn = None
    try:
        conn = pg.connect(**params)
        cur = conn.cursor()

        lengthToUpdate = 10000
        tripsToUpdate = []
        updateSQL = ''' update  TRIP
                        set     REASONDEPARTURE = data.reasonDep,
                                REASONDESTINATION = data.reasonDest--,
                                --TIMESTAMPDEPARTURE = TO_TIMESTAMP(data.dataD||' '||TIMEDEPARTURE, 'DDMMYYYY HH24:MI'),
                                --TIMESTAMPARRIVAL = TO_TIMESTAMP(data.dataA||' '||TIMEARRIVAL, 'DDMMYYYY HH24:MI'),
                                --MODE1 = data.m1::integer,
                                --MODE2 = data.m2::integer,
                                --MODE3 = data.m3::integer,
                                --MODE4 = data.m4::integer,
                                --MAINMODE = data.mainmode::integer--,
                                --IDPLACEDEPARTURE = ORIGIN.IDPLACE,
                                --IDPLACEDESTINATION = DESTINATION.IDPLACE
                        from    (values %s) as data (idD, dataD, dataA, timestampD, timestampA, reasonDep, reasonDest, m1, m2, m3, m4, mainmode, oX, oY, dX, dY)--,
                                --PLACE as ORIGIN,
                                --PLACE as DESTINATION
                        where   IDDRIVER = data.idD and
                                to_char(TIMESTAMPDEPARTURE, 'HH24:MI') = data.timestampD and
                                to_char(TIMESTAMPARRIVAL, 'HH24:MI') = data.timestampA --and
                                --TIMEDEPARTURE = data.timestampD and
                                --TIMEARRIVAL = data.timestampA and
                                --DRIVINGDISTANCE is NULL and
                                --ORIGIN.GEOM = ST_Transform(ST_SetSRID(ST_MakePoint(data.oX, data.oY), 22523), 4326) and
                                --DESTINATION.GEOM = ST_Transform(ST_SetSRID(ST_MakePoint(data.dX, data.dY), 22523), 4326)'''
        
        for row in kdf.itertuples():
            dictRow = row._asdict()

            dataDValue1 = dataDValue2 = checkAppendZeroDate(dictRow['DATA'])
            hourDeparture = toHour(dictRow['H_SAIDA'], dictRow['MIN_SAIDA'])
            hourArrival = toHour(dictRow['H_CHEG'], dictRow['MIN_CHEG'])
            if hourDeparture > hourArrival:
                date_time_obj = datetime.strptime(dataDValue2, '%d%m%Y')
                date_time_obj += timedelta(days=1)
                dataDValue2 = date_time_obj.strftime("%d%m%Y")

            tripsToUpdate.append((  dictRow['ID_PESS'], dataDValue1, dataDValue2, hourDeparture, hourArrival, dictRow['MOTIVO_O'],  dictRow['MOTIVO_D'],
                                    dictRow['MODO1'], dictRow['MODO2'], dictRow['MODO3'], dictRow['MODO4'], dictRow['MODOPRIN'],
                                    dictRow['CO_O_X'], dictRow['CO_O_Y'], dictRow['CO_D_X'], dictRow['CO_D_Y']))

            if len(tripsToUpdate) > lengthToUpdate:
                    pgExtras.execute_values(cur, updateSQL, tripsToUpdate)
                    tripsToUpdate = []
        
        if len(tripsToUpdate) > 0:
            pgExtras.execute_values(cur, updateSQL, tripsToUpdate)

        conn.commit()
        cur.close()
    except(Exception, pg.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

    return kdf

def storePlacesOfTrip():
    kdf = pd.read_excel('/home/cristiano/Dropbox/UFMG/Cristiano/Doutorado/Segunda Etapa da Qualificação/Pesquisas OD/OD 2017/Banco de dados/OD_2017.xlsx',
                        usecols=[   'ID_PESS', 'DATA', 'FE_VIA', 'TOT_VIAG',
                                    'CO_O_X', 'CO_O_Y', 'CO_D_X', 'CO_D_Y',
                                    'MODO1', 'MODO2', 'MODO3', 'MODO4',
                                    'H_SAIDA', 'MIN_SAIDA', 'ANDA_O', 'H_CHEG', 'MIN_CHEG', 'ANDA_D',
                                    'MODOPRIN', 'TP_ESAUTO', 'VL_EST'])
    kdf = ks.from_pandas(kdf)

    resultSQL = ks.sql('''  select  *
                            from    {kdf} OD
                            where   exists (    select  1
                                                from    {kdf} OD_SUBQUERY
                                                where   (OD_SUBQUERY.MODO1 = 9 or OD_SUBQUERY.MODO2 = 9 or OD_SUBQUERY.MODO3 = 9 or OD_SUBQUERY.MODO4 = 9) and
                                                        OD_SUBQUERY.ID_PESS = OD.ID_PESS
                                    )
    ''')
    params.update({'host':'localhost', 'port':'5432', 'database':'afterqualifying'})
    conn = None
    try:
        conn = pg.connect(**params)
        cur = conn.cursor()

        lengthToInsert = 10000
        placesToInsert = []
        countRows = 0
        for row in resultSQL.itertuples():
            dictRow = row._asdict()
            countRows += 1
            isMonthlyParking = True if dictRow['TP_ESAUTO'] == 7 else False
            placesToInsert.append((countRows, dictRow['ID_PESS'], dictRow['FE_VIA'],
                    dictRow['CO_O_X'], dictRow['CO_O_Y'], dictRow['CO_D_X'], dictRow['CO_D_Y'],
                    toHour(dictRow['H_SAIDA'], dictRow['MIN_SAIDA']), toHour(dictRow['H_CHEG'], dictRow['MIN_CHEG']), dictRow['VL_EST'], isMonthlyParking))

            if len(placesToInsert) > lengthToInsert:
                    pgExtras.execute_values(cur, '''insert into TRIP (IDTRIP, IDDRIVER, TRIPEXPANSIONFACTOR, GEOMDEPARTURE, GEOMDESTINATION,
                                                    TIMEDEPARTURE, TIMEARRIVAL, PARKINGEXPENSES, ISMONTHLYPARKING) values %s''', placesToInsert,
                                                    template='''(%s, %s, %s, ST_Transform(ST_SetSRID(ST_MakePoint(%s, %s), 22523), 4326),
                                                    ST_Transform(ST_SetSRID(ST_MakePoint(%s, %s), 22523), 4326), %s, %s, %s, %s)''')
                    placesToInsert = []
        
        if len(placesToInsert) > 0:
            pgExtras.execute_values(cur, '''insert into TRIP (IDTRIP, IDDRIVER, TRIPEXPANSIONFACTOR, GEOMDEPARTURE, GEOMDESTINATION,
                                                    TIMEDEPARTURE, TIMEARRIVAL, PARKINGEXPENSES, ISMONTHLYPARKING) values %s''', placesToInsert,
                                                    template='''(%s, %s, %s, ST_Transform(ST_SetSRID(ST_MakePoint(%s, %s), 22523), 4326),
                                                    ST_Transform(ST_SetSRID(ST_MakePoint(%s, %s), 22523), 4326), %s, %s, %s, %s)''')

        conn.commit()
        cur.close()
    except(Exception, pg.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

    return kdf

#kdf = setTimestampsTrips()
#kdf = storePlacesOfTrip()

# Setting edges parameters related to the trips data

In [63]:
class Edge:
    def __init__(self, source, target, length, utilityValue, parkingExpenses, numTripsReached):
        self.source = source
        self.target = target
        self.length = length
        self.utilityValue = utilityValue
        self.parkingExpenses = parkingExpenses
        self.numTripsReached = numTripsReached

def createEdges(G, edge, vertexKey, posVertex, inverted=False):
    #When the streets are not oneway, the position of place in the reverse edge must also be reversed
    if inverted:
        posVertex = 1 - posVertex
        edge.source, edge.target = edge.target, edge.source

    #Check if the edge makes a self-loop
    if edge.source != edge.target:
        closestEdgeSource = edge.source
        idEdge = str(closestEdgeSource) + '_' + vertexKey
        length = posVertex * edge.length
        G.add_edge(closestEdgeSource, vertexKey, idedge=idEdge, length=length)

        closestEdgeTarget = edge.target
        idEdge = str(closestEdgeTarget) + '_' + vertexKey
        length = (1 - posVertex) * edge.length
        G.add_edge(vertexKey, closestEdgeTarget, idedge=idEdge, length=length)
    else:
        closestEdgeSource = closestEdgeTarget = edge.source
        idEdge = str(closestEdgeSource) + '_' + vertexKey
        length = min(posVertex, (1 - posVertex)) * edge.length
        G.add_edge(closestEdgeSource, vertexKey, idedge=idEdge, length=length)
    
    #Fix attributes back to their normal values
    if inverted:
        posVertex = 1 - posVertex
        edge.source, edge.target = edge.target, edge.source
        closestEdgeSource, closestEdgeTarget = closestEdgeTarget, closestEdgeSource
    
    return closestEdgeSource, closestEdgeTarget

def includeEdges(G, edge, vertexKey, posVertex):
    G.add_node(vertexKey)

    closestEdgeSource, closestEdgeTarget = createEdges(G, edge, vertexKey, posVertex)
    #Networkx does not allow parallel self-loops in DiGraphs. Thus, self-loops are treated as if they were non-directed
    #However, if the graph is a DiGraph, a reverse edge exists, and the edge is not a self-loop, a second (inverted) edge must be created
    if G.is_directed() and G.has_edge(edge.target, edge.source) and edge.source != edge.target:
        closestEdgeSource, closestEdgeTarget = createEdges(G, edge, vertexKey, posVertex, inverted=True)

    return closestEdgeSource, closestEdgeTarget

def removeEdgesVertex(G, closestEdgeSource, vertexKey, closestEdgeTarget):
    G.remove_edge(closestEdgeSource, vertexKey)

    #If the edge was a self-loop and the graph is non-directed, the line above already removed the second edge
    if G.has_edge(vertexKey, closestEdgeTarget):
        G.remove_edge(vertexKey, closestEdgeTarget)

        if G.is_directed() and G.has_edge(vertexKey, closestEdgeSource) and G.has_edge(closestEdgeTarget, vertexKey):
            G.remove_edge(vertexKey, closestEdgeSource)
            G.remove_edge(closestEdgeTarget, vertexKey)

    G.remove_node(vertexKey)

def setEdgesDictValues(edgesDict, keyEdge, tripExpansionFactor, parkingExpenses, isDestination):
    edgesDict[keyEdge].utilityValue += tripExpansionFactor

    if parkingExpenses != None and isDestination:
        edgesDict[keyEdge].parkingExpenses += parkingExpenses
        edgesDict[keyEdge].numTripsReached += 1

    return edgesDict

def updateEdgesDict(G, vertexKeySource, keyEdgeSource, cutOff, edgesDict, tripExpansionFactor, parkingExpenses, isDestination):
    #The distance is not needed, the cutOff filtering is enogh because I only need the edges involved in the path
    predecessors, distances = nx.dijkstra_predecessor_and_distance(G, source=vertexKeySource, cutoff=cutOff, weight='length')

    edgesDict = setEdgesDictValues(edgesDict, keyEdgeSource, tripExpansionFactor, parkingExpenses, isDestination)

    for vertexReached, predecessorList in predecessors.items():
        for predVertex in predecessorList:
            #Since it is a MultiGraph, multiple edges may exist between two vertices
            for edge in G[predVertex][vertexReached].values():
                keyEdgeReached = edge['idedge']
                if isinstance(keyEdgeReached, str) and vertexKeySource in keyEdgeReached:
                    continue

                edgesDict = setEdgesDictValues(edgesDict, keyEdgeReached, tripExpansionFactor, parkingExpenses, isDestination)

    return edgesDict, distances

def setTripsAndEdgesValues(kdf, G, D):
    edgesDict = {}
    for row in kdf.itertuples():
        dictRow = row._asdict()
        edgesDict[dictRow['idedge']] = Edge(dictRow['idvertexorig_fk'], dictRow['idvertexdest_fk'], dictRow['length'], 0, 0, 0)

    params.update({'host':'localhost', 'port':'5432', 'database':'afterqualifying'})

    keyEdgeOrigin = 'orig_idedge'
    keyPosOrigin = 'posorigin'
    keyEdgeDest = 'dest_idedge'
    keyPosDest = 'posdest'
    keyIdDriver = 'iddriver'
    keyTripExpansionFactor = 'tripexpansionfactor'
    keyTimeDeparture = 'timedeparture'
    keyTimeArrival = 'timearrival'
    keyParkingExpenses = 'parkingexpenses'
    keyIsMonthlyParking = 'ismonthlyparking'
    kdf = ks.read_sql_query('   select  ORIGIN.IDEDGE_FK as ' + keyEdgeOrigin + ''',
                                        ORIGIN.POSITIONINEDGE as ''' + keyPosOrigin + ''',
                                        DESTINATION.IDEDGE_FK as ''' + keyEdgeDest + ''',
                                        DESTINATION.POSITIONINEDGE as ''' + keyPosDest + ''',
                                        TRIP.IDDRIVER as ''' + keyIdDriver + ''',
                                        TRIP.TRIPEXPANSIONFACTOR as ''' + keyTripExpansionFactor + ''',
                                        TRIP.TIMEDEPARTURE as ''' + keyTimeDeparture + ''',
                                        TRIP.TIMEARRIVAL as ''' + keyTimeArrival + ''',
                                        TRIP.PARKINGEXPENSES as ''' + keyParkingExpenses + ''',
                                        TRIP.ISMONTHLYPARKING as ''' + keyIsMonthlyParking + '''

                                from    PLACE ORIGIN,
                                        PLACE DESTINATION,
                                        TRIP

                                where   ORIGIN.IDPLACE = TRIP.IDPLACEDEPARTURE and
                                        DESTINATION.IDPLACE = TRIP.IDPLACEDESTINATION 
                            ''', 'jdbc:postgresql:afterqualifying', options=params)

    cutOff = 500.000001
    vertexKeyOrigin = 'originTrip'
    vertexKeyDest = 'destTrip'

    try:
        conn = pg.connect(**params)
        cur = conn.cursor()
        
        lengthToUpdate = 10000
        tripsToUpdate = []
        updateSQL = ''' update  TRIP
                        set     DRIVINGDISTANCE = data.drivingDist,
                                WALKINGDISTANCE = data.walkingDist
                        from    (values %s) as data (drivingDist, walkingDist, idPess, timeDepart, timeArriv)
                        where   IDDRIVER = data.idPess and
                                TIMEDEPARTURE = data.timeDepart and
                                TIMEARRIVAL = data.timeArriv  '''
        
        for row in kdf.itertuples():
            dictRow = row._asdict()

            parkingExpenses = dictRow[keyParkingExpenses]
            if dictRow[keyIsMonthlyParking] == False:
                parkingExpenses = None
            
            closestEdgeSourceOrigin, closestEdgeTargetOrigin = includeEdges(G, edgesDict[dictRow[keyEdgeOrigin]], vertexKeyOrigin, dictRow[keyPosOrigin])
            closestEdgeSourceOrigin, closestEdgeTargetOrigin = includeEdges(D, edgesDict[dictRow[keyEdgeOrigin]], vertexKeyOrigin, dictRow[keyPosOrigin])
            edgesDict, distances = updateEdgesDict(G, vertexKeyOrigin, dictRow[keyEdgeOrigin], cutOff, edgesDict, dictRow[keyTripExpansionFactor], parkingExpenses, False)

            closestEdgeSourceDest, closestEdgeTargetDest = includeEdges(G, edgesDict[dictRow[keyEdgeDest]], vertexKeyDest, dictRow[keyPosDest])
            closestEdgeSourceDest, closestEdgeTargetDest = includeEdges(D, edgesDict[dictRow[keyEdgeDest]], vertexKeyDest, dictRow[keyPosDest])

            if vertexKeyDest in distances:
                walkingDistance = distances[vertexKeyDest]
            else:
                walkingDistance = nx.dijkstra_path_length(G, source=vertexKeyOrigin, target=vertexKeyDest, weight='length')
            
            #A path may not exist when considering only the DiGraph from OpenStreetMap
            try:
                drivingDistance = nx.dijkstra_path_length(D, source=vertexKeyOrigin, target=vertexKeyDest, weight='length')
            except(nx.NetworkXNoPath):
                drivingDistance = walkingDistance

            removeEdgesVertex(G, closestEdgeSourceOrigin, vertexKeyOrigin, closestEdgeTargetOrigin)
            removeEdgesVertex(D, closestEdgeSourceOrigin, vertexKeyOrigin, closestEdgeTargetOrigin)

            edgesDict, distances = updateEdgesDict(G, vertexKeyDest, dictRow[keyEdgeDest], cutOff, edgesDict, dictRow[keyTripExpansionFactor], parkingExpenses, True)
            removeEdgesVertex(G, closestEdgeSourceDest, vertexKeyDest, closestEdgeTargetDest)
            removeEdgesVertex(D, closestEdgeSourceDest, vertexKeyDest, closestEdgeTargetDest)

            tripsToUpdate.append((drivingDistance, walkingDistance, dictRow[keyIdDriver], dictRow[keyTimeDeparture], dictRow[keyTimeArrival]))
            if len(tripsToUpdate) > lengthToUpdate:
                pgExtras.execute_values(cur, updateSQL, tripsToUpdate)
                tripsToUpdate = []
                conn, cur = reopenConn(conn, cur, params)
        
        if len(tripsToUpdate) > 0:
            pgExtras.execute_values(cur, updateSQL, tripsToUpdate)
        
        tripsToUpdate = None
        edgesToUpdate = []
        updateSQL = ''' update  STREETSEGMENT
                        set     UTILITYVALUE = data.utilityV,
                                PARKINGEXPENSES = data.parkingE::double precision
                        from    (values %s) as data (utilityV, parkingE, idE)
                        where   IDEDGE = data.idE  '''

        for idEdge, edge in edgesDict.items():
            if edge.numTripsReached > 0:
                parkingExpensesValue = edge.parkingExpenses/edge.numTripsReached
            else:
                parkingExpensesValue = None
            
            edgesToUpdate.append((edge.utilityValue, parkingExpensesValue, idEdge))

            if len(edgesToUpdate) > lengthToUpdate:
                pgExtras.execute_values(cur, updateSQL, edgesToUpdate)
                edgesToUpdate = []
                conn, cur = reopenConn(conn, cur, params)
        
        if len(edgesToUpdate) > 0:
            pgExtras.execute_values(cur, updateSQL, edgesToUpdate)
        edgesToUpdate = None

        conn.commit()
        cur.close()
    except(Exception, pg.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

    return edgesDict

#G, kdf = loadMultiGraph()
#D = loadMultiDigraph(kdf)
#edgesDict = setTripsAndEdgesValues(kdf, G, D)

In [64]:
def searchClosestParkingExpenseData(G, u, v, cutoffDistance, data):
	predecessors, distances = nx.dijkstra_predecessor_and_distance(G, source=u, cutoff=cutoffDistance, weight='length')

	for vertexReached, predecessorList in predecessors.items():
		for predVertex in predecessorList:
			#Since it is a MultiGraph, multiple edges may exist between two vertices
			for edge in G[predVertex][vertexReached].values():
				if edge['utilityvalue'] != 0 and not pd.isna(edge['parkingexpenses']):
					#Try to find a closer edge from the other vertex with monthly parking expenses set. Cutoff is the vertex just reached
					predecessors2, distances2 = nx.dijkstra_predecessor_and_distance(G, source=v, cutoff=distances[vertexReached], weight='length')

					for vertexReached2, predecessorList2 in predecessors2.items():
						for predVertex2 in predecessorList2:
							for edge2 in G[predVertex2][vertexReached2].values():
								if edge2['utilityvalue'] != 0 and not pd.isna(edge2['parkingexpenses']):
									#Store this value to set as parking expense for the edge we are researching about (data['idedge'])
									return (data['idedge'], edge2['parkingexpenses'])

					#Store the previously found value, since it was closer than the other options
					return ((data['idedge'], edge['parkingexpenses']))
	
	#Return None if no nearby edge with parking expense set was found
	return None

def fillMonthlyParkingExpenses(distance=500):
	params.update({'host':'localhost', 'port':'5432', 'database':'afterqualifying'})

	conn = pg.connect(**params)
	cur = conn.cursor()

	countWhile = 0
	#While there is edge with utility value but without monthly parking expenses
	while not ks.read_sql_query('''	select	1
									from	STREETSEGMENT as EDGE
									where	EDGE.UTILITYVALUE <> 0 and
											EDGE.PARKINGEXPENSES is NULL ''',
						'jdbc:postgresql:afterqualifying', options=params).empty:

		G = nx.from_pandas_edgelist(ks.read_sql_query('''	select	EDGE.IDVERTEXORIG_FK,
																	EDGE.IDVERTEXDEST_FK,
																	EDGE.IDEDGE,
																	EDGE.LENGTH,
																	EDGE.UTILITYVALUE,
																	EDGE.PARKINGEXPENSES
															from	STREETSEGMENT as EDGE ''',
						'jdbc:postgresql:afterqualifying', options=params).to_pandas(),
						'idvertexorig_fk', 'idvertexdest_fk', ['idedge', 'length', 'utilityvalue', 'parkingexpenses'], create_using=nx.MultiGraph())
	
		edgesToUpdate = []
		for u, v, data in G.edges(data=True):
			if data['utilityvalue'] != 0 and pd.isna(data['parkingexpenses']):
				tupleEdgeToUpdate = searchClosestParkingExpenseData(G, u, v, distance, data)
				if tupleEdgeToUpdate != None:
					edgesToUpdate.append(tupleEdgeToUpdate)
		
		updateSQL = ''' update  STREETSEGMENT
                        set     PARKINGEXPENSES = data.parkingE
                        from    (values %s) as data (idE, parkingE)
                        where   IDEDGE = data.idE  '''
						
		pgExtras.execute_values(cur, updateSQL, edgesToUpdate)
		conn, cur = reopenConn(conn, cur, params)

		countWhile += 1
		print("NUMBER OF UPDATES RUN: ", countWhile, len(edgesToUpdate))

	cur.close()
	conn.close()

fillMonthlyParkingExpenses(30000)

NUMBER OF UPDATES RUN:  1 607
NUMBER OF UPDATES RUN:  2 1
