# Loading the libraries

In [1]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--driver-class-path /usr/share/java/postgresql-42.2.23.jar --jars /usr/share/java/postgresql-42.2.23.jar pyspark-shell'
import databricks.koalas as ks
import networkx as nx
import psycopg2 as pg
import psycopg2.extras as pgExtras

# Loading the data

In [2]:
params = {'user': 'cristiano', 'password': 'cristiano'}
kdf = ks.read_sql_query('''	select	EDGE.IDVERTEXORIG_FK,
									EDGE.IDVERTEXDEST_FK,
									EDGE.LENGTH
							from	STREETSEGMENT as EDGE ''',
        		'jdbc:postgresql:afterqualifying', options=params)

G = nx.from_pandas_edgelist(kdf.to_pandas(), 'idvertexorig_fk', 'idvertexdest_fk', ['length']).to_undirected()
kdf = None

# Deleting the smaller components

In [3]:
def deleteSmallerComponents():
	components = sorted(nx.connected_components(G), key=len, reverse=True)
	compToDelete = []
	for comp in components[1:]:
		compToDelete.extend(list(comp))
	compToDelete = [(x,) for x in compToDelete]
	compToDeleteDouble = [(x, y) for x, y in zip(compToDelete, compToDelete)]

	params.update({'host':'localhost', 'port':'5432', 'database':'afterqualifying'})
	conn = None
	try:
		conn = pg.connect(**params)
		cur = conn.cursor()
		cur.executemany('delete from STREETSEGMENT where IDVERTEXORIG_FK = %s or IDVERTEXDEST_FK = %s', compToDeleteDouble)
		cur.executemany('delete from STREETINTERSECTION where IDVERTEX = %s', compToDelete)
		conn.commit()
		cur.close()
	except(Exception, pg.DatabaseError) as error:
		print(error)
	finally:
		if conn is not None:
			conn.close()

#deleteSmallerComponents()

# Calculating and storing the shortest distances

In [4]:
def storeShortestDistances(G):
	cutOffThreshold = 1000.000001
	distances = nx.all_pairs_dijkstra_path_length(G, cutoff=cutOffThreshold, weight='length')
	G = None
	distances = dict(distances)
	
	params.update({'host':'localhost', 'port':'5432', 'database':'afterqualifying'})
	conn = None
	try:
		conn = pg.connect(**params)
		cur = conn.cursor()

		lengthToInsert = 10000
		distancesToInsert = []
		for source in distances:
			for destination in distances[source]:
				distancesToInsert.append((source, destination, distances[source][destination]))

			if len(distancesToInsert) > lengthToInsert:
				pgExtras.execute_values(cur, 'insert into VERTICESPAIRSNEARBY (IDVERTEX1_FK, IDVERTEX2_FK, WALKINGDISTANCE) values %s', distancesToInsert)
				distancesToInsert = []

		conn.commit()
		cur.close()
	except(Exception, pg.DatabaseError) as error:
		print(error)
	finally:
		if conn is not None:
			conn.close()

	distances = None

#storeShortestDistances(G)

In [11]:
def distancesFromCenter(G):
	centerGraph = nx.center(G, usebounds=True)[0]
	dictDistancesCenter = nx.single_source_dijkstra_path_length(G, source=centerGraph, weight='length')
	distancesCenter = sorted(dictDistancesCenter.items(), key=lambda item: item[1])
	
	with open('centerDistances.txt', 'w') as f:
		f.write('%s' % centerGraph)
		for item in distancesCenter:
			f.write('\n%s %s' % item[0], item[1])

	return distancesCenter, centerGraph

distancesCenter, centerGraph = distancesFromCenter(G)

KeyboardInterrupt: 