In [1]:
# Import libraries
import numpy as np
import geopandas as gpd
import momepy
import networkx as nx
# import pandas as pd
# import shapely
# import shapely.geometry as sg
# import matplotlib
# import matplotlib.pyplot as plt
# %matplotlib inline

from lmzintgraf_gp_pref_elicit import dataset, gaussian_process, acquisition_function
from lmzintgraf_gp_pref_elicit.gp_utilities import utils_ccs as utils_ccs
from lmzintgraf_gp_pref_elicit.gp_utilities import utils_data as utils_data
from lmzintgraf_gp_pref_elicit.gp_utilities import utils_experiment as utils_experiment
from lmzintgraf_gp_pref_elicit.gp_utilities import utils_parameters as utils_parameters
from lmzintgraf_gp_pref_elicit.gp_utilities import utils_user as utils_user

In [2]:
map = gpd.read_file("Sidewalk_width_crossings_small.geojson") #Read in the map with radius 250m and ~1000 nodes

# Objectives
objective1 = map['length']
objective2 = map['crossing']
objective3 = map['obstacle_free_width']

objectives = ('length', 'crossing')

In [3]:
# Create a NetworkX graph from the map
G = momepy.gdf_to_nx(map, approach='primal')
nodes = G.nodes
edges = G.edges


  gdf_network[length] = gdf_network.geometry.length


In [4]:
# print(nodes)

In [5]:
#Pick random ones or pick manually that make sense - to experiment
S = (120548.6120283842, 486088.19577846595)
T = (120798.0893320718, 486128.7633437495)

In [6]:
# Initialise the Gaussian process for 2 objectives
gp = gaussian_process.GPPairwise(num_objectives=2, std_noise=0.01, kernel_width=0.15,prior_mean_type='zero', seed=None)

In [7]:
P = [] #Pareto set
p = [] #paths computed by Dijkstra's algorithm
val_vector_p = [] #value vectors w.r.t. p, i.e., v^{p_1}, v^{p_2}

# Path initialisation
for i in objectives:
    p = nx.shortest_path(G, source=S, target=T, weight=i, method='dijkstra') #Dijkstra's algorithm
    P.append(p)

    val_obj1 = nx.path_weight(G, path=p, weight='length') #Returns total cost associated with the path and weight. In other words, it returns the value of the path.
    val_obj2 = nx.path_weight(G, path=p, weight='crossing')
    val_vector_p.append(np.array([val_obj1, val_obj2]))

In [8]:
val_vector_p

[array([671.64,   3.  ]), array([674.81,   3.  ])]

In [9]:
C = [min(val_vector_p[0][0], val_vector_p[1][0]), min(val_vector_p[0][1], val_vector_p[1][1])] #Candidate Targets, i.e., the most optimistic points
C

[671.6400000000003, 3.0]

In [10]:
# User ranking: Compare paths in P
user_preference = utils_user.UserPreference(num_objectives=2, std_noise=0.1)

In [20]:
add_noise = True
ground_utility = user_preference.get_preference(val_vector_p, add_noise=add_noise) #This is the ground-truth utility
print(ground_utility)

[1.2013037  0.83255437]


In [22]:
# Add the comparisons to GP
comparisons = dataset.DatasetPairwise(num_objectives=2)
preference_copy = np.copy(ground_utility)
comparisons.add_single_comparison(val_vector_p[np.argmax(ground_utility)], val_vector_p[np.argmin(ground_utility)]) #This way we are performing user ranking of their preferences
print(comparisons.datapoints)
gp.update(comparisons)

[[671.64   3.  ]
 [674.81   3.  ]]


In [13]:
# TODO: Find the path the user likes best and has the maximum a posteriori (MAP) estimate
# MAP is the mean from gaussian_process.get_predictive_params(val_vector_p)
#input should be a value vector (utility of a path goes via its value?)

In [14]:
p_star_index = np.argmax(gp_sample)
p_star_index

NameError: name 'gp_sample' is not defined

In [None]:
p_star = p[p_star_index]
p_star

In [None]:
#TODO: No idea if this is correct. I'm not sure what ccs_size should be. So I put 10, as well as for min_size, in order to run it quicker. Otherwise, it takes too much time and gets stuck.
input_domain = utils_ccs.get_pcs_grid(ccs_size=10, num_objectives=2, eucledian_dist=0.05, min_size=10, seed=123)
#Set eucledian_dist smaller to run the code faster
#Setting ccs_size and min_size to the same value is probably also a good idea.
input_domain

In [None]:
# Initialise the acquisition function
acq_fun = acquisition_function.DiscreteAcquirer(input_domain=input_domain, query_type='ranking', seed=123, acquisition_type='expected improvement')

In [None]:
#TODO: The next code cells are in a while-loop
# while C:

In [None]:
expected_improvement = acquisition_function.get_expected_improvement(input_domain, gp, acq_fun.history)

In [None]:
t_index = np.argmax(expected_improvement)
t_index

In [None]:
t = input_domain[t_index]
t

In [None]:
C

In [None]:
# Remove t from C
C = np.delete(C, np.where(np.all(C == t)))
C

In [None]:
# Inner-loop
# path = A(t, G, S, T)

In [None]:
#TODO: Line 15 of pseudocode is unclear to me how it should be in code. Also, which is correct:
# If v^p improves in the target region
# because you've identified a new value vector on the PCS. If you stop once the utility no longer improves, I think this can result in stopping prematurely. Specifically, imagine you have a current partial Pareto front of (10,0) and (0,10) the user model u((10,0)) is the current best. The target vector is (10,10)  and when you run DFS, you get one of the possible vectors in the target region. You get (1,9) out of the the call to DFS, and the u((1,9)) < u((10,0)) even after querying the user about it. Now here, you shouldn't stop, because the true best - (7,3) for example, is still possible to find.
#you're just not going to improve on that with a newly found vector
#So improving on the acquisition function by identifying a new point is impossible as
# 1) you were searching at an optimistic estimate (target), so the actually found value will be worse than the target
# 2) finding new points, and querying the user reduces uncertainty

In [None]:
P = P.append(p)

In [None]:
#TODO: More user ranking...
#Compare p to p∗ and add comparison to the GP ▷User ranking, i.e., is the new path preferred to the current, maximum one?


In [None]:
# if u(p) > u(p∗) then
# p∗ ← p
# end if


In [None]:
# Compute new candidate targets based on v^p and add to C #p from inner-loop I assume


In [None]:
# end if
# end while
# return p∗, vp
