In [1]:
from IPython.display import HTML
HTML("<style>.container { width:100% !important; }</style>")

In [2]:
import numpy as np
from desdeo_emo.EAs import RVEA, NSGAIII
from desdeo_problem.testproblems.TestProblems import test_problem_builder
from desdeo_problem import DataProblem
from desdeo_tools.utilities import fast_non_dominated_sort, hypervolume_indicator

import matplotlib.pyplot as plt
import sklearn
from pyDOE import lhs
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ExpSineSquared, WhiteKernel, RationalQuadratic, DotProduct, ConstantKernel, Matern

## Assignment 4

## Task 1 

Implement probability of selection for single objective optimization. * (refer eqn.
12, 13, 14)

In [4]:
np.set_printoptions(precision=4,suppress=True) # easier to check values rn when we dont care too much about the exactness

In [5]:
# in minimization think probability of being wrong, in max probability of acceptance
# probability of sample A dominating sample B in max is P(A > B) is
def prob_selection(a, b, sigma_a, sigma_b):
    s = sigma_a / sigma_b
    m = (a - b) / sigma_b
    
    top = -2.5*m / np.sqrt(2 + 2*s**2)
    bot = 1 + np.exp(top)
    return 1 / bot

# b clearly dominates
a = np.array([1.,1.,3.])
b = np.array([2.,5.,1.])
sigma_a = 0.01
sigma_b = 0.01

prob_selection(a, b, sigma_a, sigma_b)

array([0., 0., 1.])

In [6]:
def prob_sel2(a, b, sigma_a, sigma_b):
    s = sigma_a / sigma_b
    m = (a - b) / sigma_b

    bot = 0.8*np.sqrt(2 + 2*s**2)
    return 0.5*(1 + np.tanh(m/bot))
    
    
    
# a1 clearly dominates b1, b1 clearly dominates a2
a = np.array([1.,1.,3.])
b = np.array([2.,5.,1.])
sigma_a = 1.
sigma_b = 1.

prob_sel2(a, b, sigma_a, sigma_b)

array([0.2227, 0.0067, 0.9241])

So we can see that probability of being wrong with first items (1 and 2) by choosing a is 0.22. Again for second items (1,5) thats 0.007.  And also with 3rd items (3,1) its 0.92. So we can clearly see its working properly.

## Task 2

Implement probability of dominance and ranking. * (refer eqn. 15, 17, 18, 22)

- Generate 50 samples from DTLZ2 problem (3 objectives, 5 decision variables) using LHS sampling
- Build Kriging surrogates using the generated samples
- Resample or generate 20 data points randomly
- Rank the 20 points using probability of dominance ranking

In [9]:
dtlz2_3 = test_problem_builder("DTLZ2", n_of_variables=5, n_of_objectives=3)

In [10]:
def create_samples(dec_dim, samples):
    # create samples 
    x = lhs(dec_dim, samples)     
    return np.asarray(x)

In [11]:
x = create_samples(5, 50)
print(x.shape)
eval_results = dtlz2_3.evaluate(x)
y = eval_results.objectives
print(y[:5])

(50, 5)
[[0.622  0.7431 1.0673]
 [0.1619 1.225  0.5548]
 [0.5454 0.8174 0.8826]
 [0.0292 0.0374 1.1344]
 [1.0229 0.7998 0.1999]]


In [12]:
print(y[:,0])

[0.622  0.1619 0.5454 0.0292 1.0229 1.0629 0.1616 0.3233 0.2553 0.2015
 0.164  0.9144 0.4622 0.1    0.3535 0.8779 0.0119 0.7184 0.1718 0.5466
 1.205  0.5222 0.0011 0.0037 0.3003 0.5981 0.4178 0.173  0.0484 0.792
 0.1383 0.9036 0.4067 0.7915 1.0321 0.7092 1.2004 0.0617 0.3289 0.0463
 0.1488 1.2565 1.0837 0.4299 0.7827 0.9642 0.2509 0.9557 0.6971 0.6054]


In [13]:
## build surrogates

kernel = 1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 20.0), nu=1.5)
    
y1 = y[:,0]
gpr1 = GaussianProcessRegressor(kernel,n_restarts_optimizer=1,random_state=7).fit(x, y1)

y2 = y[:,1]
gpr2 = GaussianProcessRegressor(kernel,n_restarts_optimizer=1,random_state=7).fit(x, y2)

y3 = y[:,2]
gpr3 = GaussianProcessRegressor(kernel,n_restarts_optimizer=1,random_state=7).fit(x, y3)

In [14]:
## resampling

x_samples = create_samples(5, 20)

# predict
y1_sample, y1_std = gpr1.predict(x_samples, return_std=True) 
print(np.mean(y1_std))

# predict
y2_sample, y2_std = gpr2.predict(x_samples, return_std=True) 
print(np.mean(y2_std))
# predict
y3_sample, y3_std = gpr3.predict(x_samples, return_std=True) 
print(np.mean(y3_std))

y_samples = np.column_stack((y1_sample, y2_sample, y3_sample))
#y_samples.reshape(1,-1)
y_samples.shape

0.06816775838730102
0.06568298843432663
0.05839646889857207


(20, 3)

In [15]:
#print(y_samples)

In [16]:
def sum_of_rank_pos(n):
    return n*(n - 1)/2

sum_of_rank_pos(y_samples.shape[0])

190.0

In [17]:
# Papers example, just to test the code
A = [0.2, 0.9]  # tässä on a1 a2. kun lasketaan prod P (Ai < bi) i to n, tulee P(A < B)
B = [0.3, 0.5]
C = [0.5, 0.3]
D = [0.8, 0.2]
E = [0.85, 0.5]
F = [0.9, 0.95]

sols = np.array([A, B, C, D, E, F])
print(sols.shape)

(6, 2)


## DONE:

Single objective ranking works with uncertainty 0 and uncertainty 1. So prob_sel2 works
MO ranking with uncertainty 0 works

mo ranking with uncertainty works

## TODO:

rest of the assigment using some data

ask/find out about the bug with m.

In [44]:

def prob_sel(a, b, sigma_a, sigma_b):
    s = sigma_a / sigma_b
    m = (a - b) / sigma_b

    bot = 0.8*np.sqrt(2 + 2*s**2)
    res = 0.5*(1 + np.tanh(m/bot))
    res = np.prod(res)
    return res


def sof_prob_dom_multi(fit, j, sigma_a, sigma_b):
    summa = 0
    # iterate the sols
    for i in range(fit.shape[0]):
        for ii in range(fit.shape[1]):
            
            fits = fit[i][ii]
            if j[ii] == fits:
                s = prob_sel2(j[ii], fits, sigma_a, sigma_b)
                summa += 0.5*s
            elif j[ii] > fits:
                s = prob_sel2(j[ii], fits, sigma_a, sigma_b)
                summa += 0.5*s
            elif j[ii] < fits:
                s = prob_sel2(j[ii], fits, sigma_a, sigma_b)
                summa += 0.5*s
            else:
                print("should not get here")
        
    summa -= 0.5    
    return summa

###  Testing with paper example

In [45]:
s2 = []
sigma_a, sigma_b = 0.001, 0.001 # no uncertainty

# seems to work properly

for i in range(len(sols)):
    s2.append(sof_prob_dom_multi(sols, sols[i], sigma_a, sigma_b))
s2

[2.0, 1.75, 1.5, 1.5, 3.25, 5.0]

Same results as in the paper with 0 uncertainty.

In [46]:
np.sum(s2)

15.0

In [47]:
s3 = []
sigma_a, sigma_b = 1., .1 # uncertainty of 1. 

for i in range(len(sols)):
    s3.append(sof_prob_dom_multi(sols, sols[i], sigma_a, sigma_b))
s3

[2.4424476117753353,
 2.0712607555795635,
 2.069245942354567,
 2.323391690061394,
 2.7454916020811777,
 3.348162398147964]

In [48]:
np.sum(s3)

15.0

With sigma = 1, results are quite not like in the paper.. but

In [49]:
s4 = []
sigma_a, sigma_b = 0.1, 0.1 # uncertainty of 0.1. 

for i in range(len(sols)):
    s4.append(sof_prob_dom_multi(sols, sols[i], sigma_a, sigma_b))
s4

[2.2906164496671124,
 1.634060344553688,
 1.436565639361921,
 1.9073932584275828,
 3.1988455322854907,
 4.532518775704205]

with sigma 0.1, it gives pretty much the same results as in the paper.

Assuming it works fine enough to continue, although not sure what is the problem.

## Ranking y_samples

In [96]:
s = []
sigma_a, sigma_b = np.mean(y1_std), np.mean(y2_std) # what about y3_std?


for i in range(len(y_samples)):
    s.append(sof_prob_dom_multi(y_samples, y_samples[i], sigma_a, sigma_b) - 5) # need to subtract -5 to hit 190 of sum rank pos, why?
s

[9.969902948052436,
 8.824946374506299,
 10.793185979834501,
 7.793596512366614,
 11.144231260213015,
 9.69023281210676,
 5.867692513312983,
 6.3545436690189,
 12.034783384098912,
 10.03338939676105,
 11.02243523077399,
 9.7834697053731,
 4.953107938432389,
 7.898109177434787,
 10.333824946205178,
 11.485039980555257,
 6.925057748638848,
 11.563144307016799,
 12.965306238121173,
 10.563999877177014]

In [97]:
np.sum(s)

190.0

In [98]:
sum_of_rank_pos(y_samples.shape[0])

190.0

### Everything kinda works?

Ranking works, it does what is supposed to do if adding the extra subtract of 5, for the y_samples. That lets the sum of rank position to stay correct. Not sure if it is necessary to make these match or if there is a bug in the code and extra subtraction is not needed..