# Fitness: (Average of overheads, moving average of routing costs)

## Load data


In [None]:
%matplotlib inline 
%reload_ext autoreload
from notebooks_commons import get_raw_data

# The first time we want to download the data from an index (might take a while!), 
# we should put the parameter load_from_db to True. Then, we can set it to False to 
# read the saved data from the local pickle file.
def read_data(index, load_from_db = False):
    print "\nReading from index " + index
    pair = get_raw_data(index, load_from_db)
    rtx_runs = pair[0]
    data     = pair[1] 
    return rtx_runs, data


# Here we specify the name of the index to read data from

index_random_500 = "erik-gcp-random-rtx-new" 
index_novelty_500 = "erik-gcp-novelty-rtx-new-fixedeval"
index_nsga2_500 = "erik-gcp-nsga2-rtx-new"
index_mlr_500 = "erik-gcp-mlr-rtx-new" 

index_random_700 = "ilias-random-700cars" 
index_novelty_700 = "erik-gcp-novelty-rtx-new-fixedeval-700"
index_nsga2_700 = "erik-gcp-nsga2-700cars"
index_mlr_700 = "ilias-mlr-700cars" 

index_random_800 = "ilias-random-800cars" 
index_novelty_800 = "erik-gcp-novelty-rtx-new-fixedeval-800"
index_nsga2_800 = "erik-gcp-nsga2-800cars"
index_mlr_800 = "ilias-mlr-800cars" 

### Get Novelty data
novelty_rtx_runs_500, novelty_data_500 = read_data(index_novelty_500)
novelty_rtx_runs_700, novelty_data_700 = read_data(index_novelty_700)
novelty_rtx_runs_800, novelty_data_800 = read_data(index_novelty_800)

### Get NSGA2 data
nsga2_rtx_runs_500, nsga2_data_500 = read_data(index_nsga2_500)
nsga2_rtx_runs_700, nsga2_data_700 = read_data(index_nsga2_700)
nsga2_rtx_runs_800, nsga2_data_800 = read_data(index_nsga2_800)

### Get MLR data
mlr_rtx_runs_500, mlr_data_500 = read_data(index_mlr_500)
mlr_rtx_runs_700, mlr_data_700 = read_data(index_mlr_700)
mlr_rtx_runs_800, mlr_data_800 = read_data(index_mlr_800)

### Get RandomSearch data
random_rtx_runs_500, random_data_500 = read_data(index_random_500)
random_rtx_runs_700, random_data_700 = read_data(index_random_700)
random_rtx_runs_800, random_data_800 = read_data(index_random_800)


## Check what's in there

In [None]:
import pprint
from IPython.display import Markdown, display

def printmd(string, color=None):
    colorstr = "<span style='color:{}'>{}</span>".format(color, string)
    display(Markdown(colorstr))
    
pp = pprint.PrettyPrinter(indent=4)

def check_data(rtx_runs, data):
    # sort according to seed 
    rtx_runs.sort(key=lambda d : d["seed"])
    if len(rtx_runs) > 0:
        try:
            opt_method = rtx_runs[0]["strategy"]["optimizer_method"]
        except:
            # mlr does not store the opt method name in field strategy.optimizer_method
            opt_method = "MLR"
    print "There were " + str(len(rtx_runs)) + " runs performed by " + opt_method

    for rtx_run in rtx_runs:
        data_for_run = [d for d in data if d["parent"] == rtx_run["id"]]
        data_for_run.sort(key=lambda d : (d["_source"]["iteration"], d["_source"]["individual"]))
        printmd(str(len(data_for_run)) + "\t\t| seed " + str(rtx_run["seed"]) 
                + " | id " + str(rtx_run["id"]), "red")

        #for d in data_for_run:
        #    s = d["_source"]
        #    overheads = s["payload"]["overheads"]
        #    routings = s["payload"]["routings"]
        #    printmd("Iteration " + str(s["iteration"]) + ", individual " 
        #            + str(s["individual"]) + " with configuration", "blue")        
        #    pp.pprint(s["knobs"])
        #    printmd("has " + str(len(overheads)) + " overheads and " 
        #            + str(len(routings)) + " routings", "green")


# random search
check_data(random_rtx_runs_500, random_data_500)
check_data(random_rtx_runs_700, random_data_700)
check_data(random_rtx_runs_800, random_data_800)
    
# mlr
check_data(mlr_rtx_runs_500, mlr_data_500)
check_data(mlr_rtx_runs_700, mlr_data_700) 
check_data(mlr_rtx_runs_800, mlr_data_800)
    
# novelty
check_data(novelty_rtx_runs_500, novelty_data_500)
check_data(novelty_rtx_runs_700, novelty_data_700)
check_data(novelty_rtx_runs_800, novelty_data_800)

# nsga2
check_data(nsga2_rtx_runs_500, nsga2_data_500)
check_data(nsga2_rtx_runs_700, nsga2_data_700)
check_data(nsga2_rtx_runs_800, nsga2_data_800)

## Code for computing the hypervolume

In [None]:
#    https://ls11-www.cs.tu-dortmund.de/rudolph/hypervolume/start

#    Copyright (C) 2010 Simon Wessing
#    TU Dortmund University
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


__author__ = "Simon Wessing"


class HyperVolume:
    """
    Hypervolume computation based on variant 3 of the algorithm in the paper:
    C. M. Fonseca, L. Paquete, and M. Lopez-Ibanez. An improved dimension-sweep
    algorithm for the hypervolume indicator. In IEEE Congress on Evolutionary
    Computation, pages 1157-1163, Vancouver, Canada, July 2006.

    Minimization is implicitly assumed here!

    """

    def __init__(self, referencePoint):
        """Constructor."""
        self.referencePoint = referencePoint
        self.list = []


    def compute(self, front):
        """Returns the hypervolume that is dominated by a non-dominated front.

        Before the HV computation, front and reference point are translated, so
        that the reference point is [0, ..., 0].

        """

        def weaklyDominates(point, other):
            for i in xrange(len(point)):
                if point[i] > other[i]:
                    return False
            return True

        relevantPoints = []
        referencePoint = self.referencePoint
        dimensions = len(referencePoint)
        for point in front:
            # only consider points that dominate the reference point
            if weaklyDominates(point, referencePoint):
                relevantPoints.append(point)
        if any(referencePoint):
            # shift points so that referencePoint == [0, ..., 0]
            # this way the reference point doesn't have to be explicitly used
            # in the HV computation
            for j in xrange(len(relevantPoints)):
                relevantPoints[j] = [relevantPoints[j][i] - referencePoint[i] for i in xrange(dimensions)]
        self.preProcess(relevantPoints)
        bounds = [-1.0e308] * dimensions
        hyperVolume = self.hvRecursive(dimensions - 1, len(relevantPoints), bounds)
        return hyperVolume


    def hvRecursive(self, dimIndex, length, bounds):
        """Recursive call to hypervolume calculation.

        In contrast to the paper, the code assumes that the reference point
        is [0, ..., 0]. This allows the avoidance of a few operations.

        """
        hvol = 0.0
        sentinel = self.list.sentinel
        if length == 0:
            return hvol
        elif dimIndex == 0:
            # special case: only one dimension
            # why using hypervolume at all?
            return -sentinel.next[0].cargo[0]
        elif dimIndex == 1:
            # special case: two dimensions, end recursion
            q = sentinel.next[1]
            h = q.cargo[0]
            p = q.next[1]
            while p is not sentinel:
                pCargo = p.cargo
                hvol += h * (q.cargo[1] - pCargo[1])
                if pCargo[0] < h:
                    h = pCargo[0]
                q = p
                p = q.next[1]
            hvol += h * q.cargo[1]
            return hvol
        else:
            remove = self.list.remove
            reinsert = self.list.reinsert
            hvRecursive = self.hvRecursive
            p = sentinel
            q = p.prev[dimIndex]
            while q.cargo != None:
                if q.ignore < dimIndex:
                    q.ignore = 0
                q = q.prev[dimIndex]
            q = p.prev[dimIndex]
            while length > 1 and (q.cargo[dimIndex] > bounds[dimIndex] or q.prev[dimIndex].cargo[dimIndex] >= bounds[dimIndex]):
                p = q
                remove(p, dimIndex, bounds)
                q = p.prev[dimIndex]
                length -= 1
            qArea = q.area
            qCargo = q.cargo
            qPrevDimIndex = q.prev[dimIndex]
            if length > 1:
                hvol = qPrevDimIndex.volume[dimIndex] + qPrevDimIndex.area[dimIndex] * (qCargo[dimIndex] - qPrevDimIndex.cargo[dimIndex])
            else:
                qArea[0] = 1
                qArea[1:dimIndex+1] = [qArea[i] * -qCargo[i] for i in xrange(dimIndex)]
            q.volume[dimIndex] = hvol
            if q.ignore >= dimIndex:
                qArea[dimIndex] = qPrevDimIndex.area[dimIndex]
            else:
                qArea[dimIndex] = hvRecursive(dimIndex - 1, length, bounds)
                if qArea[dimIndex] <= qPrevDimIndex.area[dimIndex]:
                    q.ignore = dimIndex
            while p is not sentinel:
                pCargoDimIndex = p.cargo[dimIndex]
                hvol += q.area[dimIndex] * (pCargoDimIndex - q.cargo[dimIndex])
                bounds[dimIndex] = pCargoDimIndex
                reinsert(p, dimIndex, bounds)
                length += 1
                q = p
                p = p.next[dimIndex]
                q.volume[dimIndex] = hvol
                if q.ignore >= dimIndex:
                    q.area[dimIndex] = q.prev[dimIndex].area[dimIndex]
                else:
                    q.area[dimIndex] = hvRecursive(dimIndex - 1, length, bounds)
                    if q.area[dimIndex] <= q.prev[dimIndex].area[dimIndex]:
                        q.ignore = dimIndex
            hvol -= q.area[dimIndex] * q.cargo[dimIndex]
            return hvol


    def preProcess(self, front):
        """Sets up the list data structure needed for calculation."""
        dimensions = len(self.referencePoint)
        nodeList = MultiList(dimensions)
        nodes = [MultiList.Node(dimensions, point) for point in front]
        for i in xrange(dimensions):
            self.sortByDimension(nodes, i)
            nodeList.extend(nodes, i)
        self.list = nodeList


    def sortByDimension(self, nodes, i):
        """Sorts the list of nodes by the i-th value of the contained points."""
        # build a list of tuples of (point[i], node)
        decorated = [(node.cargo[i], node) for node in nodes]
        # sort by this value
        decorated.sort()
        # write back to original list
        nodes[:] = [node for (_, node) in decorated]
            
            
            
class MultiList: 
    """A special data structure needed by FonsecaHyperVolume. 
    
    It consists of several doubly linked lists that share common nodes. So, 
    every node has multiple predecessors and successors, one in every list.

    """

    class Node: 
        
        def __init__(self, numberLists, cargo=None): 
            self.cargo = cargo 
            self.next  = [None] * numberLists
            self.prev = [None] * numberLists
            self.ignore = 0
            self.area = [0.0] * numberLists
            self.volume = [0.0] * numberLists
    
        def __str__(self): 
            return str(self.cargo)
        
        
    def __init__(self, numberLists):  
        """Constructor. 
        
        Builds 'numberLists' doubly linked lists.

        """
        self.numberLists = numberLists
        self.sentinel = MultiList.Node(numberLists)
        self.sentinel.next = [self.sentinel] * numberLists
        self.sentinel.prev = [self.sentinel] * numberLists  
        
        
    def __str__(self):
        strings = []
        for i in xrange(self.numberLists):
            currentList = []
            node = self.sentinel.next[i]
            while node != self.sentinel:
                currentList.append(str(node))
                node = node.next[i]
            strings.append(str(currentList))
        stringRepr = ""
        for string in strings:
            stringRepr += string + "\n"
        return stringRepr
    
    
    def __len__(self):
        """Returns the number of lists that are included in this MultiList."""
        return self.numberLists
    
    
    def getLength(self, i):
        """Returns the length of the i-th list."""
        length = 0
        sentinel = self.sentinel
        node = sentinel.next[i]
        while node != sentinel:
            length += 1
            node = node.next[i]
        return length
            
            
    def append(self, node, index):
        """Appends a node to the end of the list at the given index."""
        lastButOne = self.sentinel.prev[index]
        node.next[index] = self.sentinel
        node.prev[index] = lastButOne
        # set the last element as the new one
        self.sentinel.prev[index] = node
        lastButOne.next[index] = node
        
        
    def extend(self, nodes, index):
        """Extends the list at the given index with the nodes."""
        sentinel = self.sentinel
        for node in nodes:
            lastButOne = sentinel.prev[index]
            node.next[index] = sentinel
            node.prev[index] = lastButOne
            # set the last element as the new one
            sentinel.prev[index] = node
            lastButOne.next[index] = node
        
        
    def remove(self, node, index, bounds): 
        """Removes and returns 'node' from all lists in [0, 'index'[."""
        for i in xrange(index): 
            predecessor = node.prev[i]
            successor = node.next[i]
            predecessor.next[i] = successor
            successor.prev[i] = predecessor  
            if bounds[i] > node.cargo[i]:
                bounds[i] = node.cargo[i]
        return node
    
    
    def reinsert(self, node, index, bounds):
        """
        Inserts 'node' at the position it had in all lists in [0, 'index'[
        before it was removed. This method assumes that the next and previous 
        nodes of the node that is reinserted are in the list.

        """
        for i in xrange(index): 
            node.prev[i].next[i] = node
            node.next[i].prev[i] = node
            if bounds[i] > node.cargo[i]:
                bounds[i] = node.cargo[i]         
     
    

print("Load HyperVolume module.")

## Compute fitness and store it

In [None]:
import sys
import numpy as np
import pylab 
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rc("savefig", dpi=300)
# plt.rcParams["figure.figsize"] = [8,6]


## params 
iterations_default = 10
iterations_mlr = iterations_default * 10
sample_size = 5000
total_number_of_docs = 100
debug = True

## maintain iterations to consider
iterations = iterations_default
def set_iterations(opt_method):
    global iterations
    if opt_method == "MLR":
        iterations = iterations_mlr
    else:
        iterations = iterations_default 
        
        
'''
Prints an individuals
'''
def print_individual(ind):
    print("Iteration: " + str(ind["iteration"]) 
                  + " | Individual: " + str(ind["individual"]) 
                  + " | Avg overhead: " + str(ind["avg_overhead"]) 
                  + " | Avg performance: " + str(ind["avg_routing"]))
    print("\tConfiguration:")
    for knob, knob_value in ind["knobs"].iteritems():
        print("\t\t" + str(knob) + ": " + str(knob_value))
         

'''
Calculate the rolling/moving average
'''
def calculate_moving_average(aList):
    moving_avg = 0
    tmp_count = 0
    for r in range(len(aList)):
        moving_avg = (moving_avg * tmp_count + aList[r]) / (tmp_count + 1)
        tmp_count = tmp_count + 1
    return moving_avg


'''
Computes the fitness of all individuals of the given runs and 
stores the fitness of an individual to the individual.
Returns a dictionary where the key is the id of the run and the
value are all individuals of this run.
'''
def compute_fitness(rtx_runs, data):
    global worst_avg_overhead
    global worst_avg_routing
    global best_avg_overhead
    global best_avg_routing
        
    run_to_all_individuals = dict()
    # counter how often evaluations have been reused over all rtx runs
    number_of_reuse_per_run = []
    
    # for each run
    for rtx_run in rtx_runs:
        rtx_run_id = rtx_run["id"]
        # get all documents = all evaluations
        all_documents = [d["_source"] for d in data if d["parent"] == rtx_run_id]
        try:
            opt_method = rtx_run["strategy"]["optimizer_method"]
        except:
            # mlr does not store the opt method name in field strategy.optimizer_method
            opt_method = "MLR"
        # set iterations
        set_iterations(opt_method)
        
        if debug:
            print("================================================================")
        print("Run: " + rtx_run_id + " | " + opt_method 
              + " | seed: " + str(rtx_run["seed"]) 
              + " | number of docs/evals: " + str(len(all_documents))
              + " | iterations to process: " + str(iterations))
        if debug:
            print("================================================================")

        # counter how often evaluations have been reused for this rtx run
        number_of_reuse = 0
        # all inidividuals across all iterations
        all_individuals = []
        # for each iteration
        for i in range(0, iterations):
            # get documents of the given iteration for the given run
            documents_of_iteration = [d for d in all_documents if d["iteration"] == i]
            pop_size = len(documents_of_iteration)
            if debug:
                print("Iteration: " + str(i) + " | Population size: " + str(pop_size))

            # for each individual of the iteration
            for j in range(len(documents_of_iteration)):
                new_inds = [d for d in documents_of_iteration if d["individual"] == j]
                if len(new_inds) > 1:
                    print("ERROR: More than one individual with the same number within the same iteration")
                individual = new_inds[0]
                # get overheads of individual
                overheads = individual["payload"]["overheads"]
                # get routings of individual
                routings = individual["payload"]["routings"]
                if overheads == [-1]:
                    # print("Reuse")
                    avg_overhead = individual["payload"]["avg_overhead"] 
                    moving_avg_routing = individual["payload"]["avg_routing"] # was a BUG: used avg_routing =
                    number_of_reuse = number_of_reuse + 1
                else:
                    if len(overheads) <> sample_size:
                        print("ERROR: Overhead entries " + str(len(overheads)) 
                              + "<> sample size " + str(sample_size))
                    avg_overhead = np.mean(overheads) 
                    # avg_routing = np.mean(routings)
                    # calculate moving average of routings
                    moving_avg_routing = calculate_moving_average(routings) 

                # store fitness values in the individual
                individual["avg_overhead"] = avg_overhead
                individual["avg_routing"] = moving_avg_routing

                # check for worst case
                if avg_overhead > worst_avg_overhead:
                    worst_avg_overhead = avg_overhead
                if avg_overhead < best_avg_overhead:
                    best_avg_overhead = avg_overhead
                if moving_avg_routing > worst_avg_routing:
                    worst_avg_routing = moving_avg_routing
                if moving_avg_routing < best_avg_routing:
                    best_avg_routing = moving_avg_routing

                if debug:
                    print("Individual: " + str(individual["individual"]) 
                         + " | Overhead: " + str(individual["avg_overhead"])
                         + " | Routing: " + str(individual["avg_routing"]))

                all_individuals.append(individual)
                # next individual

            # next iteration
            if debug:
                print("")

        if len(all_documents) == total_number_of_docs:
            run_to_all_individuals[rtx_run_id] = all_individuals
        else:
            print("Skip run for further analysis, only " + str(len(all_documents)) + " documents/evaluations present.")
        # print("Number of reused evaluations: " + str(number_of_reuse))
        number_of_reuse_per_run.append(number_of_reuse)
        # next run
    print("Min | Average | Max number of reused evaluations per run: "
          + str(np.min(number_of_reuse_per_run)) + " | "
          + str(np.mean(number_of_reuse_per_run)) + " | "
          + str(np.max(number_of_reuse_per_run)) + " | "
          + "\n==========================================================")
    return run_to_all_individuals
      

# worst and best objective values achieved in *all* runs
worst_avg_overhead = sys.float_info.min
worst_avg_routing = sys.float_info.min
best_avg_overhead = sys.float_info.max
best_avg_routing = sys.float_info.max

# compute fitness

# random search 
random_run_inds_500 = compute_fitness(random_rtx_runs_500, random_data_500)
random_run_inds_700 = compute_fitness(random_rtx_runs_700, random_data_700)
random_run_inds_800 = compute_fitness(random_rtx_runs_800, random_data_800)

# mlr 
mlr_run_inds_500 = compute_fitness(mlr_rtx_runs_500, mlr_data_500)
mlr_run_inds_700 = compute_fitness(mlr_rtx_runs_700, mlr_data_700)
mlr_run_inds_800 = compute_fitness(mlr_rtx_runs_800, mlr_data_800)
    
# novelty
novelty_run_inds_500 = compute_fitness(novelty_rtx_runs_500, novelty_data_500)
novelty_run_inds_700 = compute_fitness(novelty_rtx_runs_700, novelty_data_700)
novelty_run_inds_800 = compute_fitness(novelty_rtx_runs_800, novelty_data_800)

# nsga2
nsga2_run_inds_500 = compute_fitness(nsga2_rtx_runs_500, nsga2_data_500)
nsga2_run_inds_700 = compute_fitness(nsga2_rtx_runs_700, nsga2_data_700)
nsga2_run_inds_800 = compute_fitness(nsga2_rtx_runs_800, nsga2_data_800)

## Analyze and plot data

In [None]:
import sys
import numpy as np
import pylab 
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rc("savefig", dpi=300)
# plt.rcParams["figure.figsize"] = [8,6]


'''
Method to take a list of individuals and return just the individuals which lie 
on the Pareto frontier, sorted into order.
Default behaviour is to find the maximum for both X and Y objectives, but the option is
available to specify maxX = False or maxY = False to find the minimum for either
or both of the objectves.
Adapted from: http://oco-carbon.com/metrics/find-pareto-frontiers-in-python/
'''
def get_pareto_front(Inds, maxX = False, maxY = False):
    # Sort the list in either ascending or descending order of X
    Inds.sort(key=lambda x: x["avg_overhead"], reverse=maxX)
    # Start the Pareto frontier with the first value in the sorted list
    p_front = [Inds[0]]    
    # Loop through the sorted list
    for ind in Inds[1:]:
        if maxY: 
            # Thomas: changed >= to >
            if ind["avg_routing"] > p_front[-1]["avg_routing"]: # Look for higher values of Y… 
                p_front.append(ind) # … and add them to the Pareto frontier
        else:
            # Thomas: changed <= to <
            if ind["avg_routing"] < p_front[-1]["avg_routing"]: # Look for lower values of Y…    
                p_front.append(ind) # … and add them to the Pareto frontier
    return p_front


def analyze_and_plot(rtx_runs, run_to_all_individuals, plot_data = False):    
    printmd("===========================================================", "red")
    # pareto fronts of all runs
    pareto_fronts_of_all_runs = []
    
    opt_method = ""
    if len(rtx_runs) > 0:
        try:
            opt_method = rtx_runs[0]["strategy"]["optimizer_method"]
        except:
            # mlr does not store the opt method name in field strategy.optimizer_method
            opt_method = "MLR"

    # set iterations
    set_iterations(opt_method)
    
    # for each run
    for rtx_run in rtx_runs:
        rtx_run_id = rtx_run["id"]
        # get all individuals of the run
        all_individuals = run_to_all_individuals.get(rtx_run_id, None)
        if all_individuals is None:
            print("Encountered skipped run " + str(rtx_run_id))
        else:
        
            print("\nRun: " + rtx_run_id + " | method: " + opt_method 
                  + " | seed: " + str(rtx_run["seed"]) 
                  + " | number of documents/evaluations: " + str(len(all_individuals)))

            # compute pareto front of the run ####################################################
            pareto_front_of_run = get_pareto_front(all_individuals)
            pareto_fronts_of_all_runs.append(pareto_front_of_run)
            if plot_data:
                print("\nPareto front of the run:")
                for p in pareto_front_of_run:
                    print_individual(p)

            # compute hypervolume ####################################################
            reference_point = [worst_avg_overhead, worst_avg_routing]
            print("Reference Point: " + str(reference_point))
            hyperVolume = HyperVolume(reference_point)

            pareto_front_values = [[el["avg_overhead"], el["avg_routing"]] for el in pareto_front_of_run]
            hv = hyperVolume.compute(pareto_front_values)
            print("Hypervolume: " + str(hv))

            # plotting #################################################################
            if plot_data:

                if opt_method == "NSGAII" or opt_method == "NoveltySearch" or opt_method == "MLR":

                    # plot all averages for all iterations ####################################################
                    # Average Overhead
                    fig, axes = plt.subplots()
                    fig.suptitle('Evolution of average overheads', fontsize=16)    
                    plt.xlabel('Iteration') 
                    plt.ylabel('Average Overhead')
                    x = []
                    y = []
                    min_avgo_over_iterations = []
                    best_min = sys.float_info.max
                    for i in range(iterations):
                        new_y = [el["avg_overhead"] for el in all_individuals if el["iteration"] == i]
                        if not isinstance(new_y, (list,)):
                            new_y = [new_y]
                        y.extend(new_y)
                        x.extend([i]*len(new_y))
                        min_new_y = min(new_y)
                        if min_new_y < best_min:
                            best_min = min_new_y
                        min_avgo_over_iterations.append(best_min)

                    plt.scatter(x,y, marker="+", color='black', label='individual')
                    plt.scatter(range(iterations), min_avgo_over_iterations, s=100, facecolors='none', 
                                edgecolors='r', label='perato front for average overhead')
                    pylab.legend(loc='best')

                    # Average Routing Performance ####################################################
                    fig, axes = plt.subplots()
                    fig.suptitle('Evolution of average routing performance', fontsize=16)    
                    plt.xlabel('Iteration') 
                    plt.ylabel('Average Routing Performance')
                    x = []
                    y = []
                    min_avgp_over_iterations = []
                    best_min = sys.float_info.max
                    for i in range(iterations):
                        new_y = [el["avg_routing"] for el in all_individuals if el["iteration"] == i]
                        if not isinstance(new_y, (list,)):
                            new_y = [new_y]
                        y.extend(new_y)
                        x.extend([i]*len(new_y))
                        min_new_y = min(new_y)
                        if min_new_y < best_min:
                            best_min = min_new_y
                        min_avgp_over_iterations.append(best_min)

                    plt.scatter(x,y, marker="+", color='black', label='individual')
                    plt.scatter(range(iterations), min_avgp_over_iterations, s=100, 
                                facecolors='none', edgecolors='r', label='perato front for average overhead')
                    pylab.legend(loc='best')


                # print all individuals and pareto front ##############################
                fig, axes = plt.subplots()
                # axes.grid(True)
                fig.suptitle('All individuals and the pareto front', fontsize=16)
                avg_o = [el["avg_overhead"] for el in all_individuals]
                avg_p = [el["avg_routing"] for el in all_individuals] 

                plt.ylabel('Average Routing Performance')
                plt.xlabel('Average Overhead')
                plt.scatter(avg_o,avg_p, marker="+", color='black', label='Individual')
                p_front_avg_o = [el["avg_overhead"] for el in pareto_front_of_run]
                p_front_avg_p = [el["avg_routing"] for el in pareto_front_of_run] 
                if len(pareto_front_of_run) > 1:
                    plt.plot(p_front_avg_o, p_front_avg_p, label="Pareto Front")
                else:
                    plt.scatter(p_front_avg_o, p_front_avg_p, label="Pareto Front")
                pylab.legend(loc='best')
                #for i,j in zip(avg_o,avg_p):
                #    axes.annotate(str(i)+", "+str(j),xy=(i,j))


                # print hypervolume ####################################################
                if opt_method == "NSGAII" or opt_method == "NoveltySearch" or opt_method == "MLR":

                    fig, axes = plt.subplots()
                    fig.suptitle('Evolution of the Hypervolume', fontsize=16)    
                    plt.ylabel('Hypervolume')
                    plt.xlabel('Iteration') 
                    x = range(iterations)
                    y = []
                    current_pareto_front = []
                    for i in range(iterations):
                        new_inds = [el for el in all_individuals if el["iteration"] == i]
                        if not isinstance(new_inds, (list,)):
                            new_inds = [new_inds]
                        new_inds.extend(current_pareto_front)
                        current_pareto_front = get_pareto_front(new_inds)
                        current_pareto_front_values = [[el["avg_overhead"], el["avg_routing"]] 
                                                       for el in current_pareto_front]
                        hv = hyperVolume.compute(current_pareto_front_values)
                        y.append(hv)

                    plt.plot(x,y, color='black', label='Hypervolume')
                    pylab.legend(loc='best')


                plt.show()

    return pareto_fronts_of_all_runs


# analyze and plot data

# random search
random_500_pfronts = analyze_and_plot(random_rtx_runs_500, random_run_inds_500)
random_700_pfronts = analyze_and_plot(random_rtx_runs_700, random_run_inds_700)
random_800_pfronts = analyze_and_plot(random_rtx_runs_800, random_run_inds_800)

# mlr
mlr_500_pfronts = analyze_and_plot(mlr_rtx_runs_500, mlr_run_inds_500)
mlr_700_pfronts = analyze_and_plot(mlr_rtx_runs_700, mlr_run_inds_700)
mlr_800_pfronts = analyze_and_plot(mlr_rtx_runs_800, mlr_run_inds_800)
    
# novelty
novelty_500_pfronts = analyze_and_plot(novelty_rtx_runs_500, novelty_run_inds_500)
novelty_700_pfronts = analyze_and_plot(novelty_rtx_runs_700, novelty_run_inds_700)
novelty_800_pfronts = analyze_and_plot(novelty_rtx_runs_800, novelty_run_inds_800)

# nsga2
nsga2_500_pfronts = analyze_and_plot(nsga2_rtx_runs_500, nsga2_run_inds_500)
nsga2_700_pfronts = analyze_and_plot(nsga2_rtx_runs_700, nsga2_run_inds_700)
nsga2_800_pfronts = analyze_and_plot(nsga2_rtx_runs_800, nsga2_run_inds_800)

## Analysis of the hypervolume and objectives over multiple runs

In [None]:
from scipy import stats

reference_point = [worst_avg_overhead, worst_avg_routing]
hyperVolume = HyperVolume(reference_point)

'''
Computes the hypervolumes for all pareto fronts
'''
def compute_hvs(pfronts):
    hvs = []
    for pfront in pfronts:
        pfront_values = [[el["avg_overhead"], el["avg_routing"]] for el in pfront]
        hv = hyperVolume.compute(pfront_values)
        hvs.append(hv)       
    return hvs


'''
Computes the average hypervolumes for all pareto fronts
'''
def compute_avg_hv(pfronts):
    hvs = compute_hvs(pfronts)        
    return np.mean(hvs)


'''
Computes the median hypervolumes for all pareto fronts
'''
def compute_median_hv(pfronts, plotting=False):
    hvs = compute_hvs(pfronts)
        
    if plotting:
        fig,ax = plt.subplots()
        plt.hist(hvs, bins=30)  
        plt.ylabel('frequency')
        plt.xlabel('hypervolume')

        fig,ax = plt.subplots()
        ax.boxplot(hvs, 0, '', positions=range(1))
        ax.plot(["1"], np.mean(hvs), ".", label='mean', color='black', linestyle=':')
        plt.ylabel('hypervolume')
        
        plt.show() 
        
    return np.median(hvs)


'''
Computes the average of the avg overhead and avg routing over all runs' pareto fronts
'''
def compute_avg_objectives(pfronts):
    avg_overheads_of_runs, avg_routings_of_runs = get_objectives_of_runs(pfronts)
    return np.mean(avg_overheads_of_runs), np.mean(avg_routings_of_runs)

'''
Computes the median of the avg overhead and avg routing over all runs' pareto fronts
'''
def compute_median_objectives(pfronts):
    avg_overheads_of_runs, avg_routings_of_runs = get_objectives_of_runs(pfronts)
    return np.median(avg_overheads_of_runs), np.median(avg_routings_of_runs)


'''
Computes the average of the avg overhead and avg routing for each run's pareto front
'''
def get_objectives_of_runs(pfronts, plotting=False):
    avg_overheads_of_runs = []
    avg_routings_of_runs = []
    # for each run/seed
    for pfront in pfronts:
        avg_overheads = [el["avg_overhead"] for el in pfront]
        avg_overheads_of_runs.append(np.mean(avg_overheads))
        avg_routings = [el["avg_routing"] for el in pfront]
        avg_routings_of_runs.append(np.mean(avg_routings))
        
    if plotting:
        fig,ax = plt.subplots()
        ax.boxplot(avg_overheads_of_runs, 0, '', positions=range(1))
        ax.plot(["1"], np.mean(avg_overheads_of_runs), ".", label='mean', color='black', linestyle=':')
        plt.ylabel('overheads of trips')

        fig,ax = plt.subplots()
        ax.boxplot(avg_routings_of_runs, 0, '', positions=range(1))
        ax.plot(["1"], np.mean(avg_routings_of_runs), ".", label='mean', color='black', linestyle=':')
        plt.ylabel('routings of trips')
        
        fig,ax = plt.subplots()
        plt.hist(avg_overheads_of_runs, bins=30)  
        plt.ylabel('number of overheads')
        plt.xlabel('trip overhead')
        
        fig,ax = plt.subplots()
        plt.hist(avg_routings_of_runs, bins=30)  
        plt.ylabel('number of routing costs')
        plt.xlabel('routing costs')
           
        plt.show() 

    return avg_overheads_of_runs, avg_routings_of_runs


def compute_utility(pfronts, normalize=True, plotting=False):
    all_utilities = []
    for pfront in pfronts:
        pfront_values = [[el["avg_overhead"], el["avg_routing"]] for el in pfront]
        utilities_of_one_front = []
        for solution in pfront_values:
            avg_overhead = solution[0]
            if normalize:
                avg_overhead = normalize_avg_overhead(avg_overhead)
            avg_routing = solution[1]
            if normalize:
                avg_routing = normalize_avg_routing(avg_routing)
            utility = avg_overhead + avg_routing
            utilities_of_one_front.append(utility)
        all_utilities.append(np.mean(utilities_of_one_front))
    
    if plotting:
        fig,ax = plt.subplots()
        ax.boxplot(all_utilities, 0, '', positions=range(1))
        ax.plot(["1"], np.mean(all_utilities), ".", label='mean', color='black', linestyle=':')
        plt.ylabel('utility')
        plt.show() 

    return all_utilities


def compute_avg_utility(pfronts, normalize=True, plotting=False):
    all_utilities = compute_utility(pfronts, normalize, plotting)
    return np.mean(all_utilities)


def compute_median_utility(pfronts, normalize=True, plotting=False):
    all_utilities = compute_utility(pfronts, normalize, plotting)
    return np.median(all_utilities)


def normalize_avg_overhead(avg_overhead):
    result = (float(avg_overhead-best_avg_overhead))/(float(worst_avg_overhead-best_avg_overhead))
    # print("Overhead: " + str(avg_overhead) + " => " + str(result))
    if result < 0 or result > 1:
        print("ERROR")
    return result

def normalize_avg_routing(avg_routing):
    result = (float(avg_routing-best_avg_routing))/(float(worst_avg_routing-best_avg_routing))
    # print("Routing: " + str(avg_routing) + " => " + str(result))
    if result < 0 or result > 1:
        print("ERROR")
    return result


'''
List of lists, one for each run containing how the hypervolume evolves with each fitness evaluation.
'''
def compute_hv_over_evals(rtx_runs, run_to_all_individuals, opt_method):
    # set iterations
    set_iterations(opt_method)
    
    hv_series_of_all_runs = []
    for rtx_run in rtx_runs:
        rtx_run_id = rtx_run["id"]
        # get all individuals of the run
        all_individuals = run_to_all_individuals.get(rtx_run_id, None)
        if all_individuals is None:
            print("No documents for run " + str(rtx_run_id))
            break
       
        # debug_msg = ""
        hv_series = []          
        current_pareto_front = []
        for i in range(iterations):
            new_inds = [el for el in all_individuals if el["iteration"] == i]
            if not isinstance(new_inds, (list,)):
                new_inds = [new_inds]
            for j in range(len(new_inds)):
                # list with one element
                new_ind = [el for el in new_inds if el["individual"] == j]
                if len(new_ind) > 1:
                    print("ERROR: More than one individual with the same number within the same iteration.")
                # debug_msg = debug_msg + "(" + str(new_ind[0]["iteration"]) + ", " + str(new_ind[0]["individual"]) + ")"
                current_pareto_front.append(new_ind[0])
                current_pareto_front = get_pareto_front(current_pareto_front)
                current_pareto_front_values = [[el["avg_overhead"], el["avg_routing"]] 
                                               for el in current_pareto_front]
                hv = hyperVolume.compute(current_pareto_front_values)
                hv_series.append(hv)
        # print(debug_msg)    
        hv_series_of_all_runs.append(hv_series)
    return hv_series_of_all_runs
    

'''
Plots the evolution of the hypervolume of the methods over fitness evaluations. 
The hypervolume is the "Mean" or the "Median" over the runs for each method.
'''
def plot_hypervolume_evolution(random_rtx_runs, random_run_to_all_individuals,
                               nsga2_rtx_runs, nsga2_run_to_all_individuals,
                               novelty_rtx_runs, novelty_run_to_all_individuals,
                               mlr_rtx_runs, mlr_run_to_all_individuals, cars_number, mean_or_median = "Mean"):
    methods = ["Random", "NSGA2", "Novelty", "MLR"]
    linestyles = ['--', '-', '-.', ':']
    
    random_hv_series = compute_hv_over_evals(random_rtx_runs, random_run_to_all_individuals, methods[0])
    nsga2_hv_series = compute_hv_over_evals(nsga2_rtx_runs, nsga2_run_to_all_individuals, methods[1])
    novelty_hv_series = compute_hv_over_evals(novelty_rtx_runs, novelty_run_to_all_individuals, methods[2])
    mlr_hv_series = compute_hv_over_evals(mlr_rtx_runs, mlr_run_to_all_individuals, methods[3])
    
    all_hv_series = [random_hv_series, nsga2_hv_series, novelty_hv_series, mlr_hv_series]
    
    fig, axes = plt.subplots()
    fig.suptitle("Evolution of the " + mean_or_median 
                 + " Hypervolume (" + str(cars_number) + "cars)", fontsize=16)    
    plt.ylabel('Hypervolume')
    plt.xlabel('Fitness Evaluations')
    
    for i in range(len(methods)):
        method = methods[i]
        hv_series = all_hv_series[i]
        a = np.array(hv_series)
        
        if mean_or_median == "Mean":
            plot_data = np.mean(a, axis=0)
            print(method + " - final mean hv: " + str(plot_data[99]))
        elif mean_or_median == "Median":
            plot_data = np.median(a, axis=0)
        else:
            print("ERROR: First parameter must be mean or median.")
            
        plt.plot(range(len(plot_data)), plot_data, color='black', linestyle=linestyles[i], label=method)
     
    pylab.legend(loc='best')
    plt.show()

    
def plot_hypervolume_boxplots(random_pfronts, nsga2_pfronts, novelty_pfronts, mlr_pfronts, cars_number):
    hvs_random = compute_hvs(random_pfronts)
    hvs_nsga2 = compute_hvs(nsga2_pfronts)
    hvs_novelty = compute_hvs(novelty_pfronts)
    hvs_mlr = compute_hvs(mlr_pfronts)

    hvs = [hvs_random, hvs_nsga2, hvs_novelty, hvs_mlr]
    hvs_names = ["Random", "NSGA2", "Novelty", "MLR"]
    hvs_labels = range(1,5)

    fig,ax = plt.subplots()
    plt.title(str(cars_number) + " cars")
    ax.boxplot(hvs, 0, '', positions=hvs_labels)
    for i in range(len(hvs)):
        ax.plot(hvs_labels[i], np.mean(hvs[i]), ".", label='mean', color='black', linestyle=':')
    plt.xticks(hvs_labels, hvs_names) 
    plt.ylabel('hypervolume')
    plt.show() 
    
    # statistical tests
    run_ttest(hvs, hvs_names)

    
def plot_objectives_boxplots(random_pfronts, nsga2_pfronts, novelty_pfronts, mlr_pfronts, cars_number):
    overheads_random, routings_random = get_objectives_of_runs(random_pfronts) 
    overheads_nsga2, routings_nsga2 = get_objectives_of_runs(nsga2_pfronts) 
    overheads_novelty, routings_novelty = get_objectives_of_runs(novelty_pfronts)  
    overheads_mlr, routings_mlr = get_objectives_of_runs(mlr_pfronts)
    
    overheads = [overheads_random, overheads_nsga2, overheads_novelty, overheads_mlr]
    routings  = [routings_random, routings_nsga2, routings_novelty, routings_mlr]
    names = ["Random", "NSGA2", "Novelty", "MLR"]
    labels = range(1,5)
    
    # Trip overhead
    fig,ax = plt.subplots()
    plt.title(str(cars_number) + " cars")
    ax.boxplot(overheads, 0, '', positions=labels)
    for i in range(len(overheads)):
        ax.plot(labels[i], np.mean(overheads[i]), ".", label='mean', color='black', linestyle=':')
    plt.xticks(labels, names) 
    plt.ylabel('Trip Overhead')
    plt.show()
    
    # statistical test #############
    print("Trip Overhead:")
    run_ttest(overheads, names)
    
    # Routing Costs
    fig,ax = plt.subplots()
    plt.title(str(cars_number) + " cars")
    ax.boxplot(routings, 0, '', positions=labels)
    for i in range(len(routings)):
        ax.plot(labels[i], np.mean(routings[i]), ".", label='mean', color='black', linestyle=':')
    plt.xticks(labels, names) 
    plt.ylabel('Routing Costs')
    plt.show() 
    
    # statistical test #############
    print("Routing Costs:")
    run_ttest(routings, names)
            

def run_ttest(data, names):
    alpha = 0.05
    for i in range(len(data)):
        data_first = data[i]
        name_first = names[i]
        for j in range(len(names)):
            if j == i:
                break
            data_second = data[j]
            name_second = names[j]
            statistic, pvalue = stats.ttest_ind(data_first, data_second, equal_var = False)
            different_averages = bool(pvalue <= alpha)
            is_is_not = "\tis\t" if different_averages else "\tis not\t"
            print(name_first + is_is_not 
                  + " statistically significantly different than "+ name_second)

            
def check_avg(pfronts, name):
    print(name + " ("+str(len(pfronts))+" runs):\t" + str(compute_avg_hv(pfronts)) + "\t|" 
         + str(compute_median_hv(pfronts)) + "\t| " + str(compute_avg_utility(pfronts)) + "\t| " 
         + str(compute_median_utility(pfronts)) + "\t| "
         + str(compute_avg_objectives(pfronts)) + "\t| " + str(compute_median_objectives(pfronts)))
    
    

print("Iterations considered: " + str(iterations))
print("Average Hypervolume (the higher, the better) | Median Hypervolume | " 
      + "Average utility (the lower, the better) | Median utility | Average objectives | Median objectives")


print("500 cars")
check_avg(random_500_pfronts, "Random")
check_avg(mlr_500_pfronts, "MLR")
check_avg(novelty_500_pfronts, "Novel")
check_avg(nsga2_500_pfronts, "NSGAII")

print("700 cars")
check_avg(random_700_pfronts, "Random")
check_avg(mlr_700_pfronts, "MLR")
check_avg(novelty_700_pfronts, "Novel")
check_avg(nsga2_700_pfronts, "NSGAII")

print("800 cars")
check_avg(random_800_pfronts, "Random")
check_avg(mlr_800_pfronts, "MLR")
check_avg(novelty_800_pfronts, "Novel")
check_avg(nsga2_800_pfronts, "NSGAII")

# Hypervolume Boxplots ==========
plot_hypervolume_boxplots(random_500_pfronts, nsga2_500_pfronts, novelty_500_pfronts, mlr_500_pfronts, 500)
plot_hypervolume_boxplots(random_700_pfronts, nsga2_700_pfronts, novelty_700_pfronts, mlr_700_pfronts, 700)
plot_hypervolume_boxplots(random_800_pfronts, nsga2_800_pfronts, novelty_800_pfronts, mlr_800_pfronts, 800)

# Objectives Boxplots ==========
plot_objectives_boxplots(random_500_pfronts, nsga2_500_pfronts, novelty_500_pfronts, mlr_500_pfronts, 500)
plot_objectives_boxplots(random_700_pfronts, nsga2_700_pfronts, novelty_700_pfronts, mlr_700_pfronts, 700)
plot_objectives_boxplots(random_800_pfronts, nsga2_800_pfronts, novelty_800_pfronts, mlr_800_pfronts, 800)

# Hypervolume Evolution ==========
plot_hypervolume_evolution(random_rtx_runs_500, random_run_inds_500,
                           nsga2_rtx_runs_500, nsga2_run_inds_500,
                          novelty_rtx_runs_500, novelty_run_inds_500,
                           mlr_rtx_runs_500, mlr_run_inds_500, 500)

plot_hypervolume_evolution(random_rtx_runs_700, random_run_inds_700,
                           nsga2_rtx_runs_700, nsga2_run_inds_700,
                           novelty_rtx_runs_700, novelty_run_inds_700,
                           mlr_rtx_runs_700, mlr_run_inds_700, 700)

plot_hypervolume_evolution(random_rtx_runs_800, random_run_inds_800,
                           nsga2_rtx_runs_800, nsga2_run_inds_800,
                           novelty_rtx_runs_800, novelty_run_inds_800,
                           mlr_rtx_runs_800, mlr_run_inds_800, 800)

## (DEPRECATED) Analysis over multiple runs

In [None]:
def plot_pfronts_over_runs(pfronts, combined_pront, method_name):
    fig, axes = plt.subplots()
    plt.ylabel('Average Routing Performance')
    plt.xlabel('Average Overhead')
    # axes.grid(True)
    fig.suptitle("Pareto fronts over runs for " + method_name, fontsize=16)
    for pfront in pfronts:
        avg_o = [el["avg_overhead"] for el in pfront]
        avg_p = [el["avg_routing"] for el in pfront] 
        plt.plot(avg_o,avg_p, marker="+", color='black', label='')

    p_front_avg_o = [el["avg_overhead"] for el in combined_pront]
    p_front_avg_p = [el["avg_routing"] for el in combined_pront] 
    if len(combined_pront) > 1:
        plt.plot(p_front_avg_o, p_front_avg_p, label="Pareto Front over all runs", color='red')
    else:
        plt.scatter(p_front_avg_o, p_front_avg_p, label="Pareto Front over all runs")
    # pylab.legend(loc='best')
    #for i,j in zip(avg_o,avg_p):
    #    axes.annotate(str(i)+", "+str(j),xy=(i,j))
    

def plot_combined_pfronts_over_methods(combined_pfronts, method_names, line_styles, colors):
    fig, axes = plt.subplots()
    plt.ylabel('Average Routing Performance')
    plt.xlabel('Average Overhead')
    # axes.grid(True)
    fig.suptitle("Combined pareto fronts for each method", fontsize=16)
    for i in range(len(combined_pfronts)):
        avg_o = [el["avg_overhead"] for el in combined_pfronts[i]]
        avg_p = [el["avg_routing"] for el in combined_pfronts[i]] 
        if len(combined_pfronts[i]) > 1:
            plt.plot(avg_o, avg_p, linestyle=line_styles[i], color=colors[i], label=method_names[i])
        else:
            plt.scatter(avg_o, avg_p, linestyle=line_styles[i], color=colors[i], label=method_names[i])
    pylab.legend(loc='best')

# Analyze pareto fronts over runs    

# Random Search
random_500_pfronts_flat = [ind for p_front in random_500_pfronts for ind in p_front]
random_500_combined_pfront = get_pareto_front(random_500_pfronts_flat)
plot_pfronts_over_runs(random_500_pfronts, random_500_combined_pfront, "RandomSearch")

# MLR
mlr_500_pfronts_flat = [ind for p_front in mlr_500_pfronts for ind in p_front]
mlr_500_combined_pfront = get_pareto_front(mlr_500_pfronts_flat)
plot_pfronts_over_runs(mlr_500_pfronts, mlr_500_combined_pfront, "MLR")

# Novelty
novelty_500_pfronts_flat = [ind for p_front in novelty_500_pfronts for ind in p_front]
novelty_500_combined_pfront = get_pareto_front(novelty_500_pfronts_flat)
plot_pfronts_over_runs(novelty_500_pfronts, novelty_500_combined_pfront, "Novelty")

# NSGA2
nsga2_500_pfronts_flat = [ind for p_front in nsga2_500_pfronts for ind in p_front]
nsga2_500_combined_pfront = get_pareto_front(nsga2_500_pfronts_flat)
plot_pfronts_over_runs(nsga2_500_pfronts, nsga2_500_combined_pfront, "NSGAII")


###
combined_pfronts_500 = [random_500_combined_pfront, mlr_500_combined_pfront, novelty_500_combined_pfront, nsga2_500_combined_pfront]

#combined_500_pfronts_flat = [ind for p_front in combined_pfronts_500 for ind in p_front]
#reference_front_500 = get_pareto_front(combined_500_pfronts_flat)
#combined_pfronts_500.append(reference_front_500)

method_names = ["RandomSearch", "MLR", "Novelty", "NSGAII"]
line_styles = ['-', '--', '-.', ':']
colors = ['b', 'g', 'r', 'c']
plot_combined_pfronts_over_methods(combined_pfronts_500, method_names, line_styles, colors)


## Testing

In [None]:

'''
Method to take two equally-sized lists and return just the elements which lie 
on the Pareto frontier, sorted into order.
Default behaviour is to find the maximum for both X and Y, but the option is
available to specify maxX = False or maxY = False to find the minimum for either
or both of the parameters.
'''
def pareto_frontier(Xs, Ys, maxX = False, maxY = False):
# Sort the list in either ascending or descending order of X
    myList = sorted([[Xs[i], Ys[i]] for i in range(len(Xs))], reverse=maxX)
# Start the Pareto frontier with the first value in the sorted list
    p_front = [myList[0]]    
# Loop through the sorted list
    for pair in myList[1:]:
        if maxY: 
            if pair[1] >= p_front[-1][1]: # Look for higher values of Y… 
                p_front.append(pair) # … and add them to the Pareto frontier
        else:
            if pair[1] <= p_front[-1][1]: # Look for lower values of Y…    
                p_front.append(pair) # … and add them to the Pareto frontier
# Turn resulting pairs back into a list of Xs and Ys
    p_frontX = [pair[0] for pair in p_front]
    p_frontY = [pair[1] for pair in p_front]
    return p_frontX, p_frontY



l = [["ind1", 3.00, 12], ["ind2", 2.23, 13], ["ind3", 4.5, 15]]
print(l)
Is = [el[0] for el in l]
Xs = [el[1] for el in l]
Ys = [el[2] for el in l]
print(Xs)
print(Ys)

pfront = pareto_frontier(Xs, Ys)
print(pfront)
s = zip(pfront[0], pfront[1])
for e in s:
    print(str(e[0]) + " " + str(e[1]))
    
unsorted_list = [['a','b','c','5','d'],['e','f','g','3','h'],['i','j','k','4','m']]
print(unsorted_list)
unsorted_list.sort(key=lambda x: x[3], reverse=False)
print(unsorted_list)

In [None]:
import numpy as np
a = np.array([[40, 50, 55], [50, 60, 60]])
m1 = np.mean(a, axis=1)     # to take the mean of each row
print(m1)
m2 = np.mean(a, axis=0)     # to take the mean of each col
print(m2)

### 