# Case Study: Chamanculo C

In [1]:
# OS functionalities (to deal with files)
import os

# Enhanced Iteration capabilities (to use cross-products)
import itertools
from itertools import chain

# Data processing packages
import pandas as pd
import numpy as np

import math

# Global Setup for Processing Data

In this section, we define all the global constants that will be used thourought the script. By defining them up here, we are making this notebook purely parametric and reusable for other projects (as long as the folder structure remains the same).  

In [2]:
# Folders Structure
base_folder = os.getcwd()
results_folder= os.path.join(base_folder, "algorithms")
output_folder = "./output/"

# CSV File Configuration
has_header = True
files_sep = ","
file_extension = 'csv'

# Optimization Settings
runs = [1]
nruns = len(runs)
max_evals = 1200

## Problem Definition (in the files) 
### Variables
n_houses = 20

vars_cols = [f"h{i+1}" for i in range(n_houses)]

### Objectives  
Performance = "Performance"
Variation = "Variation"
Cost = "Cost"
objs_cols = [Performance, Cost, Variation]

relevant_cols = vars_cols + objs_cols


d_vars = {4+i: vars_cols[i] for i in range(len(vars_cols))} 

d_objs ={4+len(vars_cols): "Performance",
         5+len(vars_cols): "Variation",
         6+len(vars_cols): "Cost"}

names_mapping = dict(chain.from_iterable(d.items() for d in (d_vars, d_objs)))


## Multi-Objective Optimization Algorithms
### Metaheuristics
pop_size = 50
metaheuristics = ["NSGAII"]

### Model Based (or metamodel)
metamodels_base = ['RF']
metamodels_strategies = ["NSGAII"]
metamodels_algorithms = [f"{b}_{s}" for (b, s) in itertools.product(metamodels_base, metamodels_strategies)]

# all_algorithms = metaheuristics
all_algorithms = metaheuristics + metamodels_algorithms
n_algorithms = len(all_algorithms)

### Filenames with the results 
filenames = [f"{a}_results_0{r}.{file_extension}" for (r, a) in itertools.product(runs, all_algorithms)]
filenames

['NSGAII_results_01.csv', 'RF_NSGAII_results_01.csv']

In [3]:
# Sanity check: verify the file names are what we expected
filenames[::3]

['NSGAII_results_01.csv']

## Input/Output (IO) Methods 

In this subsection we create the methods that will be responsible for loading the data from the files. To manipulate the data, we will use pandas.DataFrame data structure. This can be easily manipulated and different statistics can be computed on top of these abstractions.

In [4]:
def load_results(filenames, base_folder=results_folder, 
                 has_header=True, keep_header=False, 
                 sep=files_sep, usecols=names_mapping.keys(),  
                 max_lines=max_evals):
    """Loads the data from the specified `base_folder` using the `filenames`.
    It assumes the filenames
    """
    read_args = { 
        "header": 'infer' if keep_header else None, 
        "sep": sep,
        "usecols": usecols if usecols else None,
        "skiprows": 1 if has_header and not keep_header else 0,
    }
    filepaths = [os.path.join(base_folder, f) for f in filenames]  
    if max_lines:
        return [pd.read_csv(f, **read_args)[0:max_lines] for f in filepaths]
    else:
        return [pd.read_csv(f, **read_args) for f in filepaths]

In [5]:
# Always confirm whether the results are according to what you expected
examples = load_results(filenames)
examples[0].head()

Unnamed: 0,4,5,6,7,8,9,10,11,12,13,...,17,18,19,20,21,22,23,24,25,26
0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,...,0.0,0.0,0.0,1.0,2.0,1.0,1.0,-69.164223,18.405801,44663.654162
1,1.0,2.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,...,1.0,0.0,1.0,0.0,1.0,1.0,2.0,-69.164223,18.405801,44663.654162
2,0.0,1.0,1.0,0.0,1.0,1.0,2.0,1.0,2.0,2.0,...,0.0,2.0,1.0,2.0,0.0,1.0,2.0,-68.680352,17.869348,42949.635941
3,1.0,2.0,0.0,0.0,1.0,2.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,0.0,1.0,-66.744868,15.819305,39572.370455
4,2.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,1.0,0.0,...,0.0,2.0,1.0,1.0,2.0,1.0,1.0,-67.609971,15.845628,40228.95365


In [6]:
list(filter(lambda x: x > max_evals, [len(e) for e in examples]))

[]

In [7]:
def get_run_indices(dfs, run, n_algorithms=n_algorithms):
    """Returns the dataframes that correspond to the specified `run`. 
    This assumes that the dataframes are read per run, that means that 
    if we run two algorithms (SMPSO and SPEA2) for 3 runs each, this 
    method assumes that it was read in the following order:
    
    > SMPSO_run1
    > SPEA2_run1
    > SMPSO_run2
    > SPEA2_run2
    > SMPSO_run3
    > SPEA2_run3
    
    In that case, this invocation `get_run_indices(dfs, 1, n_algorithms=2)`
    will return: 
        dfs[0:2] 
    """
    run -= 1
    return dfs[run*n_algorithms:(run+1)*n_algorithms]

## Pareto Dominance Methods
This section contains methods related to the Pareto optimality (or [Pareto Efficiency](https://en.wikipedia.org/wiki/Pareto_efficiency)).

In [8]:
# **IMPORTANT NOTE**: This function assumes that your problem is a minimization problem for every objective dimension.
def weakly_dominates(v0, v1):
    """Computes whether v0 dominates v1, i.e., whether at least one objective
    is better (in this case, smaller) than some other)
    """
    return np.all(v0 <= v1) and np.any(v0 < v1)
    
# Sanity Check (:
print("(Expected) True   (Obtained) ", weakly_dominates(np.array([1, 1]), np.array([2, 2])))
print("(Expected) True   (Obtained) ", weakly_dominates(np.array([2, 1]), np.array([2, 2])))
print("(Expected) False  (Obtained) ", weakly_dominates(np.array([2, 2]), np.array([1, 1]))) 
print("(Expected) True   (Obtained) ", weakly_dominates(np.array([1, 2]), np.array([2, 2]))) 
print("(Expected) False  (Obtained) ", weakly_dominates(np.array([1, 3]), np.array([3, 1])))
print("(Expected) False  (Obtained) ", weakly_dominates(np.array([3, 1]), np.array([1, 3]))) 
print("(Expected) False  (Obtained) ", weakly_dominates(np.array([1, 1]), np.array([1, 1])))  

(Expected) True   (Obtained)  True
(Expected) True   (Obtained)  True
(Expected) False  (Obtained)  False
(Expected) True   (Obtained)  True
(Expected) False  (Obtained)  False
(Expected) False  (Obtained)  False
(Expected) False  (Obtained)  False


In [9]:
def get_non_dominated(V, dominance=weakly_dominates):
    """Computes the optimal and non-optimal solutions. 
    Optimal solutions are called non-dominated and non-optimal 
    solutions are called denominated.
    """
    nsols, nobjs = V.shape

    dominated = np.zeros((nsols, 1))
    dominated_by = np.zeros((nsols, 1))
    for i in range(nsols):
        for j in range(nsols):
            if i != j:
                if dominance(V[j], V[i]):
                    dominated[i] = 1
                    dominated_by[i] = j 
                    break
              
    return dominated, dominated_by

In [10]:
def add_isdominated_cols(d, cols=objs_cols):
    """Adds to the provided dataframe columns for Pareto optimal solution."""
    df_copy = d.copy()
    A = np.array(df_copy[cols])
    B, C = get_non_dominated(A)
    df_copy["isDominated"] = pd.DataFrame(B, columns=["isDominated"])
    df_copy["dominatedBy"] = pd.DataFrame(C, columns=["dominatedBy"])
    print(df_copy["isDominated"].value_counts())
    return df_copy

In [11]:
def all_data(dfs, drop_cols=relevant_cols, objs_cols=objs_cols):
    """Concatenate all Data Frames in a single one, and then drop the duplicated solutions"""
    all_data = pd.concat(dfs)
    if drop_cols:
        all_data = all_data.drop_duplicates(drop_cols)
    all_data = all_data.reset_index(drop=True)
    return all_data

In [12]:
def get_combined_PF(dfs, drop_cols=relevant_cols, objs_cols=objs_cols):
    """Computes the combined Pareto front based on a set of input dataframes"""
    all_dfs = all_data(dfs, drop_cols=relevant_cols, objs_cols=objs_cols)

    return add_isdominated_cols(all_dfs, cols=objs_cols)

## General Methods

This section contains general purpose methods that can be used in your scripts.

In [13]:
def broadcasting_multi(df, cols, value):
    cols = cols if isinstance(cols, (list, tuple)) else [cols]
    copy_df = df.copy()
    for col in cols:
        copy_df[col] = df[col] * value
    return copy_df

In [14]:
def get_symmetric(df, cols):
    """Computes the symmetric value of the provided `cols` and returns a 
    copy of the original dataframe where the values of the specified `cols`
    are symmetric."""
    return broadcasting_multi(df, cols, -1)

In [15]:
def drop_by_value(dfs, col, value):
    """Drops the solutions from the provided dataframes based on a column and a value, 
    changing them inplace."""
    for df in dfs:      
        unfeasible_sols = df.loc[df[col]==value]
        df.drop(unfeasible_sols.index, inplace=True)
        df.index = range(len(df))
    return dfs

# Data Visualization

In this section, we visualize the data with visual means. 

In [16]:
# Visualization Framework
import plotly
import plotly.graph_objs as go

import matplotlib.pyplot as plt

import plotly_express as px 
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

%matplotlib inline
init_notebook_mode(connected=True)

try:
    import plotly.plotly as py
    plotly.tools.set_credentials_file(username='username', api_key='api_key')
except: 
    import chart_studio
    import chart_studio.plotly as py
    chart_studio.tools.set_credentials_file(username='username', api_key='api_key')

    
# Print plotly's version
plotly.__version__

'5.1.0'

Let us create the functions that create the graphs

In [17]:
def get_colors(n, colorscale="viridis"): 
    colors = plt.get_cmap(colorscale).colors
    colors_idx = np.linspace(0, len(colors)-1, n, dtype=int)
    
    colors = [colors[idx] for idx in colors_idx]
    colors_str = [f"rgb({r}, {g}, {b})" for (r, g, b) in colors]
    return colors_str

In [18]:
def scatter(data, x=None, y=None, names=all_algorithms, 
            colorscale="viridis", colors=None, 
            mode="markers", marker_size=5.5, ln_width=1.5, 
            layout=None):
    
    def get_x_y(d): 
        if isinstance(d, pd.Series):
            return np.array(d.index) + 1, d.values
        elif isinstance(d, pd.DataFrame):
            return d[x], d[y]
        else: 
            return np.arange(len(d)), d      
        
    # ----------------------------------------------
    # Normalize input data
    # ----------------------------------------------
    data = data if isinstance(data, (list, tuple)) else [data]
    colors = get_colors(len(data), colorscale) if colorscale else colors
    
    # ----------------------------------------------
    # Determine the data types of provided inputs
    # ----------------------------------------------
    x_data = []
    y_data = []
    for d in data:
        xx, yy = get_x_y(d)
        x_data += [xx]
        y_data += [yy]

    
    traces = []
    for (i, (x,y)) in enumerate(zip(x_data, y_data)):
        traces += [
            go.Scatter(
                x = x,
                y = y,
                mode = mode,
                name = names[i],
                marker = dict(
                    # Markers size
                    size = marker_size,
                    color=colors[i],
                ),
                line=dict(
                    width=ln_width,
                    color=colors[i]
            )
            )]
    
    kwargs = {} if not layout else {"layout": layout}
    fig = go.Figure(data=traces, **kwargs)
    return py.iplot(fig, filename='simple_scatter')

## Default Layout

In [19]:
# Default layout for the pareto fronts graphs
layout = go.Layout(
    template="plotly_white",
    autosize=True,
    legend=dict(
        orientation='h'
    ),
    # Define axis
    xaxis=dict(
        autorange=True,
        showgrid=True,
        zeroline=False,
        showline=True,
        ticks='',
        showticklabels=True,
        tickformat='.'
    ),
    yaxis=dict(
        autorange=True,        
        showgrid=True,
        zeroline=False,
        showline=True,
        ticks='',
        showticklabels=True,
        tickformat='.'
    )
)

## 2D Pareto Front

This section contains different functions that explore dataframes having information about the non-dominated and the dominated solutions.

In [20]:
def create_pf(pf, name, x, y, nd_color='rgb(0,0,255)', ln_width=1.5, marker_size=5.5, d_color=None):
    traces = []
    
    # Create the non dominated trace (in a different color, as specified in *nd_color*)
    x_pf = pf[pf['isDominated'] == 0][x] 
    y_pf = pf[pf['isDominated'] == 0][y]
    x_pf, y_pf = zip(*sorted(zip(x_pf, y_pf)))
    
    # Get information about the objectives and variables - relevant for an iteractive PF
    info = []
    info.extend(vars_cols)
    info.extend(objs_cols)
    info_pf = np.array(pf[pf['isDominated'] == 0][info])
    info_npf = np.array(pf[pf['isDominated'] == 1][info])
    
    traces += [
        go.Scatter(
            x = x_pf,
            y = y_pf,
            mode = 'lines+markers',
            #name = name + " NonDominated",
            #name = name + " Optimal",
            name = name,
            customdata = info_pf,
            opacity=1,

            # Layout do marker
            marker=dict(
                color=nd_color,
                # size=marker_size
                size=7
            ),
            line=dict(
                color=nd_color,
                width=2
                #width=ln_width
            )
        )]
    
    if d_color:
        x_npf = pf[pf['isDominated'] == 1][x]
        y_npf = pf[pf['isDominated'] == 1][y]
        
        # Create the dominated trace (in a different color, as specified in *d_color*)
        traces +=[
            go.Scatter(
                x = x_npf,
                y = y_npf,
                mode = 'markers',
                name = name + " Dominated",
                #showlegend=False,
                customdata = info_npf,
                opacity=0.4,

                # Layout do Marker
                marker=dict(
                    #color = d_color,
                    color = nd_color,
                    size=marker_size*0.7,
                )
            )]

    return traces

In [21]:
def get_traces(pfs, x, y, draw_dominated=True,
               names=all_algorithms, colorscale='viridis', colors=None,
               tpf=None, tpf_name="Combined_PF", tpf_color='rgb(0,0,0)', 
               layout=layout):
    
    pfs = pfs if isinstance(pfs, (list, tuple)) else [pfs]
    names = names if isinstance(names, (list, tuple)) else [names]
    n_pfs = len(pfs)
    colors = get_colors(n_pfs, colorscale) if colorscale else colors
    
    traces = []
    
    if tpf is not None:
        traces += create_pf(pf=tpf, name=tpf_name, x=x, y=y, ln_width=4, marker_size=10, nd_color=tpf_color)
    
    for (i, pf) in enumerate(pfs):
        d_color = colors[i] if draw_dominated else None
        traces += create_pf(pf=pf, name=names[i], x=x, y=y, nd_color=colors[i], d_color=d_color)

    fig = go.Figure(data=traces, layout=layout)
    return traces

In [22]:
def create_pfs(pfs, x, y, draw_dominated=True,
               names=all_algorithms, colorscale='viridis', colors=None,
               tpf=None, tpf_name="Combined_PF", tpf_color='rgb(0,0,0)', 
               layout=layout):
    
    traces = get_traces(pfs, x, y, draw_dominated, names, colorscale, colors, tpf, tpf_name, tpf_color, layout)
    
    fig = go.Figure(data=traces, layout=layout)
    return py.iplot(fig, filename='algorithms_pfs_per_run')

## 3D Pareto Front

In [23]:
def create_pf_3d(pf, name, x, y, z, 
                 nd_color='rgb(0,0,255)', d_color=None,
                 ln_width=1.5, marker_size=2):
    traces = []
    
    # Create the non dominated trace (in a different color, as specified in *nd_color*)
    x_pf = pf[pf['isDominated'] == 0][x] 
    y_pf = pf[pf['isDominated'] == 0][y]
    z_pf = pf[pf['isDominated'] == 0][z]
    x_pf, y_pf, z_pf= zip(*sorted(zip(x_pf, y_pf, z_pf)))
    
    # Get information about the objectives and variables - relevant for an iteractive PF
    info = []
    info.extend(vars_cols)
    info.extend(objs_cols)
    info_pf = np.array(pf[pf['isDominated'] == 0][info])
    info_npf = np.array(pf[pf['isDominated'] == 1][info])
    
    traces += [
        go.Scatter3d(
            x = x_pf,
            y = y_pf,
            z = z_pf,
            mode = 'markers',
            name = name + " NonDominated",
            customdata = info_pf,
            opacity=1,

            # Layout do marker
            marker=dict(
                color=nd_color,
                size=marker_size
            )
        )]
    
    traces += [
        go.Mesh3d(
            x=x_pf, 
            y=y_pf, 
            z=z_pf,
            color=nd_color,
            name = name + " Dominated Surface",
            opacity=0.8
        )]
    
    if d_color:
        x_npf = pf[pf['isDominated'] == 1][x]
        y_npf = pf[pf['isDominated'] == 1][y]
        z_npf = pf[pf['isDominated'] == 1][z]
        
        # Create the dominated trace (in a different color, as specified in *d_color*)
        traces +=[
            go.Scatter3d(
                x = x_npf,
                y = y_npf,
                z = z_npf,
                mode = 'markers',
                name = name + " Dominated",
                customdata = info_npf,
                opacity=0.5,

                # Layout do Marker
                marker=dict(
                    color = d_color,
                    #color = nd_color,
                    size=marker_size * 0.8,
                )
            )]

    return traces

In [24]:
def get_traces_3d(pfs, x, y, z, draw_dominated=True,
               names=all_algorithms, colorscale='viridis', colors=None,
               tpf=None, tpf_name="Combined_PF", tpf_color='rgb(0,0,0)', 
               layout=layout):
    
    pfs = pfs if isinstance(pfs, (list, tuple)) else [pfs]
    names = names if isinstance(names, (list, tuple)) else [names]
    n_pfs = len(pfs)
    colors = get_colors(n_pfs, colorscale) if colorscale else colors
    
    traces = []
    
    if tpf is not None:
        traces += create_pf_3d(pf=tpf, name=tpf_name, x=x, y=y, z=z, ln_width=4, marker_size=10, nd_color=tpf_color)
    
    for (i, pf) in enumerate(pfs):
        d_color = colors[i] if draw_dominated else None
        traces += create_pf_3d(pf=pf, name=names[i], x=x, y=y, z=z, nd_color=colors[i], d_color=d_color)

    fig = go.Figure(data=traces)
    return traces

In [25]:
def create_pfs_3d(pfs, x, y, z, draw_dominated=True,
               names=all_algorithms, colorscale='viridis', colors=None,
               tpf=None, tpf_name="Combined_PF", tpf_color='rgb(0,0,0)', 
               layout=layout):
    
    traces = get_traces_3d(pfs, x, y, z, draw_dominated, names, colorscale, colors, tpf, tpf_name, tpf_color, layout)
    
    fig = go.Figure(data=traces)
    return fig

---

# Analysis

## Pareto Front

### Layout

In [26]:
# Layout for the 2D pareto fronts plots
layout_2d = go.Layout(
    template="plotly_white",
    autosize=False,
    
    # Define plot size
    width=900, 
    height=600,
    
    # Legend Position
    legend=dict(
        orientation='h',
        x=0.05,
        y=-0.2
    ),

    # Define axis
    xaxis=dict(
        title="Thermal Autonomy [%]",
#         range=[40, 61],
        autorange = True,
        showgrid=True,
        zeroline=False,
        showline=True,
        tickmode = 'linear',
        tick0 = 0,
        dtick = 5,
    ),
    yaxis=dict(
        title="Cost [€]",
#         range=[3500, 13000],
        autorange = True,
        showgrid=True,
        zeroline=False,
        showline=True,
        ticks='',
        showticklabels=True,
        tickformat='.',
        tickmode = 'linear',
        tick0 = 0,
        dtick = 1500,
    )
)

### Read Algorithms

In [27]:
# Read algorithms  
dfs1 = load_results(filenames)

In [28]:
dfs1 = [df.rename(names_mapping, axis='columns') for df in dfs1]

In [29]:
dfs1 = drop_by_value(dfs1, Performance, 0)

In [30]:
# Compute non_dominated_solutions (per run)
pfs1 = [add_isdominated_cols(df) for df in dfs1]
pfs2 = [add_isdominated_cols(df, cols=[Performance, Cost]) for df in dfs1]
pfs3 = [add_isdominated_cols(df, cols=[Performance, Variation]) for df in dfs1]
pfs4 = [add_isdominated_cols(df, cols=[Cost, Variation]) for df in dfs1]

1.0    1127
0.0      45
Name: isDominated, dtype: int64
1.0    703
0.0     50
Name: isDominated, dtype: int64
1.0    1145
0.0      27
Name: isDominated, dtype: int64
1.0    721
0.0     32
Name: isDominated, dtype: int64
1.0    1168
0.0       4
Name: isDominated, dtype: int64
1.0    748
0.0      5
Name: isDominated, dtype: int64
1.0    1159
0.0      13
Name: isDominated, dtype: int64
1.0    747
0.0      6
Name: isDominated, dtype: int64


In [31]:
# Sanity check!!
pfs1[0].head()

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h16,h17,h18,h19,h20,Performance,Variation,Cost,isDominated,dominatedBy
0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,...,0.0,1.0,2.0,1.0,1.0,-69.164223,18.405801,44663.654162,1.0,7.0
1,1.0,2.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,...,1.0,0.0,1.0,1.0,2.0,-69.164223,18.405801,44663.654162,1.0,7.0
2,0.0,1.0,1.0,0.0,1.0,1.0,2.0,1.0,2.0,2.0,...,1.0,2.0,0.0,1.0,2.0,-68.680352,17.869348,42949.635941,1.0,7.0
3,1.0,2.0,0.0,0.0,1.0,2.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,0.0,1.0,-66.744868,15.819305,39572.370455,1.0,13.0
4,2.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,1.0,0.0,...,1.0,1.0,2.0,1.0,1.0,-67.609971,15.845628,40228.95365,1.0,13.0


In [32]:
# Computes combined Pareto Front (optimal solutions found from all the algorithms, all the runs)
combined_pf2 = get_combined_PF(dfs1, drop_cols=relevant_cols, objs_cols=[Performance, Cost])
combined_pf3 = get_combined_PF(dfs1, drop_cols=relevant_cols, objs_cols=[Performance, Variation])
combined_pf4 = get_combined_PF(dfs1, drop_cols=relevant_cols, objs_cols=[Cost, Variation])

1.0    1888
0.0      37
Name: isDominated, dtype: int64
1.0    1921
0.0       4
Name: isDominated, dtype: int64
1.0    1914
0.0      11
Name: isDominated, dtype: int64


In [33]:
# Since sDA is actually a maximization, let us use the symmetric operation
pfs1 = [get_symmetric(pf, Performance) for pf in pfs1]
pfs2 = [get_symmetric(pf, Performance) for pf in pfs2]
pfs3 = [get_symmetric(pf, Performance) for pf in pfs3]
pfs4 = [get_symmetric(pf, Performance) for pf in pfs4]

combined_pf2 = get_symmetric(combined_pf2, Performance)
combined_pf3 = get_symmetric(combined_pf3, Performance)
combined_pf4 = get_symmetric(combined_pf4, Performance)

In [34]:
# Sort sDA values in ascending order

## This fixes the error we were getting in the create_pfs_interactive function,
## which would return a different point than the one we selected

pfs2 = [pf.sort_values(by=Performance, ascending=True) for pf in pfs2]
pfs3 = [pf.sort_values(by=Performance, ascending=True) for pf in pfs3]
pfs4 = [pf.sort_values(by=Performance, ascending=True) for pf in pfs4]

In [35]:
# Sanity check!!
pfs2[0].head()

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,...,h16,h17,h18,h19,h20,Performance,Variation,Cost,isDominated,dominatedBy
792,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,...,2.0,2.0,0.0,1.0,1.0,58.753666,18.423744,28774.752892,1.0,49.0
761,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,2.0,1.0,...,2.0,0.0,1.0,1.0,0.0,59.677419,16.525457,29741.403917,1.0,6.0
161,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,...,1.0,0.0,1.0,0.0,1.0,59.956012,17.290532,27784.23034,1.0,621.0
621,1.0,0.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,...,1.0,0.0,1.0,2.0,0.0,60.117302,14.375013,27005.987084,0.0,0.0
6,2.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,0.0,2.0,...,2.0,0.0,1.0,2.0,2.0,60.58651,15.99798,29739.423432,1.0,49.0


In [36]:
len(pfs1[0])

1172

 - __Optimal Solutions__

In [37]:
# Run only if want to see the entire dataframe
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [38]:
optimal = pfs1[0][pfs1[0]['isDominated']==0].drop_duplicates().reset_index()

In [39]:
solution = 5

optimal.iloc[(solution-1), 1:6]

h1    1.0
h2    1.0
h3    1.0
h4    2.0
h5    2.0
Name: 4, dtype: float64

### 2D Plot

* __Peformance x Cost__

In [40]:
fig=create_pfs(pfs2, x=Performance, y=Cost, tpf=None, names=all_algorithms,
                           colorscale=None, 
                           colors=['#43a0b5', '#B5557A', '#eb911c', '#6c943e'],
                           draw_dominated=True, layout=layout_2d)
 
fig

* __Peformance x Variation__

In [41]:
fig=create_pfs(pfs3, x=Performance, y=Variation, tpf=None, names=all_algorithms,
                colorscale=None, 
                colors=['#43a0b5', '#B5557A', '#eb911c', '#6c943e'],
                draw_dominated=True)
 
fig

* __Variation x Cost__

In [42]:
fig=create_pfs(pfs4, x=Variation, y=Cost, tpf=None, names=all_algorithms,
                colorscale=None, 
                colors=['#43a0b5', '#B5557A', '#eb911c', '#6c943e'],
                draw_dominated=True)
 
fig

* __Peformance x Cost: Multiple fronts for diferent variations__

In [43]:
pf1 = pfs2[0]
pf2 = pf1[pf1[Variation] < 13]
pf3 = pf1[pf1[Variation] < 12]
pf4 = pf1[pf1[Variation] < 11]


pfs5 = [pf1, pf2, pf3, pf4]

names_fronts = ["Original", "σ < 13", "σ < 12", "σ < 11"]

In [44]:
cs = px.colors.diverging.Temps

fig=create_pfs(pfs5, x=Performance, y=Cost, tpf=None, names=names_fronts,
               colorscale=None, 
               colors=['#43a0b5', '#B5557A', '#eb911c', '#6c943e', "#a834eb", '#B5557A', '#6c943e'],
               draw_dominated=True)
 
fig

### 3D Plot

__Camera Controls:__ https://plotly.com/python/3d-camera-controls/

__3D Axes:__ https://plotly.com/python/3d-axes/

In [45]:
fig=create_pfs_3d(pfs1, x=Variation, y=Performance, z=Cost, tpf=None, names=all_algorithms,
                           colorscale=None, colors=['#43a0b5', '#B5557A'],
                           draw_dominated=True)

camera = dict(
    eye=dict(x=-2, y=-2, z=1)
)

scene = dict(
    zaxis=dict(
        title="Cost [€]",
        nticks=10, 
#         range=[13500,36000],

    ),
    
    yaxis=dict(
        title="Thermal Autonomy [%]",
        nticks=10, 
#         range=[60,100],

    ),
               
    xaxis=dict(
        title="σ [%]",
        nticks=5, 
#         range=[0,20],

    ),
#     aspectratio=dict(
#                 x=1,
#                 y=1,
#                 z=1
#         )
)

fig.update_layout(
    showlegend=True,
    legend=dict(
        orientation='h',
    ),
    width=850, height=700,
    scene=scene,
    scene_camera=camera,
)

fig.show()

## Parallel Coordinates

### Read Files

In [46]:
# Concatenate all DFs in a single one, and drop duplicates
all_alg = all_data(pfs1)

In [47]:
# Sanity check!!
all_alg.head()

Unnamed: 0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11,h12,h13,h14,h15,h16,h17,h18,h19,h20,Performance,Variation,Cost,isDominated,dominatedBy
0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,69.164223,18.405801,44663.654162,1.0,7.0
1,1.0,2.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,2.0,69.164223,18.405801,44663.654162,1.0,7.0
2,0.0,1.0,1.0,0.0,1.0,1.0,2.0,1.0,2.0,2.0,0.0,1.0,1.0,0.0,2.0,1.0,2.0,0.0,1.0,2.0,68.680352,17.869348,42949.635941,1.0,7.0
3,1.0,2.0,0.0,0.0,1.0,2.0,1.0,1.0,1.0,1.0,0.0,2.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,66.744868,15.819305,39572.370455,1.0,13.0
4,2.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,2.0,1.0,1.0,2.0,1.0,1.0,67.609971,15.845628,40228.95365,1.0,13.0


### Unconstrained Plot

__Colors:__ https://plotly.com/python/builtin-colorscales/

In [48]:
fig = px.parallel_coordinates(all_alg,
                              dimensions=relevant_cols,
                              color="Performance",
                              labels={"Performance": "TA [%]",
                                      "Variation": "σ [%]",
                                      "Cost": "Cost [€]"},
                              color_continuous_scale=px.colors.diverging.Temps,
                              color_continuous_midpoint=70,
                              width=1200, height=400)

fig.show()

In [49]:
fig = px.parallel_coordinates(all_alg,
                              dimensions=objs_cols,
                              color="Performance",
                              labels={"Performance": "TA [%]",
                                      "Variation": "σ [%]",
                                      "Cost": "Cost [€]"},
                              color_continuous_scale=px.colors.diverging.Temps,
                              color_continuous_midpoint=70,
                              width=400, height=400)

fig.show()

In [50]:
fig = px.parallel_coordinates(all_alg,
                              dimensions=objs_cols + ["isDominated"],
                              color="Performance",
                              labels={"Performance": "TA [%]",
                                      "Variation": "σ [%]",
                                      "Cost": "Cost [€]"},
                              color_continuous_scale=px.colors.diverging.Temps,
                              color_continuous_midpoint=70,
                              width=400, height=400)

fig.show()

In [51]:
fig = go.Figure(data=
    go.Parcoords(
        line = dict(color = all_alg["Performance"],
                   colorscale = "rdylgn",
                   showscale = True),
        dimensions = list([
            dict(range = [55, 80],
                label = "TA [%]", values = all_alg["Performance"],
                constraintrange = [65, 100]),
            dict(range = [20000, 57000],
                label = "Cost [€]", values = all_alg["Cost"],
                constraintrange = [0, 40000]),
            dict(range = [9,22],
                 label = "σ [%]", values = all_alg["Variation"],
                 constraintrange = [0, 15]),
            dict(range = [0,1],
                 label = "Is dominated?", values = all_alg["isDominated"],
                 constraintrange = [0,0.01]),
        ])
    )
)

fig.update_layout(
    width=600, 
    height=400
)

fig.show()

## Encode Fairness with Color

In [52]:
fig=px.scatter(all_alg, x=Performance, y=Cost, color=Variation,
               color_continuous_scale=px.colors.sequential.YlOrRd,
               template='plotly_white',
               width=925, height=530,
               labels={"Performance": "Thermal Autonomy [%]",
                       "Variation": "σ [%]",
                       "Cost": "Cost [€]"},
               range_x=[52, 80],
               range_y=[21000, 59000],
               range_color=[10, 20])


fig.update_traces(marker_size=3)

fig

In [53]:
fig=px.scatter(all_alg[all_alg["isDominated"]==0], x=Performance, y=Cost, color=Variation,
               color_continuous_scale=px.colors.sequential.YlOrRd,
               template='plotly_white',
               width=925, height=530,
               labels={"Performance": "Thermal Autonomy [%]",
                       "Variation": "σ [%]",
                       "Cost": "Cost [€]"},
              range_x=[52, 80],
              range_y=[21000, 59000],
              range_color=[10, 20])

fig.update_traces(marker_size=8, opacity=1, marker_symbol="triangle-up", marker_line=dict(color='black', width=0.9))


fig