## Preprocess

This notebook does the preprocessing part of the visualization mechanism.

In [None]:
%matplotlib notebook
%reload_ext autoreload
%autoreload 2

import sys
import os

sys.path.append('../src')

### List of data files to be preprocessed

In [None]:
# Do all these plots with centroid based coloring

# rawfpath = "../data/spherical/spherical-3d.out"
# rawfpath = "../data/spherical/spherical-4d.out"
# rawfpath = "../data/spherical/spherical-8d.out"

# rawfpath = "../data/knee/3d/knee.out"
rawfpath = "../data/knee/4d/knee.out"
# rawfpath = "../data/knee/8d/knee.out"

# rawfpath = "../data/line-surface/line-3d.out"
# rawfpath = "../data/line-surface/line-4d.out"
# rawfpath = "../data/line-surface/line-6d.out" # fails
# rawfpath = "../data/line-surface/line-8d.out"

# Do all these plots with constraint based coloring

# rawfpath = "../data/knee-const/knee-const-3d.out"
# rawfpath = "../data/knee-const/knee-const-4d.out"
# rawfpath = "../data/knee-const/knee-const-8d.out"

# rawfpath = "../data/knee-const-mod/knee-const-mod-3d.out"
# rawfpath = "../data/knee-const-mod/knee-const-mod-4d.out"

# rawfpath = "../data/isolated/isolated-3d.out"
# rawfpath = "../data/isolated/isolated-4d.out"
# rawfpath = "../data/isolated/isolated-5d.out"
# rawfpath = "../data/isolated/isolated-6d.out"
# rawfpath = "../data/isolated/isolated-7d.out"
# rawfpath = "../data/isolated/isolated-8d.out"

# rawfpath = "../data/c2dtlz2/c2dtlz2-3d.out"
# rawfpath = "../data/c2dtlz2/c2dtlz2-4d.out"
# rawfpath = "../data/c2dtlz2/c2dtlz2-5d.out"
# rawfpath = "../data/c2dtlz2/c2dtlz2-8d.out"

# rawfpath = "../data/c2dtlz2/c2dtlz2-c1/c2dtlz2-c1-3d.out"
# rawfpath = "../data/c2dtlz2/c2dtlz2-c2/c2dtlz2-c2-3d.out"
# rawfpath = "../data/c2dtlz2/c2dtlz2-c3/c2dtlz2-c3-3d.out"
# rawfpath = "../data/c2dtlz2/c2dtlz2-c4/c2dtlz2-c4-3d.out"

# rawfpath = "../data/osy/osy-3d.out"
# rawfpath = "../data/osy/osy-4d.out"

# some engineering design problems

# rawfpath = "../data/carcrash/carcrash-3d.out"
# rawfpath = "../data/carcrash/carcrash-c1/carcrash-c1-3d.out"
# rawfpath = "../data/carcrash/carcrash-c2/carcrash-c2-3d.out"

# rawfpath = "../data/gaa-das/gaa-das-10d.out"
# rawfpath = "../data/gaa-lhs/gaa-lhs-10d.out"

# some ML data sets

# rawfpath = "../data/airofoil/airofoil-5d.out"
# rawfpath = "../data/banknote/banknote-4d.out"
# rawfpath = "../data/blood/blood-4d.out" # fails
# rawfpath = "../data/cccp/cccp-4d.out"
# rawfpath = "../data/concrete/concrete-8d.out" # fails
# rawfpath = "../data/iris/iris-4d.out" 
# rawfpath = "../data/mammogram/mammogram-5d.out" # fails
# rawfpath = "../data/wil/wil-7d.out" # fails
# rawfpath = "../data/yeast/yeast-8d.out" #  fails 

path, rawfname = os.path.split(rawfpath)
frontname = rawfname.split('.')[0]
normfname = frontname + "-norm.csv"
mufname = normfname.split('.')[0] + "-mu.csv"

### Normalize the data points within [0.0, 1.0]

In [None]:
# Normalize the data points within [0.0, 1.0]

import numpy as np
from utils import transform as tf

F = np.loadtxt(rawfpath, delimiter = '\t')
print(F.shape)
F_ = tf.normalize(F, lb = np.zeros(F.shape[1]), ub = np.ones(F.shape[1]))
print(F_[0:10,:])

normfile = os.path.join(path, normfname)
print("Saving normalized data into {0:s} ...".format(normfile))
np.savetxt(normfile, F_, delimiter = ',', fmt = "%1.4e")

### Find the trade-off values

In [None]:
# Compute the trade-off values

import numpy as np
from visualization import knee

if os.path.exists(normfile):
    F_ = np.loadtxt(normfile, delimiter = ',')
    epskey = frontname + '-' + str(F_.shape[1]) + 'd'
    mu = knee.tradeoff(F_, eps = knee.params[epskey])
    print(mu.shape)
    print(mu[0:10])

    mufile = os.path.join(path, mufname)
    print("Saving tradeoff values to {0:s} ...".format(mufile))
    np.savetxt(mufile, mu, delimiter = ',', fmt = "%1.4e")
else:
    print("Error: " + normfile + " does not exist.")

In [None]:
# This part does the peeler.py operation

mode = "default"
# mode = "no-project" # use it for line-surface

normfpath = os.path.join(path, normfname)
points = fmt.load(normfpath)

m = len(points[0])
print("Peeling data point cloud in {0:s} mode ...".format(mode))
if mode == "default":
    # the defualt mode will project the points 
    # on a simplex on the first quadrant
    ppoints = plr.project(points)
    cpoints = plr.collapse(ppoints, dim = m - 1)
    boundaries = plr.peel(cpoints)
elif mode == "no-project":
    boundaries = plr.peel(points)
fmt.cat(boundaries, dtype = 'int')

layerfpath = os.path.join(path, normfname.split('.')[0] + "-layers.out")
print("Saving {0:d} layers into {1:s} ...".format(len(boundaries), layerfpath))
fmt.save(boundaries, layerfpath, dtype = 'int')

In [None]:
# This part does the palettizer.py operation

n_layers = 3
mode = "invsc"
# mode = "invrv"
# mode = "rv"
# mode = "sc"

normfpath = os.path.join(path, normfname)
points = fmt.load(normfpath)

layerfile = normfname.split('.')[0] + "-layers.out"
layers = fmt.load(os.path.join(path, layerfile), dtype = 'int')

print("Computing palette coordinates with {0:d} layers in {1:s} mode ...".format(n_layers, mode))
if mode == "invsc":
    palette_coords = pltz.palettize_sc(points, layers, n_layers = n_layers, inverted = True)
    palettefpath = os.path.join(path, normfname.split('.')[0] + "-palette-invsc.out")
elif mode == "invrv":
    palette_coords = pltz.palettize_rv(points, layers, n_layers = n_layers, inverted = True)
    palettefpath = os.path.join(path, normfname.split('.')[0] + "-palette-invrv.out")
elif mode == "rv":
    palette_coords = pltz.palettize_rv(points, layers, n_layers = n_layers)
    palettefpath = os.path.join(path, normfname.split('.')[0] + "-palette-rv.out")
elif mode == "sc":
    palette_coords = pltz.palettize_sc(points, layers, n_layers = n_layers)
    palettefpath = os.path.join(path, normfname.split('.')[0] + "-palette-sc.out")
else:
    print("Error: unknown mode \'{0:s}\'\n".format(mode))
    sys.exit(1)

print("Saving palette coordinates into {0:s} ...".format(palettefpath))
pltz.save_palette(palette_coords, palettefpath)