In [10]:
import numpy as np
import pandas as pd
import sys
sys.path.append('../')
from model.erroneousChoice import  erroneousChoice
from sklearn.utils import shuffle
from kernel import jaxrbf
from utility import  paramz
import matplotlib.pyplot as plt
import matplotlib 
matplotlib.rc('xtick', labelsize=12) 
matplotlib.rc('ytick', labelsize=12)  

import torch
from botorch.utils.multi_objective import is_non_dominated
def is_pareto(X):
    return is_non_dominated(torch.from_numpy(X),deduplicate=False)

#generate CA RA sets
def make_CA_RA(x, y, rows=[]):
    if len(rows)==0:
        rows=np.arange(x.shape[0])
    acc = rows[is_pareto(y)]
    rej = rows[~ is_pareto(y)]
    return acc, rej

def make_observations(X, Y, nA, dimA):
    CA=[]
    RA=[]   
    ix = 0
    for i in range(nA):
        rows = np.random.permutation(np.arange(X.shape[0]))[0:dimA]
        x=X[rows,:]
        y=Y[rows,:]
        acc,rej=make_CA_RA(x, y, rows)
        if len(acc)>0:
            CA.append(acc)
        else:
            CA.append([])
        if len(acc)<dimA:
            RA.append(rej)
        else:
            RA.append([])
        ix = ix+1
    return CA, RA

# Additive Manufacturing dataset

We consider 6 features (layer height, nozzle temperature, bed temperature, print speed	material,fan speed) and we use the three outputs (roughness, tension strength, elongation) to generate choice data.

We then employ ChoiceGP to learn back the utility function from the choice data.

In [16]:
#load the data
df = pd.read_csv("data_AM.csv")
df['infill_pattern'].replace(['grid','honeycomb'], [0,1], inplace = True)
df['material'].replace(['abs','pla'], [0,1], inplace = True)
df.head()

Unnamed: 0,layer_height,wall_thickness,infill_density,infill_pattern,nozzle_temperature,bed_temperature,print_speed,material,fan_speed,roughness,tension_strenght,elongation
0,0.02,8,90,0,220,60,40,0,0,25,18,1.2
1,0.02,7,90,1,225,65,40,0,25,32,16,1.4
2,0.02,1,80,0,230,70,40,0,50,40,8,0.8
3,0.02,4,70,1,240,75,40,0,75,68,10,0.5
4,0.02,6,90,0,250,80,40,0,100,92,5,0.7


In [17]:
# Generate choice data front training and testin
df =shuffle(df)
latent_dim=3
df.iloc[:,[-3,-1]]=-df.iloc[:,[-3,-1]] # we change the sign to maximise
X = df.iloc[0:,[0,4,5,6,7,8]].values #select features
Y = df.iloc[0:,[-3,-2,-1]].values # select true utilities
# we randomly generate choice data
nA = 300
dimA = 3
CA,RA = make_observations(X, Y, nA, dimA)

#train-test split
n_tr=200
indp = np.random.permutation(nA)
#trainining
CA_tr=[CA[i] for i in indp[0:n_tr]]
RA_tr=[RA[i] for i in indp[0:n_tr]]
#testing
CA_te=[CA[i] for i in indp[n_tr:]]
RA_te=[RA[i] for i in indp[n_tr:]]

In [None]:
#data
data={'X': X,
      'CA': CA_tr,
      'RA': RA_tr,
      'dimA':dimA
          }

# define kernel: RBF 
Kernel = jaxrbf.RBF

#initial value for the hyperparameters of the kernel
params = {}
for i in range(latent_dim):
    params['lengthscale_'+str(i)]={'value':1.0*np.ones(data["X"].shape[1],float), 
                                'range':np.vstack([[0.1, 3.0]]*data["X"].shape[1]),
                                'transform': paramz.logexp()}
    params['variance_'+str(i)]   ={'value':np.array([3]), 
                                    'range':np.vstack([[1.0, 200.0]]),
                                    'transform': paramz.logexp()}
# define choice model 
model = erroneousChoice(data,Kernel,params,latent_dim)

# compute variational inference and estimate hyperparameters
model.optimize_hyperparams(niterations=4000,kernel_hypers_fixed=False)
print(model.params)


In [None]:
# predicted samples
predictions = model.predict_VI(X)
#it returns the joint mean (predictions[0]) and joint covariance matrix (predictions[1]) 
#for the latent utilities. 
F = predictions[0]
F = F.reshape(latent_dim,X.shape[0]).T# these are the expected utility as a matrix: num_X times latent_dim

In [None]:
#Compute accuracy on test set
Pred=[]
YTrue=[]
for ii in range(0,len(CA_te)):
    if len(RA_te[ii])>0:
        Pred.append(is_non_dominated(torch.from_numpy(np.vstack([F[CA_te[ii]],
               F[RA_te[ii]]
              ]))))
        YTrue.append(np.hstack([np.ones(len(CA_te[ii])),np.zeros(len(RA_te[ii]))]))
    else:
        Pred.append(is_non_dominated(torch.from_numpy(np.array(F[CA_te[ii]]))))
        YTrue.append(np.hstack([np.ones(len(CA_te[ii]))]))
acc = len(np.where(abs(np.vstack(YTrue).astype(int)-np.vstack(Pred))==0)[0])/np.vstack(YTrue).size
print("accuracy=",acc)