In [None]:
import numpy as np
import gurobipy as gp
from gurobipy import GRB
from Helper_Functions import z_expectation_variance,moment_matching_update,product_diff_list,question_extractor

import itertools
import random

import pandas as pd

import pylogit as pl
from collections import OrderedDict

import seaborn as sns
import matplotlib.pyplot as plt

#from choicedesign.design import EffDesign
#from biogeme.expressions import Beta, Variable
#import biogeme.database as db
#import biogeme.models as models

#import pyDOE2

import statsmodels.api as sm

In [None]:
rng = np.random.default_rng(100)

In [None]:
#This function simulates a decision maker picking between objects x and y.
def simulated_decision_maker_selection(true_partworth,x,y):
    #true_partworth: this is a numpy array of partworths of the decision maker
    #x and y: two objects for the decision maker to compare, should be numpy arrays. 
    
    preferred_option = x
    not_preferred = y
    epsilon_y = rng.gumbel(loc = 0.0, scale = 1.0)
    epsilon_x = rng.gumbel(loc = 0.0, scale = 1.0)
    
    
    if np.dot(true_partworth,y) + epsilon_y > np.dot(true_partworth,x) + epsilon_x:
        preferred_option = y
        not_preferred = x
    
    return preferred_option,not_preferred

In [None]:
#Import a choice design for the simulated decision maker to participate in. This will be used to construct a prior
prior_design_data = pd.read_csv('Toyota_Corolla_Prior_Design.csv', header = 0)
prior_design_data.head() # to display the first 5 lines of loaded data

In [None]:
#Here, we simulate a group of individuals participating in a static questionnaire and we will end up using the
#estimates of the model parameters from this study as prior information for our decision maker of interest.

#Specify group mean and variance
group_pref_mean = np.array([1.0,-0.5,-0.25,0.0,0.0,0.5,-0.5,-0.5,0.0,0.0,0.0])
#group_pref_var = np.array([[0.25, 0.125, -0.0625, 0,0,0,0,0,0,0,0],
                           #[0.125, 0.25, -0.125, 0,0,0,0,0,0,0,0],
                           #[-0.0625, -0.125, 0.25, 0,0,0,0,0,0,0,0],
                           #[0,0,0, 0.25, -0.125, 0,0,0,0,0,0],
                           #[0,0,0, -0.125, 0.25, 0,0,0,0,0,0],
                           #[0,0,0,0,0, 0.25, -0.125,0,0,0,0],
                           #[0,0,0,0,0, -0.125, 0.25,0,0,0,0],
                           #[0,0,0,0,0,0,0,0.25,0,0,0],
                           #[0,0,0,0,0,0,0,0,0.25,0,0],
                           #[0,0,0,0,0,0,0,0,0,0.25,0],
                           #[0,0,0,0,0,0,0,0,0,0,0.25]])
group_pref_var = 1.0*np.identity(11)

for i in range(85):
    subject_i_partworth = rng.multivariate_normal(group_pref_mean,group_pref_var)
    for j in range(12):
        x_j = prior_design_data.iloc[2*j + 24*i,5:16].to_numpy()
        y_j = prior_design_data.iloc[2*j + 24*i + 1, 5:16].to_numpy()
        #print(x_j)
        #print(y_j)
        pref_i = simulated_decision_maker_selection(subject_i_partworth,x_j,y_j)[0]
        #print(pref_i)
        if np.array_equal(x_j,pref_i):
            prior_design_data.iloc[2*j + 24*i,2] = int(1)
            prior_design_data.iloc[2*j + 24*i + 1, 2] = int(0)
        else:
            prior_design_data.iloc[2*j + 24*i,2] = int(0)
            prior_design_data.iloc[2*j + 24*i + 1, 2] = int(1)

        
print(prior_design_data.tail())

In [None]:
# Create the model specification for the screening study
v_spec = OrderedDict()
v_names = OrderedDict()

for col, display_name in [("Color_Metallic", "Color_Metallic"),("Color_White","Color_White"),("Color_Gray","Color_Gray"),
                         ("Wheels_Factory2","Wheels_Factory2"),("Wheels_Factory3","Wheels_Factory3"),
                         ("FB_Factory2","FB_Factory2"),("FB_Factory3","FB_Factory3"),("RB_Custom","RB_Custom"),("HL_Factory2","HL_Factory2"),
                         ("TL_Factory2","TL_Factory2"),("SM_Custom","SM_Custom")]:
    v_spec[col] = [[1,2]]
    v_names[col] = [display_name]

    
# List the variables that are the index variables (NEW 8/14/2024)
index_var_names = ["Color_Metallic", "Color_White", "Color_Gray", "Wheels_Factory2", "Wheels_Factory3", "FB_Factory2",
                  "FB_Factory3","RB_Custom","HL_Factory2","TL_Factory2","SM_Custom"]

# Transform all of the index variable columns to have float dtypes
for col in index_var_names:
    prior_design_data[col] = prior_design_data[col].astype(float)

#Test this out.
#prior_design_data_limited = prior_design_data[prior_design_data["id"] <= 10]

#COMMENT THIS OUT 8/14/2024
#v_model = pl.create_choice_model(data = prior_design_data, #prior_design_data_limited,#test this out.
                                #alt_id_col = "alt_id",
                                #obs_id_col = "choice_situation",
                                #choice_col = "choice",
                                #specification = v_spec,
                                #model_type = "MNL",
                                #names = v_names,
                                #)

v_model_mixed = pl.create_choice_model(data = prior_design_data,
                                      alt_id_col = "alt_id",
                                      obs_id_col = "choice_situation",
                                      choice_col = "choice",
                                      specification = v_spec,
                                      model_type = "Mixed Logit",
                                      names = v_names,
                                      mixing_id_col = "id",
                                      mixing_vars = index_var_names)

#COMMENT THIS OUT 8/14/2024
#We will save the utility coefficient estimates and variance.
#v_model_fit = v_model.fit_mle(np.zeros(11), just_point = True)

v_model_mixed_fit = v_model_mixed.fit_mle(init_vals=np.zeros(2 * len(index_var_names)),
                      num_draws=600,
                      seed=123, just_point = True)

# Look at the estimated results
#v_model_mixed.get_statsmodels_summary()

#MAY CHANGE 8/14/2024
#Save partworths and covariance matrix. We scale the covariance by 85 since we had 85 participants.
#v_model_partworths = np.array(v_model_fit["x"])
#v_model_covariance = 85*np.array(v_model_fit["hess_inv"])

v_model_mixed_estimates = np.array(v_model_mixed_fit["x"])

#print('v_model_partworths: ' + str(v_model_partworths))
#print('v_model_covariance: ' + str(v_model_covariance))
#print('v_model_covariance scaled by number of participants: ' + str(85.0 * v_model_covariance))

print(v_model_mixed_estimates)

v_model_partworths = v_model_mixed_estimates[0:11]
v_model_covariance = np.diag(np.square(v_model_mixed_estimates[11:22]))

print(v_model_partworths)
print(v_model_covariance)

In [None]:
#Plot the summary just to check the model.
v_model_mixed.fit_mle(init_vals=np.zeros(2 * len(index_var_names)),
                      num_draws=600,
                      seed=123)
v_model_mixed.get_statsmodels_summary()

In [None]:
#This function creates a list of difference in products which satisfy dummy coding constraints for a specified list of 
#levels
def product_diff_list_casestudy(levels):
    ##levels: This is an array of numerical values, where the i-th entry denotes the number of levels of the i-th attribute.
    
    #calculate the number of binary variables in the attribute vector.
    num_binary_var = sum([lev - 1 for lev in levels])
    
    #make a call to product_diff_list to get a larger set of questions (difference between two alternatives) which violates the dummy coding constraints. We
    #will filter out questions which violate the constraints.
    question_list = product_diff_list(num_binary_var)
    
    #Create a list of questions which satisfy the dummy coding constraints
    filtered_question_list = []
    
    
    num_levels = len(levels)
    
    #For each question, we check whether it violates the dummy coding. If not, it gets added to our list of 
    #filtered questions.
    for question in question_list:
        lower_bound = 0
        upper_bound = levels[0]-1
        num_violations = 0
        
        for i in range(num_levels):
            abs_sum_lev_question = sum([abs(question[j]) for j in range(lower_bound, upper_bound)])
            if abs_sum_lev_question > 2:
                num_violations = num_violations + 1
            #Update lower and upper bound to go to the next level.
            lower_bound = upper_bound
            if i < num_levels-1:
                upper_bound = upper_bound + (levels[i+1]-1)
        
        if num_violations == 0:
            filtered_question_list.append(question)
    
    #The filtering above leaves questions which can have two levels of a factor activated at once. For example,
    #for a 3x2 design the above filtering leaves the question [1 1 -1], which is not possible under dummy coding.
    #We must filter out questions whose levels sum exactly to 2 (and also -2).
    further_filtered_question_list = []
    
    for question in filtered_question_list:
        lower_bound = 0
        upper_bound = levels[0]-1
        num_violations = 0
        
        for i in range(num_levels):
            sum_lev_question = sum([question[j] for j in range(lower_bound, upper_bound)])
            if sum_lev_question == 2 or sum_lev_question == -2:
                num_violations = num_violations + 1
            #Update lower and upper bound to go to the next level.
            lower_bound = upper_bound
            if i < num_levels-1:
                upper_bound = upper_bound + (levels[i+1]-1)
        
        if num_violations == 0:
            further_filtered_question_list.append(question)
    
    return further_filtered_question_list

In [None]:
#Test the product_diff_list_casestudy function above.
#print(product_diff_list_casestudy([3,3,2,2,2,2,4]))

In [None]:
#This function is used to generate data to estimate the parameters in the normalized AO model. The normalized AO model
#is given by log(D-err/Det^(1/2)(Sig)) ~ AM/||L*mu|| + AV/||S*Sig|| + AO/||S*Sig|| + ||L*mu|| + ||S*Sig||. AM, AV, and AO denote
#the average question mean, average quesiton variance, and average question orthogonality of a given design under prior
#N(mu, Sig). L and S denote varying signal and noise levels, respectively. The normalized AO model is used in our
#optimization procedure so that we will not have to refit the parameters in the optimization model everytime the user
#answers a batch. The idea is that varying L and S enough should encompass a wide enough range so that mu and Sig will
#be within this range after updating.
#In this function, we also include MO so that we may fit a maximum orthogonality model.

#!!! rng will need to be set before calling this function !!!

def norm_AO_MO_data_generation_casestudy(init_mu, init_Sig, batch_size, L, S, num_random_batches, num_true_partworths,rng,levels):
    #init_mu: This is the initial expectation of the partworths. Should be a numpy array.
    
    #init_Sig: This is the initial covariance matrix of the partworths. Should be a square two-dimensional numpy array
    #having rows and columns with same number of entries corresponding to init_mu.
    
    #batch_size: This is the number of questions in each batch. Should be an integer greater than or equal to one.
    
    #L: This is a vector which holds varying levels of signal (multiply with mu). For example,
    #we could have L = [0.25,1.0,4.0]
    
    #S: This is a vector which holds varying levels of noise (multiply with Sig). For example,
    #we could have S = [0.25,1.0,4.0]
    
    #num_random_batches: This is the number of random batches that we will generate for collecting data on log(D-err),
    #AM, AV, and AO (and MO). This set of random batches will be used for each level combination of L and S. Should be an integer
    #greater than or equal to one.
    
    #num_true_partworths: This is the number of true/baseline partworths we will use to evaluate the d-error of a design. Should be
    #an integer greater than or equal to one.
    
    #rng: random number generator for generating multivariate normal vectors. Should be of the form rng = np.random.default_rng(seed)
    #for some seed value.
    
    #levels: This is an array of numerical values, where the i-th entry denotes the number of levels of the i-th attribute.
    
    attr_num = len(init_mu)
    
    #Create lists to store average orthogonality and max orthogonality, as well as d-error and average question mean and
    #average question variance, and ||L*mu|| and ||S*Sig|| as well.
    average_orthogonality = []
    
    maximum_orthogonality = []
    
    average_question_mean = []
    
    average_question_variance = []
    
    average_d_error = []
    
    L_mu = []
    
    S_Sig = []
    
    init_sqrt_determinant = []
    
    #Create a list of all products. WE USE product_diff_list_casestudy here instead of product_diff_list.
    #prod_list = product_diff_list(attr_num)
    prod_list = product_diff_list_casestudy(levels)
    
    #Construct the set of batch designs
    batch_set = [[] for i in range(num_random_batches)]
    for i in range(num_random_batches):
        random_question_matrix = random.sample(prod_list,batch_size)
        for m in range(batch_size):
            #random_question_vec = random.sample(prod_list,1)[0]
            #[x,y] = question_extractor(random_question_vec)
            [x,y] = question_extractor(random_question_matrix[m])
            batch_set[i].append([x,y])
    
    #Record the scaled norm of mu and Sig for each combination of L and S
    for l in L:
        for s in S:
            for i in range(num_random_batches):
                L_mu.append(l*np.linalg.norm(init_mu,2))
                S_Sig.append(s*np.linalg.norm(init_Sig,2))
                init_sqrt_determinant.append(np.sqrt(np.linalg.det(s*init_Sig)))
    
    #Calculate AM, AV, AO and MO for each of the batches
    for l in L:
        for s in S:
            for i in range(num_random_batches):
                random_batch_question_mean = []
                random_batch_question_variance = []
                random_batch_orthogonality = []
        
                for p in range(batch_size):
                    x_p = np.array(batch_set[i][p][0])
                    y_p = np.array(batch_set[i][p][1])
                    random_batch_question_mean.append(np.abs(np.dot(l*init_mu,x_p - y_p)))
                    random_batch_question_variance.append(np.dot(x_p - y_p, np.dot(s*init_Sig,x_p - y_p)))
                    for q in range(p+1, batch_size):
                        x_q = np.array(batch_set[i][q][0])
                        y_q = np.array(batch_set[i][q][1])
                        random_batch_orthogonality.append(np.abs(np.dot(x_p - y_p, np.dot(s*init_Sig,x_q - y_q))))
                
                #We use this if statement in case the batch size is 1 because
                #if the batch size is 1 then there are no orthogonality terms.
                if len(random_batch_orthogonality) > 0:
                    average_orthogonality.append(np.mean(np.array(random_batch_orthogonality)))
                    maximum_orthogonality.append(np.max(np.array(random_batch_orthogonality)))
        
                average_question_mean.append(np.mean(np.array(random_batch_question_mean)))
                average_question_variance.append(np.mean(np.array(random_batch_question_variance)))
            
    #Calculate the D-error.
    for l in L:
        print('L: '+str(l))
        for s in S:
            print('S: '+str(s))
            true_partworths = []
            for t in range(num_true_partworths):
                #This is where rng needs to be set beforehand!
                true_partworths.append(rng.multivariate_normal(l*init_mu,s*init_Sig))
                
            gumbel_errors = [[[np.random.gumbel(0,1) for k in range(2)] for j in range(batch_size)] for i in range(num_true_partworths)]
            
            for i in range(num_random_batches):
                #Create a list for the batch that will store the final determinant value for each simulation
                #corresponding to each baseline partworth.
                batch_simulate_d_values = []
                
                #Simulate d-efficiency over baseline partworths
                for j in range(len(true_partworths)):
                #Each time we start with a new partworth, we must use the initial prior parameters.
                    mu = l*init_mu
                    Sig = s*init_Sig
                    
                    #Each simulation goes through the questions in the random batch.
                    for k in range(batch_size):
                    #Set x and y
                        x = batch_set[i][k][0]
                        y = batch_set[i][k][1]
                
                        #These temp variables will be used in the choice model below in case the user prefers y over x.
                        x_temp = x
                        y_temp = y
                        
                        gum_x = gumbel_errors[j][k][0]
                        gum_y = gumbel_errors[j][k][1]
                        #See preference between two products
                        if (np.dot(true_partworths[j],np.array(y)) + gum_y) >= (np.dot(true_partworths[j],np.array(x)) + gum_x):
                            x = y_temp
                            y = x_temp
                            
                        #Perform moment matching after choice is made.
                        [mu, Sig] = moment_matching_update(x,y,mu,Sig)
                        
                    #After the questionnaire for a baseline partworth is complete, we append the square root of the determinant
                    #of the final covariance matrix.
                    batch_simulate_d_values.append(np.sqrt(np.linalg.det(Sig)))
                    
                #We average the d-values from the simulation for a batch and store it in a list. This is the D-error of the batch i
                #under distribution N(L*mu, S*Sig).
                average_d_error.append(np.mean(batch_simulate_d_values))
                
    return average_orthogonality, maximum_orthogonality, average_question_mean, average_question_variance, L_mu, S_Sig, init_sqrt_determinant, average_d_error

In [None]:
#This function constructs a batch design based off of average question mean, average question variance, and average
#question orthogonality. For the average question orthogonality, we take the absolute value of the summands rather than
#the square. We also normalize mu and Sig in the objective so that we do not need to keep on refitting the parameters 
#that go with question mean, question variance, and question orthogonality.

#For the case study, we add functionality which allows constraints to enforce dummy coding for attributes which have more 
#than two levels.

def batch_design_AO_casestudy(mu,Sig,batch_size,quest_mean_log_coeff,quest_var_log_coeff,quest_orth_log_coeff,t_lim = 100,logfile=False, levels = []):
    #mu: expectation of prior on the DM's partworth. Should be a numpy array.
    
    #Sig: Covariance matrix of prior on the DM's partworth.  Should be a square two-dimensional numpy array
    #having rows and columns with same number of entries corresponding to mu.
    
    
    #batch_size: the number of questions we want to return in our batch design. This should be less or equal to the number
    #of attributes (length of mu).
    
    #quest_mean_log_coeff: this is a fitting parameter that goes with the average question mean and is obtained 
    #by fitting a linear model log (D-err/Init_det) ~ AM/||l*mu|| + AV/||s*Sig|| + AO/||s*Sig|| + ||l*mu|| + ||s*Sig|| and using the fitted parameter that goes with
    #AM/||l*mu||.
    
    #quest_var_log_coeff: this is a fitting parameter that goes with the average question variance and is obtained 
    #by fitting a linear model log (D-err/Init_det) ~ AM/||l*mu|| + AV/||s*Sig|| + AO/||s*Sig|| + ||l*mu|| + ||s*Sig|| and using the fitted parameter that goes with
    #AV/||s*Sig||.
    
    #quest_orth_log_coeff: this is a fitting parameter that goes with the average question orthogonality and is obtained 
    #by fitting a linear model log (D-err/Init_det) ~ AM/||l*mu|| + AV/||s*Sig|| + AO/||s*Sig|| + ||l*mu|| + ||s*Sig|| and using the fitted parameter that goes with
    #AO/||s*Sig||.
    
    #In the above three comments regarding the coefficients, (l,s) are scaling parameters for mu and Sig that divide the space into 
    #different signal-to-noise ratio regions.
    
    #t_lim: this is the max amount of time we want to take to construct the batch
    #logfile: determine whether to print out a logfile of the optimization procedure.
    
    #levels: This is an array of numerical values, where the i-th entry denotes the number of levels of the i-th attribute.
    #This will be used to enforce dummy coding constraints when there are more than two levels. The default value is []. In
    #the default case, it is assumed all attributes have two levels. It should be the case that (outside the default
    #case) the sum of (each entry
    #in levels - 1) should equal the length of mu, due to the dummy coding conventions.

    #Make sure that quest_orth_log_coeff is greater or equal to zero. Otherwise, we will
    #have an unbounded optimization problem. In most situations, the fitting procedure
    #will result in a positive value for quest_orth_log_coeff, but very rarely the fitting
    #procedure will give a statistically non-significant but negative value for
    #quest_orth_log_coeff that makes the optimization problem unbounded. When the quest_orth_log_coeff
    #is less than 0, we decide to set it equal to 0. This will result in a bounded optimization problem,
    #but the quality of the solution in terms of D-error may not be sufficient because we are no
    #longer controlling orthogonality in the objective function.
    if quest_orth_log_coeff<0.0:
        quest_orth_log_coeff = 0.0

    # This is the number of attributes for the products
    n = len(Sig[0])
    
    m = gp.Model("mip1")
    m.setParam('Timelimit',t_lim)
    if logfile:
        m.setParam('LogFile',"Batch_AO_batchsize"+str(batch_size)+"_meancoeff_"+str(quest_mean_log_coeff)+"_varcoeff_"+
               str(quest_var_log_coeff)+"_orthcoeff_"+str(quest_orth_log_coeff)+"_v5.txt")
    
    #calculate 2-norms of mu and Sigma
    mu_2norm = np.linalg.norm(mu,2)
    Sig_2norm = np.linalg.norm(Sig,2)
    
    #List of tuples for delta variable
    if batch_size > 1:
        delta_tuples = []
        for i in range(batch_size):
            for j in range(i+1,batch_size):
                delta_tuples.append((i,j))
    
    #Set up the x_i and y_i, i = 1,...,batchsize
    X = m.addMVar((batch_size,n),vtype = GRB.BINARY)
    Y = m.addMVar((batch_size,n),vtype = GRB.BINARY)
    if batch_size > 1:
        Delta = m.addVars(delta_tuples, lb=0.0, vtype = GRB.CONTINUOUS)
    
    #Set up the objective function.
    if batch_size > 1:
        m.setObjective((quest_mean_log_coeff/(batch_size*mu_2norm))*sum([mu@X[i] - mu@Y[i] for i in range(batch_size)]) + 
                       (quest_var_log_coeff/(batch_size*Sig_2norm))*sum([X[i]@Sig@X[i] - X[i]@(2.0*Sig)@Y[i] + 
                       Y[i]@Sig@Y[i] for i in range(batch_size)]) + 
                           (quest_orth_log_coeff/(batch_size*(batch_size-1)*Sig_2norm/2))*sum([Delta[i,j] for i in range(batch_size) for j in range(i+1,batch_size)]),GRB.MINIMIZE)
        
    if batch_size == 1:
        m.setObjective((quest_mean_log_coeff/(batch_size*mu_2norm))*sum([mu@X[i] - mu@Y[i] for i in range(batch_size)]) + 
                       (quest_var_log_coeff/(batch_size*Sig_2norm))*sum([X[i]@Sig@X[i] - X[i]@(2.0*Sig)@Y[i] + 
                       Y[i]@Sig@Y[i] for i in range(batch_size)]),GRB.MINIMIZE)
    
    #Set up the constraints that force the products in question i to be different, as well as forcing the symmetry
    #exploitation condition.
    for i in range(batch_size):
        m.addConstr(X[i]@X[i] - X[i]@Y[i] - Y[i]@X[i] + Y[i]@Y[i] >= 1)
        m.addConstr(mu@X[i] - mu@Y[i] >= 0)
        
    #Set up the Sigma-orthogonality constraint for all questions i and j, i not equal to j. Also add constraints
    #to make sure that questions within a batch are different, including with respect to switching order of products in
    #the questions.
    for i in range(batch_size):
        for j in range(i+1,batch_size):
            m.addConstr(X[i]@Sig@X[j] - X[i]@Sig@Y[j] - Y[i]@Sig@X[j] + Y[i]@Sig@Y[j] - Delta[i,j] <= 0)
            m.addConstr(X[i]@Sig@X[j] - X[i]@Sig@Y[j] - Y[i]@Sig@X[j] + Y[i]@Sig@Y[j] + Delta[i,j] >= 0)
            m.addConstr(X[i]@X[i] - X[i]@Y[i] - X[i]@X[j] + X[i]@Y[j] -
                       Y[i]@X[i] + Y[i]@Y[i] + Y[i]@X[j] - Y[i]@Y[j] -
                       X[j]@X[i] + X[j]@Y[i] + X[j]@X[j] - X[j]@Y[j] +
                       Y[j]@X[i] - Y[j]@Y[i] - Y[j]@X[j] + Y[j]@Y[j] >= 1)
            m.addConstr(X[i]@X[i] - X[i]@Y[i] - X[i]@Y[j] + X[i]@X[j] -
                       Y[i]@X[i] + Y[i]@Y[i] + Y[i]@Y[j] - Y[i]@X[j] -
                       Y[j]@X[i] + Y[j]@Y[i] + Y[j]@Y[j] - Y[j]@X[j] +
                       X[j]@X[i] - X[j]@Y[i] - X[j]@Y[j] + X[j]@X[j] >= 1)
            
    #We add the dummy coding constraints if there are attributes with more than two levels. (7/27/2024)
    num_attributes = len(levels)
    if num_attributes > 0:
        for i in range(batch_size):
            index_level_tracker = 0
            for j in range(num_attributes):
                m.addConstr(sum([X[i,k] for k in range(index_level_tracker,index_level_tracker + levels[j]-1)]) <= 1)
                m.addConstr(sum([Y[i,k] for k in range(index_level_tracker,index_level_tracker + levels[j]-1)]) <= 1)
                index_level_tracker = index_level_tracker + (levels[j]-1)
                #print(index_level_tracker)
                
            
    m.optimize()
    
    #This will be the list of products
    Q = [ [] for i in range(batch_size)]
    D = [ [] for i in range(batch_size-1)]
    
    for i in range(batch_size):
        Q[i].append(X[i].X)
        Q[i].append(Y[i].X)
        
    for i in range(batch_size):
        for j in range(i+1, batch_size):
            D[i].append(Delta[i,j].X)
        
    return[Q,D]


In [None]:
#Test the batch_design_AO_casestudy function above.
#mu_test = np.ones(7)
#Sig_test = np.identity(7)
#batch_test = 4
#mean_coeff_test = 0.03
#var_coeff_test = -0.01
#orth_coeff_test = 0.005
#levels_test = [4,3,2,2]
#print(batch_design_AO_casestudy(mu_test,Sig_test,batch_test,mean_coeff_test,var_coeff_test,orth_coeff_test,levels = levels_test))

In [None]:
#Create our targeted decision maker.
rng2 = np.random.default_rng(1000)
target_dm_partworth = rng2.multivariate_normal(group_pref_mean,group_pref_var)
print('target_dm_partworth: ' + str(target_dm_partworth))

In [None]:
#Here we start the process of learning the parameters for the MIP-AC (batch_design_AO_casestudy) model.
rng3 = np.random.default_rng(101)


batch_size_fit = 5

#These are the vectors L and S discussed in "Offline Learning Framework for Specifying MIP Objective Parameters"
L_fit = [0.5,1.0,2.0]
S_fit = [0.5,1.0,2.0]

#num_random_batches_fit is the number of batches for which we evaluate D-error, average question mean,
#average question variance, average & maximum question covariance (orthogonality). num_true_partworths_fit is 
#the number of partworths used in simulating/evaluating D-error of each design.
num_random_batches_fit = 1000
num_true_partworths_fit = 50

v_levels = [4,3,3,2,2,2,2]

#Generate the data in order to estimate the parameters of the AO and MO models
average_orthogonality_fit, maximum_orthogonality_fit, average_question_mean_fit, average_question_variance_fit, L_mu_fit, S_Sig_fit, init_sqrt_determinant_fit, average_d_error_fit = norm_AO_MO_data_generation_casestudy(v_model_partworths, v_model_covariance, batch_size_fit, L_fit, S_fit, num_random_batches_fit, num_true_partworths_fit,rng3,v_levels)

#Create a dataframe of the generated data for fitting the parameters of the AO(MIP-AC) and MO(MIP-MC) models
df_fit = pd.DataFrame(list(zip(average_orthogonality_fit, maximum_orthogonality_fit, average_question_mean_fit, average_question_variance_fit, L_mu_fit, S_Sig_fit, init_sqrt_determinant_fit, average_d_error_fit)),
                  columns =['Avg_Orth', 'Max_Orth', 'Avg_Quest_Mean', 'Avg_Quest_Var', 'L_mu_norm', 'S_Sig_norm', 'Init_Sqrt_Det', 'D_err'])

#Add some new columns to the dataset. We mean-center the independent variables to attempt to reduce VIF. This will not affect the value of
#of the coefficients, except for the intercept. The intercept is not important because it is constant and thus will not be used
#in the optimization problem.
df_fit['log_norm_derr'] = np.log(np.divide(np.array(df_fit['D_err']),np.array(df_fit['Init_Sqrt_Det'])))
df_fit['cent_norm_AM'] = np.divide(np.array(df_fit['Avg_Quest_Mean']),np.array(df_fit['L_mu_norm'])) - np.mean(np.divide(np.array(df_fit['Avg_Quest_Mean']),np.array(df_fit['L_mu_norm'])))
df_fit['cent_norm_AV'] = np.divide(np.array(df_fit['Avg_Quest_Var']),np.array(df_fit['S_Sig_norm'])) - np.mean(np.divide(np.array(df_fit['Avg_Quest_Var']),np.array(df_fit['S_Sig_norm'])))
df_fit['cent_norm_AO'] = np.divide(np.array(df_fit['Avg_Orth']),np.array(df_fit['S_Sig_norm'])) - np.mean(np.divide(np.array(df_fit['Avg_Orth']),np.array(df_fit['S_Sig_norm'])))
df_fit['cent_norm_MO'] = np.divide(np.array(df_fit['Max_Orth']),np.array(df_fit['S_Sig_norm'])) - np.mean(np.divide(np.array(df_fit['Max_Orth']),np.array(df_fit['S_Sig_norm'])))

df_fit['cent_L_mu_norm'] = df_fit['L_mu_norm'] - np.mean(np.array(df_fit['L_mu_norm']))
df_fit['cent_S_Sig_norm'] = df_fit['S_Sig_norm'] - np.mean(np.array(df_fit['S_Sig_norm']))

#Fitting the linear model for the AO (MIP-AC) model. Note that we do not use batch size
#because it will be constant throughout the questionnaire.
model_AO = sm.formula.ols(formula = "log_norm_derr ~  cent_norm_AM + cent_norm_AV + cent_norm_AO + cent_L_mu_norm + cent_S_Sig_norm", data = df_fit).fit()
parameter_est_AO = model_AO.params

print('parameter_est_AO: ' + str(parameter_est_AO))

In [None]:
#Setup the questionnaire for the targeted decision maker.

number_of_questions_target = 20
batch_size_target = 5

#Set the optimization model parameter estimates for MIP-AC
AO_alpha_exp = parameter_est_AO[1]
AO_kappa_exp = parameter_est_AO[2]
AO_gamma_exp = parameter_est_AO[3]

In [None]:
#Start the questionnaire
mu_target = v_model_partworths
Sig_target = v_model_covariance

for j in range(number_of_questions_target):
    
    if (j % batch_size_target == 0):
        batch_AO = batch_design_AO_casestudy(mu_target,Sig_target,batch_size_target,AO_alpha_exp,AO_kappa_exp,AO_gamma_exp,t_lim = 50,levels = v_levels)[0]
        print(batch_AO)

    [x_target,y_target] = batch_AO[j % batch_size_target]
    
    [pref,not_pref] = simulated_decision_maker_selection(target_dm_partworth,x_target,y_target)
    print([pref,not_pref])
        
    #Perform moment matching after choice is made.
    [mu_target, Sig_target] = moment_matching_update(pref,not_pref,mu_target,Sig_target)

In [None]:
#Observe covariance and estimates of partworths after finishing the questionnaire. We observe the covariance in order
#to get standard errors of the partworths.
print('mu_target: ' + str(mu_target))
print('Sig_target: ' + str(Sig_target))

In [None]:
#Compare estimated ranking of top 5 vehicles for true partworth and estimated partworth

#Create a new dataframe which has all the vehicle configurations. This dataframe comes from the dataset
#Toyota_Corolla_Concept_Designs

vehicle_concept_designs = pd.read_csv("Toyota_Corolla_Concept_Designs.csv",header = 0)

#We start by adding two columns to the dataframe, which are the utilities of each vehicle configuration under
#the true partworth, and utilities under the estimated partworth.

true_mean_utility = []
est_mean_utility = []

for i in range(len(vehicle_concept_designs)):
    vehicle_i = vehicle_concept_designs.iloc[i,1:13].to_numpy()
    #print(vehicle_i)
    true_mean_utility.append(np.dot(target_dm_partworth,vehicle_i))
    est_mean_utility.append(np.dot(mu_target,vehicle_i))

print('true_mean_utility: ' + str(true_mean_utility))
print('est_mean_utility: ' + str(est_mean_utility))

vehicle_concept_designs["true_mean_util"] = true_mean_utility
vehicle_concept_designs["est_mean_util"] = est_mean_utility

vehicle_concept_designs_true_util = vehicle_concept_designs.sort_values(by = 'true_mean_util',ascending = False)

print(vehicle_concept_designs_true_util.head())

vehicle_concept_designs_est_util = vehicle_concept_designs.sort_values(by = 'est_mean_util', ascending = False)

print(vehicle_concept_designs_est_util.head())

In [None]:
#Calculate standard deviations and CI's
standard_deviations_final = np.sqrt(np.diag(Sig_target))
print("sd: " + str(standard_deviations_final))

lower_CI = mu_target - 1.96*standard_deviations_final
upper_CI = mu_target + 1.96*standard_deviations_final

print("Low CI: " + str(lower_CI))
print("Up CI: " + str(upper_CI))

cover = []
for i in range(11):
    if lower_CI[i] <= target_dm_partworth[i] and upper_CI[i] >= target_dm_partworth[i]:
        cover.append("Cover")
    else:
        cover.append("Not Cover")
        
print("Cover: " + str(cover))

In [None]:
#Scatter plot of utility values

fig = plt.figure()

sns.scatterplot(data=vehicle_concept_designs_est_util, x="est_mean_util", y="true_mean_util")

plt.xlabel("Estimated Utility")

plt.ylabel("True Utility")

#ax.set(xlabel='Estimated Utility', ylabel='True Utility')

fig.savefig('Correlation_True_Est_Utility.png',bbox_inches='tight')

#plt.show()

In [None]:
#Calculate MSEs
mu_target_mse = np.mean(np.square(target_dm_partworth - mu_target))

prior_mean_mse = np.mean(np.square(target_dm_partworth - v_model_partworths))

print("mu_target_mse: " + str(mu_target_mse))
print("prior_mean_mse: " + str(prior_mean_mse))

In [None]:
#Calculate correlations
np.corrcoef(vehicle_concept_designs_est_util["est_mean_util"],vehicle_concept_designs_est_util["true_mean_util"])