In [None]:
import sys
import csv
import gurobipy as gp
from gurobipy import GRB
import numpy as np
import scipy.integrate
import scipy.stats
import math
import pandas as pd
import itertools
import random
import statsmodels.api as sm

import matplotlib.pyplot as plt
import seaborn as sns

############
from Helper_Functions import z_expectation_variance,moment_matching_update,product_diff_list,
question_extractor,norm_AO_MO_data_generation,batch_design_AO, batch_design_MO

In [None]:
#This function calculates the mean and 95% confidence interval (based on t-distribution)
#of a set of data.
def mean_CI_95(data):
    #data: The set of data for which we will calculate the mean and 95% confidence interval.
    
    data_len = len(data)
    t_crit_95 = scipy.stats.t.ppf(q=1-.05/2,df=data_len-1)
    data_mean = np.mean(data)
    data_sd = np.std(data,ddof=1)
    
    lower_CI = data_mean - t_crit_95*data_sd/np.sqrt(data_len)
    upper_CI = data_mean + t_crit_95*data_sd/np.sqrt(data_len)
    
    return data_mean, lower_CI, upper_CI

In [None]:
#This function is for conducting an experiment where we compare the batch design using AO, the batch design using MO,
#and JMP in a sequential manner.

def sequential_AO_MO_experiment(init_mu,init_Sig,true_partworths,gumbel_error_terms,rep_per_partworth,num_questions,AO_MO_batch_size,
                               AO_alpha, AO_kappa, AO_gamma, MO_alpha, MO_kappa, MO_gamma, OS_alpha, OS_kappa, noise_par, hitrate_question_list, JMP_questionnaire, Method = 0, t=100):
    #init_mu: This is the initial estimate on the partworths
    #init_Sig: This is the initial covariance matrix on the partworths
    #true_partworths: These are used to make selection in the product selection stage (a list/set of partworths)
    #gumbel_error_terms: This is a list of gumbel errors terms used in evaluating the d-error of a design. This 
    #list should have dimension (true_partworths x num_questions), where each entry is a list with two randomly generated
    #gumbel terms. We use the same true partworths and error terms for evaluating the d-error of each randomly generated 
    #design.
    #rep_per_partworth: This is the number of times we want to conduct a questionnaire on each partworth
    #num_questions: Length of the questionnaire. Will also be used as the batch size.
    #AO_MO_batch_size: The size of the batch we will use for the orthogonal batches using AO and MO.
    #AO_alpha,AO_kappa,AO_gamma: These are the parameters that go with average question mean, average question variance, and 
    #average question orthogonality in the AO model, respectively.
    #MO_alpha,MO_kappa,MO_gamma: These are the parameters that go with average question mean, average question variance, and 
    #maximum question orthogonality in the MO model, respectively.
    #OS_alpha and OS_kappa: These are the parameters that go with question mean and question variance in the OS model.
    #noise_par: This is a parameter which is used to increase the weight of the individuals' true partworth when making decision
    #hitrate_question_list: This is a list of questions which will be used to calculate the hitrate, which is the
    #proportion of times that an estimated partworth matches the product selection of a true underlying partworth.
    #JMP_questionnaire: This is a dataframe containing the questions coming from a choice design created in JMP.
    #                   JMP uses a Bayesian D-optimal design framework to construct the choice design, so we use init_mu
    #                   and init_Sig as the prior parameters.
    #Method: 0 - Batch Design with AO
    #        1 - Batch Design with MO   <------ CAN ADD MORE METHODS IF NEEDED
    #        2 - JMP
    #        3 - Onestep
    #t: time for constructing batch design.
    
    attr_num = len(init_mu)
    
    num_true_partworth = len(true_partworths) 
    
    hitrate_total_num_of_questions = len(hitrate_question_list)
    
    #Set up lists to hold normalized MSE, sqrt determinant, and hitrate. Also save the mu vectors. 
    
    MSE_normalized = [[[] for j in range(num_questions)] for u in range(num_true_partworth)]
    
    SQRTDET = [[[] for j in range(num_questions)] for u in range(num_true_partworth)]
    
    HITRATE = [[[] for j in range(num_questions)] for u in range(num_true_partworth)]
    
    MU = [[[] for j in range(num_questions)] for u in range(num_true_partworth)]
    
    #Initiate the initial batches for the experiment. Note that we do not need the initial batch in the for loop because
    #all true partworths start with the same prior ( N(init_mu, init_Sig) ).
    if Method == 0:
        batch_AO_init = batch_design_AO(init_mu,init_Sig, AO_MO_batch_size,AO_alpha,AO_kappa,AO_gamma,t_lim = t)[0]
    
    if Method == 1:
        batch_MO_init = batch_design_MO(init_mu,init_Sig, AO_MO_batch_size,MO_alpha,MO_kappa,MO_gamma,t_lim = t)[0]
        
    for u in range(num_true_partworth):
        for i in range(rep_per_partworth):
            mu = init_mu
            Sig = init_Sig
            
            for j in range(num_questions):
                
                #AO batch
                if Method == 0:
                    if j == 0:
                        batch_AO = batch_AO_init
                    if (j % AO_MO_batch_size == 0) and (j>0):
                        batch_AO = batch_design_AO(mu,Sig,AO_MO_batch_size,AO_alpha,AO_kappa,AO_gamma,t_lim = t)[0]
                    [x,y] = batch_AO[j % AO_MO_batch_size]
                    
                #MO batch
                if Method == 1:
                    if j == 0:
                        batch_MO = batch_MO_init
                    if (j % AO_MO_batch_size == 0) and (j>0):
                        batch_MO = batch_design_MO(mu,Sig,AO_MO_batch_size,MO_alpha,MO_kappa,MO_gamma,t_lim = t)[0]
                    [x,y] = batch_MO[j % AO_MO_batch_size]
                    
                #JMP 
                if Method == 2:
                    x = np.array(JMP_questionnaire.loc[2*j,'X1':])
                    y = np.array(JMP_questionnaire.loc[2*j + 1,'X1':])
                
                #Onestep
                if Method == 3:
                    onestep_question = batch_design_AO(mu,Sig,1,OS_alpha,OS_kappa,0,t_lim = t)[0]
                    [x,y] = onestep_question[0]
                    
                #Instantiate gumbel random variables which are used in the product choice selection process. If there is 
                #only one replication per partworth, we will use a fixed gumbel noise matrix (gumbel_error_terms).
                if(rep_per_partworth == 1):
                    gum_x = gumbel_error_terms[u][j][0]
                    gum_y = gumbel_error_terms[u][j][1]
                else:
                    gum_x = rng.gumbel(0,1)
                    gum_y = rng.gumbel(0,1)
                    
                #These temp variables will be used in the choice model below in case the user prefers y over x.
                x_temp = x
                y_temp = y
                
                #See preference between two products
                if (noise_par*np.dot(true_partworths[u],np.array(y)) + gum_y) >= (noise_par*np.dot(true_partworths[u],np.array(x))
                                                                               + gum_x):
                    x = y_temp
                    y = x_temp
                    
                #Perform moment matching after choice is made.
                [mu, Sig] = moment_matching_update(x,y,mu,Sig)
                
                #add mu to the list of mu vectors
                MU[u][j].append(mu)
                #Add the normalized MSE between the true partworth and estimator at question j to a list, and add the determinant of
                #the covariance matrix at question j into a list. Also add the regular MSE
                MSE_normalized[u][j].append(np.square(np.subtract(true_partworths[u]/np.linalg.norm(true_partworths[u],ord = 2),
                                                            mu/np.linalg.norm(mu, ord = 2))).mean())
                SQRTDET[u][j].append(np.sqrt(np.linalg.det(Sig)))
                
                #Calculate hitrate for this particular partworth after updating.
                hits = 0
                for q in hitrate_question_list:
                    if np.dot(true_partworths[u],q)*np.dot(mu,q)>=0:
                        hits = hits + 1
                HITRATE[u][j].append(hits/hitrate_total_num_of_questions)
            
    return[MSE_normalized,SQRTDET,HITRATE,true_partworths,MU]

In [None]:
#Fitting the models batch_AO and batch_MO, and one-step.
rng = np.random.default_rng(100) 
np.random.seed(100)
random.seed(100)

#signal to noise ratio. 
#1 - LOW: multiply expectation by 0.25 and covariance by 4.0
#2 - REG: multiply expectation by 1.0 and covariance by 1.0
#3 - HIGH: multiply expectation by 4.0 and covariance by 0.25
snr = int(sys.argv[1])

if snr == 1:
    init_mu_fit = 0.25*np.array(6*[1.0])
    init_Sig_fit = 4.0*np.identity(6)
if snr == 2:
    init_mu_fit = 1.0*np.array(6*[1.0])
    init_Sig_fit = 1.0*np.identity(6)
if snr == 3:
    init_mu_fit = 4.0*np.array(6*[1.0])
    init_Sig_fit = 0.25*np.identity(6)
    
batch_size_fit = 4
    
L_fit = [0.5,1.0,2.0]
S_fit = [0.5,1.0,2.0]

num_random_batches_fit = 1000
num_true_partworths_fit = 50

#Onestep settings
batch_size_OS = 1
num_random_batches_OS = 500

In [None]:
#Generate the data in order to estimate the parameters of the AO and MO models
average_orthogonality_fit, maximum_orthogonality_fit, average_question_mean_fit, average_question_variance_fit, L_mu_fit, S_Sig_fit, init_sqrt_determinant_fit, average_d_error_fit = norm_AO_MO_data_generation(init_mu_fit, init_Sig_fit, batch_size_fit, L_fit, S_fit, num_random_batches_fit, num_true_partworths_fit)

In [None]:
#Generate the data in order to estimate the parameters of OS model.
average_orthogonality_OS, maximum_orthogonality_OS, average_question_mean_OS, average_question_variance_OS, L_mu_OS, S_Sig_OS, init_sqrt_determinant_OS, average_d_error_OS = norm_AO_MO_data_generation(init_mu_fit, init_Sig_fit, batch_size_OS, L_fit, S_fit, num_random_batches_OS, num_true_partworths_fit)

In [None]:
#Create a dataframe of the generated data for fitting the parameters of the AO and MO models
df_fit = pd.DataFrame(list(zip(average_orthogonality_fit, maximum_orthogonality_fit, average_question_mean_fit, average_question_variance_fit, L_mu_fit, S_Sig_fit, init_sqrt_determinant_fit, average_d_error_fit)),
                  columns =['Avg_Orth', 'Max_Orth', 'Avg_Quest_Mean', 'Avg_Quest_Var', 'L_mu_norm', 'S_Sig_norm', 'Init_Sqrt_Det', 'D_err'])

In [None]:
df_fit_OS = pd.DataFrame(list(zip(average_question_mean_OS, average_question_variance_OS, L_mu_OS, S_Sig_OS, init_sqrt_determinant_OS, average_d_error_OS)),
                  columns =['Quest_Mean_OS', 'Quest_Var_OS', 'L_mu_norm_OS', 'S_Sig_norm_OS', 'Init_Sqrt_Det_OS', 'D_err_OS'])

In [None]:
#Add some new columns to the dataset. We mean-center the independent variables to attempt to reduce VIF. This will not affect the value of
#of the coefficients, except for the intercept.
df_fit['log_norm_derr'] = np.log(np.divide(np.array(df_fit['D_err']),np.array(df_fit['Init_Sqrt_Det'])))
df_fit['cent_norm_AM'] = np.divide(np.array(df_fit['Avg_Quest_Mean']),np.array(df_fit['L_mu_norm'])) - np.mean(np.divide(np.array(df_fit['Avg_Quest_Mean']),np.array(df_fit['L_mu_norm'])))
df_fit['cent_norm_AV'] = np.divide(np.array(df_fit['Avg_Quest_Var']),np.array(df_fit['S_Sig_norm'])) - np.mean(np.divide(np.array(df_fit['Avg_Quest_Var']),np.array(df_fit['S_Sig_norm'])))
df_fit['cent_norm_AO'] = np.divide(np.array(df_fit['Avg_Orth']),np.array(df_fit['S_Sig_norm'])) - np.mean(np.divide(np.array(df_fit['Avg_Orth']),np.array(df_fit['S_Sig_norm'])))
df_fit['cent_norm_MO'] = np.divide(np.array(df_fit['Max_Orth']),np.array(df_fit['S_Sig_norm'])) - np.mean(np.divide(np.array(df_fit['Max_Orth']),np.array(df_fit['S_Sig_norm'])))

df_fit['cent_L_mu_norm'] = df_fit['L_mu_norm'] - np.mean(np.array(df_fit['L_mu_norm']))
df_fit['cent_S_Sig_norm'] = df_fit['S_Sig_norm'] - np.mean(np.array(df_fit['S_Sig_norm']))

In [None]:
#Add some new columns to the OS fit dataset. We mean-center the independent variables to attempt to reduce VIF. This will not affect the value of
#of the coefficients, except for the intercept.

df_fit_OS['log_norm_derr_OS'] = np.log(np.divide(np.array(df_fit_OS['D_err_OS']),np.array(df_fit_OS['Init_Sqrt_Det_OS'])))
df_fit_OS['cent_quest_mean_norm_OS'] = np.divide(np.array(df_fit_OS['Quest_Mean_OS']),np.array(df_fit_OS['L_mu_norm_OS'])) - np.mean(np.divide(np.array(df_fit_OS['Quest_Mean_OS']),np.array(df_fit_OS['L_mu_norm_OS'])))
df_fit_OS['cent_quest_var_norm_OS'] = np.divide(np.array(df_fit_OS['Quest_Var_OS']),np.array(df_fit_OS['S_Sig_norm_OS'])) - np.mean(np.divide(np.array(df_fit_OS['Quest_Var_OS']),np.array(df_fit_OS['S_Sig_norm_OS'])))

df_fit_OS['cent_L_mu_norm_OS'] = df_fit_OS['L_mu_norm_OS'] - np.mean(np.array(df_fit_OS['L_mu_norm_OS']))
df_fit_OS['cent_S_Sig_norm_OS'] = df_fit_OS['S_Sig_norm_OS'] - np.mean(np.array(df_fit_OS['S_Sig_norm_OS']))

In [None]:
#Save resulting file as a CSV.
if snr == 1:
    df_fit.to_csv('ThirdseqExpv2_Normalized_AO_MO_Model_Data_mu025_Sig4Ident_batchsize4_L_05_1_2_S_05_1_2_nrb_1000_ntp_50.csv')
    df_fit_OS.to_csv('Normalized_OS_Model_Data_mu025_Sig4Ident_batchsize1_L_05_1_2_S_05_1_2_nrb_500_ntp_50_expIIIv2.csv')
if snr == 2:
    df_fit.to_csv('ThirdseqExpv2_Normalized_AO_MO_Model_Data_mu1_Sig1Ident_batchsize4_L_05_1_2_S_05_1_2_nrb_1000_ntp_50.csv')
    df_fit_OS.to_csv('Normalized_OS_Model_Data_mu1_Sig1Ident_batchsize1_L_05_1_2_S_05_1_2_nrb_500_ntp_50_expIIIv2.csv')
if snr == 3:
    df_fit.to_csv('ThirdseqExpv2_Normalized_AO_MO_Model_Data_mu4_Sig025Ident_batchsize4_L_05_1_2_S_05_1_2_nrb_1000_ntp_50.csv')
    df_fit_OS.to_csv('Normalized_OS_Model_Data_mu4_Sig025Ident_batchsize1_L_05_1_2_S_05_1_2_nrb_500_ntp_50_expIIIv2.csv')

In [None]:
#Model with AO
model_AO = sm.formula.ols(formula = "log_norm_derr ~  cent_norm_AM + cent_norm_AV + cent_norm_AO + cent_L_mu_norm + cent_S_Sig_norm", data = df_fit).fit()
parameter_est_AO = model_AO.params

In [None]:
#Model with MO
model_MO = sm.formula.ols(formula = "log_norm_derr ~  cent_norm_AM + cent_norm_AV + cent_norm_MO + cent_L_mu_norm + cent_S_Sig_norm", data = df_fit).fit()
parameter_est_MO = model_MO.params

In [None]:
#Model with OS
model_OS = sm.formula.ols(formula = "log_norm_derr_OS ~  cent_quest_mean_norm_OS + cent_quest_var_norm_OS + cent_L_mu_norm_OS + cent_S_Sig_norm_OS", data = df_fit_OS).fit()
parameter_est_OS = model_OS.params

In [None]:
#Save model parameters in txt file
if snr == 1:
    params_est = np.array([parameter_est_AO[1],parameter_est_AO[2],parameter_est_AO[3],parameter_est_MO[1],parameter_est_MO[2],parameter_est_MO[3], parameter_est_OS[1],parameter_est_OS[2]])
    np.savetxt('ThirdseqExpv2_modelparams_lowsnr.txt',params_est)
if snr == 2:
    params_est = np.array([parameter_est_AO[1],parameter_est_AO[2],parameter_est_AO[3],parameter_est_MO[1],parameter_est_MO[2],parameter_est_MO[3], parameter_est_OS[1],parameter_est_OS[2]])
    np.savetxt('ThirdseqExpv2_modelparams_medsnr.txt',params_est)
if snr == 3:
    params_est = np.array([parameter_est_AO[1],parameter_est_AO[2],parameter_est_AO[3],parameter_est_MO[1],parameter_est_MO[2],parameter_est_MO[3], parameter_est_OS[1],parameter_est_OS[2]])
    np.savetxt('ThirdseqExpv2_modelparams_highsnr.txt',params_est)

In [None]:
#Settings for experiment
rng = np.random.default_rng(100)

if snr == 1:
    mu_exp = 0.25*np.array(6*[1.0])
    Sig_exp = 4.0*np.identity(6)
if snr == 2:
    mu_exp = 1.0*np.array(6*[1.0])
    Sig_exp = 1.0*np.identity(6)
if snr == 3:
    mu_exp = 4.0*np.array(6*[1.0])
    Sig_exp = 0.25*np.identity(6)

num_true_partworths_exp = 100
true_partworth_exp = []

#There will be 100 true partworths
for t in range(num_true_partworths_exp):
    true_partworth_exp.append(rng.multivariate_normal(mu_exp,Sig_exp))

#only one repetition per each partworth    
rep_per_partworth_exp = 1
num_questions_exp = 16
batch_size_exp = 4

gumbel_error_terms_exp = [[[np.random.gumbel(0,1) for k in range(2)] for j in range(num_questions_exp)] for i in range(num_true_partworths_exp)]

noise_par_exp = 1.0

AO_alpha_exp = parameter_est_AO[1]
AO_kappa_exp = parameter_est_AO[2]
AO_gamma_exp = parameter_est_AO[3]

MO_alpha_exp = parameter_est_MO[1]
MO_kappa_exp = parameter_est_MO[2]
MO_gamma_exp = parameter_est_MO[3]

OS_alpha_exp = parameter_est_OS[1]
OS_kappa_exp = parameter_est_OS[2]

#Create a list of questions for hitrate
hitrate_question_list_exp = product_diff_list(6)

if snr == 1:
    JMP_df = pd.read_csv('JMP_attr_6_exp_1_cov_1_loc_025_scale_4_quest_16.csv')
    JMP_df_exp = JMP_df.loc[:,'X1':]
if snr == 2:
    JMP_df = pd.read_csv('JMP_attr_6_exp_1_cov_1_loc_1_scale_1_quest_16.csv')
    JMP_df_exp = JMP_df.loc[:,'X1':]
if snr == 3:
    JMP_df = pd.read_csv('JMP_attr_6_exp_1_cov_1_loc_4_scale_025_quest_16.csv')
    JMP_df_exp = JMP_df.loc[:,'X1':]


In [None]:
#Batch_AO
#Set up random number seed
rng = np.random.default_rng(100) 
np.random.seed(100)
random.seed(100)

[Norm_MSE_AO, SQRTDET_AO, HITRATE_AO, true_partworths_AO, MU_AO] = sequential_AO_MO_experiment(mu_exp,Sig_exp,true_partworth_exp, gumbel_error_terms_exp, rep_per_partworth_exp,
                                                                                              num_questions_exp, batch_size_exp, AO_alpha_exp, AO_kappa_exp, AO_gamma_exp,
                                                                                              MO_alpha_exp, MO_kappa_exp, MO_gamma_exp, OS_alpha_exp, OS_kappa_exp, noise_par_exp, hitrate_question_list_exp,
                                                                                              JMP_df_exp, Method = 0)

In [None]:
#Batch_MO
#Set up random number seed
rng = np.random.default_rng(100) 
np.random.seed(100)
random.seed(100)

[Norm_MSE_MO, SQRTDET_MO, HITRATE_MO, true_partworths_MO, MU_MO] = sequential_AO_MO_experiment(mu_exp,Sig_exp,true_partworth_exp, gumbel_error_terms_exp, rep_per_partworth_exp,
                                                                                              num_questions_exp, batch_size_exp, AO_alpha_exp, AO_kappa_exp, AO_gamma_exp,
                                                                                              MO_alpha_exp, MO_kappa_exp, MO_gamma_exp, OS_alpha_exp, OS_kappa_exp, noise_par_exp, hitrate_question_list_exp,
                                                                                              JMP_df_exp, Method = 1)

In [None]:
#JMP
#Set up random number seed
rng = np.random.default_rng(100) 
np.random.seed(100)
random.seed(100)

[Norm_MSE_JMP, SQRTDET_JMP, HITRATE_JMP, true_partworths_JMP, MU_JMP] = sequential_AO_MO_experiment(mu_exp,Sig_exp,true_partworth_exp, gumbel_error_terms_exp, rep_per_partworth_exp,
                                                                                              num_questions_exp, batch_size_exp, AO_alpha_exp, AO_kappa_exp, AO_gamma_exp,
                                                                                              MO_alpha_exp, MO_kappa_exp, MO_gamma_exp, OS_alpha_exp, OS_kappa_exp, noise_par_exp, hitrate_question_list_exp,
                                                                                              JMP_df_exp, Method = 2)

In [None]:
#OS
#Set up random number seed
rng = np.random.default_rng(100) 
np.random.seed(100)
random.seed(100)

[Norm_MSE_OS, SQRTDET_OS, HITRATE_OS, true_partworths_OS, MU_OS] = sequential_AO_MO_experiment(mu_exp,Sig_exp,true_partworth_exp, gumbel_error_terms_exp, rep_per_partworth_exp,
                                                                                              num_questions_exp, batch_size_exp, AO_alpha_exp, AO_kappa_exp, AO_gamma_exp,
                                                                                              MO_alpha_exp, MO_kappa_exp, MO_gamma_exp, OS_alpha_exp, OS_kappa_exp, noise_par_exp, hitrate_question_list_exp,
                                                                                              JMP_df_exp, Method = 3)

In [None]:
#Construct the columns of the dataframe. This is for the case where there are 100 true partworths.

data_collection = [[Norm_MSE_AO, SQRTDET_AO, HITRATE_AO, true_partworths_AO, MU_AO],[Norm_MSE_MO, SQRTDET_MO, HITRATE_MO, true_partworths_MO, MU_MO],
                  [Norm_MSE_JMP, SQRTDET_JMP, HITRATE_JMP, true_partworths_JMP, MU_JMP],[Norm_MSE_OS, SQRTDET_OS, HITRATE_OS, true_partworths_OS, MU_OS]]

method_range = 4

Norm_MSE_col = []
SQRTDET_col = []
HITRATE_col = []
partworthID_col = []
Method_col = []

rep_col = []
quest_col = []

method_types = ['AO','MO','JMP','OS']

#Columns here are the entries in the ith component of the true partworth and estimator mu. There are 6 attributes.
true_partworths_col = [[] for i in range(6)]
MU_col = [[] for i in range(6)]

#Construct the Norm_MSE, SQRTDET, and HITRATE columns
#Methods are 0=Batch_AO, 1=Batch_MO, 2=JMP, 3=OS
for Meth in range(method_range):
    for part in range(100):
        for r in range(rep_per_partworth_exp):
            for q in range(num_questions_exp):
                Norm_MSE_col.append(data_collection[Meth][0][part][q][r])
                SQRTDET_col.append(data_collection[Meth][1][part][q][r])
                
                Method_col.append(Meth)
                partworthID_col.append(part)
                
                rep_col.append(r + 1)
                quest_col.append(q + 1)
                
                HITRATE_col.append(data_collection[Meth][2][part][q][r])
                
                for i in range(6):
                    true_partworths_col[i].append(data_collection[Meth][3][part][i])
                    MU_col[i].append(data_collection[Meth][4][part][q][r][i])
                    
#Data for the mu vector and true partworths
df_columns_data = [Method_col,partworthID_col,rep_col,quest_col,Norm_MSE_col,SQRTDET_col,HITRATE_col]

for i in range(6):
    df_columns_data.append(MU_col[i])

for i in range(6):
    df_columns_data.append(true_partworths_col[i])
    
df_columns_data = np.array(df_columns_data).T

#Make column names
df_columns_names = ['Method', 'PartworthID','Rep','Question','Norm_MSE','SQRTDet','HITRATE']

for i in range(6):
    df_columns_names.append('Mu_' + str(i+1))


for i in range(6):
    df_columns_names.append('True_' + str(i+1))
    
#Make dataframe. The number of columns is 7 + 2*attributes. The number of rows is method_range*100*1*num_questions
#100 comes from the 100 true_partworths that we use.
df_data = pd.DataFrame(data = df_columns_data, index = range(1,method_range*100*rep_per_partworth_exp*num_questions_exp + 1),
                      columns = df_columns_names)

#Here we add a column for normalized D-error, and it's log transformation.
df_data["Normalized_SQRTDet"] = df_data["SQRTDet"]/(np.sqrt(np.linalg.det(Sig_exp)))
df_data["Log_Normalized_SQRTDet"] = np.log(df_data["Normalized_SQRTDet"])

if snr == 1:
    df_data.to_csv('ThirdseqExpv2_sequential_data_AO_MO_JMP_OS_attr_'+str(6)+ '_exp_'+str(1)+'_cov_'+str(1) +
               '_loc_' + str(25) +'_scale_'+ str(4) + '_quest_'+ str(num_questions_exp)+'_batch_'+str(batch_size_exp)+'.csv',
               index=True,header=True)
if snr == 2:
    df_data.to_csv('ThirdseqExpv2_sequential_data_AO_MO_JMP_OS_attr_'+str(6)+ '_exp_'+str(1)+'_cov_'+str(1) +
               '_loc_' + str(1) +'_scale_'+ str(1) + '_quest_'+ str(num_questions_exp)+'_batch_'+str(batch_size_exp)+'.csv',
               index=True,header=True)
if snr == 3:
    df_data.to_csv('ThirdseqExpv2_sequential_data_AO_MO_JMP_OS_attr_'+str(6)+ '_exp_'+str(1)+'_cov_'+str(1) +
               '_loc_' + str(4) +'_scale_'+ str(25) + '_quest_'+ str(num_questions_exp)+'_batch_'+str(batch_size_exp)+'.csv',
               index=True,header=True)

In [None]:
#Determinant, 100 partworths
#fig = plt.figure()
#sns.lineplot(data = df_data, x = 'Question',y = 'SQRTDet', hue = 'Method', palette = 'deep').set(
    #title='Square Root of Determinant')
#plt.legend(labels=["AO","MO","JMP","OS"])
#if snr == 1:
    #fig.savefig('ThirdseqExpv2_Det_AO_MO_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(25)+'_scale_'+str(4)+'_quest_'+ str(num_questions_exp)+'_batch_'+str(batch_size_exp)+'.jpg',bbox_inches='tight')
#if snr == 2:
    #fig.savefig('ThirdseqExpv2_Det_AO_MO_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(1)+'_scale_'+str(1)+'_quest_'+ str(num_questions_exp)+'_batch_'+str(batch_size_exp)+'.jpg',bbox_inches='tight')
#if snr == 3:
    #fig.savefig('ThirdseqExpv2_Det_AO_MO_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(4)+'_scale_'+str(25)+'_quest_'+ str(num_questions_exp)+'_batch_'+str(batch_size_exp)+'.jpg',bbox_inches='tight')

In [None]:
#Normalized MSE, 100 partworths
#fig = plt.figure()
#sns.lineplot(data = df_data, x = 'Question',y = 'Norm_MSE', hue = 'Method', palette = 'deep').set(
    #title='Normalized MSE')
#plt.legend(labels=["AO","MO","JMP","OS"])
#if snr == 1:
    #fig.savefig('ThirdseqExpv2_Normmse_AO_MO_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(25)+'_scale_'+ str(4) +'_quest_'+ str(num_questions_exp)+'_batch_'+str(batch_size_exp)+'.jpg',bbox_inches='tight')
#if snr == 2:
    #fig.savefig('ThirdseqExpv2_Normmse_AO_MO_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(1)+'_scale_'+ str(1) +'_quest_'+ str(num_questions_exp)+'_batch_'+str(batch_size_exp)+'.jpg',bbox_inches='tight')
#if snr == 3:
    #fig.savefig('ThirdseqExpv2_Normmse_AO_MO_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(4)+'_scale_'+ str(25) +'_quest_'+ str(num_questions_exp)+'_batch_'+str(batch_size_exp)+'.jpg',bbox_inches='tight')

In [None]:
#Log normalized D-error, 100 partworths
if snr == 1:
    #Determinant of loc=25 scale=4, using log_norm d-err
    fig = plt.figure()
    sns.lineplot(data = df_data, x = 'Question',y = 'Log_Normalized_SQRTDet', hue = 'Method', style = 'Method', palette = 'deep').set(
        title='Logarithm of Normalized D-error: Low SNR')
    plt.legend(labels=["MIP-AC","MIP-MC","JMP","OS"])
    plt.xlabel("Question")
    plt.ylabel("Log Norm D-Err")
    fig.savefig('ThirdseqExpv2_Det_AO_MO_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(25)+'_scale_'+str(4)+'_quest_'+ str(16)+'_batch_'+str(4)+'_LogNorm.png',bbox_inches='tight')
if snr == 2:
    #Determinant of loc=1 scale=1, using log_norm d-err
    fig = plt.figure()
    sns.lineplot(data = df_data, x = 'Question',y = 'Log_Normalized_SQRTDet', hue = 'Method', style = 'Method', palette = 'deep').set(
        title='Logarithm of Normalized D-error: Medium SNR')
    plt.legend(labels=["MIP-AC","MIP-MC","JMP","OS"])
    plt.xlabel("Question")
    plt.ylabel("Log Norm D-Err")
    fig.savefig('ThirdseqExpv2_Det_AO_MO_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(1)+'_scale_'+str(1)+'_quest_'+ str(16)+'_batch_'+str(4)+'_LogNorm.png',bbox_inches='tight')
if snr == 3:
    #Determinant of loc=4 scale=25, using log_norm d-err
    fig = plt.figure()
    sns.lineplot(data = df_data, x = 'Question',y = 'Log_Normalized_SQRTDet', hue = 'Method', style = 'Method', palette = 'deep').set(
        title='Logarithm of Normalized D-Error: High SNR')
    plt.legend(labels=["MIP-AC","MIP-MC","JMP","OS"])
    plt.xlabel("Question")
    plt.ylabel("Log Norm D-Err")
    fig.savefig('ThirdseqExpv2_Det_AO_MO_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(4)+'_scale_'+str(25)+'_quest_'+ str(16)+'_batch_'+str(4)+'_LogNorm.png',bbox_inches='tight')

In [None]:
#Normalized MSE, 100 partworths
if snr == 1:
    #Norm MSE of loc=0.25 and scale=4
    fig = plt.figure()
    sns.lineplot(data = df_data, x = 'Question',y = 'Norm_MSE', hue = 'Method', style = 'Method', palette = 'deep').set(
        title='Normalized MSE: Low SNR')
    plt.legend(labels=["MIP-AC","MIP-MC","JMP","OS"])
    plt.xlabel("Question")
    plt.ylabel("Norm MSE")
    fig.savefig('ThirdseqExpv2_Normmse_MIPAC_MIPMC_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(25)+'_scale_'+ str(4) +'_quest_'+ str(16)+'_batch_'+str(4)+'.png',bbox_inches='tight')
if snr == 2:
    #Norm MSE of loc=1 and scale=1
    fig = plt.figure()
    sns.lineplot(data = df_data, x = 'Question',y = 'Norm_MSE', hue = 'Method', style = 'Method', palette = 'deep').set(
        title='Normalized MSE: Medium SNR')
    plt.legend(labels=["MIP-AC","MIP-MC","JMP","OS"])
    plt.xlabel("Question")
    plt.ylabel("Norm MSE")
    fig.savefig('ThirdseqExpv2_Normmse_MIPAC_MIPMC_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(1)+'_scale_'+ str(1) +'_quest_'+ str(16)+'_batch_'+str(4)+'.png',bbox_inches='tight')
if snr == 3:
    #Norm MSE of loc=4 and scale=0.25
    fig = plt.figure()
    sns.lineplot(data = df_data, x = 'Question',y = 'Norm_MSE', hue = 'Method', style = 'Method', palette = 'deep').set(
        title='Normalized MSE: High SNR')
    plt.legend(labels=["MIP-AC","MIP-MC","JMP","OS"])
    plt.xlabel("Question")
    plt.ylabel("Norm MSE")
    fig.savefig('ThirdseqExpv2_Normmse_MIPAC_MIPMC_JMP_OS_attr_'+str(6)+'_exp_'+str(1)+'_cov_'+str(1)+'_loc_'+str(4)+'_scale_'+ str(25) +'_quest_'+ str(16)+'_batch_'+str(4)+'.png',bbox_inches='tight')

In [None]:
#Collect data from the last question, partition with respect to the 
#method type
df_data_AO_lastquest = df_data[(df_data['Question'] == 16) & (df_data['Method'] == 0)]
df_data_MO_lastquest = df_data[(df_data['Question'] == 16) & (df_data['Method'] == 1)]
df_data_JMP_lastquest = df_data[(df_data['Question'] == 16) & (df_data['Method'] == 2)]
df_data_OS_lastquest = df_data[(df_data['Question'] == 16) & (df_data['Method'] == 3)]

In [None]:
#Compute means and 95% confidence intervals of the Log Normalized D-error for the last question for each method
AO_lastquest_mean_CI = mean_CI_95(df_data_AO_lastquest['Log_Normalized_SQRTDet'])
MO_lastquest_mean_CI = mean_CI_95(df_data_MO_lastquest['Log_Normalized_SQRTDet'])
JMP_lastquest_mean_CI = mean_CI_95(df_data_JMP_lastquest['Log_Normalized_SQRTDet'])
OS_lastquest_mean_CI = mean_CI_95(df_data_OS_lastquest['Log_Normalized_SQRTDet'])

mean_CI_info = {'AO' : AO_lastquest_mean_CI, 'MO' : MO_lastquest_mean_CI, 'JMP' : JMP_lastquest_mean_CI,
                'OS' : OS_lastquest_mean_CI}

In [None]:
#Save mean and CI information to a CSV
if snr == 1:
    # open file for writing, "w" is writing
    w = csv.writer(open("low_snr_mean_CI_lastquest.csv", "w"))
    # loop over dictionary keys and values
    for key, val in mean_CI_info.items():
        # write every key and value to file
        w.writerow([key,val[0],val[1],val[2]])
if snr == 2:
    # open file for writing, "w" is writing
    w = csv.writer(open("med_snr_mean_CI_lastquest.csv", "w"))
    # loop over dictionary keys and values
    for key, val in mean_CI_info.items():
        # write every key and value to file
        w.writerow([key,val[0],val[1],val[2]])
if snr == 3:
    # open file for writing, "w" is writing
    w = csv.writer(open("high_snr_mean_CI_lastquest.csv", "w"))
    # loop over dictionary keys and values
    for key, val in mean_CI_info.items():
        # write every key and value to file
        w.writerow([key,val[0],val[1],val[2]])