In [27]:
from scipy.stats import norm
from sklearn.linear_model import LogisticRegression
from statistics import median
from sklearn.utils import shuffle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Defining True Parameters
mu_1_true = 10
mu_0_true = 7
classifier_true = 0.5*(mu_0_true + mu_1_true)
sd = 1 

# Defining Initial Assumed Parameters
mu1_init = 9
mu0_init = 6

true_pop_prob = 0.5
exploration_prob = 0.5

reference_quantile = 0.5

# Pre-defined parameters
L1 = 10
L2 = 110
L3 = 11
L4 = 5

re_prob = 0
action = ['large','small','reject']
sample_size = 1000
n1 = 500
n0 = 500


# Define the reward value
def loss (x, y, theta, LB, action, re):
    
    if (x >= theta) & (y == 0) & (action == 'large'): 
        l  = L2
    elif ((x < LB) & (y == 1) & (action == 'reject')) or ((x >= LB) & (x < theta) & (y == 1) & (action == 'reject')): 
        l  = L1
    elif ((x >= LB) & (x < theta) & (y == 0) & (action == 'small') & (re == False)):
        l = L3
    else:
        l = 0
        
    return l

In [28]:
# Find V0

# Generate data for the 1st state
label1 = np.random.normal(mu_1_true,sd,size =int(sample_size*(1-true_pop_prob)))
label0 = np.random.normal(mu_0_true,sd,size =int(sample_size*true_pop_prob))

info0 = pd.DataFrame(columns = ['True Label','Values','Decision','Reveal','RP_0',
                                      'Classifier','Loss','Explore_Prob', 'Return'])

# Assign feature values to the dataframe
info0['Values'] = np.append(label1, label0)
# Assign true labels to the dataframe
info0.loc[0:len(label1),'True Label'] = 1
info0.loc[len(label1):,'True Label'] = 0
# Shuffle the dataframe
info0 = shuffle(info0)
# Resign the index 
info0.reset_index(inplace=True, drop=True)


# Find classifier (\theta_2) 
classifier = 0.5*(mu0_init + mu1_init)
classifier0 = classifier
# Find the LB (Using alpha = 50 for label 0)
temp = 2*norm.cdf(norm.ppf(reference_quantile,mu0_init,sd),mu0_init,sd) -\
            norm.cdf(classifier,mu0_init,sd)
LB = float(norm.ppf(temp,mu0_init,sd))
temp = 2*norm.cdf(norm.ppf(reference_quantile,mu1_init,sd),mu1_init,sd) -\
            norm.cdf(classifier,mu1_init,sd)
UB = float(norm.ppf(temp,mu1_init,sd))
exploration_prob = classifier_true - classifier

print ("mu1 is {} and mu0 is {}".format(mu1_init, mu0_init))
print ("Classifier is {}, LB is {}, UB is {}, explore prob is {}".format(classifier, LB, UB, exploration_prob))
print ("n1 is {}, n0 is {}".format(n1, n0))

mu1 is 9 and mu0 is 6
Classifier is 7.5, LB is 4.5, UB is 10.5, explore prob is 1.0
n1 is 500, n0 is 500


In [29]:
mis_error0 = 0.5*norm.cdf(classifier,mu_1_true,sd)*L1*sample_size*(1-true_pop_prob) + 0.5*norm.sf(classifier,mu_0_true,sd)*L2*sample_size*true_pop_prob

V0 = 0  
for i in range(len(info0)):
    if (info0.loc[i,'Values'] >= classifier) & (info0.loc[i,'Values'] <= UB):
        a = 'large'
        info0.loc[i,'Decision'] = a
        info0.loc[i,'Reveal'] = 'Yes'
        
    elif info0.loc[i,'Values'] < LB:
        a = 'reject'
        info0.loc[i,'Decision'] = a
        
    else:
        if (np.random.rand() < exploration_prob):
            a = 'small'
            info0.loc[i,'Decision'] = a
            
            if info0.loc[i,'True Label'] == 1:
                info0.loc[i,'Reveal'] = 'Yes'
                V0 = V0 - L1 + L4
            else:

                if (np.random.rand() > re_prob):
                    info0.loc[i,'Reveal'] = 'Yes'
                    re = False
                    V0 += L3
                else:
                    re = True

        else: 
            a = 'reject'
            info0.loc[i,'Decision'] = a
            

print("V0(theta_0) is {}, mis-error is {}".format(V0, mis_error0))


V0(theta_0) is 3106, mis-error is 8500.306478279079


In [30]:
# update the classifier

info0_sampled = info0[(info0["Values"] >= classifier) & (info0["Reveal"] == "Yes") & (info0["Values"] <= UB)]
info0_sampled_smaller = info0[(info0["Values"] < classifier) & (info0["Reveal"] == "Yes")]

# Filter rows where True Label is equal to 1
sample1 = info0_sampled[info0_sampled["True Label"] == 1]['Values']
sample0 = info0_sampled_smaller[info0_sampled_smaller["True Label"] == 0]['Values']


for i in range(len(sample1)):
    mu1_init = n1*mu1_init/(n1+1) + sample1.iloc[i]/(n1+1)
    
for i in range(len(sample0)):
    mu0_init = n0*mu0_init/(n0+1) + sample0.iloc[i]/(n0+1)

n1 = n1 + len(sample1)
n0 = n0 + len(sample0)

# Find classifier (\theta_2) 
classifier = 0.5*(mu0_init + mu1_init)
classifier1 = classifier
print ("Classifier is ", classifier)
# Find the LB (Using alpha = 50 for label 0)
temp = 2*norm.cdf(norm.ppf(reference_quantile,mu0_init,sd),mu0_init,sd) -\
            norm.cdf(classifier,mu0_init,sd)
LB = float(norm.ppf(temp,mu0_init,sd))
temp = 2*norm.cdf(norm.ppf(reference_quantile,mu1_init,sd),mu1_init,sd) -\
            norm.cdf(classifier,mu1_init,sd)
UB = float(norm.ppf(temp,mu1_init,sd))
exploration_prob = classifier_true - classifier

print ("n1 is {}, n0 is {}".format(n1, n0))
print ("mu1 is {} and mu0 is {}".format(mu1_init, mu0_init))
print ("Classifier is {}, LB is {}, UB is {}, explore prob is {}".format(classifier, LB, UB, exploration_prob))


Classifier is  7.766970443367396
n1 is 849, n0 is 850
mu1 is 9.278533061875667 and mu0 is 6.255407824859125
Classifier is 7.766970443367396, LB is 4.743845206350854, UB is 10.790095680383939, explore prob is 0.7330295566326042


In [31]:
# Find V1

# Generate data for the 2nd state
label1 = np.random.normal(mu_1_true,sd,size =int(sample_size*(1-true_pop_prob)))
label0 = np.random.normal(mu_0_true,sd,size =int(sample_size*true_pop_prob))

info1 = pd.DataFrame(columns = ['True Label','Values','Decision','RP_1','RP_0',
                                      'Classifier','Loss','Explore_Prob', 'Return'])

# Assign feature values to the dataframe
info1['Values'] = np.append(label1, label0)
# Assign true labels to the dataframe
info1.loc[0:len(label1),'True Label'] = 1
info1.loc[len(label1):,'True Label'] = 0
# Shuffle the dataframe
info1 = shuffle(info1)
# Resign the index 
info1.reset_index(inplace=True, drop=True)


In [32]:
mis_error1 = 0.5*norm.cdf(classifier,mu_1_true,sd)*L1*sample_size*(1-true_pop_prob) + 0.5*norm.sf(classifier,mu_0_true,sd)*L2*sample_size*true_pop_prob


V1 = 0  
for i in range(len(info1)):
    if (info1.loc[i,'Values'] >= classifier) & (info1.loc[i,'Values'] <= UB):
        a = 'large'
        info1.loc[i,'Decision'] = a
        info1.loc[i,'Reveal'] = 'Yes'
        
    elif info1.loc[i,'Values'] < LB:
        a = 'reject'
        info1.loc[i,'Decision'] = a
        
    else:
        if (np.random.rand() < exploration_prob):
            a = 'small'
            info1.loc[i,'Decision'] = a
            
            if info1.loc[i,'True Label'] == 1:
                info1.loc[i,'Reveal'] = 'Yes'
                V1 = V1 - L1 + L4
            else:

                if (np.random.rand() > re_prob):
                    info1.loc[i,'Reveal'] = 'Yes'
                    re = False
                    V1 += L3
                else:
                    re = True

        else: 
            a = 'reject'
            info1.loc[i,'Decision'] = a
            
print("V_1(theta_1) is {} and mis-error is {}".format(V1, mis_error1))

V_1(theta_1) is 2869 and mis-error is 6124.546216233328


In [33]:
# update the classifier

info1_sampled = info1[(info1["Values"] >= classifier) & (info1["Reveal"] == "Yes") & (info1["Values"] <= UB)]
info1_sampled_smaller = info1[(info1["Values"] < classifier) & (info1["Reveal"] == "Yes")]

# Filter rows where True Label is equal to 1
sample1 = info1_sampled[info1_sampled["True Label"] == 1]['Values']
sample0 = info1_sampled_smaller[info1_sampled_smaller["True Label"] == 0]['Values']

for i in range(len(sample1)):
    mu1_init = n1*mu1_init/(n1+1) + sample1.iloc[i]/(n1+1)
    
for i in range(len(sample0)):
    mu0_init = n0*mu0_init/(n0+1) + sample0.iloc[i]/(n0+1)

n1 = n1 + len(sample1)
n0 = n0 + len(sample0)

# Find classifier (\theta_2) 
classifier = 0.5*(mu0_init + mu1_init)
classifier2 = classifier
print ("Classifier is ", classifier)
# Find the LB (Using alpha = 50 for label 0)
temp = 2*norm.cdf(norm.ppf(reference_quantile,mu0_init,sd),mu0_init,sd) -\
            norm.cdf(classifier,mu0_init,sd)
LB = float(norm.ppf(temp,mu0_init,sd))
temp = 2*norm.cdf(norm.ppf(reference_quantile,mu1_init,sd),mu1_init,sd) -\
            norm.cdf(classifier,mu1_init,sd)
UB = float(norm.ppf(temp,mu1_init,sd))
exploration_prob = classifier_true - classifier


print ("n1 is {}, n0 is {}".format(n1, n0))
print ("mu1 is {} and mu0 is {}".format(mu1_init, mu0_init))
print ("Classifier is {}, LB is {}, UB is {}, explore prob is {}".format(classifier, LB, UB, exploration_prob))


Classifier is  7.886551293235515
n1 is 1232, n0 is 1149
mu1 is 9.413752018045532 and mu0 is 6.359350568425497
Classifier is 7.886551293235515, LB is 4.832149843615479, UB is 10.940952742855549, explore prob is 0.6134487067644852


In [34]:
# Find V2

# Generate data for the 1st state
label1 = np.random.normal(mu_1_true,sd,size =int(sample_size*(1-true_pop_prob)))
label0 = np.random.normal(mu_0_true,sd,size =int(sample_size*true_pop_prob))

info2 = pd.DataFrame(columns = ['True Label','Values','Decision','RP_1','RP_0',
                                      'Classifier','Loss','Explore_Prob', 'Return'])

# Assign feature values to the dataframe
info2['Values'] = np.append(label1, label0)
# Assign true labels to the dataframe
info2.loc[0:len(label1),'True Label'] = 1
info2.loc[len(label1):,'True Label'] = 0
# Shuffle the dataframe
info2 = shuffle(info2)
# Resign the index 
info2.reset_index(inplace=True, drop=True)


In [35]:
mis_error2 = 0.5*norm.cdf(classifier,mu_1_true,sd)*L1*sample_size*(1-true_pop_prob) + 0.5*norm.sf(classifier,mu_0_true,sd)*L2*sample_size*true_pop_prob

V2 = 0  
for i in range(len(info1)):
    if (info2.loc[i,'Values'] >= classifier) & (info2.loc[i,'Values'] <= UB):
        a = 'large'
        info2.loc[i,'Decision'] = a
        info2.loc[i,'Reveal'] = 'Yes'
        
    elif info2.loc[i,'Values'] < LB:
        a = 'reject'
        info2.loc[i,'Decision'] = a
        
    else:
        if (np.random.rand() < exploration_prob):
            a = 'small'
            info2.loc[i,'Decision'] = a
            
            if info2.loc[i,'True Label'] == 1:
                info2.loc[i,'Reveal'] = 'Yes'
                #V1 = V1 - L1 + L4
            else:

                if (np.random.rand() > re_prob):
                    info2.loc[i,'Reveal'] = 'Yes'
                    re = False
                    #V1 += L3
                else:
                    re = True

        else: 
            a = 'reject'
            info2.loc[i,'Decision'] = a

print("V_2(theta_2) is {} and mis-error is {}".format(V2, mis_error2))

V_2(theta_2) is 0 and mis-error is 5203.860220635752
