In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
def subsets(s):
    sets = []
    for i in range(1 << len(s)):
        subset = [s[bit] for bit in range(len(s)) if is_bit_set(i, bit)]
        sets.append(subset)
    return sets

def halfsubsets(s):
    sets = []
    for i in range(1 << len(s)):
        subset = [s[bit] for bit in range(len(s)) if is_bit_set(i, bit)]
        if(len(subset) <= len(s)/2):
            sets.append(subset)
    return sets

def is_bit_set(num, bit):
    return num & (1 << bit) > 0

# I. Simple Test Data

In [3]:
df = pd.read_csv('SimpleTestData.csv')
df

Unnamed: 0,Y,A,X,P(A|X),D_A,D_B,D_A = A,D_B = B
0,1,1,1,0.5,2,2,0,0
1,2,1,2,0.5,1,2,1,0
2,3,1,3,0.5,1,1,1,1
3,4,1,4,0.5,1,1,1,1
4,5,1,5,0.5,1,1,1,1
5,3,2,1,0.5,2,2,1,1
6,3,2,2,0.5,1,2,0,1
7,3,2,3,0.5,1,1,0,0
8,3,2,4,0.5,1,1,0,0
9,3,2,5,0.5,1,1,0,0


### Part 1. Preprocess

In [4]:
D_Adam = df.copy()[['Y','A','X']]
print("Decision of Doc. Adam")
D_Adam['D'] = df['X'].apply(lambda x : 2 if x>=2 else 1)
D_Adam['E'] = np.where(D_Adam['D']==D_Adam['A'], 0, 1)
D_Adam['E'].values

Decision of Doc. Adam


array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0])

In [5]:
D_Barry = df.copy()[['Y','A','X']]
print("Decision of Doc. Barry")
D_Barry['D'] = df['X'].apply(lambda x : 2 if x>=3 else 1)
D_Barry['E'] = np.where(D_Barry['D']==D_Barry['A'], 0, 1)
D_Barry['E'].values

Decision of Doc. Barry


array([0, 0, 1, 1, 1, 1, 1, 0, 0, 0])

### Part II. Expect

In [6]:
E_Adam = sum(D_Adam['Y']*D_Adam['E'])*2/df.shape[0]
print("Choice for Doc. Adam : " + str(E_Adam))

Choice for Doc. Adam : 3.4


In [7]:
E_Barry = sum(D_Barry['Y']*D_Barry['E'])*2/df.shape[0]
print("Choice for Doc. Adam : " + str(E_Barry))

Choice for Doc. Adam : 3.6


# Load CSV Data

In [8]:
df = pd.read_csv('TestSample_100_XTable.csv')
del df['Unnamed: 0']
df.head(n = 20)

Unnamed: 0,ID,Cont1,Cont2,Cont3,Ord1,Ord2,Ord3,Nom1,Nom2,Nom3,A,Y
0,1,0,1,7,4,2,0,4,3,3,0,31.2112
1,2,9,0,3,0,3,3,2,3,3,0,20.8458
2,3,2,7,0,1,3,3,4,1,3,0,94.7321
3,4,2,3,0,0,0,2,3,2,0,1,22.4179
4,5,4,4,2,2,3,4,1,4,1,1,15.0723
5,6,9,4,1,4,0,2,2,3,4,1,21.5173
6,7,1,2,9,3,1,2,2,0,4,0,29.9679
7,8,1,8,2,3,1,0,4,3,3,0,0.4875
8,9,0,2,8,0,2,0,1,4,1,1,63.6878
9,10,8,4,3,4,1,4,1,4,2,1,19.114


In [9]:
df.shape

(100, 12)

### Part 1. Search  X0 (continuous variable 1)

In [10]:
unique_X0 = sorted((df['Cont1']+1).unique())
print("Cut Range for X0: "+str(unique_X0))
print("Cut size for X0: "+str(len(unique_X0)))

Cut Range for X0: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Cut size for X0: 10


In [11]:
for i in unique_X0:
    X_Search = df.copy()[['Cont1','A','Y']];
    X_Search['D'] = df['Cont1'].apply(lambda x : 1 if x<i else 0)
    X_Search['E'] = np.where(X_Search['D']==X_Search['A'], 0, 1)
    E_X_Search_L = sum(X_Search['Y']*X_Search['E'])*2/df.shape[0]
    E_X_Search_R = sum(X_Search['Y']*(1-X_Search['E']))*2/df.shape[0]
    print("Choice for X0 <"+str(i)+": " + str(E_X_Search_L))
    print("Choice for X0 >="+str(i)+": " + str(E_X_Search_R))

Choice for X0 <1: 35.63502
Choice for X0 >=1: 55.3229314
Choice for X0 <2: 38.276296
Choice for X0 >=2: 52.6816554
Choice for X0 <3: 42.032956
Choice for X0 >=3: 48.9249954
Choice for X0 <4: 44.094414
Choice for X0 >=4: 46.8635374
Choice for X0 <5: 40.363062
Choice for X0 >=5: 50.5948894
Choice for X0 <6: 41.285908
Choice for X0 >=6: 49.6720434
Choice for X0 <7: 50.0887154
Choice for X0 >=7: 40.869236
Choice for X0 <8: 52.8049614
Choice for X0 >=8: 38.15299
Choice for X0 <9: 58.3572554
Choice for X0 >=9: 32.600696
Choice for X0 <10: 56.8355874
Choice for X0 >=10: 34.122364


### Part 2. Search  X1 (continuous variable 2)

In [12]:
unique_X1 = sorted((df['Cont2']+1).unique())
print("Cut Range for X1: "+str(unique_X1))
print("Cut size for X1: "+str(len(unique_X1)))

Cut Range for X1: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Cut size for X1: 10


In [13]:
for i in unique_X1:
    X_Search = df.copy()[['Cont2','A','Y']];
    X_Search['D'] = df['Cont2'].apply(lambda x : 1 if x<i else 0)
    X_Search['E'] = np.where(X_Search['D']==X_Search['A'], 0, 1)
    E_X_Search_L = sum(X_Search['Y']*X_Search['E'])*2/df.shape[0]
    E_X_Search_R = sum(X_Search['Y']*(1-X_Search['E']))*2/df.shape[0]
    print("Choice for X1 <"+str(i)+": " + str(E_X_Search_L))
    print("Choice for X1 >="+str(i)+": " + str(E_X_Search_R))

Choice for X1 <1: 32.206456
Choice for X1 >=1: 58.7514954
Choice for X1 <2: 31.895356
Choice for X1 >=2: 59.0625954
Choice for X1 <3: 33.25742
Choice for X1 >=3: 57.7005314
Choice for X1 <4: 30.3150994
Choice for X1 >=4: 60.642852
Choice for X1 <5: 29.6733734
Choice for X1 >=5: 61.284578
Choice for X1 <6: 35.9514454
Choice for X1 >=6: 55.006506
Choice for X1 <7: 43.8065354
Choice for X1 >=7: 47.151416
Choice for X1 <8: 50.0044154
Choice for X1 >=8: 40.953536
Choice for X1 <9: 54.4178954
Choice for X1 >=9: 36.540056
Choice for X1 <10: 56.8355874
Choice for X1 >=10: 34.122364


### Part 3. Search  X2 (continuous variable 3)

In [14]:
unique_X2 = sorted((df['Cont3']+1).unique())
print("Cut Range for X2: "+str(unique_X2))
print("Cut size for X2: "+str(len(unique_X2)))

Cut Range for X2: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Cut size for X2: 10


In [15]:
for i in unique_X2:
    X_Search = df.copy()[['Cont3','A','Y']];
    X_Search['D'] = df['Cont3'].apply(lambda x : 1 if x<i else 0)
    X_Search['E'] = np.where(X_Search['D']==X_Search['A'], 0, 1)
    E_X_Search_L = sum(X_Search['Y']*X_Search['E'])*2/df.shape[0]
    E_X_Search_R = sum(X_Search['Y']*(1-X_Search['E']))*2/df.shape[0]
    print("Choice for X2 <"+str(i)+": " + str(E_X_Search_L))
    print("Choice for X2 >="+str(i)+": " + str(E_X_Search_R))

Choice for X2 <1: 38.930102
Choice for X2 >=1: 52.0278494
Choice for X2 <2: 45.110042
Choice for X2 >=2: 45.8479094
Choice for X2 <3: 43.9285394
Choice for X2 >=3: 47.029412
Choice for X2 <4: 45.5578814
Choice for X2 >=4: 45.40007
Choice for X2 <5: 41.4163094
Choice for X2 >=5: 49.541642
Choice for X2 <6: 41.7051834
Choice for X2 >=6: 49.252768
Choice for X2 <7: 48.2001334
Choice for X2 >=7: 42.757818
Choice for X2 <8: 52.7313874
Choice for X2 >=8: 38.226564
Choice for X2 <9: 54.2355754
Choice for X2 >=9: 36.722376
Choice for X2 <10: 56.8355874
Choice for X2 >=10: 34.122364


### Part 4. Search  X3 (ordinal variable 1)

In [16]:
unique_X3 = sorted((df['Ord1']+1).unique())
print("Cut Range for X3: "+str(unique_X3))
print("Cut size for X3: "+str(len(unique_X3)))

Cut Range for X3: [1, 2, 3, 4, 5]
Cut size for X3: 5


In [17]:
for i in unique_X3:
    X_Search = df.copy()[['Ord1','A','Y']];
    X_Search['D'] = df['Ord1'].apply(lambda x : 1 if x<i else 0)
    X_Search['E'] = np.where(X_Search['D']==X_Search['A'], 0, 1)
    E_X_Search_L = sum(X_Search['Y']*X_Search['E'])*2/df.shape[0]
    E_X_Search_R = sum(X_Search['Y']*(1-X_Search['E']))*2/df.shape[0]
    print("Choice for X3 <"+str(i)+": " + str(E_X_Search_L))
    print("Choice for X3 >="+str(i)+": " + str(E_X_Search_R))

Choice for X3 <1: 37.333322
Choice for X3 >=1: 53.6246294
Choice for X3 <2: 38.399154
Choice for X3 >=2: 52.5587974
Choice for X3 <3: 42.342138
Choice for X3 >=3: 48.6158134
Choice for X3 <4: 50.0971654
Choice for X3 >=4: 40.860786
Choice for X3 <5: 56.8355874
Choice for X3 >=5: 34.122364


### Part 5. Search  X4 (ordinal variable 2)

In [18]:
unique_X4 = sorted((df['Ord2']+1).unique())
print("Cut Range for X4: "+str(unique_X4))
print("Cut size for X4: "+str(len(unique_X4)))

Cut Range for X4: [1, 2, 3, 4, 5]
Cut size for X4: 5


In [19]:
for i in unique_X4:
    X_Search = df.copy()[['Ord2','A','Y']];
    X_Search['D'] = df['Ord2'].apply(lambda x : 1 if x<i else 0)
    X_Search['E'] = np.where(X_Search['D']==X_Search['A'], 0, 1)
    E_X_Search_L = sum(X_Search['Y']*X_Search['E'])*2/df.shape[0]
    E_X_Search_R = sum(X_Search['Y']*(1-X_Search['E']))*2/df.shape[0]
    print("Choice for X4 <"+str(i)+": " + str(E_X_Search_L))
    print("Choice for X4 >="+str(i)+": " + str(E_X_Search_R))

Choice for X4 <1: 37.345148
Choice for X4 >=1: 53.6128034
Choice for X4 <2: 41.801006
Choice for X4 >=2: 49.1569454
Choice for X4 <3: 48.1087234
Choice for X4 >=3: 42.849228
Choice for X4 <4: 55.9730514
Choice for X4 >=4: 34.9849
Choice for X4 <5: 56.8355874
Choice for X4 >=5: 34.122364


### Part 6. Search  X5 (ordinal variable 3)

In [20]:
unique_X5 = sorted((df['Ord3']+1).unique())
print("Cut Range for X5: "+str(unique_X5))
print("Cut size for X5: "+str(len(unique_X5)))

Cut Range for X5: [1, 2, 3, 4, 5]
Cut size for X5: 5


In [21]:
for i in unique_X5:
    X_Search = df.copy()[['Ord3','A','Y']];
    X_Search['D'] = df['Ord3'].apply(lambda x : 1 if x<i else 0)
    X_Search['E'] = np.where(X_Search['D']==X_Search['A'], 0, 1)
    E_X_Search_L = sum(X_Search['Y']*X_Search['E'])*2/df.shape[0]
    E_X_Search_R = sum(X_Search['Y']*(1-X_Search['E']))*2/df.shape[0]
    print("Choice for X5 <"+str(i)+": " + str(E_X_Search_L))
    print("Choice for X5 >="+str(i)+": " + str(E_X_Search_R))

Choice for X5 <1: 32.921272
Choice for X5 >=1: 58.0366794
Choice for X5 <2: 44.071096
Choice for X5 >=2: 46.8868554
Choice for X5 <3: 43.673372
Choice for X5 >=3: 47.2845794
Choice for X5 <4: 49.4834134
Choice for X5 >=4: 41.474538
Choice for X5 <5: 56.8355874
Choice for X5 >=5: 34.122364


### Part 7. Search  X6 (nominal variable 1)

In [22]:
unique_X6 = sorted(df['Nom1'].unique())
sets_X6 = halfsubsets(unique_X6)
print("Cut Range for X6: "+str(list(range(len(sets_X6)))))
print("Cut Size for X6: "+str(len(sets_X6)))
sets_X6

Cut Range for X6: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
Cut Size for X6: 16


[[],
 [0],
 [1],
 [0, 1],
 [2],
 [0, 2],
 [1, 2],
 [3],
 [0, 3],
 [1, 3],
 [2, 3],
 [4],
 [0, 4],
 [1, 4],
 [2, 4],
 [3, 4]]

In [23]:
for i in range(len(sets_X6)):
    X_Search = df.copy()[['Nom1','A','Y']];
    X_Search['D'] = df['Nom1'].apply(lambda x : 1 if x in sets_X6[i] else 0)
    X_Search['E'] = np.where(X_Search['D']==X_Search['A'], 0, 1)
    E_X_Search_L = sum(X_Search['Y']*X_Search['E'])*2/df.shape[0]
    E_X_Search_R = sum(X_Search['Y']*(1-X_Search['E']))*2/df.shape[0]
    print("Choice for X6 in set "+str(i)+": " + str(E_X_Search_L))
    print("Choice for X6 not in set "+str(i)+": " + str(E_X_Search_R))

Choice for X6 in set 0: 34.122364
Choice for X6 not in set 0: 56.8355874
Choice for X6 in set 1: 41.138344
Choice for X6 not in set 1: 49.8196074
Choice for X6 in set 2: 30.694016
Choice for X6 not in set 2: 60.2639354
Choice for X6 in set 3: 37.709996
Choice for X6 not in set 3: 53.2479554
Choice for X6 in set 4: 44.1934794
Choice for X6 not in set 4: 46.764472
Choice for X6 in set 5: 51.2094594
Choice for X6 not in set 5: 39.748492
Choice for X6 in set 6: 40.7651314
Choice for X6 not in set 6: 50.19282
Choice for X6 in set 7: 39.606242
Choice for X6 not in set 7: 51.3517094
Choice for X6 in set 8: 46.622222
Choice for X6 not in set 8: 44.3357294
Choice for X6 in set 9: 36.177894
Choice for X6 not in set 9: 54.7800574
Choice for X6 in set 10: 49.6773574
Choice for X6 not in set 10: 41.280594
Choice for X6 in set 11: 37.692962
Choice for X6 not in set 11: 53.2649894
Choice for X6 in set 12: 44.708942
Choice for X6 not in set 12: 46.2490094
Choice for X6 in set 13: 34.264614
Choice for 

### Part 8. Search  X7 (nominal variable 2)

In [24]:
unique_X7 = sorted(df['Nom2'].unique())
sets_X7 = halfsubsets(unique_X7)
print("Cut Range for X7: "+str(list(range(len(sets_X7)))))
print("Cut Size for X7: "+str(len(sets_X7)))
sets_X7

Cut Range for X7: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
Cut Size for X7: 16


[[],
 [0],
 [1],
 [0, 1],
 [2],
 [0, 2],
 [1, 2],
 [3],
 [0, 3],
 [1, 3],
 [2, 3],
 [4],
 [0, 4],
 [1, 4],
 [2, 4],
 [3, 4]]

In [25]:
for i in range(len(sets_X7)):
    X_Search = df.copy()[['Nom2','A','Y']];
    X_Search['D'] = df['Nom2'].apply(lambda x : 1 if x in sets_X7[i] else 0)
    X_Search['E'] = np.where(X_Search['D']==X_Search['A'], 0, 1)
    E_X_Search_L = sum(X_Search['Y']*X_Search['E'])*2/df.shape[0]
    E_X_Search_R = sum(X_Search['Y']*(1-X_Search['E']))*2/df.shape[0]
    print("Choice for X7 in set "+str(i)+": " + str(E_X_Search_L))
    print("Choice for X7 not in set "+str(i)+": " + str(E_X_Search_R))

Choice for X7 in set 0: 34.122364
Choice for X7 not in set 0: 56.8355874
Choice for X7 in set 1: 40.317024
Choice for X7 not in set 1: 50.6409274
Choice for X7 in set 2: 42.438954
Choice for X7 not in set 2: 48.5189974
Choice for X7 in set 3: 48.633614
Choice for X7 not in set 3: 42.3243374
Choice for X7 in set 4: 39.744408
Choice for X7 not in set 4: 51.2135434
Choice for X7 in set 5: 45.939068
Choice for X7 not in set 5: 45.0188834
Choice for X7 in set 6: 48.060998
Choice for X7 not in set 6: 42.8969534
Choice for X7 in set 7: 38.6230474
Choice for X7 not in set 7: 52.334904
Choice for X7 in set 8: 44.8177074
Choice for X7 not in set 8: 46.140244
Choice for X7 in set 9: 46.9396374
Choice for X7 not in set 9: 44.018314
Choice for X7 in set 10: 44.2450914
Choice for X7 not in set 10: 46.71286
Choice for X7 in set 11: 32.20161
Choice for X7 not in set 11: 58.7563414
Choice for X7 in set 12: 38.39627
Choice for X7 not in set 12: 52.5616814
Choice for X7 in set 13: 40.5182
Choice for X7 n

### Part 9. Search  X8 (nominal variable 3)

In [26]:
unique_X8 = sorted(df['Nom3'].unique())
sets_X8 = halfsubsets(unique_X8)
print("Cut Range for X8: "+str(list(range(len(sets_X8)))))
print("Cut Size for X8: "+str(len(sets_X8)))
sets_X8

Cut Range for X8: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
Cut Size for X8: 16


[[],
 [0],
 [1],
 [0, 1],
 [2],
 [0, 2],
 [1, 2],
 [3],
 [0, 3],
 [1, 3],
 [2, 3],
 [4],
 [0, 4],
 [1, 4],
 [2, 4],
 [3, 4]]

In [27]:
for i in range(len(sets_X8)):
    X_Search = df.copy()[['Nom3','A','Y']];
    X_Search['D'] = df['Nom3'].apply(lambda x : 1 if x in sets_X8[i] else 0)
    X_Search['E'] = np.where(X_Search['D']==X_Search['A'], 0, 1)
    E_X_Search_L = sum(X_Search['Y']*X_Search['E'])*2/df.shape[0]
    E_X_Search_R = sum(X_Search['Y']*(1-X_Search['E']))*2/df.shape[0]
    print("Choice for X8 in set "+str(i)+": " + str(E_X_Search_L))
    print("Choice for X8 not in set "+str(i)+": " + str(E_X_Search_R))

Choice for X8 in set 0: 34.122364
Choice for X8 not in set 0: 56.8355874
Choice for X8 in set 1: 40.000362
Choice for X8 not in set 1: 50.9575894
Choice for X8 in set 2: 32.27922
Choice for X8 not in set 2: 58.6787314
Choice for X8 in set 3: 38.157218
Choice for X8 not in set 3: 52.8007334
Choice for X8 in set 4: 43.8414114
Choice for X8 not in set 4: 47.11654
Choice for X8 in set 5: 49.7194094
Choice for X8 not in set 5: 41.238542
Choice for X8 in set 6: 41.9982674
Choice for X8 not in set 6: 48.959684
Choice for X8 in set 7: 38.414678
Choice for X8 not in set 7: 52.5432734
Choice for X8 in set 8: 44.292676
Choice for X8 not in set 8: 46.6652754
Choice for X8 in set 9: 36.571534
Choice for X8 not in set 9: 54.3864174
Choice for X8 in set 10: 48.1337254
Choice for X8 not in set 10: 42.824226
Choice for X8 in set 11: 38.789372
Choice for X8 not in set 11: 52.1685794
Choice for X8 in set 12: 44.66737
Choice for X8 not in set 12: 46.2905814
Choice for X8 in set 13: 36.946228
Choice for X8