In [1]:
import pandas as pd
import numpy as np
import math
import operator
from itertools import combinations

In [2]:
def subsets(s):
    sets = []
    for i in range(1 << len(s)):
        subset = [s[bit] for bit in range(len(s)) if is_bit_set(i, bit)]
        sets.append(subset)
    return sets

def halfsubsets(s):
    sets = []
    for i in range(1 << len(s)):
        subset = [s[bit] for bit in range(len(s)) if is_bit_set(i, bit)]
        if(len(subset) <= len(s)/2):
            sets.append(subset)
    return sets

def is_bit_set(num, bit):
    return num & (1 << bit) > 0

# Load CSV Data

In [3]:
df = pd.read_csv('TestSample_100_XTable.csv')
del df['Unnamed: 0']
df.head(n = 20)

Unnamed: 0,ID,Cont1,Cont2,Cont3,Ord1,Ord2,Ord3,Nom1,Nom2,Nom3,A,Y
0,1,0,1,7,4,2,0,4,3,3,0,31.2112
1,2,9,0,3,0,3,3,2,3,3,0,20.8458
2,3,2,7,0,1,3,3,4,1,3,0,94.7321
3,4,2,3,0,0,0,2,3,2,0,1,22.4179
4,5,4,4,2,2,3,4,1,4,1,1,15.0723
5,6,9,4,1,4,0,2,2,3,4,1,21.5173
6,7,1,2,9,3,1,2,2,0,4,0,29.9679
7,8,1,8,2,3,1,0,4,3,3,0,0.4875
8,9,0,2,8,0,2,0,1,4,1,1,63.6878
9,10,8,4,3,4,1,4,1,4,2,1,19.114


## Step 1. Create cut info

In [4]:
unique_X0 = sorted((df['Cont1']+1).unique())
unique_X1 = sorted((df['Cont2']+1).unique())
unique_X2 = sorted((df['Cont3']+1).unique())
unique_X3 = sorted((df['Ord1']+1).unique())
unique_X4 = sorted((df['Ord2']+1).unique())
unique_X5 = sorted((df['Ord3']+1).unique())
unique_X6 = sorted(df['Nom1'].unique())
sets_X6 = halfsubsets(unique_X6)
unique_X7 = sorted(df['Nom2'].unique())
sets_X7 = halfsubsets(unique_X7)
unique_X8 = sorted(df['Nom3'].unique())
sets_X8 = halfsubsets(unique_X8)

In [5]:
var_dict = {0:10,1:10,2:10,3:5,4:5,5:5,6:16,7:16,8:16}
var = [0,1,2,3,4,5,6,7,8]
var_comb = list(combinations(var,3))
var_info = pd.DataFrame(var_comb)
var_info.columns = ['var i', 'var j','var k']
var_info['N'] = var_info['var i'].map(var_dict) * var_info['var j'].map(var_dict) * var_info['var k'].map(var_dict)
var_info['N'].sum(axis = 0)

86131

In [6]:
var_info['cum_N'] = var_info['N'].cumsum()
var_info

Unnamed: 0,var i,var j,var k,N,cum_N
0,0,1,2,1000,1000
1,0,1,3,500,1500
2,0,1,4,500,2000
3,0,1,5,500,2500
4,0,1,6,1600,4100
5,0,1,7,1600,5700
6,0,1,8,1600,7300
7,0,2,3,500,7800
8,0,2,4,500,8300
9,0,2,5,500,8800


## Step 2. Search

In [7]:
X_Search = df.copy()[['Cont1','Cont2','Cont3','A','Y']];
X_Search['C1'] = df['Cont1'].apply(lambda x : 1 if x<1 else 0)
X_Search['C2'] = df['Cont2'].apply(lambda x : 1 if x<1 else 0)
X_Search['C3'] = df['Cont3'].apply(lambda x : 1 if x<1 else 0)
X_Search['D1'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==1)&(row['C3']==1) else 0, axis=1)
X_Search['D2'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==1)&(row['C3']==0) else 0, axis=1)
X_Search['D3'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==0)&(row['C3']==1) else 0, axis=1)
X_Search['D4'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==0)&(row['C3']==0) else 0, axis=1)
X_Search['D5'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==1)&(row['C3']==1) else 0, axis=1)
X_Search['D6'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==1)&(row['C3']==0) else 0, axis=1)
X_Search['D7'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==0)&(row['C3']==1) else 0, axis=1)
X_Search['D8'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==0)&(row['C3']==0) else 0, axis=1)

In [8]:
X_Search[['D1','D2','D3','D4','D5','D6','D7','D8']].sum(axis=0)

D1     0
D2     1
D3     1
D4     8
D5     1
D6     8
D7     8
D8    73
dtype: int64

In [9]:
sum(X_Search[['D1','D2','D3','D4','D5','D6','D7','D8']].sum(axis=0))

100

In [10]:
X_Search['E1'] = np.where(X_Search['D1']==X_Search['A'], 1, 0)
X_Search['E2'] = np.where(X_Search['D2']==X_Search['A'], 1, 0)
X_Search['E3'] = np.where(X_Search['D3']==X_Search['A'], 1, 0)
X_Search['E4'] = np.where(X_Search['D4']==X_Search['A'], 1, 0)
X_Search['E5'] = np.where(X_Search['D5']==X_Search['A'], 1, 0)
X_Search['E6'] = np.where(X_Search['D6']==X_Search['A'], 1, 0)
X_Search['E7'] = np.where(X_Search['D7']==X_Search['A'], 1, 0)
X_Search['E8'] = np.where(X_Search['D8']==X_Search['A'], 1, 0)

In [11]:
E_X_Search_1 = sum(X_Search['Y']*X_Search['E1'])*2/df.shape[0]
E_X_Search_2 = sum(X_Search['Y']*X_Search['E2'])*2/df.shape[0]
E_X_Search_3 = sum(X_Search['Y']*X_Search['E3'])*2/df.shape[0]
E_X_Search_4 = sum(X_Search['Y']*X_Search['E4'])*2/df.shape[0]
E_X_Search_5 = sum(X_Search['Y']*X_Search['E5'])*2/df.shape[0]
E_X_Search_6 = sum(X_Search['Y']*X_Search['E6'])*2/df.shape[0]
E_X_Search_7 = sum(X_Search['Y']*X_Search['E7'])*2/df.shape[0]
E_X_Search_8 = sum(X_Search['Y']*X_Search['E8'])*2/df.shape[0]
E = [E_X_Search_8,E_X_Search_7,E_X_Search_6,E_X_Search_5,E_X_Search_4,E_X_Search_3,E_X_Search_2,E_X_Search_1]
E

[36.388511999999984,
 55.182959400000001,
 59.482863400000006,
 55.087447400000009,
 55.713129400000014,
 55.4286174,
 57.852359400000005,
 56.835587400000001]

In [12]:
index, value = max(enumerate(E), key=operator.itemgetter(1))
print("index: "+str(index))
print("max: "+str(value))

index: 2
max: 59.4828634


### Case 1. var0, var 1 and var2 (Cont 1, Cont 2, and Cont 3)

In [13]:
Results = []
for i in unique_X0:
    for j in unique_X1:
        for k in unique_X2:
            X_Search = df.copy()[['Cont1','Cont2','Cont3','A','Y']];
            X_Search['C1'] = df['Cont1'].apply(lambda x : 1 if x<i else 0)
            X_Search['C2'] = df['Cont2'].apply(lambda x : 1 if x<j else 0)
            X_Search['C3'] = df['Cont3'].apply(lambda x : 1 if x<k else 0)
            X_Search['D1'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==1)&(row['C3']==1) else 0, axis=1)
            X_Search['D2'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==1)&(row['C3']==0) else 0, axis=1)
            X_Search['D3'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==0)&(row['C3']==1) else 0, axis=1)
            X_Search['D4'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==0)&(row['C3']==0) else 0, axis=1)
            X_Search['D5'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==1)&(row['C3']==1) else 0, axis=1)
            X_Search['D6'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==1)&(row['C3']==0) else 0, axis=1)
            X_Search['D7'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==0)&(row['C3']==1) else 0, axis=1)
            X_Search['D8'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==0)&(row['C3']==0) else 0, axis=1)
            X_Search['E1'] = np.where(X_Search['D1']==X_Search['A'], 1, 0)
            X_Search['E2'] = np.where(X_Search['D2']==X_Search['A'], 1, 0)
            X_Search['E3'] = np.where(X_Search['D3']==X_Search['A'], 1, 0)
            X_Search['E4'] = np.where(X_Search['D4']==X_Search['A'], 1, 0)
            X_Search['E5'] = np.where(X_Search['D5']==X_Search['A'], 1, 0)
            X_Search['E6'] = np.where(X_Search['D6']==X_Search['A'], 1, 0)
            X_Search['E7'] = np.where(X_Search['D7']==X_Search['A'], 1, 0)
            X_Search['E8'] = np.where(X_Search['D8']==X_Search['A'], 1, 0)
            E_X_Search_1 = sum(X_Search['Y']*X_Search['E1'])*2/df.shape[0]
            E_X_Search_2 = sum(X_Search['Y']*X_Search['E2'])*2/df.shape[0]
            E_X_Search_3 = sum(X_Search['Y']*X_Search['E3'])*2/df.shape[0]
            E_X_Search_4 = sum(X_Search['Y']*X_Search['E4'])*2/df.shape[0]
            E_X_Search_5 = sum(X_Search['Y']*X_Search['E5'])*2/df.shape[0]
            E_X_Search_6 = sum(X_Search['Y']*X_Search['E6'])*2/df.shape[0]
            E_X_Search_7 = sum(X_Search['Y']*X_Search['E7'])*2/df.shape[0]
            E_X_Search_8 = sum(X_Search['Y']*X_Search['E8'])*2/df.shape[0]
            E = [E_X_Search_8,E_X_Search_7,E_X_Search_6,E_X_Search_5,E_X_Search_4,E_X_Search_3,E_X_Search_2,E_X_Search_1]
            index, value = max(enumerate(E), key=operator.itemgetter(1))
            Results.append((i,j,k,value,index)) 

In [14]:
len(Results)

1000

In [15]:
Results[0]

(1, 1, 1, 59.482863400000006, 2)

In [16]:
Results[999]

(10, 10, 10, 56.835587400000001, 0)

### Case 2. var3, var 4 and var5 (Ord 1, Ord 2, and Ord 3)

In [17]:
Results = []
for i in unique_X3:
    for j in unique_X4:
        for k in unique_X5:
            X_Search = df.copy()[['Ord1','Ord2','Ord3','A','Y']];
            X_Search['C1'] = df['Ord1'].apply(lambda x : 1 if x<i else 0)
            X_Search['C2'] = df['Ord2'].apply(lambda x : 1 if x<j else 0)
            X_Search['C3'] = df['Ord3'].apply(lambda x : 1 if x<k else 0)
            X_Search['D1'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==1)&(row['C3']==1) else 0, axis=1)
            X_Search['D2'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==1)&(row['C3']==0) else 0, axis=1)
            X_Search['D3'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==0)&(row['C3']==1) else 0, axis=1)
            X_Search['D4'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==0)&(row['C3']==0) else 0, axis=1)
            X_Search['D5'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==1)&(row['C3']==1) else 0, axis=1)
            X_Search['D6'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==1)&(row['C3']==0) else 0, axis=1)
            X_Search['D7'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==0)&(row['C3']==1) else 0, axis=1)
            X_Search['D8'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==0)&(row['C3']==0) else 0, axis=1)
            X_Search['E1'] = np.where(X_Search['D1']==X_Search['A'], 1, 0)
            X_Search['E2'] = np.where(X_Search['D2']==X_Search['A'], 1, 0)
            X_Search['E3'] = np.where(X_Search['D3']==X_Search['A'], 1, 0)
            X_Search['E4'] = np.where(X_Search['D4']==X_Search['A'], 1, 0)
            X_Search['E5'] = np.where(X_Search['D5']==X_Search['A'], 1, 0)
            X_Search['E6'] = np.where(X_Search['D6']==X_Search['A'], 1, 0)
            X_Search['E7'] = np.where(X_Search['D7']==X_Search['A'], 1, 0)
            X_Search['E8'] = np.where(X_Search['D8']==X_Search['A'], 1, 0)
            E_X_Search_1 = sum(X_Search['Y']*X_Search['E1'])*2/df.shape[0]
            E_X_Search_2 = sum(X_Search['Y']*X_Search['E2'])*2/df.shape[0]
            E_X_Search_3 = sum(X_Search['Y']*X_Search['E3'])*2/df.shape[0]
            E_X_Search_4 = sum(X_Search['Y']*X_Search['E4'])*2/df.shape[0]
            E_X_Search_5 = sum(X_Search['Y']*X_Search['E5'])*2/df.shape[0]
            E_X_Search_6 = sum(X_Search['Y']*X_Search['E6'])*2/df.shape[0]
            E_X_Search_7 = sum(X_Search['Y']*X_Search['E7'])*2/df.shape[0]
            E_X_Search_8 = sum(X_Search['Y']*X_Search['E8'])*2/df.shape[0]
            E = [E_X_Search_8,E_X_Search_7,E_X_Search_6,E_X_Search_5,E_X_Search_4,E_X_Search_3,E_X_Search_2,E_X_Search_1]
            index, value = max(enumerate(E), key=operator.itemgetter(1))
            Results.append((i,j,k,value,index)) 

In [18]:
len(Results)

125

In [19]:
Results[0]

(1, 1, 1, 59.445173400000016, 5)

In [20]:
Results[124]

(5, 5, 5, 56.835587400000001, 0)

### Case 3. var6, var 7 and var8 (Nom 1, Nom 2, and Nom 3)

In [21]:
Results = []
for i in range(len(sets_X6)):
    for j in range(len(sets_X7)):
        for k in range(len(sets_X8)):
            X_Search = df.copy()[['Nom1','Nom2','Nom3','A','Y']];
            X_Search['C1'] = df['Nom1'].apply(lambda x : 1 if x in sets_X6[i] else 0)
            X_Search['C2'] = df['Nom2'].apply(lambda x : 1 if x in sets_X7[j] else 0)
            X_Search['C3'] = df['Nom3'].apply(lambda x : 1 if x in sets_X8[k] else 0)
            X_Search['D1'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==1)&(row['C3']==1) else 0, axis=1)
            X_Search['D2'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==1)&(row['C3']==0) else 0, axis=1)
            X_Search['D3'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==0)&(row['C3']==1) else 0, axis=1)
            X_Search['D4'] = X_Search.apply(lambda row: 1 if (row['C1']==1)&(row['C2']==0)&(row['C3']==0) else 0, axis=1)
            X_Search['D5'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==1)&(row['C3']==1) else 0, axis=1)
            X_Search['D6'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==1)&(row['C3']==0) else 0, axis=1)
            X_Search['D7'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==0)&(row['C3']==1) else 0, axis=1)
            X_Search['D8'] = X_Search.apply(lambda row: 1 if (row['C1']==0)&(row['C2']==0)&(row['C3']==0) else 0, axis=1)
            X_Search['E1'] = np.where(X_Search['D1']==X_Search['A'], 1, 0)
            X_Search['E2'] = np.where(X_Search['D2']==X_Search['A'], 1, 0)
            X_Search['E3'] = np.where(X_Search['D3']==X_Search['A'], 1, 0)
            X_Search['E4'] = np.where(X_Search['D4']==X_Search['A'], 1, 0)
            X_Search['E5'] = np.where(X_Search['D5']==X_Search['A'], 1, 0)
            X_Search['E6'] = np.where(X_Search['D6']==X_Search['A'], 1, 0)
            X_Search['E7'] = np.where(X_Search['D7']==X_Search['A'], 1, 0)
            X_Search['E8'] = np.where(X_Search['D8']==X_Search['A'], 1, 0)
            E_X_Search_1 = sum(X_Search['Y']*X_Search['E1'])*2/df.shape[0]
            E_X_Search_2 = sum(X_Search['Y']*X_Search['E2'])*2/df.shape[0]
            E_X_Search_3 = sum(X_Search['Y']*X_Search['E3'])*2/df.shape[0]
            E_X_Search_4 = sum(X_Search['Y']*X_Search['E4'])*2/df.shape[0]
            E_X_Search_5 = sum(X_Search['Y']*X_Search['E5'])*2/df.shape[0]
            E_X_Search_6 = sum(X_Search['Y']*X_Search['E6'])*2/df.shape[0]
            E_X_Search_7 = sum(X_Search['Y']*X_Search['E7'])*2/df.shape[0]
            E_X_Search_8 = sum(X_Search['Y']*X_Search['E8'])*2/df.shape[0]
            E = [E_X_Search_8,E_X_Search_7,E_X_Search_6,E_X_Search_5,E_X_Search_4,E_X_Search_3,E_X_Search_2,E_X_Search_1]
            index, value = max(enumerate(E), key=operator.itemgetter(1))
            Results.append((i,j,k,value,index))

In [22]:
len(Results)

4096

In [23]:
Results[0]

(0, 0, 0, 56.835587400000001, 1)

In [24]:
Results[4095]

(15, 15, 15, 57.570071400000018, 6)