In [5]:
import numpy as np
from utils import *
from tqdm.notebook import tqdm
from multiprocessing import Pool

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.utils import shuffle
from sklearn.preprocessing import normalize, StandardScaler
from itertools import combinations

def check_float(data):
    if(data.dtype == np.int64): data = data.astype(np.float64)
    return data

def NormJa(data):
    data = check_float(data)
    for index, row in enumerate(data):
        min = row.min()
        max = row.max()
        # mean = row.mean()
        row = (row.astype(np.float64) - min) / float(max - min)
        data[index] = row
        # print(row)
    return data

def StandardJa(data):
    data = check_float(data)
    for index, row in enumerate(data):
        mean = row.mean()
        std = row.std()
        row = (row - mean) / std
        data[index] = row
        # print(row)
    return data

# SVM

In [9]:
def search(p_id, X,y,comb_list):
    count = 0
    best_score = -100
    best_comb = None
    best_grid = None
    for comb in comb_list:
        if(count % 400 == 0):
            print(f"\tp_id:{p_id} running {count}/{len(comb_list)}")

        X_selected = X[:,comb].copy()
        param_grid = dict(kernel=['linear','poly','rbf', 'sigmoid'])#,'precomputed'])
        cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
        grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
        grid.fit(X_selected, y)
        # print(f"{p_id} {best_score} {grid.best_score_} {comb}")
        if(best_score < grid.best_score_):
            best_score = grid.best_score_
            best_grid = grid
            best_comb = comb
            # print(f"{p_id} Update: {grid.best_score_} {comb}")
        count += 1
    print(f"\t{p_id} Done!!")
    return best_grid, best_comb

_NORM_SK_NORM = 0
_NORM_SK_STD = 1
_NORM_MY_NORM = 2
_NORM_MY_STD = 3

In [10]:
from sklearn.svm import SVC

for norm_type in [0,1,2,3]:
    X_ori, y_ori = load('X_ori_baseline0_2'), load('y_ori_baseline0_2')
    X,y = np.array(X_ori), np.array(y_ori)
    if(norm_type == _NORM_SK_NORM):
        print("Perform: sklearn normalize")
        X = normalize(X.copy(), axis=0)
    elif(norm_type == _NORM_SK_STD):
        print("Perform: sklearn standardize")
        X = StandardScaler().fit_transform(X.copy())
    elif(norm_type == _NORM_MY_NORM):
        print("Perform: my normalize")
        X = NormJa(X.copy().T).T
    elif(norm_type == _NORM_MY_STD):
        print("Perform: my standardize")
        X = StandardJa(X.copy().T).T
    X_shuff,y_shuff = shuffle(X,y)

    comb_list = []
    for feature_num in range(2,14):
        comb_list.extend(list(combinations(range(13),feature_num)))
    try:
        t_out = 60000
        pool = Pool()
        p_list = []
        ans_list = []
        for p_id in range(6):
            p_list.append(pool.apply_async( search, [p_id, X_shuff,y_shuff,comb_list[p_id::6]] ))
        for i in range(6):
            ans_list.append( p_list[i].get(timeout=t_out) )
        # ans_list
    finally:
        print("========= close ========")
        pool.close() 
        pool.terminate()
    
    best_score = -100
    best_grid = None
    best_comb = None
    for ans in ans_list:
        if(ans[0].best_score_ > best_score):
            best_score = ans[0].best_score_
            best_grid = ans[0]
            best_comb = ans[1]
    print(f"\tBest Combo {best_comb} | The best parameters are {best_grid.best_params_} with a score of {best_grid.best_score_:.2f}")

Perform: sklearn normalize
	p_id:0 running 0/1363	p_id:2 running 0/1363
	p_id:4 running 0/1363
	p_id:3 running 0/1363	p_id:5 running 0/1363


	p_id:1 running 0/1363
	p_id:1 running 400/1363
	p_id:0 running 400/1363
	p_id:3 running 400/1363
	p_id:5 running 400/1363
	p_id:4 running 400/1363
	p_id:1 running 800/1363
	p_id:0 running 800/1363
	p_id:3 running 800/1363
	p_id:5 running 800/1363
	p_id:4 running 800/1363
	p_id:2 running 400/1363
	p_id:1 running 1200/1363
	p_id:0 running 1200/1363
	p_id:3 running 1200/1363
	p_id:5 running 1200/1363
	1 Done!!
	p_id:4 running 1200/1363
	0 Done!!
	3 Done!!
	5 Done!!
	4 Done!!
	p_id:2 running 800/1363
	p_id:2 running 1200/1363
	2 Done!!
	Best Combo (0, 1, 2, 3, 4, 5, 6) | The best parameters are {'kernel': 'rbf'} with a score of 0.85
Perform: sklearn standardize
	p_id:0 running 0/1363	p_id:1 running 0/1363	p_id:2 running 0/1363

	p_id:3 running 0/1363

	p_id:5 running 0/1363
	p_id:4 running 0/1363
	p_id:1 running 400/1363
	p_id:5 running 400/1363
	p_

In [12]:
# my stdardized
# Best Combo (0, 1, 2, 3, 4, 6, 8, 11) | The best parameters are {'kernel': 'rbf'} with a score of 0.92
X_ori, y_ori = load('X_ori_baseline0_2'), load('y_ori_baseline0_2')
X,y = np.array(X_ori), np.array(y_ori)
X = StandardJa(X.copy().T).T
comb = (0, 1, 2, 3, 4, 6, 8, 11)
X_shuff,y_shuff = shuffle(X,y)
model = SVC(kernel='rbf')
model.fit(X_shuff[:,comb], y_shuff)
ans = model.predict(X_shuff[:,comb])
acc = sum(ans == y_shuff) / len(y_shuff)
cross = cross_val_score(model, X_shuff[:,comb], y_shuff, cv=50)
print(acc, cross.mean(), cross)
print(ans)

0.9106060606060606 0.8967032967032966 [0.92857143 0.78571429 0.92857143 0.92857143 0.92857143 0.85714286
 1.         1.         0.92857143 0.85714286 0.92307692 0.76923077
 0.84615385 0.92307692 1.         0.84615385 0.84615385 0.84615385
 0.84615385 0.69230769 0.84615385 0.92307692 0.92307692 0.92307692
 0.92307692 0.84615385 0.92307692 1.         0.92307692 0.84615385
 0.84615385 1.         0.76923077 1.         0.92307692 0.84615385
 0.92307692 0.84615385 0.84615385 0.92307692 0.76923077 0.92307692
 0.92307692 0.92307692 0.92307692 0.84615385 1.         1.
 0.92307692 0.92307692]
[0 0 0 1 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 0 0 0 1 0 0 1 0 0
 0 1 0 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0
 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1
 0 0 0 1 1 0 1 0 1 0 1 0 1 1 0 1 0 1 0 1 0 1 1 0 1 1 0 1 1 1 1 0 0 1 0 1 0
 1 1 1 0 1 1 0 1 0 0 1 0 1 1 1 0 1 1 1 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1 1 0 0
 1 1 0 1 0 1 1 1 1 1 0 0 0 0 1 1 1 

# Baseline None

In [14]:
from sklearn.svm import SVC

for norm_type in [0,1,2,3]:
    X_ori, y_ori = load('X_ori_baseline_none'), load('y_ori_baseline_none')
    X,y = np.array(X_ori), np.array(y_ori)
    if(norm_type == _NORM_SK_NORM):
        print("Perform: sklearn normalize")
        X = normalize(X.copy(), axis=0)
    elif(norm_type == _NORM_SK_STD):
        print("Perform: sklearn standardize")
        X = StandardScaler().fit_transform(X.copy())
    elif(norm_type == _NORM_MY_NORM):
        print("Perform: my normalize")
        X = NormJa(X.copy().T).T
    elif(norm_type == _NORM_MY_STD):
        print("Perform: my standardize")
        X = StandardJa(X.copy().T).T
    X_shuff,y_shuff = shuffle(X,y)

    comb_list = []
    for feature_num in range(2,14):
        comb_list.extend(list(combinations(range(13),feature_num)))
    try:
        t_out = 60000
        pool = Pool()
        p_list = []
        ans_list = []
        for p_id in range(6):
            p_list.append(pool.apply_async( search, [p_id, X_shuff,y_shuff,comb_list[p_id::6]] ))
        for i in range(6):
            ans_list.append( p_list[i].get(timeout=t_out) )
        # ans_list
    finally:
        print("========= close ========")
        pool.close() 
        pool.terminate()
    
    best_score = -100
    best_grid = None
    best_comb = None
    for ans in ans_list:
        if(ans[0].best_score_ > best_score):
            best_score = ans[0].best_score_
            best_grid = ans[0]
            best_comb = ans[1]
    print(f"\tBest Combo {best_comb} | The best parameters are {best_grid.best_params_} with a score of {best_grid.best_score_:.2f}")

Perform: sklearn normalize
	p_id:1 running 0/1363	p_id:2 running 0/1363	p_id:0 running 0/1363


	p_id:3 running 0/1363
	p_id:5 running 0/1363
	p_id:4 running 0/1363
	p_id:1 running 400/1363
	p_id:3 running 400/1363
	p_id:0 running 400/1363
	p_id:5 running 400/1363
	p_id:4 running 400/1363
	p_id:1 running 800/1363
	p_id:3 running 800/1363
	p_id:0 running 800/1363
	p_id:5 running 800/1363
	p_id:4 running 800/1363
	p_id:2 running 400/1363
	p_id:1 running 1200/1363
	p_id:3 running 1200/1363
	p_id:0 running 1200/1363
	p_id:5 running 1200/1363
	p_id:4 running 1200/1363
	1 Done!!
	3 Done!!
	0 Done!!
	5 Done!!
	4 Done!!
	p_id:2 running 800/1363
	p_id:2 running 1200/1363
	2 Done!!
	Best Combo (0, 1, 2, 3, 6) | The best parameters are {'kernel': 'rbf'} with a score of 0.83
Perform: sklearn standardize
	p_id:0 running 0/1363	p_id:1 running 0/1363

	p_id:4 running 0/1363	p_id:2 running 0/1363	p_id:5 running 0/1363
	p_id:3 running 0/1363


	p_id:1 running 400/1363
	p_id:3 running 400/1363
	p_id:4 r

# No high-pass

In [15]:
from sklearn.svm import SVC

for norm_type in [0,1,2,3]:
    X_ori, y_ori = load('X_ori_baseline_no_highpass'), load('y_ori_baseline_no_highpass')
    X,y = np.array(X_ori), np.array(y_ori)
    if(norm_type == _NORM_SK_NORM):
        print("Perform: sklearn normalize")
        X = normalize(X.copy(), axis=0)
    elif(norm_type == _NORM_SK_STD):
        print("Perform: sklearn standardize")
        X = StandardScaler().fit_transform(X.copy())
    elif(norm_type == _NORM_MY_NORM):
        print("Perform: my normalize")
        X = NormJa(X.copy().T).T
    elif(norm_type == _NORM_MY_STD):
        print("Perform: my standardize")
        X = StandardJa(X.copy().T).T
    X_shuff,y_shuff = shuffle(X,y)

    comb_list = []
    for feature_num in range(2,14):
        comb_list.extend(list(combinations(range(13),feature_num)))
    try:
        t_out = 60000
        pool = Pool()
        p_list = []
        ans_list = []
        for p_id in range(6):
            p_list.append(pool.apply_async( search, [p_id, X_shuff,y_shuff,comb_list[p_id::6]] ))
        for i in range(6):
            ans_list.append( p_list[i].get(timeout=t_out) )
        # ans_list
    finally:
        print("========= close ========")
        pool.close() 
        pool.terminate()
    
    best_score = -100
    best_grid = None
    best_comb = None
    for ans in ans_list:
        if(ans[0].best_score_ > best_score):
            best_score = ans[0].best_score_
            best_grid = ans[0]
            best_comb = ans[1]
    print(f"\tBest Combo {best_comb} | The best parameters are {best_grid.best_params_} with a score of {best_grid.best_score_:.2f}")

Perform: sklearn normalize
	p_id:0 running 0/1363	p_id:1 running 0/1363	p_id:2 running 0/1363

	p_id:4 running 0/1363	p_id:3 running 0/1363	p_id:5 running 0/1363



	p_id:3 running 400/1363
	p_id:1 running 400/1363
	p_id:5 running 400/1363
	p_id:0 running 400/1363
	p_id:4 running 400/1363
	p_id:3 running 800/1363
	p_id:1 running 800/1363
	p_id:5 running 800/1363
	p_id:0 running 800/1363
	p_id:4 running 800/1363
	p_id:2 running 400/1363
	p_id:3 running 1200/1363
	p_id:1 running 1200/1363
	p_id:5 running 1200/1363
	p_id:0 running 1200/1363
	3 Done!!
	1 Done!!
	5 Done!!
	p_id:4 running 1200/1363
	0 Done!!
	4 Done!!
	p_id:2 running 800/1363
	p_id:2 running 1200/1363
	2 Done!!
	Best Combo (0, 1, 2, 3, 4) | The best parameters are {'kernel': 'rbf'} with a score of 0.84
Perform: sklearn standardize
	p_id:0 running 0/1363	p_id:1 running 0/1363

	p_id:2 running 0/1363	p_id:3 running 0/1363	p_id:4 running 0/1363	p_id:5 running 0/1363



	p_id:1 running 400/1363
	p_id:2 running 400/1363
	p_id:0 r

# Baseline Only

In [16]:
from sklearn.svm import SVC

for norm_type in [0,1,2,3]:
    X_ori, y_ori = load('X_ori_baseline_only'), load('y_ori_baseline_only')
    X,y = np.array(X_ori), np.array(y_ori)
    if(norm_type == _NORM_SK_NORM):
        print("Perform: sklearn normalize")
        X = normalize(X.copy(), axis=0)
    elif(norm_type == _NORM_SK_STD):
        print("Perform: sklearn standardize")
        X = StandardScaler().fit_transform(X.copy())
    elif(norm_type == _NORM_MY_NORM):
        print("Perform: my normalize")
        X = NormJa(X.copy().T).T
    elif(norm_type == _NORM_MY_STD):
        print("Perform: my standardize")
        X = StandardJa(X.copy().T).T
    X_shuff,y_shuff = shuffle(X,y)

    comb_list = []
    for feature_num in range(2,14):
        comb_list.extend(list(combinations(range(13),feature_num)))
    try:
        t_out = 60000
        pool = Pool()
        p_list = []
        ans_list = []
        for p_id in range(6):
            p_list.append(pool.apply_async( search, [p_id, X_shuff,y_shuff,comb_list[p_id::6]] ))
        for i in range(6):
            ans_list.append( p_list[i].get(timeout=t_out) )
        # ans_list
    finally:
        print("========= close ========")
        pool.close() 
        pool.terminate()
    
    best_score = -100
    best_grid = None
    best_comb = None
    for ans in ans_list:
        if(ans[0].best_score_ > best_score):
            best_score = ans[0].best_score_
            best_grid = ans[0]
            best_comb = ans[1]
    print(f"\tBest Combo {best_comb} | The best parameters are {best_grid.best_params_} with a score of {best_grid.best_score_:.2f}")

Perform: sklearn normalize
	p_id:0 running 0/1363	p_id:1 running 0/1363
	p_id:3 running 0/1363	p_id:4 running 0/1363	p_id:2 running 0/1363


	p_id:5 running 0/1363

	p_id:3 running 400/1363
	p_id:1 running 400/1363
	p_id:0 running 400/1363
	p_id:5 running 400/1363
	p_id:4 running 400/1363
	p_id:3 running 800/1363
	p_id:1 running 800/1363
	p_id:0 running 800/1363
	p_id:5 running 800/1363
	p_id:4 running 800/1363
	p_id:2 running 400/1363
	p_id:3 running 1200/1363
	p_id:1 running 1200/1363
	p_id:0 running 1200/1363
	3 Done!!
	p_id:5 running 1200/1363
	1 Done!!
	p_id:4 running 1200/1363
	0 Done!!
	5 Done!!
	4 Done!!
	p_id:2 running 800/1363
	p_id:2 running 1200/1363
	2 Done!!
	Best Combo (0, 1, 2, 3, 4) | The best parameters are {'kernel': 'rbf'} with a score of 0.83
Perform: sklearn standardize
	p_id:1 running 0/1363	p_id:3 running 0/1363	p_id:2 running 0/1363	p_id:5 running 0/1363	p_id:0 running 0/1363	p_id:4 running 0/1363





	p_id:2 running 400/1363
	p_id:1 running 400/1363
	p_id:0 r