# Explore the parameters of DT, RF, KNN, and SVM.  Find out the parameters that have significant effect on the accuracy.   This is an open question.  There is no standard answer.
DT as [DecisionTree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier)<br>
RF as [RandomForest](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html)<br>
KNN as [Kneighbors](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)<br>
SVC as [C-Support Vector Classification](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)

## Python Class for Function Management

In [30]:
from os import system, getcwd
from os.path import join
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import pandas as pd

print_flag = False
print_result_flag = False
plot_flag = False


class algorithmOperation():
    def get_data(self):
        path = join(getcwd(), 'wdbc.data').replace('\\', '/')
        self.data = pd.read_csv(path, header=None)
        if print_flag:
            print(self.data)
            print(self.data.shape)
            print(self.data.columns)
            print(self.data.head())

    def set_column_names(self):
        column_names = ['id', 'malignant',
                        'nucleus_mean', 'nucleus_se', 'nucleus_worst',
                        'texture_mean', 'texture_se', 'texture_worst',
                        'perimeter_mean', 'perimeter_se', 'perimeter_worst',
                        'area_mean', 'area_se', 'area_worst',
                        'smoothness_mean', 'smoothness_se', 'smoothness_worst',
                        'compactness_mean', 'compactness_se', 'compactness_worst',
                        'concavity_mean', 'concavity_se', 'concavity_worst',
                        'concave_pts_mean', 'concave_pts_se', 'concave_pts_worst',
                        'symmetry_mean', 'symmetry_se', 'symmetry_worst',
                        'fractal_dim_mean', 'fractal_dim_se', 'fractal_dim_worst'
                        ]

        self.data.columns = column_names
        if print_flag:
            print(self.data.shape)
            print(self.data.columns)
            print(self.data.head())
            self.data.tail(10)

    def make_data_all_numerical(self):
        self.data['malignant'] = self.data['malignant'].map(
            lambda x: 0 if x == 'B' else 1)
        if print_flag:
            self.data.tail(10)

    def split_data_into_train_test(self):
        self.X = self.data.drop(columns=['malignant']).values
        
#         # for scaled data
#         ss = StandardScaler()
#         self.X = ss.fit_transform(self.X)
        
        self.y = self.data['malignant'].values
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.X, self.y, test_size=0.25, random_state=2018)

    def build_mldl_model(self, n):
        if n == '1.0':
            # Default DT
            self.model = DecisionTreeClassifier()
        if n == '1.a1':
            # DT/criterion (default='gini')
            self.model = DecisionTreeClassifier(criterion='entropy')
        if n == '1.a2':
            # DT/criterion (default='gini')
            self.model = DecisionTreeClassifier(criterion='log_loss')
        if n == '1.b1':
            # DT/splitter (default='best')
            self.model = DecisionTreeClassifier(splitter='best')
        if n == '1.c1':
            # DT/max_depth (default=None=Infinite)
            self.model = DecisionTreeClassifier(max_depth=1)
        if n == '1.d1':
            # DT/min_samples_split (default=2, must be greater than 2)
            self.model = DecisionTreeClassifier(min_samples_split=5)
        if n == '1.d2':
            # DT/min_samples_split (default=2, must be greater than 2)
            self.model = DecisionTreeClassifier(min_samples_split=10)
        if n == '1.e1':
            # DT/min_weight_fraction_leaf (default=0.0, must be <= 0.5)
            self.model = DecisionTreeClassifier(min_weight_fraction_leaf=0.1)
        if n == '1.e2':
            # DT/min_weight_fraction_leaf (default=0.0, must be <= 0.5)
            self.model = DecisionTreeClassifier(min_weight_fraction_leaf=0.5)
        if n == '1.f1':
            # DT/max_features (default=None)
            self.model = DecisionTreeClassifier(max_features=1)
        if n == '1.f2':
            # DT/max_features (default=None)
            self.model = DecisionTreeClassifier(max_features=10)
        if n == '1.f3':
            # DT/max_features (default=None, if float must < 1)
            self.model = DecisionTreeClassifier(max_features=0.1)
        if n == '1.f4':
            # DT/max_features (default=None, if float must < 1)
            self.model = DecisionTreeClassifier(max_features=0.9)
        if n == '1.f5':
            # DT/max_features (default=None)
            self.model = DecisionTreeClassifier(max_features="auto")
        if n == '1.f6':
            # DT/max_features (default=None)
            self.model = DecisionTreeClassifier(max_features="sqrt")
        if n == '1.f7':
            # DT/max_features (default=None)
            self.model = DecisionTreeClassifier(max_features="log2")
        if n == '1.g1':
            # DT/random_state (default=None)
            self.model = DecisionTreeClassifier(random_state=1)
        if n == '1.g2':
            # DT/random_state (default=None)
            self.model = DecisionTreeClassifier(random_state=10)
        if n == '1.h1':
            # DT/max_leaf_nodes (default=None, must be >= 2)
            self.model = DecisionTreeClassifier(max_leaf_nodes=2)
        if n == '1.h2':
            # DT/max_leaf_nodes (default=None, must be >= 2)
            self.model = DecisionTreeClassifier(max_leaf_nodes=10)
        if n == '1.i1':
            # DT/min_impurity_decrease (default=0.0)
            self.model = DecisionTreeClassifier(min_impurity_decrease=0.1)
        if n == '1.i2':
            # DT/min_impurity_decrease (default=0.0)
            self.model = DecisionTreeClassifier(min_impurity_decrease=9.9)
        if n == '1.j1':
            # DT/class_weight (default=None, dict, list are skipped)
            self.model = DecisionTreeClassifier(class_weight="balanced")
        if n == '1.k1':
            # DT/ccp_alpha (default=0.0, non-negative float)
            self.model = DecisionTreeClassifier(ccp_alpha=0.1)
        if n == '1.k2':
            # DT/ccp_alpha (default=0.0, non-negative float)
            self.model = DecisionTreeClassifier(ccp_alpha=9.9)
        if n == '2.0':
            self.model = RandomForestClassifier()
        if n == '2.a1':
            # RF/n_estimators (default=100, int)
            self.model = RandomForestClassifier(n_estimators=1)
        if n == '2.a2':
            # RF/n_estimators (default=100, int)
            self.model = RandomForestClassifier(n_estimators=100)
        if n == '2.b1':
            # RF/criterion (default="gini")
            self.model = RandomForestClassifier(criterion="entropy")
        if n == '2.b2':
            # RF/criterion (default="gini")
            self.model = RandomForestClassifier(criterion="log_loss")
        if n == '2.c1':
            # RF/max_depth (default=None, int)
            self.model = RandomForestClassifier(max_depth=1)
        if n == '2.d1':
            # RF/min_samples_split (default=2, int, float)
            self.model = RandomForestClassifier(min_samples_split=3)
        if n == '2.d2':
            # RF/min_samples_split (default=2, int, float)
            self.model = RandomForestClassifier(min_samples_split=10)
        if n == '2.e1':
            # RF/min_samples_leaf (default=1, int, float)
            self.model = RandomForestClassifier(min_samples_leaf=2)
        if n == '2.e2':
            # RF/min_samples_leaf (default=1, int, float)
            self.model = RandomForestClassifier(min_samples_leaf=10)
        if n == '2.f1':
            # RF/min_weight_fraction_leaf (default=0.0, float, must be <= 0.5)
            self.model = RandomForestClassifier(min_weight_fraction_leaf=0.1)
        if n == '2.f2':
            # RF/min_weight_fraction_leaf (default=0.0, float, must be <= 0.5)
            self.model = RandomForestClassifier(min_weight_fraction_leaf=0.5)
        if n == '2.g1':
            # RF/max_features (default="sqrt", "log2", "None", int, float)
            self.model = RandomForestClassifier(max_features="log2")
        if n == '2.g2':
            # RF/max_features (default="sqrt", "log2", None, int, float)
            self.model = RandomForestClassifier(max_features=None)
        if n == '2.g3':
            # RF/max_features (default="sqrt", "log2", "None", int, float)
            self.model = RandomForestClassifier(max_features=1)
        if n == '2.g4':
            # RF/max_features (default="sqrt", "log2", "None", int, float)
            self.model = RandomForestClassifier(max_features=0.1)
        if n == '2.h1':
            # RF/max_leaf_nodes (default=infinite, int)
            self.model = RandomForestClassifier(max_leaf_nodes=10)
        if n == '2.i1':
            # RF/min_impurity_decrease (default=0.0, float)
            self.model = RandomForestClassifier(min_impurity_decrease=0.1)
        if n == '2.i2':
            # RF/min_impurity_decrease (default=0.0, float)
            self.model = RandomForestClassifier(min_impurity_decrease=9.9)
        if n == '2.j1':
            # RF/bootstrap (default=True, bool)
            self.model = RandomForestClassifier(bootstrap=False)
        if n == '2.k1':
            # RF/oob_score (default=False, bool)
            self.model = RandomForestClassifier(oob_score=True)
#         if n == '2.l1':
#             # RF/n_jobs (default=None, int)
#             self.model = RandomForestClassifier(n_jobs=-1)
#         if n == '2.l2':
#             # RF/n_jobs (default=None, int)
#             self.model = RandomForestClassifier(n_jobs=1)
#         if n == '2.l3':
#             # RF/n_jobs (default=None, int)
#             self.model = RandomForestClassifier(n_jobs=10)
        if n == '2.m1':
            # RF/random_state (default=None, int)
            self.model = RandomForestClassifier(random_state=1)
        if n == '2.m2':
            # RF/random_state (default=None, int)
            self.model = RandomForestClassifier(random_state=10)
        if n == '2.n1':
            # RF/verbose (default=None, int)
            self.model = RandomForestClassifier(verbose=1)
        if n == '2.n2':
            # RF/verbose (default=None, int)
            self.model = RandomForestClassifier(verbose=10)
        if n == '2.o1':
            # RF/warm_start (default=False, bool)
            self.model = RandomForestClassifier(warm_start=True)
        if n == '2.p1':
            # RF/class_weight (default=None, "balanced", "balanced_subsample", dict)
            self.model = RandomForestClassifier(class_weight="balanced")
        if n == '2.p2':
            # RF/class_weight (default=None, "balanced", "balanced_subsample", dict)
            self.model = RandomForestClassifier(class_weight="balanced_subsample")
        if n == '2.q1':
            # RF/ccp_alpha (default=0.0, non-negative float)
            self.model = RandomForestClassifier(ccp_alpha=0.1)
        if n == '2.q2':
            # RF/ccp_alpha (default=0.0, non-negative float)
            self.model = RandomForestClassifier(ccp_alpha=9.9)
        if n == '2.r1':
            # RF/max_samples (default=None, int, float)
            self.model = RandomForestClassifier(max_samples=0.1)
        if n == '2.r2':
            # RF/max_samples (default=None, int, float)
            self.model = RandomForestClassifier(max_samples=1.0)
        if n == '3.0':
            self.model = KNeighborsClassifier()
        if n == '3.a1':
            # KNN/n_neighbors (default=5, int)
            self.model = KNeighborsClassifier(n_neighbors=1)
        if n == '3.a2':
            # KNN/n_neighbors (default=5, int)
            self.model = KNeighborsClassifier(n_neighbors=10)
        if n == '3.b1':
            # KNN/weights (default="uniform", "distance", callable)
            self.model = KNeighborsClassifier(weights="distance")
        if n == '3.c1':
            # KNN/algorithm (default="auto", "ball_tree", "kd_tree", "brute")
            self.model = KNeighborsClassifier(algorithm="ball_tree")
        if n == '3.c2':
            # KNN/algorithm (default="auto", "ball_tree", "kd_tree", "brute")
            self.model = KNeighborsClassifier(algorithm="kd_tree")
        if n == '3.c3':
            # KNN/algorithm (default="auto", "ball_tree", "kd_tree", "brute")
            self.model = KNeighborsClassifier(algorithm="brute")
        if n == '3.d1':
            # KNN/leaf_size (default=30, int)
            self.model = KNeighborsClassifier(leaf_size=1)
        if n == '3.d2':
            # KNN/leaf_size (default=30, int)
            self.model = KNeighborsClassifier(leaf_size=100)
        if n == '3.e1':
            # KNN/p (default=2, int)
            self.model = KNeighborsClassifier(p=1)
#         if n == '3.f1':
#             # KNN/metric (default="minkowski", str, callable)
#             self.model = KNeighborsClassifier(metric="precomputed")
#             # unusable, need to be square
#         if n == '3.e1':
#             # KNN/matric_params (default=None, undocumented)
#             self.model = KNeighborsClassifier(matric_params=1)
#         if n == '3.e1':
#             # KNN/n_jobs (default=None, int)
#             # number of parallel jobs to run
#             self.model = KNeighborsClassifier(n_jobs=-1)
        if n == '4.0':
            self.model = SVC()
        if n == '4.a1':
            # SVC/C (default=1.0, float)
            self.model = SVC(C=0.1)
        if n == '4.a2':
            # SVC/C (default=1.0, float)
            self.model = SVC(C=10.0)
#         if n == '4.b1':
#             # SVC/kernel (default="rbf", "linear", "poly", "sigmoid", "precomputed")
#             self.model = SVC(kernel="linear")
#             # Too slow to compute
#         if n == '4.b2':
#             # SVC/kernel (default="rbf", "linear", "poly", "sigmoid", "precomputed")
#             self.model = SVC(kernel="poly")
#             # Too slow to compute
#         if n == '4.b3':
#             # SVC/kernel (default="rbf", "linear", "poly", "sigmoid", "precomputed")
#             self.model = SVC(kernel="sigmoid")
#             # Too slow to compute
#         if n == '4.b4':
#             # SVC/kernel (default="rbf", "linear", "poly", "sigmoid", "precomputed")
#             self.model = SVC(kernel="precomputed")
#             # Too slow to compute
#         if n == '4.c1':
#             # SVC/degree (default=3, int, only for poly kernel)
#             self.model = SVC(degree=4)
        if n == '4.c1':
            # SVC/gamma (default="scale", "auto", float, for rbf, poly, sigmoid kernel)
            self.model = SVC(gamma="auto")
#         if n == '4.d1':
#             # SVC/coef0 (default=0.0, float, for poly, sigmoid kernel)
#             self.model = SVC(coef0=0.1)
        if n == '4.e1':
            # SVC/shrinking (default=True, bool)
            self.model = SVC(shrinking=False)
        if n == '4.f1':
            # SVC/probability (default=False, bool)
            self.model = SVC(probability=True)
        if n == '4.g1':
            # SVC/tol (default=1e-3, float)
            self.model = SVC(tol=1e-1)
        if n == '4.h1':
            # SVC/cache_sizze (default=200, float, MB)
            self.model = SVC(cache_size=400)
            
            
        if n == '4.i1':
            # SVC/class_weight (default=None, "balanced", dict)
            self.model = SVC(class_weight="balanced")
        if n == '4.j1':
            # SVC/verbose (default=False, book)
            self.model = SVC(verbose=True)
        if n == '4.k1':
            # SVC/max_iter (default=-1, int)
            self.model = SVC(max_iter=1)
        if n == '4.l1':
            # SVC/decision_function_shape (default="ovr", "ovo")
            self.model = SVC(decision_function_shape="ovo")
        if n == '4.m1':
            # SVC/break_ties (default=False, bool)
            self.model = SVC(break_ties=True)
#         if n == '4.n1':
#             # SVC/random_state (default=None, int, randomstate instance)
#             self.model = SVC(random_state=None)

    def training_mldl_model(self):
        self.model.fit(self.X_train, self.y_train)

    def test_mldl_model(self):
        self.y_pred = self.model.predict(self.X_test)

    def evaluate_the_result(self, n):
        if self.y_pred.all == self.y_test.all:
            print('Prediction successful, all values are same') if print_result_flag else None
        else:
            self.y_diff = abs(self.y_pred - self.y_test)
            self.y_diff_n = (len(self.y_test)-sum(self.y_diff))/len(self.y_test)*100

    def prepare_data(self):
        self.get_data()
        self.set_column_names()
        self.make_data_all_numerical()
        self.split_data_into_train_test()

    def training_and_testing(self, n):
        self.build_mldl_model(n)
        self.training_mldl_model()
        self.test_mldl_model()
        self.evaluate_the_result(n)

    def single_run(self, n):
        self.prepare_data()
        self.training_and_testing(n)
    
    def compare(self, n):
        # this method of comparison doesn't provides stable fundation, but relative stable fundation,
        #   since "self.training_and_testing('1.0')" is calculated everytime.
        self.prepare_data()
        if '1' in n.split('.'):
            self.training_and_testing('1.0')
        elif '2' in n.split('.'):
            self.training_and_testing('2.0')
        elif '3' in n.split('.'):
            self.training_and_testing('3.0')
        elif '4' in n.split('.'):
            self.training_and_testing('4.0')
        else:
            print('errer')
        diff1 = self.y_diff_n
        self.training_and_testing(n)
        diff2 = self.y_diff_n
        self.Tdiff = diff2 - diff1
        return self.Tdiff

## Function to execute algorithm
1. Provide thread protection.
2. Tidy up the codes in comparison sections.

In [31]:
def c(n):
    ao = algorithmOperation()
    result = ao.compare(n)
    if result > 0:
        pm = '+'
    elif result == 0:
        pm = 'x'
    else:
        pm = '-'
    result = abs(result)
    del ao
    return [pm, result]

## 1 DT Comparison

In [32]:
dt_types = ['criterion="entropy"', 'criterion="log_loss"', 'splitter=random"', 'max_depth=1', 'min_samples_split=5', 
            'min_samples_split=10', 'min_weight_fraction_leaf=0.1', 'min_weight_fraction_leaf=0.5', 'max_features=1',
            'max_features=10', 'max_features=0.1', 'max_features=0.9', 'max_features="auto"', 'max_features="sqrt"',
            'max_features="log2"', 'random_state=1', 'random_state=10', 'max_leaf_nodes=1', 'max_leaf_nodes=10',
            'min_impurity_decrase=0.1', 'min_impurity_decrase=9.9', 'class_weight="balanced"', 'ccp_alpha=0.1',
            'ccp_alpha=9.9']
dt_pm = [c('1.a1')[0], c('1.a2')[0], c('1.b1')[0], c('1.c1')[0], c('1.d1')[0], 
         c('1.d2')[0], c('1.e1')[0], c('1.e2')[0], c('1.f1')[0], c('1.f2')[0],
         c('1.f3')[0], c('1.f4')[0], c('1.f5')[0], c('1.f6')[0], c('1.f7')[0],
         c('1.g1')[0], c('1.g2')[0], c('1.h1')[0], c('1.h2')[0], c('1.i1')[0],
         c('1.i2')[0], c('1.j1')[0], c('1.k1')[0], c('1.k2')[0]]
dt_ans = [c('1.a1')[1], c('1.a2')[1], c('1.b1')[1], c('1.c1')[1], c('1.d1')[1], 
          c('1.d2')[1], c('1.e1')[1], c('1.e2')[1], c('1.f1')[1], c('1.f2')[1],
          c('1.f3')[1], c('1.f4')[1], c('1.f5')[1], c('1.f6')[1], c('1.f7')[1],
          c('1.g1')[1], c('1.g2')[1], c('1.h1')[1], c('1.h2')[1], c('1.i1')[1],
          c('1.i2')[1], c('1.j1')[1], c('1.k1')[1], c('1.k2')[1]]



## 2 RF Comparison

In [33]:
rf_types = ['n_estimators=1', 'n_estimators=100', 'criterion="entropy"', 'criterion="log_loss"', 'max_depth=1',
            'min_samples_split=3', 'min_samples_split=10', 'min_samples_leaf=2', 'min_samples_leaf=10',
            'min_weight_fraction_leaf=0.1', 'min_weight_fraction_leaf=9.9', 'max_features="log2"', 'max_features=None',
            'max_features=1', 'max_features=0.1', 'max_leaf_nodes=10', 'min_impurity_decrease=0.1', 'min_impurity_decrease=9.9',
            'bootstrap=False', 'oob_score=True', 'n_jobs=-1', 'n_jobs=1', 'n_jobs=10', 'random_state=1', 'random_state=10',
            'verbose=1', 'verbose=10', 'warm_start=True', 'class_weight="balanced"', 'class_weight="balanced_subsample"',
            'ccp_alpha=0.1','ccp_alpha=9.9', 'max_samples=0.1', 'max_samples=1.0']
rf_pm = [c('2.a1')[0], c('2.a2')[0], c('2.b1')[0], c('2.b2')[0], c('2.c1')[0], c('2.d1')[0], c('2.d2')[0],
         c('2.e1')[0], c('2.e2')[0], c('2.f1')[0], c('2.f2')[0], c('2.g1')[0], c('2.g2')[0], c('2.g3')[0],
         c('2.g4')[0], c('2.h1')[0], c('2.i1')[0], c('2.i2')[0], c('2.j1')[0], c('2.k1')[0], None,
         None, None, c('2.m1')[0], c('2.m2')[0], c('2.n1')[0], c('2.n2')[0], c('2.o1')[0],
         c('2.p1')[0], c('2.p2')[0], c('2.q1')[0], c('2.q2')[0], c('2.r1')[0], c('2.r2')[0]]
rf_ans = [c('2.a1')[1], c('2.a2')[1], c('2.b1')[1], c('2.b2')[1], c('2.c1')[1], c('2.d1')[1], c('2.d2')[1],
          c('2.e1')[1], c('2.e2')[1], c('2.f1')[1], c('2.f2')[1], c('2.g1')[1], c('2.g2')[1], c('2.g3')[1],
          c('2.g4')[1], c('2.h1')[1], c('2.i1')[1], c('2.i2')[1], c('2.j1')[1], c('2.k1')[1], None,
          None, None, c('2.m1')[1], c('2.m2')[1], c('2.n1')[1], c('2.n2')[1], c('2.o1')[1],
          c('2.p1')[1], c('2.p2')[1], c('2.q1')[1], c('2.q2')[1], c('2.r1')[1], c('2.r2')[1]]

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0

building tree 1 of 100
building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
b

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0

building tree 1 of 100
building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
b

## 3 KNN Comparison

In [34]:
knn_types = ['n_neighbors=1', 'n_neighbors=10', 'weights="distance"', 'algorithm="ball_tree"', 'algorithm="kd_tree"',
             'algorithm="brute"', 'leaf_size=1', 'leaf_size=100', 'p=1', 'metric="precomputed"', 'matric_params=1',
             'n_jobs=-1']
knn_pm = [c('3.a1')[0], c('3.a2')[0], c('3.b1')[0], c('3.c1')[0], c('3.c2')[0], c('3.c3')[0], c('3.d1')[0], c('3.d2')[0],
          c('3.e1')[0], None, None, None]
knn_ans = [c('3.a1')[1], c('3.a2')[1], c('3.b1')[1], c('3.c1')[1], c('3.c2')[1], c('3.c3')[1], c('3.d1')[1], c('3.d2')[1],
           c('3.e1')[1], None, None, None]

## 4 SVC Comparison

In [35]:
svc_types = ['C=0.1', 'C=10.0', 'kernel="linear"', 'kernel="poly"', 'kernel="sigmoid"', 'kernel="precomputed"',
             'gamma="auto"', 'coef0=0.1', 'shrinking=False', 'probability=True', 'tol=1e-1', 'cache_sizze=400',
             'class_weight="balanced"', 'verbose=True', 'max_iter=1', 'decision_function_shape="ovo"', 'break_ties=True',
             'random_state=None']
svc_pm = [c('4.a1')[0], c('4.a2')[0], None, None, None, None, c('4.c1')[0], None, c('4.e1')[0], c('4.f1')[0], c('4.g1')[0],
          c('4.h1')[0], c('4.i1')[0], c('4.j1')[0], c('4.k1')[0], c('4.l1')[0], c('4.m1')[0], None]
svc_ans = [c('4.a1')[1], c('4.a2')[1], None, None, None, None, c('4.c1')[1], None, c('4.e1')[1], c('4.f1')[1], c('4.g1')[1],
           c('4.h1')[1], c('4.i1')[1], c('4.j1')[1], c('4.k1')[1], c('4.l1')[1], c('4.m1')[1], None]

[LibSVM]



[LibSVM]



# Sum of Result

### Function to display result

In [36]:
def display_result(types, pm, ans, printflag):
    try:
        result = pd.DataFrame([types, pm, ans])
        result = result.transpose()
        result.columns = ['type', 'pm', 'ans']
        result = result.sort_values(by=['ans'], ascending=False)
        print() if print_result_flag else None
        print(result) if print_result_flag else None
        print() if print_result_flag else None
        result_str = 'Most significant effect parameter: ' + str(result.iloc[0][0].split('=')[0])
        return [result_str, result]
    except Exception as e:
        result_str = 'Parameter error'
        print(e)
        return result_str

### Print result

In [37]:
DT_str = 'DT result:  ' + display_result(dt_types, dt_pm, dt_ans, True)[0]
RF_str = 'RF result:  ' + display_result(rf_types, rf_pm, rf_ans, True)[0]
KNN_str = 'KNN result: ' + display_result(knn_types, knn_pm, knn_ans, True)[0]
SVC_str = 'SVC result: ' + display_result(svc_types, svc_pm, svc_ans, True)[0]
DT_detail = display_result(dt_types, dt_pm, dt_ans, True)[1]
RF_detail = display_result(rf_types, rf_pm, rf_ans, True)[1]
KNN_detail = display_result(knn_types, knn_pm, knn_ans, True)[1]
SVC_detail = display_result(svc_types, svc_pm, svc_ans, True)[1]
print(DT_str) if print_result_flag else None
print(RF_str) if print_result_flag else None
print(KNN_str) if print_result_flag else None
print(SVC_str) if print_result_flag else None

### Export file

In [38]:
file_num = 5
for i in range(1,file_num+1):
    with open("q1data/result_sum_{0}.txt".format(i), 'w') as f:
        f.write(DT_str + '\n')
        f.write(RF_str + '\n')
        f.write(KNN_str + '\n')
        f.write(SVC_str + '\n')
    s = {'type': ['', ''], 'pm': ['', ''], 'ans': ['', '']}
    s = pd.DataFrame(data=s)
    detail = pd.concat([DT_detail, s, RF_detail, s, KNN_detail, s, SVC_detail], ignore_index=True)
    print(detail) if print_result_flag else None
    detail.to_csv("q1record/result_detail_{0}.txt".format(i), header=None, index=None, sep=' ')
print('All file exported.')

All file exported.
