### Import Package

In [1]:
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn import decomposition, ensemble, neighbors, tree, neural_network
from sklearn.ensemble import VotingClassifier
import xgboost

import string
import os
import pandas as pd 
import numpy as np
import pickle
import data_reader

### Load Data

In [2]:
data = data_reader.read()
data.shape

(600, 176)

In [3]:
print(data[:10,-2:])

[[-0.05805464 -0.52113092]
 [ 0.32652215 -0.35021785]
 [-0.46058144 -1.51423159]
 [-0.39751291 -2.0050897 ]
 [ 0.7812306  -0.10708989]
 [ 1.630479    1.49625811]
 [-0.05849049 -0.81698579]
 [ 0.60994349  0.18275919]
 [-0.72237744 -1.48216038]
 [ 0.86143762  1.5444956 ]]


### Preprocessing

In [4]:
data[:,-2:] = np.where(data[:,-2:] > 0.0, 1, 0)

In [5]:
train_x, valid_x, train_y, valid_y = model_selection.train_test_split(data[:,:-2], data[:,-2:], test_size=0.2)

In [6]:
test_x = data[:,:-2]

### Model Training

In [7]:
def train_model(classifier, train_x, train_y, valid_x, valid_y):
    # fit the training dataset on the classifier
    classifier.fit(train_x, train_y)   
    # predict the labels on validation dataset
    predictions = classifier.predict(valid_x)
    
    return metrics.accuracy_score(predictions, valid_y), metrics.classification_report(valid_y, predictions), classifier

In [8]:
def test_models(classifier_ch, classifier_en, test_x):
    diff = []
    pred_ch = classifier_ch.predict(test_x)
    pred_en = classifier_en.predict(test_x)
    for idx in range(len(test_x)):
        if pred_ch[idx] != pred_en[idx]:
            diff.append(idx)
    return diff

### Logistic Regression

In [9]:
accuracy, report, classifier_ch = train_model(linear_model.LogisticRegression(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_glm: ',accuracy)
print(report)

ch_glm:  0.666666666667
             precision    recall  f1-score   support

        0.0       0.67      0.64      0.66        59
        1.0       0.67      0.69      0.68        61

avg / total       0.67      0.67      0.67       120



In [10]:
accuracy, report, classifier_en = train_model(linear_model.LogisticRegression(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_glm: ',accuracy)
print(report)

en_glm:  0.641666666667
             precision    recall  f1-score   support

        0.0       0.58      0.66      0.62        53
        1.0       0.70      0.63      0.66        67

avg / total       0.65      0.64      0.64       120



In [11]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
189
[11, 12, 20, 22, 29, 40, 41, 46, 48, 51, 52, 53, 54, 55, 56, 57, 60, 62, 64, 65, 70, 71, 72, 76, 88, 97, 98, 112, 114, 115, 122, 123, 124, 133, 135, 137, 139, 142, 144, 146, 147, 148, 151, 156, 160, 166, 167, 168, 169, 170, 175, 179, 184, 186, 188, 193, 195, 196, 199, 200, 202, 203, 210, 213, 222, 233, 234, 235, 244, 246, 248, 253, 254, 256, 262, 263, 271, 277, 278, 279, 282, 286, 287, 289, 292, 302, 305, 311, 312, 314, 318, 319, 321, 323, 325, 326, 327, 328, 329, 332, 333, 337, 351, 352, 357, 358, 365, 366, 371, 378, 382, 390, 398, 399, 400, 403, 404, 407, 408, 409, 410, 413, 414, 416, 425, 429, 430, 433, 439, 451, 457, 459, 460, 461, 463, 464, 467, 468, 470, 471, 472, 476, 477, 480, 484, 485, 486, 487, 488, 493, 497, 499, 500, 503, 508, 510, 516, 520, 523, 524, 526, 529, 532, 533, 536, 538, 542, 543, 544, 550, 554, 555, 556, 557, 558, 565, 567, 573, 577, 578, 581, 582, 583, 584, 585, 587, 590, 595, 598]


### SVM (C=0.2, kernel=linear)

In [12]:
accuracy, report, classifier_ch= train_model(svm.SVC(C=0.2, kernel='linear'), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_svm: ',accuracy)
print(report)

ch_svm:  0.691666666667
             precision    recall  f1-score   support

        0.0       0.68      0.71      0.69        59
        1.0       0.71      0.67      0.69        61

avg / total       0.69      0.69      0.69       120



In [13]:
accuracy, report, classifier_en = train_model(svm.SVC(C=0.2, kernel='linear'), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_svm: ',accuracy)
print(report)

en_svm:  0.666666666667
             precision    recall  f1-score   support

        0.0       0.61      0.70      0.65        53
        1.0       0.73      0.64      0.68        67

avg / total       0.67      0.67      0.67       120



In [14]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
173
[11, 12, 20, 29, 37, 40, 46, 48, 51, 52, 53, 54, 55, 56, 57, 58, 62, 64, 71, 72, 76, 82, 88, 98, 103, 111, 112, 114, 122, 123, 124, 132, 133, 135, 136, 139, 142, 144, 146, 147, 148, 151, 156, 166, 167, 168, 169, 170, 173, 175, 184, 186, 189, 193, 195, 196, 200, 202, 210, 211, 218, 234, 235, 244, 248, 250, 253, 254, 256, 262, 263, 271, 272, 273, 277, 278, 279, 282, 283, 285, 286, 287, 289, 291, 299, 311, 312, 314, 318, 319, 321, 323, 325, 326, 327, 328, 332, 337, 351, 352, 357, 362, 365, 366, 371, 373, 383, 386, 393, 398, 399, 400, 402, 404, 408, 409, 414, 424, 430, 433, 436, 443, 451, 457, 459, 460, 463, 464, 468, 470, 471, 472, 476, 477, 486, 487, 488, 497, 499, 500, 503, 508, 510, 516, 520, 523, 524, 529, 532, 533, 538, 542, 543, 544, 555, 556, 557, 558, 565, 567, 573, 574, 575, 577, 578, 581, 582, 583, 584, 587, 590, 595, 598]


### SVM(C=100, kernel=linear)

In [15]:
accuracy, report, classifier_ch = train_model(svm.SVC(C=100, kernel='linear'), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_svm: ',accuracy)
print(report)

ch_svm:  0.65
             precision    recall  f1-score   support

        0.0       0.65      0.63      0.64        59
        1.0       0.65      0.67      0.66        61

avg / total       0.65      0.65      0.65       120



In [16]:
accuracy, report, classifier_en = train_model(svm.SVC(C=100, kernel='linear'), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_svm: ',accuracy)
print(report)

en_svm:  0.583333333333
             precision    recall  f1-score   support

        0.0       0.53      0.58      0.55        53
        1.0       0.64      0.58      0.61        67

avg / total       0.59      0.58      0.58       120



In [17]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
195
[0, 1, 4, 11, 17, 18, 20, 22, 29, 34, 37, 38, 40, 41, 46, 48, 51, 53, 54, 56, 59, 60, 62, 64, 65, 70, 71, 72, 73, 76, 77, 78, 82, 88, 92, 97, 98, 107, 112, 114, 123, 124, 128, 133, 135, 136, 139, 143, 144, 146, 147, 148, 151, 153, 155, 160, 166, 167, 168, 169, 170, 173, 175, 179, 184, 186, 188, 195, 196, 200, 202, 203, 213, 218, 222, 234, 235, 244, 246, 248, 250, 253, 254, 256, 262, 263, 265, 280, 282, 283, 284, 286, 292, 302, 305, 310, 311, 314, 319, 321, 323, 325, 326, 327, 328, 329, 332, 333, 337, 340, 346, 352, 356, 357, 358, 360, 365, 366, 371, 373, 376, 378, 382, 388, 390, 393, 394, 395, 398, 399, 400, 404, 408, 409, 411, 413, 416, 420, 424, 425, 428, 429, 438, 444, 455, 461, 463, 464, 466, 467, 472, 473, 476, 480, 485, 487, 488, 490, 492, 497, 503, 508, 509, 510, 512, 516, 523, 524, 526, 528, 532, 536, 538, 542, 544, 546, 547, 551, 554, 555, 558, 560, 565, 567, 571, 573, 574, 575, 578, 582, 583, 584, 587, 595, 597]


### Naive Bayes (GaussianNB)

In [18]:
accuracy, report, classifier_ch = train_model(naive_bayes.GaussianNB(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_NB: ',accuracy)
print(report)

ch_NB:  0.641666666667
             precision    recall  f1-score   support

        0.0       0.64      0.61      0.63        59
        1.0       0.64      0.67      0.66        61

avg / total       0.64      0.64      0.64       120



In [19]:
accuracy, report, classifier_en = train_model(naive_bayes.GaussianNB(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_NB: ',accuracy)
print(report)

en_NB:  0.675
             precision    recall  f1-score   support

        0.0       0.67      0.53      0.59        53
        1.0       0.68      0.79      0.73        67

avg / total       0.67      0.68      0.67       120



In [20]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
200
[0, 6, 12, 15, 16, 20, 22, 27, 29, 32, 35, 37, 39, 40, 41, 48, 51, 54, 55, 57, 65, 67, 72, 76, 81, 87, 88, 98, 101, 103, 104, 107, 109, 110, 112, 114, 118, 120, 124, 129, 132, 133, 135, 139, 142, 143, 144, 146, 147, 149, 151, 158, 160, 163, 167, 168, 169, 170, 171, 179, 180, 181, 186, 188, 192, 195, 196, 197, 198, 200, 205, 206, 212, 213, 215, 219, 222, 228, 231, 238, 244, 245, 246, 251, 252, 253, 254, 256, 257, 262, 279, 281, 282, 283, 286, 287, 288, 289, 292, 298, 299, 305, 307, 311, 312, 314, 319, 321, 323, 326, 328, 333, 338, 340, 341, 359, 362, 364, 366, 367, 376, 382, 383, 385, 386, 388, 390, 393, 398, 401, 403, 404, 408, 409, 410, 414, 417, 421, 424, 428, 430, 431, 433, 436, 438, 439, 442, 443, 445, 463, 464, 466, 471, 476, 481, 487, 488, 497, 500, 503, 505, 508, 512, 516, 518, 519, 520, 522, 523, 524, 525, 532, 533, 534, 538, 539, 541, 542, 547, 550, 554, 558, 561, 562, 566, 567, 569, 571, 574, 577, 578, 581, 582, 583, 584, 587, 590, 594, 

### Naive Bayes (Multinomial)

In [21]:
accuracy, report, classifier_ch = train_model(naive_bayes.MultinomialNB(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_NB: ',accuracy)
print(report)

ch_NB:  0.641666666667
             precision    recall  f1-score   support

        0.0       0.64      0.63      0.63        59
        1.0       0.65      0.66      0.65        61

avg / total       0.64      0.64      0.64       120



In [22]:
accuracy, report, classifier_en = train_model(naive_bayes.MultinomialNB(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_NB: ',accuracy)
print(report)

en_NB:  0.7
             precision    recall  f1-score   support

        0.0       0.64      0.74      0.68        53
        1.0       0.76      0.67      0.71        67

avg / total       0.71      0.70      0.70       120



In [23]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
168
[0, 4, 12, 14, 20, 27, 32, 37, 41, 46, 54, 65, 66, 68, 71, 72, 76, 77, 85, 88, 93, 98, 103, 107, 112, 115, 124, 129, 132, 133, 136, 139, 140, 142, 144, 145, 146, 147, 148, 149, 153, 156, 160, 161, 163, 167, 169, 170, 171, 175, 179, 181, 186, 188, 195, 196, 197, 200, 205, 212, 215, 216, 218, 228, 233, 234, 237, 244, 246, 250, 252, 254, 256, 262, 263, 278, 279, 280, 282, 283, 286, 292, 293, 299, 311, 312, 314, 318, 321, 323, 326, 327, 328, 329, 333, 347, 353, 365, 366, 367, 371, 378, 382, 383, 385, 386, 390, 393, 399, 400, 401, 403, 410, 414, 427, 432, 436, 438, 445, 457, 463, 464, 466, 467, 468, 471, 476, 480, 482, 483, 487, 492, 493, 508, 516, 519, 520, 522, 523, 524, 525, 529, 532, 538, 541, 542, 543, 545, 550, 551, 553, 555, 557, 558, 561, 562, 566, 574, 575, 578, 581, 582, 583, 584, 590, 595, 597, 598]


### RandomForest

In [24]:
accuracy, report, classifier_ch = train_model(ensemble.RandomForestClassifier(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_RF: ',accuracy)
print(report)

ch_RF:  0.6
             precision    recall  f1-score   support

        0.0       0.58      0.66      0.62        59
        1.0       0.62      0.54      0.58        61

avg / total       0.60      0.60      0.60       120



In [25]:
accuracy, report, classifier_en = train_model(ensemble.RandomForestClassifier(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_RF: ',accuracy)
print(report)

en_RF:  0.65
             precision    recall  f1-score   support

        0.0       0.59      0.70      0.64        53
        1.0       0.72      0.61      0.66        67

avg / total       0.66      0.65      0.65       120



In [26]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
191
[1, 11, 17, 18, 19, 20, 22, 37, 40, 45, 46, 48, 50, 51, 53, 54, 56, 57, 60, 61, 64, 70, 71, 72, 73, 76, 82, 84, 88, 89, 90, 93, 99, 101, 103, 107, 110, 111, 112, 114, 122, 123, 124, 133, 136, 139, 140, 144, 146, 147, 151, 152, 155, 167, 168, 169, 173, 175, 184, 187, 194, 196, 199, 205, 206, 207, 213, 218, 219, 220, 226, 234, 235, 240, 244, 246, 247, 248, 250, 253, 254, 256, 260, 263, 265, 273, 280, 282, 284, 291, 292, 311, 313, 314, 319, 321, 323, 326, 327, 328, 329, 332, 336, 337, 340, 346, 347, 351, 353, 354, 356, 357, 360, 364, 365, 366, 373, 392, 393, 395, 398, 399, 400, 404, 408, 409, 411, 413, 414, 416, 420, 429, 434, 436, 438, 439, 449, 453, 455, 456, 460, 463, 464, 467, 471, 474, 476, 477, 478, 479, 480, 488, 490, 491, 492, 497, 498, 499, 503, 507, 508, 509, 510, 512, 518, 519, 523, 524, 525, 526, 532, 536, 539, 544, 546, 547, 553, 554, 555, 558, 565, 567, 571, 575, 578, 582, 583, 584, 585, 595, 597]


### AdaBoost

In [27]:
accuracy, report, classfier_ch = train_model(ensemble.AdaBoostClassifier(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_AdaBoost: ',accuracy)
print(report)

ch_AdaBoost:  0.641666666667
             precision    recall  f1-score   support

        0.0       0.65      0.58      0.61        59
        1.0       0.63      0.70      0.67        61

avg / total       0.64      0.64      0.64       120



In [28]:
accuracy, report, classifier_en = train_model(ensemble.AdaBoostClassifier(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_AdaBoost: ',accuracy)
print(report)

en_AdaBoost:  0.708333333333
             precision    recall  f1-score   support

        0.0       0.65      0.74      0.69        53
        1.0       0.77      0.69      0.72        67

avg / total       0.72      0.71      0.71       120



In [29]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
211
[0, 12, 17, 18, 20, 32, 37, 41, 45, 46, 50, 53, 54, 55, 56, 59, 60, 61, 62, 64, 65, 70, 71, 72, 73, 74, 76, 77, 82, 88, 89, 90, 93, 101, 107, 109, 110, 111, 123, 124, 132, 136, 139, 144, 147, 148, 151, 155, 156, 160, 166, 167, 168, 169, 173, 175, 177, 184, 186, 187, 188, 189, 193, 194, 195, 199, 206, 207, 216, 226, 228, 231, 235, 244, 246, 247, 248, 251, 254, 262, 263, 265, 271, 273, 280, 282, 283, 286, 287, 288, 289, 292, 297, 299, 300, 312, 314, 318, 321, 323, 324, 326, 327, 328, 329, 336, 337, 339, 340, 342, 344, 346, 347, 348, 351, 353, 356, 357, 358, 365, 366, 369, 372, 373, 376, 378, 382, 385, 386, 388, 390, 393, 394, 395, 398, 401, 403, 404, 406, 408, 409, 410, 411, 414, 416, 428, 429, 430, 434, 437, 438, 443, 449, 455, 460, 463, 464, 467, 468, 471, 473, 474, 476, 478, 479, 480, 482, 484, 486, 487, 488, 490, 491, 496, 497, 500, 503, 505, 508, 509, 510, 512, 516, 519, 524, 525, 526, 528, 532, 539, 541, 542, 545, 546, 550, 555, 558, 564, 566,

### KNN

In [30]:
accuracy, report, classifier_ch = train_model(neighbors.KNeighborsClassifier(n_neighbors=7), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_knn: ',accuracy)
print(report)

ch_knn:  0.541666666667
             precision    recall  f1-score   support

        0.0       0.53      0.64      0.58        59
        1.0       0.56      0.44      0.50        61

avg / total       0.55      0.54      0.54       120



In [31]:
accuracy, report, classifier_en = train_model(neighbors.KNeighborsClassifier(n_neighbors=7), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_knn: ',accuracy)
print(report)

en_knn:  0.65
             precision    recall  f1-score   support

        0.0       0.62      0.55      0.58        53
        1.0       0.67      0.73      0.70        67

avg / total       0.65      0.65      0.65       120



In [32]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
191
[0, 11, 17, 18, 22, 24, 33, 37, 38, 41, 43, 45, 46, 50, 52, 53, 54, 55, 60, 66, 68, 76, 85, 90, 100, 103, 107, 108, 110, 112, 113, 114, 126, 128, 135, 136, 139, 142, 145, 149, 153, 154, 159, 164, 166, 171, 174, 180, 181, 184, 185, 186, 195, 196, 198, 200, 202, 204, 205, 207, 208, 210, 211, 215, 222, 227, 228, 230, 232, 233, 237, 244, 250, 253, 255, 260, 262, 265, 270, 271, 272, 273, 279, 280, 283, 284, 286, 288, 291, 298, 304, 305, 307, 308, 311, 312, 314, 316, 319, 321, 328, 329, 330, 332, 333, 335, 343, 346, 348, 354, 357, 364, 365, 369, 374, 377, 378, 391, 393, 394, 396, 398, 402, 404, 406, 407, 408, 409, 410, 417, 424, 425, 428, 430, 432, 433, 435, 436, 440, 448, 449, 455, 457, 459, 460, 467, 472, 477, 478, 480, 484, 487, 490, 492, 493, 496, 503, 506, 507, 508, 509, 511, 515, 518, 524, 525, 526, 529, 530, 532, 537, 538, 539, 540, 541, 543, 550, 554, 555, 557, 558, 565, 571, 573, 578, 581, 582, 589, 593, 595, 599]


### XGBoost

In [33]:
accuracy, report, classifier_ch = train_model(xgboost.XGBClassifier(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_XGB: ',accuracy)
print(report)

ch_XGB:  0.658333333333
             precision    recall  f1-score   support

        0.0       0.64      0.71      0.67        59
        1.0       0.69      0.61      0.64        61

avg / total       0.66      0.66      0.66       120



In [34]:
accuracy, report, classifier_en = train_model(xgboost.XGBClassifier(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_XGB: ',accuracy)
print(report)

en_XGB:  0.666666666667
             precision    recall  f1-score   support

        0.0       0.62      0.62      0.62        53
        1.0       0.70      0.70      0.70        67

avg / total       0.67      0.67      0.67       120



In [35]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
195
[0, 1, 5, 11, 18, 20, 22, 27, 32, 34, 37, 40, 41, 46, 48, 52, 54, 56, 59, 60, 64, 65, 70, 71, 72, 76, 81, 88, 90, 91, 93, 98, 99, 101, 107, 110, 111, 112, 114, 123, 124, 132, 136, 137, 139, 142, 144, 147, 148, 151, 152, 155, 156, 166, 167, 168, 169, 170, 173, 175, 184, 186, 188, 193, 195, 196, 200, 203, 216, 218, 228, 229, 233, 234, 235, 237, 246, 253, 256, 262, 263, 265, 271, 273, 279, 282, 286, 289, 291, 300, 308, 312, 313, 314, 321, 323, 326, 327, 328, 330, 333, 341, 346, 351, 352, 353, 354, 356, 357, 358, 361, 365, 366, 371, 373, 376, 378, 386, 388, 399, 400, 408, 409, 411, 413, 414, 416, 420, 421, 430, 433, 434, 436, 437, 438, 439, 440, 443, 451, 460, 463, 464, 467, 471, 476, 477, 480, 484, 487, 488, 490, 492, 493, 497, 498, 500, 507, 508, 509, 510, 512, 516, 518, 520, 524, 525, 529, 532, 533, 536, 538, 541, 542, 543, 547, 550, 554, 555, 558, 564, 565, 566, 567, 571, 574, 575, 577, 578, 582, 583, 584, 585, 587, 595, 597]


### Decision Tree

In [36]:
accuracy, report, classifier_ch = train_model(tree.DecisionTreeClassifier(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_DT: ',accuracy)
print(report)

ch_DT:  0.616666666667
             precision    recall  f1-score   support

        0.0       0.62      0.56      0.59        59
        1.0       0.61      0.67      0.64        61

avg / total       0.62      0.62      0.62       120



In [37]:
accuracy, report, classifier_en = train_model(tree.DecisionTreeClassifier(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_DT: ',accuracy)
print(report)

en_DT:  0.633333333333
             precision    recall  f1-score   support

        0.0       0.58      0.62      0.60        53
        1.0       0.68      0.64      0.66        67

avg / total       0.64      0.63      0.63       120



In [38]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
201
[1, 4, 5, 10, 11, 17, 18, 20, 22, 29, 30, 34, 37, 38, 40, 42, 45, 46, 48, 49, 50, 51, 52, 54, 56, 59, 60, 70, 71, 73, 76, 82, 83, 88, 90, 91, 92, 93, 99, 100, 103, 107, 110, 111, 114, 122, 123, 124, 130, 133, 135, 139, 140, 143, 144, 146, 147, 151, 152, 167, 168, 169, 173, 175, 184, 186, 194, 195, 196, 203, 205, 206, 207, 213, 216, 218, 219, 223, 234, 235, 240, 241, 244, 246, 248, 250, 253, 254, 260, 263, 265, 267, 272, 276, 280, 282, 284, 291, 297, 305, 310, 311, 314, 321, 323, 326, 327, 328, 330, 332, 333, 336, 337, 340, 342, 346, 351, 354, 356, 357, 360, 364, 365, 366, 371, 373, 376, 382, 386, 388, 392, 395, 396, 398, 399, 400, 404, 406, 407, 408, 409, 411, 413, 414, 415, 416, 420, 429, 430, 434, 435, 436, 438, 444, 455, 456, 460, 463, 464, 465, 467, 471, 474, 476, 480, 490, 491, 492, 497, 498, 499, 503, 507, 508, 509, 510, 512, 522, 524, 531, 532, 536, 539, 546, 547, 551, 554, 555, 558, 565, 567, 571, 575, 578, 582, 583, 584, 585, 587, 595, 59

### MLP

In [39]:
accuracy, report, classifier_ch = train_model(neural_network.MLPClassifier(max_iter=100), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_MLP: ',accuracy)
print(report)

ch_MLP:  0.641666666667
             precision    recall  f1-score   support

        0.0       0.63      0.66      0.64        59
        1.0       0.66      0.62      0.64        61

avg / total       0.64      0.64      0.64       120





In [40]:
accuracy, report, classifier_en = train_model(neural_network.MLPClassifier(max_iter=100), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_MLP: ',accuracy)
print(report)

en_MLP:  0.666666666667
             precision    recall  f1-score   support

        0.0       0.61      0.68      0.64        53
        1.0       0.72      0.66      0.69        67

avg / total       0.67      0.67      0.67       120



In [41]:
diff_list = test_models(classifier_ch, classifier_en, test_x)
print("Different idx between two models: ")
print(len(diff_list))
print(diff_list)

Different idx between two models: 
177
[1, 4, 11, 17, 19, 20, 22, 29, 34, 37, 40, 45, 46, 48, 51, 53, 54, 56, 60, 70, 71, 72, 73, 76, 82, 84, 88, 90, 92, 93, 103, 107, 110, 111, 114, 115, 122, 123, 124, 133, 135, 139, 144, 146, 147, 151, 152, 167, 168, 169, 170, 173, 175, 184, 186, 193, 194, 195, 196, 203, 205, 206, 207, 213, 218, 219, 234, 235, 240, 244, 246, 248, 253, 254, 256, 260, 263, 265, 272, 282, 284, 291, 292, 300, 305, 311, 314, 319, 321, 323, 326, 327, 328, 332, 333, 337, 340, 346, 354, 356, 357, 364, 365, 366, 371, 373, 376, 382, 392, 393, 395, 396, 398, 399, 400, 404, 407, 408, 409, 411, 413, 414, 416, 420, 429, 430, 434, 436, 438, 451, 460, 461, 463, 464, 467, 471, 472, 474, 476, 477, 480, 488, 490, 491, 492, 497, 498, 503, 507, 508, 509, 510, 512, 522, 524, 529, 532, 533, 536, 539, 546, 547, 551, 554, 555, 558, 565, 567, 571, 575, 578, 582, 583, 584, 587, 595, 597]
