### Import Package

In [163]:
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn import decomposition, ensemble, neighbors, tree, neural_network
from sklearn.ensemble import VotingClassifier
import xgboost

import string
import os
import pandas as pd 
import numpy as np
import pickle
import data_reader

### Load Data

In [164]:
data = data_reader.read()
data.shape

(600, 176)

In [165]:
print(data[:10,-2:])

[[-0.05805464 -0.52113092]
 [ 0.32652215 -0.35021785]
 [-0.46058144 -1.51423159]
 [-0.39751291 -2.0050897 ]
 [ 0.7812306  -0.10708989]
 [ 1.630479    1.49625811]
 [-0.05849049 -0.81698579]
 [ 0.60994349  0.18275919]
 [-0.72237744 -1.48216038]
 [ 0.86143762  1.5444956 ]]


### Preprocessing

In [166]:
data[:,-2:] = np.where(data[:,-2:] > 0.0, 1, 0)

In [167]:
train_x, valid_x, train_y, valid_y = model_selection.train_test_split(data[:,:-2], data[:,-2:], test_size=0.2)

### Model Training

In [168]:
def train_model(classifier, train_x, train_y, valid_x, valid_y):
    # fit the training dataset on the classifier
    classifier.fit(train_x, train_y)   
    # predict the labels on validation dataset
    predictions = classifier.predict(valid_x)
    
    return metrics.accuracy_score(predictions, valid_y), metrics.classification_report(valid_y, predictions)

### Logistic Regression

In [169]:
accuracy, report = train_model(linear_model.LogisticRegression(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_glm: ',accuracy)
print(report)

ch_glm:  0.658333333333
             precision    recall  f1-score   support

        0.0       0.67      0.64      0.66        61
        1.0       0.65      0.68      0.66        59

avg / total       0.66      0.66      0.66       120



In [170]:
accuracy, report = train_model(linear_model.LogisticRegression(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_glm: ',accuracy)
print(report)

en_glm:  0.616666666667
             precision    recall  f1-score   support

        0.0       0.57      0.57      0.57        53
        1.0       0.66      0.66      0.66        67

avg / total       0.62      0.62      0.62       120



### SVM (C=0.2, kernel=linear)

In [171]:
accuracy, report = train_model(svm.SVC(C=0.2, kernel='linear'), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_svm: ',accuracy)
print(report)

ch_svm:  0.675
             precision    recall  f1-score   support

        0.0       0.70      0.64      0.67        61
        1.0       0.66      0.71      0.68        59

avg / total       0.68      0.68      0.67       120



In [172]:
accuracy, report = train_model(svm.SVC(C=0.2, kernel='linear'), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_svm: ',accuracy)
print(report)

en_svm:  0.608333333333
             precision    recall  f1-score   support

        0.0       0.56      0.55      0.55        53
        1.0       0.65      0.66      0.65        67

avg / total       0.61      0.61      0.61       120



### SVM(C=100, kernel=linear)

In [173]:
accuracy, report = train_model(svm.SVC(C=100, kernel='linear'), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_svm: ',accuracy)
print(report)

ch_svm:  0.658333333333
             precision    recall  f1-score   support

        0.0       0.67      0.66      0.66        61
        1.0       0.65      0.66      0.66        59

avg / total       0.66      0.66      0.66       120



In [174]:
accuracy, report = train_model(svm.SVC(C=100, kernel='linear'), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_svm: ',accuracy)
print(report)

en_svm:  0.625
             precision    recall  f1-score   support

        0.0       0.57      0.58      0.58        53
        1.0       0.67      0.66      0.66        67

avg / total       0.63      0.62      0.63       120



### Naive Bayes (GaussianNB)

In [175]:
accuracy, report = train_model(naive_bayes.GaussianNB(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_NB: ',accuracy)
print(report)

ch_NB:  0.675
             precision    recall  f1-score   support

        0.0       0.69      0.66      0.67        61
        1.0       0.66      0.69      0.68        59

avg / total       0.68      0.68      0.67       120



In [176]:
accuracy, report = train_model(naive_bayes.GaussianNB(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_NB: ',accuracy)
print(report)

en_NB:  0.683333333333
             precision    recall  f1-score   support

        0.0       0.67      0.55      0.60        53
        1.0       0.69      0.79      0.74        67

avg / total       0.68      0.68      0.68       120



### Naive Bayes (Multinomial)

In [177]:
accuracy, report = train_model(naive_bayes.MultinomialNB(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_NB: ',accuracy)
print(report)

ch_NB:  0.675
             precision    recall  f1-score   support

        0.0       0.68      0.69      0.68        61
        1.0       0.67      0.66      0.67        59

avg / total       0.67      0.68      0.67       120



In [178]:
accuracy, report = train_model(naive_bayes.MultinomialNB(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_NB: ',accuracy)
print(report)

en_NB:  0.666666666667
             precision    recall  f1-score   support

        0.0       0.63      0.58      0.61        53
        1.0       0.69      0.73      0.71        67

avg / total       0.66      0.67      0.66       120



### RandomForest

In [179]:
accuracy, report = train_model(ensemble.RandomForestClassifier(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_RF: ',accuracy)
print(report)

ch_RF:  0.616666666667
             precision    recall  f1-score   support

        0.0       0.63      0.61      0.62        61
        1.0       0.61      0.63      0.62        59

avg / total       0.62      0.62      0.62       120



In [180]:
accuracy, report = train_model(ensemble.RandomForestClassifier(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_RF: ',accuracy)
print(report)

en_RF:  0.6
             precision    recall  f1-score   support

        0.0       0.54      0.66      0.59        53
        1.0       0.67      0.55      0.61        67

avg / total       0.61      0.60      0.60       120



### AdaBoost

In [181]:
accuracy, report = train_model(ensemble.AdaBoostClassifier(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_AdaBoost: ',accuracy)
print(report)

ch_AdaBoost:  0.65
             precision    recall  f1-score   support

        0.0       0.67      0.61      0.64        61
        1.0       0.63      0.69      0.66        59

avg / total       0.65      0.65      0.65       120



In [182]:
accuracy, report = train_model(ensemble.AdaBoostClassifier(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_AdaBoost: ',accuracy)
print(report)

en_AdaBoost:  0.641666666667
             precision    recall  f1-score   support

        0.0       0.59      0.62      0.61        53
        1.0       0.69      0.66      0.67        67

avg / total       0.64      0.64      0.64       120



### KNN

In [183]:
accuracy, report = train_model(neighbors.KNeighborsClassifier(n_neighbors=7), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_knn: ',accuracy)
print(report)

ch_knn:  0.65
             precision    recall  f1-score   support

        0.0       0.63      0.74      0.68        61
        1.0       0.67      0.56      0.61        59

avg / total       0.65      0.65      0.65       120



In [184]:
accuracy, report = train_model(neighbors.KNeighborsClassifier(n_neighbors=7), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_knn: ',accuracy)
print(report)

en_knn:  0.533333333333
             precision    recall  f1-score   support

        0.0       0.46      0.36      0.40        53
        1.0       0.57      0.67      0.62        67

avg / total       0.52      0.53      0.52       120



### XGBoost

In [185]:
accuracy, report = train_model(xgboost.XGBClassifier(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_XGB: ',accuracy)
print(report)

ch_XGB:  0.633333333333
             precision    recall  f1-score   support

        0.0       0.65      0.59      0.62        61
        1.0       0.62      0.68      0.65        59

avg / total       0.64      0.63      0.63       120



In [186]:
accuracy, report = train_model(xgboost.XGBClassifier(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_XGB: ',accuracy)
print(report)

en_XGB:  0.641666666667
             precision    recall  f1-score   support

        0.0       0.60      0.58      0.59        53
        1.0       0.68      0.69      0.68        67

avg / total       0.64      0.64      0.64       120



### Decision Tree

In [187]:
accuracy, report = train_model(tree.DecisionTreeClassifier(), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_DT: ',accuracy)
print(report)

ch_DT:  0.55
             precision    recall  f1-score   support

        0.0       0.56      0.51      0.53        61
        1.0       0.54      0.59      0.56        59

avg / total       0.55      0.55      0.55       120



In [188]:
accuracy, report = train_model(tree.DecisionTreeClassifier(), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_DT: ',accuracy)
print(report)

en_DT:  0.541666666667
             precision    recall  f1-score   support

        0.0       0.48      0.49      0.49        53
        1.0       0.59      0.58      0.59        67

avg / total       0.54      0.54      0.54       120



### MLP

In [191]:
accuracy, report = train_model(neural_network.MLPClassifier(max_iter=100), train_x, train_y[:,0], valid_x, valid_y[:,0])
print('ch_MLP: ',accuracy)
print(report)

ch_MLP:  0.641666666667
             precision    recall  f1-score   support

        0.0       0.66      0.61      0.63        61
        1.0       0.62      0.68      0.65        59

avg / total       0.64      0.64      0.64       120



In [190]:
accuracy, report = train_model(neural_network.MLPClassifier(max_iter=100), train_x, train_y[:,1], valid_x, valid_y[:,1])
print('en_MLP: ',accuracy)
print(report)

en_MLP:  0.666666666667
             precision    recall  f1-score   support

        0.0       0.62      0.64      0.63        53
        1.0       0.71      0.69      0.70        67

avg / total       0.67      0.67      0.67       120

