In [137]:
import os
import sys
import time
import datetime
import socket
import yaml
import shutil

import pandas as pd
import numpy as np

from sklearn.externals import joblib 

from sklearn.metrics import accuracy_score, log_loss, f1_score, confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [138]:
folderFeature = 'feature'
feaPaths = ['../xprmt/'+folderFeature+'/extract-slbp-melanoma_binary_augmented-mlbox-20181117-151136/melanoma_binary_augmented',
           '../xprmt/'+folderFeature+'/extract-haralick-melanoma_binary_augmented-mlbox-20181117-151626/melanoma_binary_augmented',
           '../xprmt/'+folderFeature+'/extract-wtio-melanoma_binary_augmented-mlbox-20181117-145744/melanoma_binary_augmented',
          ]

featureList = []
labelList = []
for feaPath in feaPaths:
    print(feaPath)
    xListL = []
    yListL = []
    yLi = os.listdir(feaPath)
    sumdat = 0
    for i in yLi:
        file = os.listdir(feaPath+ "/" + i)
        cnt = 0
        for j in file:
            if j.endswith('.pkl'):
                #print(j)
                x = joblib.load(feaPath + "/" + i + "/" +j)
                xListL.append(x)
                yListL.append(i)
                cnt += 1
        print(str(i) + " " + str(cnt))
        sumdat += cnt
    
    print("semuanya "+ str(sumdat))
    featureList.append(xListL)
    labelList.append(yListL)

xList = np.concatenate((featureList[0], featureList[1]), axis=1)
for idx in range(len(featureList)-2):
    xList = np.concatenate((xList, featureList[idx+2]), axis=1)

xList1 = xList
yList1 = labelList[0]

../xprmt/feature/extract-slbp-melanoma_binary_augmented-mlbox-20181117-151136/melanoma_binary_augmented
NonMelanoma 160
Melanoma 160
semuanya 320
../xprmt/feature/extract-haralick-melanoma_binary_augmented-mlbox-20181117-151626/melanoma_binary_augmented
NonMelanoma 160
Melanoma 160
semuanya 320
../xprmt/feature/extract-wtio-melanoma_binary_augmented-mlbox-20181117-145744/melanoma_binary_augmented
NonMelanoma 160
Melanoma 160
semuanya 320


In [139]:
xList = np.array(xList1)
yList = np.array(yList1)
print(xList.shape)
print(yList.shape)

(320, 2079)
(320,)


In [140]:
from sklearn.preprocessing import normalize
xList = normalize(xList, norm='l2', axis=0)

In [141]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(xList, yList, random_state = 42, test_size = 0.2)

In [142]:
print(X_train.shape)

(256, 2079)


In [143]:
from sklearn.ensemble import RandomForestClassifier
from imblearn.metrics import sensitivity_specificity_support
clf = RandomForestClassifier(random_state=6)
clf.fit(X_train, y_train)
prediksi = clf.predict(X_test)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, prediksi))
print(classification_report(y_test, prediksi))
print(sensitivity_specificity_support(y_test, prediksi, average='macro'))

0.921875
              precision    recall  f1-score   support

    Melanoma       0.97      0.88      0.92        32
 NonMelanoma       0.89      0.97      0.93        32

   micro avg       0.92      0.92      0.92        64
   macro avg       0.93      0.92      0.92        64
weighted avg       0.93      0.92      0.92        64

(0.921875, 0.921875, None)


In [144]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
from sklearn.model_selection import cross_val_score
score = cross_val_score(clf, xList, yList, cv=10)
print(score)
print(score.mean())

[0.9375  0.96875 0.90625 0.96875 0.90625 1.      0.78125 0.96875 0.90625
 0.96875]
0.93125


In [145]:
clf1 = RandomForestClassifier(random_state=1)
clf2 = GradientBoostingClassifier()
clf3 = LinearDiscriminantAnalysis()
eclf1 = VotingClassifier(estimators=[('rf', clf1), ('gb', clf2), ('lda', clf3)], voting='hard')
score = cross_val_score(clf, xList, yList, cv=10)
print(score)
print(score.mean())

[0.90625 0.96875 0.9375  0.9375  0.90625 1.      0.84375 0.96875 0.875
 0.9375 ]
0.928125


In [146]:

classifiers = [
    KNeighborsClassifier(3),
    #SVC(kernel="rbf", C=0.025, probability=True),
    #NuSVC(probability=True),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    QuadraticDiscriminantAnalysis()]

# Logging for Visual Comparison
log_cols=["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    acc = accuracy_score(y_test, train_predictions)
    print("Accuracy: {:.4%}".format(acc))
    
    train_predictions = clf.predict_proba(X_test)
    ll = log_loss(y_test, train_predictions)
    print("Log Loss: {}".format(ll))
    
    log_entry = pd.DataFrame([[name, acc*100, ll]], columns=log_cols)
    log = log.append(log_entry)
    
    ######################### perform 10 fold validation ######################
    kf = StratifiedKFold(n_splits=10)
    scorelist = []
    sensitivity = []
    specificity = []
    f1 = []
    for train_index, test_index in kf.split(xList, yList):
        clf.fit(xList[train_index], yList[train_index])
        p = clf.predict(xList[test_index])
        accval = accuracy_score(yList[test_index], p)
        scorelist.append(accval)
        sss = sensitivity_specificity_support(yList[test_index], p, average='macro')
        sensitivity.append(sss[0])
        specificity.append(sss[1])
        f1.append(f1_score(yList[test_index], p, average='macro') )

        
    print("MeanCVScore: {}".format(sum(scorelist)/len(scorelist)))
    print("10FoldCVScore: {}".format(scorelist))
    print("sensitivity: {}".format(sum(sensitivity)/len(sensitivity)))
    print("specificity: {}".format(sum(specificity)/len(specificity)))
    print("f1-score: {}".format(sum(f1)/len(f1)))

    #############################################################################
    
print("="*30)

KNeighborsClassifier
****Results****
Accuracy: 89.0625%
Log Loss: 1.1968428089641059
MeanCVScore: 0.90625
10FoldCVScore: [0.9375, 0.9375, 0.90625, 0.9375, 0.9375, 0.96875, 0.8125, 0.875, 0.8125, 0.9375]
sensitivity: 0.90625
specificity: 0.90625
f1-score: 0.9059432289449303
DecisionTreeClassifier
****Results****
Accuracy: 90.6250%
Log Loss: 3.2380102870228775
MeanCVScore: 0.9125
10FoldCVScore: [0.96875, 0.96875, 0.9375, 0.8125, 0.875, 0.96875, 0.8125, 0.90625, 0.875, 1.0]
sensitivity: 0.9125
specificity: 0.9125
f1-score: 0.9121105230948684
RandomForestClassifier
****Results****
Accuracy: 92.1875%
Log Loss: 0.18863115446562914
MeanCVScore: 0.934375
10FoldCVScore: [0.96875, 1.0, 0.90625, 0.96875, 0.90625, 0.96875, 0.78125, 1.0, 0.90625, 0.9375]
sensitivity: 0.934375
specificity: 0.934375
f1-score: 0.9337664108821604
AdaBoostClassifier
****Results****
Accuracy: 93.7500%
Log Loss: 0.30975729761556664
MeanCVScore: 0.959375
10FoldCVScore: [0.96875, 0.96875, 0.96875, 0.90625, 0.96875, 0.96875,

In [118]:
# clf1 = RandomForestClassifier(random_state=1)
# clf2 = GradientBoostingClassifier()
# clf3 = LinearDiscriminantAnalysis()
# eclf1 = VotingClassifier(estimators=[('rf', clf1), ('gb', clf2), ('lda', clf3)], voting='hard')
# score = cross_val_score(clf, xList, yList, cv=10)
# print(score)
# print(score.mean())

In [None]:
# cm1 = confusion_matrix(yList[test_index],p)
# print('Confusion Matrix : \n', cm1)

# total1=sum(sum(cm1))
# #####from confusion matrix calculate accuracy
# accuracy1=(cm1[0,0]+cm1[1,1])/total1
# print ('Accuracy : ', accuracy1)

# sensitivity1 = cm1[0,0]/(cm1[0,0]+cm1[0,1])
# print('Sensitivity : ', sensitivity1 )

# specificity1 = cm1[1,1]/(cm1[1,0]+cm1[1,1])
# print('Specificity : ', specificity1)

# print(classification_report(yList[test_index], p))
    
# score = cross_val_score(clf, xList, yList, cv=10)
# print("MeanCVScore: {}".format(score.mean()))
# print("10FoldCVScore: {}".format(score))

In [None]:
# feaPath = '../xprmt/feature/extract-slbp-melanoma_binary_augmented-mlbox-20181117-151136/melanoma_binary_augmented'

# xListL = []
# yListL = []
# yLi = os.listdir(feaPath)
# sumdat = 0
# for i in yLi:
#     file = os.listdir(feaPath+ "/" + i)
#     cnt = 0
#     for j in file:
#         if j.endswith('.pkl'):
#             #print(j)
#             x = joblib.load(feaPath + "/" + i + "/" +j)
#             xListL.append(x)
#             yListL.append(i)
#             cnt += 1
#     print(str(i) + " " + str(cnt))
#     sumdat += cnt
# print("semuanya "+ str(sumdat))
# feaPath = '../xprmt/feature/extract-haralick-melanoma_binary_augmented-mlbox-20181117-151626/melanoma_binary_augmented'

# xListH = []
# yListH = []
# yLi = os.listdir(feaPath)
# sumdat = 0
# for i in yLi:
#     file = os.listdir(feaPath+ "/" + i)
#     cnt = 0
#     for j in file:
#         if j.endswith('.pkl'):
#             #print(j)
#             x = joblib.load(feaPath + "/" + i + "/" +j)
#             xListH.append(x)
#             yListH.append(i)
#             cnt += 1
#     print(str(i) + " " + str(cnt))
#     sumdat += cnt
# print("semuanya "+ str(sumdat))
# feaPath = '../xprmt/feature/extract-wtio-melanoma_binary_augmented-mlbox-20181117-145744/melanoma_binary_augmented'

# xListW = []
# yListW = []
# yLi = os.listdir(feaPath)
# sumdat = 0
# for i in yLi:
#     file = os.listdir(feaPath+ "/" + i)
#     cnt = 0
#     for j in file:
#         if j.endswith('.pkl'):
#             #print(j)
#             x = joblib.load(feaPath + "/" + i + "/" +j)
#             xListW.append(x)
#             yListW.append(i)
#             cnt += 1
#     print(str(i) + " " + str(cnt))
#     sumdat += cnt
# print("semuanya "+ str(sumdat))
# xListL = np.array(xListL)
# xListH = np.array(xListH)
# xListW = np.array(xListW)
# xList1 = np.concatenate((xListL, xListH, xListW), axis=1)
# #xList = np.concatenate((xListL, xListW), axis=1)
# print(yListL == yListH)
# print(yListH == yListW)
# yList1 = yListL