In [1]:
import cv2
import random
import time
import numpy as np
from scipy import misc
from skimage import feature
from matplotlib import pyplot as plt
from scipy import misc
%matplotlib inline
plt.style.use('ggplot')
import os
import sys
sys.path.append('../common/')


In [2]:
import pickle


In [3]:
train_path = 'C:/Users/user/Desktop/Bitirme/VT/v1/Train'


In [4]:
def lbp_feature(img):
    img=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(img, 20,4, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(),bins=np.arange(0, 35),range=(0, 34))                   
    hist = hist.astype("float")                         
    hist /= (hist.sum() + (1e-7))
    return hist

In [5]:
train_labels = os.listdir(train_path)
# sort the training labels
train_labels.sort()
print(train_labels)


['Benign', 'Insitu', 'Invasive', 'Normal']


In [6]:
labels = []
features = []


In [7]:
# filter all the warnings
import warnings
warnings.filterwarnings('ignore')

In [8]:
import glob
from skimage import color
from skimage.feature import local_binary_pattern
from sklearn import svm
from sklearn.metrics import classification_report,accuracy_score

for training_name in train_labels:
    # join the training data path and each species training folder
    path = glob.glob("C:/Users/user/Desktop/Bitirme/VT/v1/Train/"+training_name+"/*.tif")

    
    # get the current training label
    current_label = training_name
    
    x=0
    # loop over the images in each sub-folder
    for file in path:
        if x<5800:
            image = cv2.imread(file)
            hist = lbp_feature(image)
            labels.append(current_label)
            features.append(hist)
           
            x=x+1
    print ("[STATUS] processed folder: {}".format(current_label))


[STATUS] processed folder: Benign
[STATUS] processed folder: Insitu
[STATUS] processed folder: Invasive
[STATUS] processed folder: Normal


In [9]:
print ("[STATUS] completed Global Feature Extraction...")

# get the overall feature vector size
print ("[STATUS] feature vector size {}".format(np.array(features).shape))

# get the overall training label size
print ("[STATUS] training Labels {}".format(np.array(labels).shape))

[STATUS] completed Global Feature Extraction...
[STATUS] feature vector size (5837, 34)
[STATUS] training Labels (5837,)


In [10]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
targetNames = np.unique(labels)
le = LabelEncoder()
target = le.fit_transform(labels)
print ("[STATUS] training labels encoded...")

[STATUS] training labels encoded...


In [11]:
# normalize the feature vector in the range (0-1)
scaler = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(features)
print ("[STATUS] feature vector normalized...")

print ("[STATUS] target labels: {}".format(target))
print( "[STATUS] target labels shape: {}".format(target.shape))

[STATUS] feature vector normalized...
[STATUS] target labels: [0 0 0 ... 3 3 3]
[STATUS] target labels shape: (5837,)


In [12]:
rescaled_features= np.nan_to_num(rescaled_features,0)


In [13]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

In [14]:
models = []
models.append(('KNN', KNeighborsClassifier(n_neighbors=9)))
models.append(('DTC', DecisionTreeClassifier(random_state=9)))
models.append(('RF', RandomForestClassifier(n_estimators=300, random_state=9)))
models.append(('SVM', SVC(C=100,gamma=1)))

In [15]:
results = []
names = []
scoring = "accuracy"

In [16]:
(trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(rescaled_features),
                                                                                          np.array(target),
                                                                                          test_size=0.2,
                                                                                          random_state=9)

In [17]:
for name, model in models:
    kfold = KFold(n_splits=5, random_state=7)
    cv_results = cross_val_score(model, trainDataGlobal, trainLabelsGlobal, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

KNN: 0.698440 (0.009032)
DTC: 0.728208 (0.009898)
RF: 0.810241 (0.010601)
SVM: 0.741916 (0.006335)


In [18]:
from sklearn import metrics
for name, model in models:
    clf=model
    clf.fit(trainDataGlobal, trainLabelsGlobal)
    y_pred=clf.predict(testDataGlobal)
    msg = "%s: %f " % (name, metrics.accuracy_score(y_pred,testLabelsGlobal))
    print(msg)

KNN: 0.723459 
DTC: 0.784247 
RF: 0.843322 
SVM: 0.766267 
