In [63]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py
import glob


In [64]:
fixed_size = tuple((512, 512))

In [65]:
train_path = 'C:/Users/user/Desktop/Bitirme/VT/v1/Train'

#num of trees for random forest
num_trees = 100


In [66]:
# bins for histogram
bins = 8

# train_test_split size
test_size = 0.10

In [67]:
# seed for reproducing same results
seed = 9

In [68]:
# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

In [69]:
# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    # return the result
    return haralick

In [70]:

# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    # convert the image to HSV color-space
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # compute the color histogram
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    # return the histogram
    return hist.flatten()

In [71]:
# get the training labels
train_labels = os.listdir(train_path)
train_labels

['Benign', 'Insitu', 'Invasive', 'Normal']

In [72]:
# sort the training labels
train_labels.sort()
print(train_labels)


['Benign', 'Insitu', 'Invasive', 'Normal']


In [73]:
# empty lists to hold feature vectors and labels
global_features = []
labels = []

i, j = 0, 0
k = 0

In [74]:
# num of images per class
images_per_class = 80

In [75]:
# loop over the training data sub-folders
for training_name in train_labels:
    # join the training data path and each species training folder
    path = glob.glob("C:/Users/user/Desktop/Bitirme/VT/v1/Train/"+training_name+"/*.tif")

    
    # get the current training label
    current_label = training_name
    
    k = 1
    x=0
    # loop over the images in each sub-folder
    for file in path:
        if x<images_per_class+1:
             # read the image and resize it to a fixed-size
            image = cv2.imread(file)
            fv_hu_moments = fd_hu_moments(image)
            fv_haralick   = fd_haralick(image)
            fv_histogram  = fd_histogram(image)
            global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])
            labels.append(current_label)
            global_features.append(global_feature)

            i += 1
            k += 1 
            x=x+1
    print(x) 
    print ("[STATUS] processed folder: {}".format(current_label))
    j += 1

81
[STATUS] processed folder: Benign
81
[STATUS] processed folder: Insitu
81
[STATUS] processed folder: Invasive
81
[STATUS] processed folder: Normal


In [76]:

print ("[STATUS] completed Global Feature Extraction...")

# get the overall feature vector size
print ("[STATUS] feature vector size {}".format(np.array(global_features).shape))

# get the overall training label size
print ("[STATUS] training Labels {}".format(np.array(labels).shape))


[STATUS] completed Global Feature Extraction...
[STATUS] feature vector size (324, 532)
[STATUS] training Labels (324,)


In [77]:
# encode the target labels
targetNames = np.unique(labels)
le = LabelEncoder()
target = le.fit_transform(labels)
print ("[STATUS] training labels encoded...")

[STATUS] training labels encoded...


In [78]:
# normalize the feature vector in the range (0-1)
scaler = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print ("[STATUS] feature vector normalized...")

print ("[STATUS] target labels: {}".format(target))
print( "[STATUS] target labels shape: {}".format(target.shape))

[STATUS] feature vector normalized...
[STATUS] target labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]
[STATUS] target labels shape: (324,)


In [81]:
# save the feature vector using HDF5
h5f_data = h5py.File('data.h5', 'w')
h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))

h5f_label = h5py.File('labels.h5', 'w')
h5f_label.create_dataset('dataset_1', data=np.array(target))

h5f_data.close()
h5f_label.close()

