In [1]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2
import os
import seaborn as sns
import pandas as pd
from skimage.filters import roberts, sobel, scharr, prewitt
from skimage.morphology import disk
from skimage.filters.rank import entropy
from scipy import ndimage as nd

In [2]:
print(os.listdir(r'D:\mini-project\data\Ready Data'))

['test', 'train']


In [3]:
#Resize images to
SIZE = (128,128)

#Capture images and labels into arrays.
train_images = []
train_labels = []
for directory_path in glob.glob(r"D:\mini-project\data\Ready Data\train\*"):
    label = directory_path.split("/")[-1]
    print(label)
    for img_path in glob.glob(os.path.join(directory_path,"*.*")):
        #print(img_path)
        img = cv2.imread(img_path, cv2.IMREAD_COLOR) #Reading color images
        img = cv2.resize(img, (SIZE)) #Resize images
        #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) #Optional step. Change BGR to RGB
        train_images.append(img)
        train_labels.append(label)
train_images = np.array(train_images)
train_labels = np.array(train_labels)

D:\mini-project\data\Ready Data\train\AK
D:\mini-project\data\Ready Data\train\BCC
D:\mini-project\data\Ready Data\train\MEL
D:\mini-project\data\Ready Data\train\VASC


In [4]:
train_images.shape

(80, 128, 128, 3)

In [5]:
#Do exactly the same for test/validation images
test_images = []
test_labels = []
for directory_path in glob.glob(r"D:\mini-project\data\Ready Data\test\*"):
    true_label = directory_path.split("/")[-1]
    print(true_label)
    for img_path in glob.glob(os.path.join(directory_path,"*.*")):
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.resize(img, (SIZE))
        #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) #Optional
        test_images.append(img)
        test_labels.append(true_label)

test_images = np.array(test_images)
test_labels = np.array(test_labels)

D:\mini-project\data\Ready Data\test\AK
D:\mini-project\data\Ready Data\test\BCC
D:\mini-project\data\Ready Data\test\MEL
D:\mini-project\data\Ready Data\test\VASC


In [6]:
test_images.shape

(20, 128, 128, 3)

In [7]:
#Encode labels from text (folder names) to integers.

from sklearn import preprocessing
le = preprocessing.LabelEncoder()

le.fit(test_labels)
test_labels_encoded = le.transform(test_labels)
le.fit(train_labels)
train_labels_encoded = le.transform(train_labels)

print(np.unique(train_labels_encoded))
print(np.unique(test_labels_encoded))

[0 1 2 3]
[0 1 2 3]


In [8]:
#Split data into test and train datasets (already split but assigning to meaningful convention)
#If you only have one dataset then split here
x_train, y_train, x_test, y_test = train_images, train_labels_encoded, test_images, test_labels_encoded

In [9]:
def feature_extractor(x_train):
  count = 1
  image_dataset = pd.DataFrame()
  for image in range(x_train.shape[0]):
    df = pd.DataFrame()
    img = x_train[image, :,:,:]
    pixel_values = img.reshape(-1)/2255.0
    df['Pixel_Value'] = pixel_values
    num = 1  #To count numbers up in order to give Gabor features a lable in the data frame
    #kernels = []
    for theta in range(2):   #Define number of thetas
      theta = theta / 4. * np.pi
      for sigma in ([1,2]):  #Sigma with Diff no.
        for lamda in np.arange(0, np.pi, np.pi / 4):   #Range of wavelengths
          for gamma in ([0.5, 1,1.5]):   #Gamma values of 0.05 and 0.5

            gabor_label = 'Gabor' + str(num)  #Label Gabor columns as Gabor1, Gabor2, etc.
                        #print(gabor_label)
            ksize=5 # try with 9 also
            kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                        #kernels.append(kernel)
                        #Now filter the image and add values to a new column
            fimg = cv2.filter2D(img, cv2.CV_8UC3, kernel)
            filtered_img = fimg.reshape(-1)/255.0
            df[gabor_label] = filtered_img  #Labels columns as Gabor1, Gabor2, etc.
            #print("added gabor ",num)            #print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
            num += 1  #Increment for gabor column label
    #GAUSSIAN with sigma=3
    gaussian_img = nd.gaussian_filter(img, sigma=3)
    gaussian_img1 = gaussian_img.reshape(-1)/255.0
    df['Gaussian s3'] = gaussian_img1

    #GAUSSIAN with sigma=5
    gaussian_img = nd.gaussian_filter(img, sigma=5)
    gaussian_img1 = gaussian_img.reshape(-1)/255.0
    df['Gaussian s5'] = gaussian_img1

    #GAUSSIAN with sigma=7
    gaussian_img2 = nd.gaussian_filter(img, sigma=7)
    gaussian_img3 = gaussian_img2.reshape(-1)/255.0
    df['Gaussian s7'] = gaussian_img3

    #MEDIAN with sigma=5
    median_img = nd.median_filter(img, size=5)
    median_img1 = median_img.reshape(-1)/255.0
    df['Median s5'] = median_img1

    #MEDIAN with sigma=7
    median_img = nd.median_filter(img, size=7)
    median_img1 = median_img.reshape(-1)/255.0
    df['Median s7'] = median_img1

    #VARIANCE with size=3
    variance_img = nd.generic_filter(img, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)/255.0
    df['Variance s3'] = variance_img1

    #VARIANCE with size=5
    variance_img = nd.generic_filter(img, np.var, size=5)
    variance_img1 = variance_img.reshape(-1)/255.0
    df['Variance s5'] = variance_img1
    
    #VARIANCE with size=7
    variance_img = nd.generic_filter(img, np.var, size=7)
    variance_img1 = variance_img.reshape(-1)/255.0
    df['Variance s7'] = variance_img1
    
    ''' # Clustering
    z = img.reshape((-1,3))
    z = np.float32(z)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    K = 5
    ret,label,center = cv2.kmeans(z,K,None, criteria, 20, cv2.KMEANS_RANDOM_CENTERS)
    center = np.uint8(center)
    res = center[label.flatten()]
    res2 = res.reshape((img.shape))
    res2 = np.uint8(res2)
    df['clustering'] = res2.reshape(-1)/255.0
    '''

    #Append features from current image to the dataset
    image_dataset = pd.concat([image_dataset, df])
    print("Done Image : ",count)
    count += 1

  return image_dataset

In [10]:
#Extract features from training images
image_features = feature_extractor(x_train)

Done Image :  1
Done Image :  2
Done Image :  3
Done Image :  4
Done Image :  5
Done Image :  6
Done Image :  7
Done Image :  8
Done Image :  9
Done Image :  10
Done Image :  11
Done Image :  12
Done Image :  13
Done Image :  14
Done Image :  15
Done Image :  16
Done Image :  17
Done Image :  18
Done Image :  19
Done Image :  20
Done Image :  21
Done Image :  22
Done Image :  23
Done Image :  24
Done Image :  25
Done Image :  26
Done Image :  27
Done Image :  28
Done Image :  29
Done Image :  30
Done Image :  31
Done Image :  32
Done Image :  33
Done Image :  34
Done Image :  35
Done Image :  36
Done Image :  37
Done Image :  38
Done Image :  39
Done Image :  40
Done Image :  41
Done Image :  42
Done Image :  43
Done Image :  44
Done Image :  45
Done Image :  46
Done Image :  47
Done Image :  48
Done Image :  49
Done Image :  50
Done Image :  51
Done Image :  52
Done Image :  53
Done Image :  54
Done Image :  55
Done Image :  56
Done Image :  57
Done Image :  58
Done Image :  59
Done I

In [11]:
#Reshape to a vector for Random Forest / SVM training
n_features = image_features.shape[1]
n_features

57

In [12]:
image_features = np.expand_dims(image_features, axis=0)
image_features

array([[[0.07095344, 0.        , 0.        , ..., 0.32941176,
         0.85098039, 0.38431373],
        [0.06208426, 0.        , 0.        , ..., 0.56470588,
         0.53333333, 0.50588235],
        [0.10554324, 0.        , 0.        , ..., 0.79607843,
         0.51372549, 0.99607843],
        ...,
        [0.06607539, 0.        , 0.        , ..., 0.03137255,
         0.65490196, 0.61568627],
        [0.06829268, 0.        , 0.        , ..., 0.11372549,
         0.49803922, 0.17254902],
        [0.08780488, 0.        , 0.        , ..., 1.        ,
         0.45098039, 0.19215686]]])

In [13]:
image_features.shape

(1, 3932160, 57)

In [14]:
X_for_RF = np.reshape(image_features, (x_train.shape[0], -1))  #Reshape to #images, features
X_for_RF.shape

(80, 2801664)

In [15]:
#Extract features from test data and reshape, just like training data
test_features = feature_extractor(x_test)

Done Image :  1
Done Image :  2
Done Image :  3
Done Image :  4
Done Image :  5
Done Image :  6
Done Image :  7
Done Image :  8
Done Image :  9
Done Image :  10
Done Image :  11
Done Image :  12
Done Image :  13
Done Image :  14
Done Image :  15
Done Image :  16
Done Image :  17
Done Image :  18
Done Image :  19
Done Image :  20


In [18]:
test_features = np.expand_dims(test_features, axis=0)
test_for_RF = np.reshape(test_features, (x_test.shape[0], -1))

In [19]:
test_for_RF.shape

(20, 2801664)

### Random Forest

In [25]:
#Define the classifier
from sklearn.ensemble import RandomForestClassifier
RF_model = RandomForestClassifier(n_estimators = 105, random_state = 101)

# Fit the model on training data
RF_model.fit(X_for_RF, y_train) #For sklearn no one hot encoding

In [26]:
#Predict on test
rf_test_prediction = RF_model.predict(test_for_RF)
#Inverse le transform to get original label back.
#rf_test_prediction = le.inverse_transform(test_prediction)

In [27]:
rf_test_prediction.shape

(20,)

In [28]:
#Print overall accuracy
from sklearn import metrics
print ("Accuracy = ", metrics.accuracy_score(y_test, rf_test_prediction))

Accuracy =  0.75


In [None]:
rf_test_prediction

In [None]:
y_test

### KNN CLASSIFIER

In [54]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 4, metric = 'minkowski', p = 3)
classifier.fit(X_for_RF, y_train)



# Predicting the Test set results
y_pred = classifier.predict(test_for_RF)


In [55]:
y_pred.shape

(20,)

In [56]:
#Print overall accuracy
from sklearn import metrics
print ("Accuracy = ", metrics.accuracy_score(y_test, y_pred))

Accuracy =  0.45


### SVM CLASSIFIER

In [66]:
# Training the SVM model on the Training set
from sklearn.svm import SVC
classifierS = SVC(kernel = 'linear', random_state = 72)
classifierS.fit(X_for_RF, y_train)

In [67]:
y_predS = classifierS.predict(test_for_RF)

In [68]:
from sklearn import metrics
print ("Accuracy = ", metrics.accuracy_score(y_test, y_predS))

Accuracy =  0.6
