## Feature Extraction

Color Statistics(Mean, Standard Deviation, Skewness, Entropy, Kurtosis) for RGB, LAB, HSV and YCrCb. Experimented with both segmented and non segmented image.
The pipeline is given: Extract stat features + label encosing + minmax scaling + stratified k fold + classifier (linear regression for now)

In [91]:
# Load Libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing
import skimage
from scipy.stats import skew, kurtosis
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn import metrics
from dataset.dataset import SkinLesion_Dataset, SegExamples
from pipeline.preprocessing import SkinLesionPreprocessing

preproc = SkinLesionPreprocessing()
data = SegExamples()

In [93]:
# Color histogram statistics -- Without segmentation
def statistics(img):
    mean1 = np.mean(img[:,:,0])
    mean2 = np.mean(img[:,:,1])
    mean3 = np.mean(img[:,:,2])
    std1 = np.std(img[:,:,0])
    std2 = np.std(img[:,:,1])
    std3 = np.std(img[:,:,2])
    # Skewness
    val1 = skew((img[:,:,0]).reshape(-1))
    val2 = skew((img[:,:,1]).reshape(-1))
    val3 = skew((img[:,:,2]).reshape(-1))
    # Kurtosis
    kval1 = kurtosis((img[:,:,0]).reshape(-1))
    kval2 = kurtosis((img[:,:,1]).reshape(-1))
    kval3 = kurtosis((img[:,:,2]).reshape(-1))
    # Entropy
    entropy1 = skimage.measure.shannon_entropy(img[:,:,0])
    entropy2 = skimage.measure.shannon_entropy(img[:,:,1])
    entropy3 = skimage.measure.shannon_entropy(img[:,:,2])

    stats = np.hstack([mean1,mean2,mean3, std1,std2,std3,val1,val2,val3,kval1,kval2,kval3, entropy1, entropy2, entropy3 ])

    return stats


# Different Color Channels - RGB, LAB, YCrCb, HSV
def color_stats(paths):
    d = []
    for i in paths:
        img = cv2.imread(i)
        preproc_image = preproc.preprocess(img)
        img_rgb = cv2.cvtColor(preproc_image, cv2.COLOR_BGR2RGB)
        img_lab = cv2.cvtColor(preproc_image, cv2.COLOR_BGR2LAB)
        img_ycrcb = cv2.cvtColor(preproc_image, cv2.COLOR_BGR2YCrCb)
        img_hsv = cv2.cvtColor(preproc_image, cv2.COLOR_BGR2HSV)

        stats_rgb = statistics(img_rgb)
        stats_lab = statistics(img_lab)
        stats_ycrcb = statistics(img_ycrcb)
        stats_hsv = statistics(img_hsv)

        s = np.hstack([stats_rgb, stats_lab, stats_ycrcb, stats_hsv])
        d.append(s)
    df = pd.DataFrame(d)
    return df

In [94]:
# Create labels for binary sample test dataset
binary_data = data.seg_examples_df.iloc[:60] # only nevus vs others
paths = "../data/"+binary_data.path

df = color_stats(paths) # get the color stats

binary_labels = []
for p in binary_data.path:
     binary_labels.append(p.split('/')[2])
binary_labels = pd.DataFrame(binary_labels)
label_encoder = preprocessing.LabelEncoder()
df['label'] = label_encoder.fit_transform(binary_labels[0])
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,label
0,152.99217,123.149559,142.798076,40.043135,38.584645,41.395877,-2.392022,-2.102251,-2.117684,4.311303,...,-1.095011,2.158256,-2.390702,50.168866,3.527213,4.306696,4.140778,5.661916,5.600688,0
1,196.997388,192.149971,189.517542,12.282235,22.959519,32.72952,-0.435427,-1.613595,-1.301156,-0.099481,...,0.339233,1.338251,-1.156194,-1.317115,1.266268,1.573237,5.098499,6.184277,5.537019,0
2,147.91254,141.134105,144.849574,13.936721,19.382833,20.149398,-2.919124,-2.693448,-2.370691,9.474356,...,-0.985864,2.77471,-2.640416,-0.706546,7.988473,8.228084,5.553149,4.791856,5.245827,0
3,172.810968,84.565775,75.969115,17.384055,24.66025,27.145198,-0.474364,-0.417743,-0.281655,-0.591204,...,4.544777,0.372488,-0.474364,18.68896,-1.305041,-0.591204,2.64893,6.581112,6.039771,0
4,159.207648,129.967554,102.280468,18.386921,23.508534,28.48976,0.051739,0.61545,0.945788,-0.142266,...,13.276639,-0.376584,0.058816,196.738186,-0.090857,-0.130004,3.186059,6.955028,6.227234,0
5,180.45505,147.827451,117.526904,27.012661,39.592515,48.109754,-0.861734,-0.285325,0.323493,-0.151614,...,4.874553,-0.276446,-0.826508,24.480373,-0.445871,-0.14664,3.673759,7.303363,6.555991,0
6,184.142638,136.548531,141.283449,36.451842,33.353137,31.960302,-1.001598,0.131318,-0.284648,0.274565,...,-0.756025,0.145727,-0.911451,-1.373457,-0.316703,-0.00821,5.042504,6.520363,6.915477,0
7,162.065971,158.013385,158.579882,6.590863,12.440354,18.99166,-2.437978,-3.285842,-2.821589,9.791838,...,-0.107828,4.194377,-1.264441,-1.671674,19.900179,4.418346,5.37389,4.906681,4.878328,0
8,191.899666,165.094118,152.671013,24.48974,42.057475,49.460764,-1.658332,-0.927884,-0.576656,2.799589,...,2.183889,0.669549,-1.586134,3.329545,-0.485591,2.616544,5.076641,7.106171,6.313592,0
9,182.547982,166.800138,159.406311,23.383354,29.277555,41.07261,-1.313949,-0.34129,-0.247329,2.603041,...,0.520923,0.983264,-1.099064,-1.505145,-0.255983,2.071997,5.946802,6.552526,6.521761,0


In [98]:
from sklearn.model_selection import StratifiedKFold
from statistics import stdev
from sklearn import linear_model


def classifier(df):
    x = df.loc[:, df.columns != 'label']
    y = df['label']

    scaler = preprocessing.MinMaxScaler()
    x_scaled = scaler.fit_transform(x)

    # Create  classifier object.
    lr = linear_model.LogisticRegression()
    # lr = svm.SVC()

    # Create StratifiedKFold object.
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=10)
    lst_accu_stratified = []

    for train_index, test_index in skf.split(x, y):
        x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        lr.fit(x_train_fold, y_train_fold)
        lst_accu_stratified.append(lr.score(x_test_fold, y_test_fold))

    # Print the output.
    print('List of possible accuracy:', lst_accu_stratified)
    print('\nMaximum Accuracy That can be obtained from this model is:',
          max(lst_accu_stratified)*100, '%')
    print('\nMinimum Accuracy:',
          min(lst_accu_stratified)*100, '%')
    print('\nOverall Accuracy:',
          np.mean(lst_accu_stratified)*100, '%')
    print('\nStandard Deviation is:', stdev(lst_accu_stratified))

In [99]:
classifier(df)

List of possible accuracy: [0.6666666666666666, 0.8333333333333334, 0.8333333333333334, 0.6666666666666666, 0.6666666666666666, 0.6666666666666666, 0.5, 0.6666666666666666, 0.8333333333333334, 0.5]

Maximum Accuracy That can be obtained from this model is: 83.33333333333334 %

Minimum Accuracy: 50.0 %

Overall Accuracy: 68.33333333333333 %

Standard Deviation is: 0.12297746456210366


Color histogram statistics -- With Segmentation--- This part is not Included --- Just Experimental

In [90]:
# Color histogram statistics -- With Segmentation
def statistics2(img, mask):
    mean1 = np.mean(mask*img[:,:,0])
    mean2 = np.mean(mask*img[:,:,1])
    mean3 = np.mean(mask*img[:,:,2])
    std1 = np.std(mask*img[:,:,0])
    std2 = np.std(mask*img[:,:,1])
    std3 = np.std(mask*img[:,:,2])
    # Skewness
    val1 = skew((mask*img[:,:,0]).reshape(-1))
    val2 = skew((mask*img[:,:,1]).reshape(-1))
    val3 = skew((mask*img[:,:,2]).reshape(-1))
    # Kurtosis
    kval1 = kurtosis((mask*img[:,:,0]).reshape(-1))
    kval2 = kurtosis((mask*img[:,:,1]).reshape(-1))
    kval3 = kurtosis((mask*img[:,:,2]).reshape(-1))

    # Entropy
    entropy1 = skimage.measure.shannon_entropy(mask*img[:,:,0])
    entropy2 = skimage.measure.shannon_entropy(mask*img[:,:,1])
    entropy3 = skimage.measure.shannon_entropy(mask*img[:,:,2])

    stats = np.hstack([mean1,mean2,mean3, std1,std2,std3,val1,val2,val3,kval1,kval2,kval3,entropy1,entropy2,entropy3])

    return stats

data = SegExamples()
binary_data = data.seg_examples_df.iloc[:60] # only nevus vs others
paths = "../data/"+binary_data.path
# Different Color Channels - RGB, LAB, YCrCb, HSV
d2 = []
for i in paths:
    img = cv2.imread(i)
    gray_seg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Segmentation
    ret1, mask = cv2.threshold(gray_seg, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    mask_inv =  255 - mask;
    # result_img = mask_inv * gray_seg

    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    img_ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    stats_rgb = statistics2(img_rgb, mask_inv )
    stats_lab = statistics2(img_lab, mask_inv)
    stats_ycrcb = statistics2(img_ycrcb, mask_inv)
    stats_hsv = statistics2(img_hsv, mask_inv)

    s = np.hstack([stats_rgb, stats_lab, stats_ycrcb, stats_hsv])
    d2.append(s)
df2 = pd.DataFrame(d2)
df2

ValueError: operands could not be broadcast together with shapes (444,596) (450,600) 

In [60]:
# Create labels for binary sample test dataset
binary_labels = []
for p in binary_data.path:
     binary_labels.append(p.split('/')[2])
binary_labels = pd.DataFrame(binary_labels)
label_encoder = preprocessing.LabelEncoder()
df2['label'] = label_encoder.fit_transform(binary_labels[0])

classifier(df2)

List of possible accuracy: [0.5, 0.6666666666666666, 0.6666666666666666, 0.5, 0.5, 0.6666666666666666, 0.6666666666666666, 0.6666666666666666, 0.5, 0.8333333333333334]

Maximum Accuracy That can be obtained from this model is: 83.33333333333334 %

Minimum Accuracy: 50.0 %

Overall Accuracy: 61.66666666666666 %

Standard Deviation is: 0.11249142628509215


In [None]:
# plt.hist(result_img.ravel(),256,[0,256])

In [None]:
# Base k means code
# paths = "../data/"+data.seg_examples_df.path
# def kmeans(data):
#     count = 0
#     for i in paths:
#         type = data[count]['type']
#         img = cv2.imread(i)
#         image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#         pixel_values = image.reshape(-1,3)
#         pixel_values = np.float32(pixel_values)
#         criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)
#         k = 2
#         _, labels, (centers) = cv2.kmeans(pixel_values, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
#         centers = np.uint8(centers)
#         labels = labels.flatten()
#         segmented_image = centers[labels.flatten()]
#         # reshape back to the original image dimension
#         segmented_image = segmented_image.reshape(image.shape)
#         # Save the image
#         count=count+1
#         cv2.imwrite(f'../examples/kmeans/{type}_{count}.png', segmented_image)
#
#         # show the image
#         plt.imshow(segmented_image, cmap="gray")
#         plt.show()