In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

# Path tp the images
base_path = './melanoma_nevus'

train_folder = os.path.join(base_path, 'train')
validation_folder = os.path.join(base_path, 'validation')
test_folder = os.path.join(base_path, 'test')

train_melanoma_folder = os.path.join(train_folder, 'melanoma')
train_nevus_folder = os.path.join(train_folder, 'nevus')
validation_melanoma_folder = os.path.join(validation_folder, 'melanoma')
validation_nevus_folder = os.path.join(validation_folder, 'nevus')
test_melanoma_folder = os.path.join(test_folder, 'melanoma')
test_nevus_folder = os.path.join(test_folder, 'nevus')


In [2]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

SIZE = (600,450)

# Rescale image values from 0..255 to 0..1
train_datagen = ImageDataGenerator(rescale=1./255,
#                                    rotation_range=180,
#                                    width_shift_range=0.1,
#                                    height_shift_range=0.1,
#                                    shear_range=0.1,
#                                    zoom_range=0.1,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest')

valid_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_folder, 
    target_size = (SIZE[0],SIZE[1]),
    batch_size = 20,
    class_mode = 'binary')

valid_generator = valid_datagen.flow_from_directory(
    validation_folder, 
    target_size = (SIZE[0],SIZE[1]), 
    batch_size = 20,
    class_mode = 'binary')

test_generator = test_datagen.flow_from_directory(
    test_folder, 
    target_size = (SIZE[0],SIZE[1]), 
    batch_size = 20,
    class_mode = 'binary')

Found 1600 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Found 400 images belonging to 2 classes.


In [None]:
def norm_dft2(dft2):
    c = 255 / np.log(1 + max(abs(dft2.flatten())))
    result = c * np.log(1 + abs(dft2))
    return result

In [None]:
def fd_hu_moments(image):
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

In [None]:
def fd_haralick(image):    
    # convert the image to grayscale
#     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mahotas.features.haralick(image).mean(axis=0)
    return haralick

In [None]:
def fd_histogram(image, mask=None):
    # convert the image to HSV color-space
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # compute the color histogram
    hist  = cv2.calcHist([image], [0], None, [256], [0,256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    hist.flatten()
    return hist

In [None]:
def get_feature_vector(image):
    # FFT
    dft = np.fft.fft2(image)
    n_dft = norm_dft2(dft)
    # Hu moments
#     rhm = fd_hu_moments(image)
    # 
#     hl = fd_haralick(image)
#     hl = hl / np.max(hl) * 255
    #
    hist = fd_histogram(image.flatten())
    features = np.hstack([n_dft.flatten(), hist.flatten()])
    return features

In [None]:
# loading images
nevus_filepath = 'data/0/'
melanoma_filepath = 'data/1/'

nevus_files = [f for f in os.listdir(nevus_filepath) if os.path.isfile(nevus_filepath+f)]
melanoma_files = [f for f in os.listdir(melanoma_filepath) if os.path.isfile(melanoma_filepath+f)]
# sort filenames
nevus_files = sorted(nevus_files)
melanoma_files = sorted(melanoma_files)

N = 500
nevus_imgs = [plt.imread(os.path.join(nevus_filepath, nevus_files[i])) for i in range(N)]
melanoma_imgs = [plt.imread(os.path.join(melanoma_filepath, melanoma_files[i])) for i in range(N)]

# resize
nevus_imgs = [cv2.resize(img,(256,256)) for img in nevus_imgs]
melanoma_imgs = [cv2.resize(img,(256,256)) for img in melanoma_imgs]

In [4]:
import cv2
import pandas as pd
from tqdm import tqdm
import mahotas
from sklearn.preprocessing import MinMaxScaler

train_len = 1400

# Create array with images features
X = pd.DataFrame()
y_train = np.zeros(1400) + 1
y_train[:700] = 0

print("Train loading...")
fnames = [os.path.join(train_melanoma_folder, fname) for fname in os.listdir(train_melanoma_folder)]
for fname in tqdm(fnames): 
    img = plt.imread(fname) 
    features = get_feature_vector(img)
    X[fname[-11:-4]] = features

fnames = [os.path.join(train_nevus_folder, fname) for fname in os.listdir(train_nevus_folder)]
for fname in tqdm(fnames): 
    img = plt.imread(fname) 
    features = get_feature_vector(img)
    X[fname[-11:-4]] = features


# # TEST
# print("Test loading...")
# X_test = pd.DataFrame()
# y_test = np.zeros(200) + 1
# y_test[:100] = 0

# fnames = [os.path.join(test_melanoma_folder, fname) for fname in os.listdir(test_melanoma_folder)]
# for fname in tqdm(fnames): 
#     img = plt.imread(fname)
#     features = get_feature_vector(img)
#     X_test[fname[-11:-4]] = features

# fnames = [os.path.join(test_nevus_folder, fname) for fname in os.listdir(test_nevus_folder)]
# for fname in tqdm(fnames): 
#     img = plt.imread(fname) 
#     features = get_feature_vector(img)
#     X_test[fname[-11:-4]] = features   
    
    
# X_train = X.to_numpy().T
# scaler = MinMaxScaler(feature_range=(0, 1))
# X_train = scaler.fit_transform(new_X)

# X_test = X_test.to_numpy().T
# scaler = MinMaxScaler(feature_range=(0, 1))
# X_test = scaler.fit_transform(new_X_test)

  0%|          | 0/800 [00:00<?, ?it/s]

Train loading...





NameError: name 'get_feature_vector' is not defined

In [None]:
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier

clf = RandomForestClassifier(min_samples_leaf=20, verbose=0, n_jobs=10)
clf.fit(new_X, y)

In [None]:
error = 0
for i in range(new_X_test.shape[0]):
    y_p = clf.predict([new_X_test[i]])
    if y_p[0] != y[i]:
        error += 1
print(error/new_X_test.shape[0])

In [None]:
# ADAboost
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(n_estimators=100, random_state=0)
clf.fit(new_X, y)

In [None]:
# XGBoost
import xgboost as xgb

dtrain = xgb.DMatrix(new_X, label=y)
dtest = xgb.DMatrix(new_X_test, label=y_test)

param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'}
num_round = 10

bst = xgb.train(param, dtrain, num_round)

preds = bst.predict(dtest)


In [None]:
np.sum(np.round(preds) == y_test) / len(y_test)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
                                 max_depth=1, random_state=0).fit(X_, y)
clf.score(new_X_test, y_test)

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier

clf1 = LogisticRegression(random_state=1, max_iter=300)
clf2 = RandomForestClassifier(n_estimators=150, random_state=1)
clf3 = GaussianNB()

N = 400
X = np.append(new_X[:700][:N], new_X[700:][:N], axis=0)
Y = np.append(y[:700][:N], y[700:][:N], axis=0)

eclf = VotingClassifier(estimators=[('lr', clf1), 
                                    ('rf', clf2), 
                                    ('gnb', clf3)],
                        voting='hard')

for clf, label in zip([clf1, clf2, clf3, eclf], ['Logistic Regression', 'Random Forest', 'naive Bayes', 'Ensemble']):
    scores = cross_val_score(clf, X, Y, scoring='accuracy', cv=5)
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

In [None]:
# a = new_X[:700][:100]
# b = new_X[700:][:100]

# np.append(a,b, axis=0).shape
X_train.shape

In [None]:
from sklearn.manifold import TSNE

melanoma_embedded = TSNE(n_components=3, n_jobs=-1).fit_transform(X_train[:400])

In [None]:
nevus_embedded = TSNE(n_components=4, n_jobs=-1).fit_transform(X_train[700:1200])

In [None]:
plt.figure()
plt.plot(X_embedded.T[0],X_embedded.T[1], 'o')
plt.plot(nevus_embedded.T[0],nevus_embedded.T[1], 'o')
# X_embedded.T[0].shape

In [None]:
import plotly.graph_objects as go
import numpy as np
x1 = melanoma_embedded.T[0]
y1 = melanoma_embedded.T[1]
z1 = melanoma_embedded.T[2]

x2 = nevus_embedded.T[0]
y2 = nevus_embedded.T[1]
z2 = nevus_embedded.T[2]

# t = np.linspace(0, 10, 50)
# x, y, z = np.cos(t), np.sin(t), t

fig = go.Figure(data=[go.Scatter3d(x=x1,y=y1,z=z1, mode='markers'),
                      go.Scatter3d(x=x2,y=y2,z=z2, mode='markers')])
fig.update_layout(width=900, height=1000)
fig.show()

In [None]:
# Distinguish 2 kides of melonomes
imgs = []

print("Train loading...")
fnames = [os.path.join(train_melanoma_folder, fname) for fname in os.listdir(train_melanoma_folder)]
for fname in tqdm(fnames): 
    img = plt.imread(fname) 
    imgs += [img]

In [None]:
# Distinguish 2 kides of nevus
n_imgs = []

print("Train loading...")
fnames = [os.path.join(train_nevus_folder, fname) for fname in os.listdir(train_nevus_folder)]
for fname in tqdm(fnames): 
    img = plt.imread(fname) 
    n_imgs += [img]

In [None]:
plt.imshow(n_imgs[28], cmap='gray')

In [None]:
# fig, ax = plt.subplots(1,2, figsize=(8,16))
# ax[0].imshow(imgs[1], cmap='gray')
# ax[1].imshow(imgs[3], cmap='gray')
plt.imshow(imgs[29], cmap='gray')

In [None]:
from sklearn.decomposition import PCA

X = np.array(imgs)

# X_train = X.reshape(700, X.shape[1]*X.shape[2])

X_train = features_train
# FFT
# X_train = []
# for x in X:
#     X_train += [np.fft.fft2(x).real]

# X_train = np.array(X_train)
# X_train = X_train.reshape(700, X_train.shape[1]*X_train.shape[2])

ipca = PCA(n_components=2)
pca = ipca.fit(X_train)


In [None]:
import seaborn as sns
pca_data = ipca.transform(X_train)

df = pd.DataFrame(pca_data)


sns.set(style="white")

g = sns.PairGrid(df, diag_sharey=False)
g.map_upper(sns.scatterplot)
g.map_lower(sns.kdeplot, colors="C0")
g.map_diag(sns.kdeplot, lw=2)




In [None]:
from sklearn.cluster import KMeans

km = KMeans(init=pca.components_, n_clusters=2, n_init=1, n_jobs=-1, algorithm='full')

km.fit(X_train)


In [None]:
Z = km.predict(X_train)

x1 = pca_data.T[0][Z==1]
y1 = pca_data.T[1][Z==1]

x1 = pca_data.T[0][Z==0]
y1 = pca_data.T[1][Z==0]

plt.figure(figsize=(16,8))
plt.plot(x1,y1, '.')
plt.plot(x2,y2, '.')
plt.xscale('log')
plt.yscale('log')

In [None]:
one_class = []
two_class = []

for i in range(len(imgs)):
    if Z[i] == 0:
        one_class += [imgs[i]]
    if Z[i] == 1:
        two_class += [imgs[i]]
        

fig, ax = plt.subplots(10,10, figsize=(16,16))

io = 0
it = 0
for i in range(len(ax)):
    for j in range(len(ax[i])):
        ax[i,j].axis('off')
        if i < 5:
            ax[i,j].imshow(one_class[io], cmap='Blues')
            io += 1
        else:
            ax[i,j].imshow(two_class[it], cmap='Reds')
            it += 1

In [None]:
import cv2 

def haralick_for(img):
    return mahotas.features.haralick(img).mean(0)

def lbp_for(img):
    return mahotas.features.lbp(img, 3, 10)

def kaze_for(img):
    vector_size = 32

    alg = cv2.KAZE_create()
    kps = alg.detect(img)

    kps = sorted(kps, key=lambda x: -x.response)[:vector_size]
    kps, dsc = alg.compute(img, kps)

    # Descriptor vector size is 64
    needed_size = (vector_size * 64)

    if type(dsc) == np.ndarray:
        dsc = dsc.flatten()
        d = np.concatenate([dsc, np.zeros(needed_size - dsc.size)])
    else:
        d = np.zeros(needed_size)
    return d

features_train = []

for img in tqdm(imgs):
    # Filtering image
    blur = cv2.GaussianBlur(img,(5,5),0)
    ret,t1 = cv2.threshold(blur,127,255,cv2.THRESH_TOZERO)

    blur = cv2.GaussianBlur(t1,(5,5),0)
    ret3,t4 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    
    m_img = cv2.bitwise_and(img, img, mask= 255 - t4)
    
    # Feature from image
    features = np.concatenate([haralick_for(m_img), 
                               lbp_for(m_img), 
                               kaze_for(m_img), 
                               np.fft.fft2(m_img).real.flatten()])
    features_train += [features]
    
features_train = np.array(features_train)
features_train.shape


In [None]:


fig, ax = plt.subplots(10,4, figsize=(16,8*10))
for i in range(10):
    img = imgs[160+i]
    blur = cv2.GaussianBlur(img,(5,5),0)
    ret,t1 = cv2.threshold(blur,127,255,cv2.THRESH_TRUNC)

    blur = cv2.GaussianBlur(t1,(5,5),0)
    ret3,t4 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    
    m_img = cv2.bitwise_and(img, img, mask= 255 - t4)
    
    ax[i,0].imshow(img)
    ax[i,1].imshow(t1)
    ax[i,2].imshow(t4)
    ax[i,3].imshow(m_img)
    
    
plt.show()