In [1]:
import os

In [2]:
from skimage import io
from skimage import filters
from skimage import morphology
from skimage.filters import sobel
from skimage.color import rgb2gray
from skimage.feature import CENSURE
from skimage.feature import ORB
from skimage.feature import match_descriptors
import matplotlib.pyplot as plt

In [3]:
%matplotlib inline

In [4]:
path_dataset = 'mednode_dataset/'

In [5]:
etiquetas = os.listdir(path_dataset)

In [6]:
etiquetas

['melanoma', 'naevus']

In [7]:
etiqueta_0 = os.path.join(path_dataset, etiquetas[0])

In [8]:
etiqueta_1 = os.path.join(path_dataset, etiquetas[1])

In [9]:
imgs_etiqueta_0 = os.listdir(etiqueta_0)

In [10]:
imgs_etiqueta_1 = os.listdir(etiqueta_1)

In [11]:
def show_imgs(etiqueta, imgs_etiquetas):
    for img in imgs_etiquetas:
        path_img = os.path.join(etiqueta, img)
        print(f'{path_img}')
        image = io.imread(path_img)
        plt.title(f'{img}')
        plt.imshow(image)
        plt.tight_layout()
        plt.axis('off')
        plt.show()

In [12]:
# show_imgs(etiqueta_0, imgs_etiqueta_0)

In [13]:
# show_imgs(etiqueta_1, imgs_etiqueta_1)

In [14]:
def show_imgs_grays(etiqueta, imgs_etiquetas):
    for img in imgs_etiquetas:
        path_img = os.path.join(etiqueta, img)
        print(f'{path_img}')
        image = io.imread(path_img)
        image = rgb2gray(image)
        plt.title(f'{img}')
        plt.imshow(image, cmap='gray')
        plt.tight_layout()
        plt.axis('off')
        plt.show()

In [15]:
# show_imgs_grays(etiqueta_0, imgs_etiqueta_0)

In [16]:
def show_imgs_grays_sobel(etiqueta, imgs_etiquetas):
    for img in imgs_etiquetas:
        path_img = os.path.join(etiqueta, img)
        print(f'{path_img}')
        image = io.imread(path_img)
        image = rgb2gray(image)
        image = sobel(image)
        plt.title(f'{img}')
        plt.imshow(image, cmap='gray')
        plt.tight_layout()
        plt.axis('off')
        plt.show()

In [17]:
# show_imgs_grays_sobel(etiqueta_0, imgs_etiqueta_0)

In [18]:
def show_imgs_keypoints(etiqueta, imgs_etiquetas):
    # keypoints = {}
    for img in imgs_etiquetas:
        path_img = os.path.join(etiqueta, img)
        print(f'{path_img}')
        image = io.imread(path_img)
        image = rgb2gray(image)
        censure = CENSURE(min_scale=1, max_scale=10)
        censure.detect(image)
       # keypoints[img] = censure.keypoints
        plt.title(f'{img}')
        plt.imshow(image, cmap='gray')
        plt.scatter(censure.keypoints[:, 1], censure.keypoints[:, 0],
              2 ** censure.scales, facecolors='none', edgecolors='r')
        plt.tight_layout()
        plt.axis('off')
        plt.show()
        break
    # return keypoints

In [19]:
# show_imgs_keypoints(etiqueta_0, imgs_etiqueta_0)

In [20]:
from scipy import stats

In [21]:
[d for d in dir(stats) if not d.startswith('_')]

 'absolute_import',
 'alpha',
 'anderson',
 'anderson_ksamp',
 'anglit',
 'ansari',
 'arcsine',
 'argus',
 'bartlett',
 'bayes_mvs',
 'bernoulli',
 'beta',
 'betaprime',
 'binned_statistic',
 'binned_statistic_2d',
 'binned_statistic_dd',
 'binom',
 'binom_test',
 'boltzmann',
 'boxcox',
 'boxcox_llf',
 'boxcox_normmax',
 'boxcox_normplot',
 'bradford',
 'brunnermunzel',
 'burr',
 'burr12',
 'cauchy',
 'chi',
 'chi2',
 'chi2_contingency',
 'chisquare',
 'circmean',
 'circstd',
 'circvar',
 'combine_pvalues',
 'contingency',
 'cosine',
 'crystalball',
 'cumfreq',
 'describe',
 'dgamma',
 'dirichlet',
 'distributions',
 'division',
 'dlaplace',
 'dweibull',
 'energy_distance',
 'entropy',
 'epps_singleton_2samp',
 'erlang',
 'expon',
 'exponnorm',
 'exponpow',
 'exponweib',
 'f',
 'f_oneway',
 'fatiguelife',
 'find_repeats',
 'fisher_exact',
 'fisk',
 'fligner',
 'foldcauchy',
 'foldnorm',
 'frechet_l',
 'frechet_r',
 'friedmanchisquare',
 'gamma',
 'gausshyper',
 'gaussian_kde',
 'genex

In [22]:
from scipy import stats
import pandas as pd

In [81]:
def show_imgs_keypoints_features(etiqueta, imgs_etiquetas):
    features = {}
    for n,img in enumerate(imgs_etiquetas):
        path_img = os.path.join(etiqueta, img)
        image = io.imread(path_img)
        image_r = image.copy()[:,:,0]
        image_g = image.copy()[:,:,1]
        image_b = image.copy()[:,:,2]
        image_gray = rgb2gray(image)
        images = [('red',image_r), ('green', image_g), ('blue', image_b), ('gray', image_gray)]
        features[img] = {}
        keypoints = []
        for label, image in images:
            censure = ORB(n_keypoints=10,
                          downscale=1.2,
                          n_scales=9,
                          fast_n=4,
                          fast_threshold=0.06,
                          harris_k=0.04)
            censure.detect_and_extract(image)
            keypoints.extend(censure.keypoints.copy())
        for label, image in images:
            for ix, kp in enumerate(keypoints):
                crop = image[int(kp[0])-3:int(kp[0])+4, int(kp[1])-3:int(kp[1])+4].copy()
                if label != 'gray':
                    crop = crop/255
                features[img][f'{label}-{ix}'] = {
                    'color': label,
                    'kp': ix,
                    'img': img,
                    'sum': crop.sum(),
                    'min': crop.min(),
                    'max': crop.max(),
                    'mean': crop.mean(),
                    'std': crop.std(),
                    'var': crop.var(),
                    'kurtosis': stats.kurtosis(crop.ravel()),
                    'moment_3': stats.moment(crop.ravel(), moment=3),
                    'moment_4': stats.moment(crop.ravel(), moment=4),
                    'skew': stats.skew(crop.ravel())
                }
                proba = [crop.ravel().tolist().count(val)/len(crop.ravel()) for i,val in enumerate(set(crop.ravel()))]
                entropy = stats.entropy(proba)
                features[img][f'{label}-{ix}'].update({'entropy': entropy})
                features[img][f'{label}-{ix}'].update({'energy': sum(p**2 for p in proba)})
                
#             plt.title(f'{img}-{label}')
#             plt.imshow(image, cmap='gray')
#             plt.scatter([kp[1] for kp in keypoints], [kp[0] for kp in keypoints],
#                   2 ** 1, facecolors='none', edgecolors='r')
#             plt.tight_layout()
#             plt.axis('off')
#             plt.show()
    return features

In [82]:
features = show_imgs_keypoints_features(etiqueta_0, imgs_etiqueta_0)

In [83]:
datas = []
for img in features.keys():
    for ix in features[img].keys():
        datas.append(features[img][ix])

In [84]:
df = pd.DataFrame(datas)

In [85]:
df.head()

Unnamed: 0,color,energy,entropy,img,kp,kurtosis,max,mean,min,moment_3,moment_4,skew,std,sum,var
0,red,0.029571,3.598225,154372.jpg,0,-1.067119,1.0,0.721809,0.360784,-0.001406,0.001999,-0.243859,0.17932,35.368627,0.032156
1,red,0.027072,3.665487,154372.jpg,1,-1.29436,1.0,0.713005,0.388235,-0.000755,0.001952,-0.121422,0.183917,34.937255,0.033825
2,red,0.027072,3.665487,154372.jpg,2,-1.320039,1.0,0.696759,0.34902,-0.001312,0.00271,-0.163025,0.200411,34.141176,0.040164
3,red,0.0404,3.528958,154372.jpg,3,-1.11732,1.0,0.767027,0.411765,-0.001388,0.00183,-0.252058,0.176576,37.584314,0.031179
4,red,0.037068,3.462288,154372.jpg,4,-1.30884,1.0,0.767187,0.454902,-0.001172,0.001645,-0.212761,0.17659,37.592157,0.031184


In [86]:
df.to_csv('datasets_melanoma.csv', index=False)

In [None]:
df = pd.read_csv('datasets_melanoma.csv')

In [None]:
describe_features = {}
for img in set(df.img.values):
    df_img = df[df.img==img]
    describe_features[img] = df_img.describe()

In [None]:
set(df.img.values)

In [87]:
features1 = show_imgs_keypoints_features(etiqueta_1, imgs_etiqueta_1)

In [88]:
datas1 = []
for img in features1.keys():
    for ix in features1[img].keys():
        datas1.append(features1[img][ix])

In [89]:
df1 = pd.DataFrame(datas1)

In [90]:
df1.head()

Unnamed: 0,color,energy,entropy,img,kp,kurtosis,max,mean,min,moment_3,moment_4,skew,std,sum,var
0,red,0.037901,3.451609,132357.jpg,0,0.129172,0.87451,0.584874,0.443137,0.001216,0.00044,0.941908,0.108883,28.658824,0.011855
1,red,0.028738,3.644129,132357.jpg,1,-1.107086,0.894118,0.634174,0.419608,0.000601,0.000719,0.220771,0.139612,31.07451,0.019491
2,red,0.031237,3.541641,132357.jpg,2,-0.685121,0.921569,0.670188,0.541176,0.000988,0.000317,0.781287,0.10814,32.839216,0.011694
3,red,0.028738,3.626516,132357.jpg,3,-1.033803,0.866667,0.60104,0.427451,0.001167,0.000643,0.479856,0.134477,29.45098,0.018084
4,red,0.030404,3.569933,132357.jpg,4,0.231241,0.882353,0.607683,0.470588,0.001382,0.000452,1.073548,0.108773,29.776471,0.011832


In [91]:
df1.to_csv('datasets_naevus.csv', index=False)

In [None]:
df1 = pd.read_csv('datasets_naevus.csv')

In [None]:
describe_features1 = {}
for img in set(df1.img.values):
    df_img1 = df1[df1.img==img]
    describe_features1[img] = df_img1.describe()

In [None]:
set(df1.img.values)

In [None]:
X = [describe_features[img].ix[['mean', 'std']].values.ravel() for img in set(df.img.values)]
y = [1. for _ in X]
leny = len(y)
X.extend([describe_features1[img].ix[['mean', 'std']].values.ravel() for img in set(df1.img.values)])
y = [1. if i < leny else 0. for i,_ in enumerate(X)]

In [None]:
len(X), len(y)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Perceptron
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
clf = Perceptron()

In [None]:
clf = LinearRegression()

In [None]:
clf = svm.SVC(gamma='scale', decision_function_shape='ovo')

In [None]:
clf = svm.LinearSVC(max_iter=10000, tol=0.001, multi_class='crammer_singer')

In [None]:
SGDC = SGDClassifier(tol=0.00001, loss="modified_huber", shuffle=False, penalty="elasticnet", max_iter=10000, validation_fraction=0.50)

In [None]:
parameters = {
    'loss': ('log', 'modified_huber'), 
    'tol': [0.00001, 0.0001, 0.001, 0.01, 0.1], 
    'penalty': ('elasticnet', 'l2', 'l1'),
    'validation_fraction': [0.60, 0.50, 0.40, 0.30, 0.20, 0.10]
}

In [None]:
clf = GridSearchCV(SGDC, parameters, cv=5)

In [None]:
clf = clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
r2_score(y_test, y_pred)

In [None]:
mean_squared_error(y_test, y_pred)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
from scipy.ndimage import binary_fill_holes

In [None]:
def show_imgs_filtered(etiqueta, imgs_etiquetas):
    for n,img in enumerate(imgs_etiquetas):
        path_img = os.path.join(etiqueta, img)
        print(f'{path_img}')
        image = io.imread(path_img)
        plt.title(f'{img}')
        plt.imshow(image)
        plt.tight_layout()
        plt.axis('off')
        plt.show()
        
        image_r = image.copy()[:,:,0]
        image_g = image.copy()[:,:,1]
        image_b = image.copy()[:,:,2]
        image_gray = rgb2gray(image)
        
        images = [('red',image_r), ('green', image_g), ('blue', image_b), ('gray', image_gray)]
        for img, image in images:
            feature = filters.gaussian(image, sigma=1)
            feature = filters.median(feature, morphology.disk(3))
            feature = filters.median(feature, morphology.disk(5))
            feature = filters.median(feature, morphology.disk(10))
            cond = feature < feature.max() / 2
            feature[cond] = 0
            feature[~cond] = 1
            feature = filters.gaussian(feature,sigma=1)
            feature = filters.roberts(feature)
            feature = binary_fill_holes(feature)
            plt.title(f'{img}')
            plt.imshow(feature, cmap='gray')
            plt.tight_layout()
            plt.axis('off')
            plt.show()
        if n > 5:
            break

In [None]:
show_imgs_filtered(etiqueta_0, imgs_etiqueta_0)