In [1]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
import math
import os

In [2]:
# Directory of melanoma dataset
dir = "D:\\Mega\\Doutorado-UFG\\Disciplinas\\Processamento Digital de Imagem\\Artigo\\complete_mednode_dataset\\dataset\\"

In [3]:
# Function to convert float number in float32 number
def np_hist_to_cv(np_histogram_output):
    counts, bin_edges = np_histogram_output
    return counts.ravel().astype('float32')

In [4]:
def extractFeatures(image_gray, radius):
    pi = 3.14

    G_x = cv2.reduce(image_gray/255, 0, cv2.REDUCE_SUM)
    G_y = cv2.reduce(image_gray/255, 1, cv2.REDUCE_SUM)
    h1 = np.histogram(G_x)
    h2 = np.histogram(G_y.T)

    # A feature - Assymetry (less is more assymetric)
    a = cv2.compareHist(np_hist_to_cv(h1), np_hist_to_cv(h2), cv2.HISTCMP_CORREL)
    # print(assymetry)

    # B feature - Border
    perimeter = 2 * pi * (radius/100)
    roundness = 4 * pi * (radius/100)/(perimeter**2)
    # print(roundness)

    # C feature - Color
    r = image[:, :, 0].mean()
    g = image[:, :, 1].mean()
    b = image[:, :, 2].mean()

    # D feature - Diameter
    d = math.sqrt((4 * (pi * ((radius/100)**2)))/pi)
    return a, d, r, g, b, roundness

In [5]:
dataset = pd.DataFrame(columns= ["assymetry", "border", "diameter", "color_r", "color_g", "color_b", "label"])

for path,d,f in os.walk(dir):
    if len(f) > 0:  # verifica se f é um arquivo válido
        for i in f:
            if i.endswith('.jpg') or i.endswith('.png'):
                img = path + '\\' +i
                image = cv2.imread(img)
                # Convert RGB image to grayscale
                image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
                # Generate histogram of projection in X and Y axes to calculate assymetry
                contours, hierarchy = cv2.findContours(image_gray, 2, 1)
                # print(len(contours))
                cnt = contours
                for j in range(1):
                    (x, y), radius = cv2.minEnclosingCircle(cnt[j])
                    center = (int(x), int(y))
                    radius = int(radius)
                    # plt.text(x-21, y+15, '+', fontsize=25, color='red')
                    # plt.text(10, -10, 'Centro: ' +str(center), fontsize=11, color='red')
                    # plt.text(840, -10, 'Diâmetro: ' + str((radius*2)/100)+'mm', fontsize=11, color='red')
                    # plt.Circle((10, -10), 7.2, color='blue')
                    # plt.imshow(image_gray, cmap='gray')
                    # plt.show()
                    a, d, r, g, b, roundness = extractFeatures(image_gray, radius)
                    if img.__contains__('melanoma'):
                        dataset.loc[len(dataset)] = [a, roundness, d, r, g, b, 1]
                    else:
                        dataset.loc[len(dataset)] = [a, roundness, d, r, g, b, 0]

dataset.to_csv('dataset_melanoma.csv')

In [28]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
# Classification of dataset
from sklearn.feature_selection import RFECV
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

labels = dataset.iloc[:, 6]
datasets = dataset.drop(columns=['label'], axis=1)

# selector = RFECV(LinearSVC(), cv=10, scoring='f1_weighted')
# sm = SMOTE(random_state=42) # Oversampling
#sm = NearMiss(random_state=42) # Undersampling

# datasets, labels = sm.fit_resample(datasets,labels)

train, test, train_tlabels, test_labels = train_test_split(datasets,
                                                           labels,
                                                           test_size=0.33)


# model = RandomForestClassifier()
# model = DecisionTreeClassifier()
# model = SVC(probability=True)
model = LinearSVC(max_iter=120000, dual=False)

model.fit(train, train_tlabels)
y_pred = model.predict(test)

# Evaluation
acc = accuracy_score(test_labels, y_pred)
print("Accuracy: {:.2f}".format(acc*100))

print(confusion_matrix(test_labels, y_pred))

Accuracy: 84.21
[[28  4]
 [ 5 20]]
