In [None]:
import keras
print(keras.__version__)
import tensorflow
print(tensorflow.__version__)
import numpy as np
print(np.__version__)

from keras_tqdm import TQDMNotebookCallback

from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout
from keras import backend as K

from keras.optimizers import Adam

In [None]:
# define metric
from balancedAccuracy import balancedAccuracy
num_classes = 3
bacc_metric = balancedAccuracy(num_classes)

In [None]:
data = np.load(".../HAMNOAUG.npz")

imageList = data["imageList"]
targetList = data["targetList"]
imageValList = data["imageValList"]
targetValList = data["targetValList"]

In [None]:
testdata = np.load(".../TESTHAM.npz")
testList = testdata["testList"]
targetTestList = testdata["targetTestList"]

In [None]:
targetList = targetList[:,:3]
targetValList = targetValList[:,:3]

In [None]:
# plot_confusion_matrix function
import itertools
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

class_names = ["MEL", "NV", "BKL"]

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(np.mean(np.diag(cm)))
        
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    # get balanced accuracy
    return np.mean(np.diag(cm))


In [None]:
def valMatrix():
    y_test = targetValList.copy()
    y_pred = model.predict(imageValList)
    y_pred = y_pred.argmax(1)
    y_test = y_test.argmax(1)

    cnf_matrix = confusion_matrix(y_test, y_pred)
    np.set_printoptions(precision=2)
    plt.figure()
    print("Balanced Accuracy: "+ str(plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                          title='Normalized Validation confusion matrix')))
    plt.show()

def trainMatrix():
    y_test = targetList.copy()
    y_pred = model.predict(imageList)
    y_pred = y_pred.argmax(1)
    y_test = y_test.argmax(1)

    cnf_matrix = confusion_matrix(y_test, y_pred)
    np.set_printoptions(precision=2)
    plt.figure()
    print("Balanced Accuracy: "+ str(plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                          title='Normalized Training confusion matrix')))
    plt.show()
    
def testMatrix():
    y_test = targetTestList.copy()
    y_pred = model.predict(testList)
    y_pred = y_pred.argmax(1)
    y_test = y_test.argmax(1)

    cnf_matrix = confusion_matrix(y_test, y_pred)
    np.set_printoptions(precision=2)
    plt.figure()
    print("Balanced Accuracy: "+ str(plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                          title='Normalized Training confusion matrix')))
    plt.show()

In [None]:
# get sample weights for training (class_weight doesn't work with one-hot encoding)
weight_dict = {0:8.15, 1:1.33, 2:8.08}
temp = targetList.argmax(1)
sample_weights = np.array(list(map(weight_dict.get, temp)))

# get sample weights for validation
temp = targetValList.argmax(1)
sample_weights_val = np.array(list(map(weight_dict.get, temp)))

## SIFT Tutorial 

In [None]:
import cv2
import matplotlib.pyplot as plt

In [None]:
print(cv2.__version__)

In [None]:
def show_rgb_img(img):
    """Convenience function to display a typical color image"""
    return plt.imshow(img)

def to_gray(color_img):
    gray = cv2.cvtColor(color_img, cv2.COLOR_RGB2GRAY)
    return gray

In [None]:
lesion_img = imageList[16]

show_rgb_img(lesion_img)

In [None]:
lesion_img_gray = to_gray(lesion_img)
plt.imshow(lesion_img_gray, cmap='gray');

In [None]:
def gen_sift_features(gray_img, eps=1e-7):
    sift = cv2.xfeatures2d.SIFT_create()
    # kps is the keypoints
    #
    # desc is the SIFT descriptors, they're 128-dimensional vectors
    # that we can use for our final features
    kp, desc = sift.detectAndCompute(gray_img, None)

    # if there are no keypoints or descriptors, return an empty tuple
    if len(kp) == 0:
        return ([], None)
    
    
    # apply the Hellinger kernel by first L1-normalizing and taking the
    # square-root
    desc /= (desc.sum(axis=1, keepdims=True) + eps)
    desc = np.sqrt(desc)
    #desc /= (np.linalg.norm(desc, axis=1, ord=2) + eps)
    
    return (kp, desc)

def show_sift_features(gray_img, color_img, kp):
    return plt.imshow(cv2.drawKeypoints(gray_img, kp, color_img.copy()))

In [None]:
# generate SIFT keypoints and descriptors
lesion_img_kp, lesion_img_desc = gen_sift_features(lesion_img_gray)

print('Here are what our SIFT features look like for the lesion image:')
show_sift_features(lesion_img_gray, lesion_img, lesion_img_kp);

# SIFT on all training images

In [None]:
from tqdm import tqdm_notebook

masterKeypoints = []

for img in tqdm_notebook(imageList):
    
    # Convert image to grayscale
    img_gray = to_gray(img)
    
    # Get m keypoints (128-length vectors) for the gray image
    _, img_desc = gen_sift_features(img_gray)
    
    masterKeypoints.append(img_desc)

In [None]:
masterKeypoints[16] is None  ## Note, some descriptions have length of 0

In [None]:
# get average description length
total = 0
for i in range(len(masterKeypoints)):
    if(masterKeypoints[i] is not None):
        total+=masterKeypoints[i].shape[0]
print(total/len(masterKeypoints))

# Vector Quantization with KMeans (Bag of Visual Words)

In [None]:
numClusters = 100 # can use other values for this, including 200

In [None]:
from sklearn.cluster import KMeans

# get all keypoints on same dimension
allKeypoints = []
for desc in masterKeypoints:
    if(desc is not None):
        for keypoint in desc:
            allKeypoints.append(keypoint)

clusters = KMeans(n_clusters=numClusters).fit(allKeypoints)

In [None]:
def developVocabulary(n_images, descriptor_list, kmeans_ret):

    """
    Each cluster denotes a particular visual word 
    Every image can be represeted as a combination of multiple 
    visual words. The best method is to generate a sparse histogram
    that contains the frequency of occurence of each visual word 
    Thus the vocabulary comprises of a set of histograms of encompassing
    all descriptions for all images
    """

    mega_histogram = np.array([np.zeros(numClusters) for i in range(n_images)])
    old_count = 0
    for i in range(n_images):
        if(descriptor_list[i] is not None):
            l = len(descriptor_list[i])
        else:
            l = 0
            
        for j in range(l):
            idx = kmeans_ret[old_count+j]
            mega_histogram[i][idx] += 1
        old_count += l
    print("Vocabulary Histogram Generated")
    
    return mega_histogram

In [None]:
megaHistogram = developVocabulary(len(imageList), masterKeypoints, clusters.predict(allKeypoints))

# SVM

In [None]:
from sklearn.svm import SVC

clf = SVC(C=.5, class_weight=weight_dict) # make classifier object
clf.fit(megaHistogram, targetList.argmax(1)) # train the model


In [None]:
# Get Training Confusion Matrix
predictions = clf.predict(megaHistogram)


y_test = targetList.copy()
y_pred = predictions
y_test = y_test.argmax(1)

cnf_matrix = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
plt.figure()
print("Balanced Accuracy: "+ str(plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized Training confusion matrix')))
plt.show()

# SIFT+SVM on Validation Set

In [None]:
from tqdm import tqdm_notebook

masterKeypoints2 = []

for img in tqdm_notebook(imageValList):
    
    # Convert image to grayscale
    img_gray = to_gray(img)
    
    # Get m keypoints (128-length vectors) for the gray image
    _, img_desc = gen_sift_features(img_gray)
    
    masterKeypoints2.append(img_desc)

In [None]:
from sklearn.cluster import KMeans

# get all keypoints on same dimension
allKeypoints2 = []
for desc in masterKeypoints2:
    if(desc is not None):
        for keypoint in desc:
            allKeypoints2.append(keypoint)

In [None]:
megaValHistogram = developVocabulary(len(imageValList), masterKeypoints2, clusters.predict(allKeypoints2))

In [None]:
megaValHistogram.shape

In [None]:
predictions = clf.predict(megaValHistogram)

# Get Validation Confusion Matrix
y_test = targetValList.copy()
y_pred = predictions
y_test = y_test.argmax(1)

cnf_matrix = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
plt.figure()
print("Balanced Accuracy: "+ str(plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized Validation confusion matrix')))
plt.show()


# SIFT+SVM on Test Set

In [None]:
from tqdm import tqdm_notebook

masterKeypoints3 = []

for img in tqdm_notebook(testList):
    
    # Convert image to grayscale
    img_gray = to_gray(img)
    
    # Get m keypoints (128-length vectors) for the gray image
    _, img_desc = gen_sift_features(img_gray)
    
    masterKeypoints3.append(img_desc)

In [None]:
# get all keypoints on same dimension
allKeypoints3 = []
for desc in masterKeypoints3:
    if(desc is not None):
        for keypoint in desc:
            allKeypoints3.append(keypoint)

In [None]:
megaTestHistogram = developVocabulary(len(testList), masterKeypoints3, clusters.predict(allKeypoints3))

In [None]:
megaTestHistogram.shape

In [None]:
predictions = clf.predict(megaTestHistogram)

# Get Validation Confusion Matrix
y_test = targetTestList.copy()
y_pred = predictions
y_test = y_test.argmax(1)

cnf_matrix = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
plt.figure()
print("Balanced Accuracy: "+ str(plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized Validation confusion matrix')))
plt.show()