# Skin Cancer detection

In [2]:
import keras
import numpy as np
import pandas as pd
from sklearn.datasets import load_files       
from keras.utils import np_utils
from glob import glob
from scipy import stats
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
# import cv2



Using TensorFlow backend.


In [3]:
train_path = "https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/skin-cancer/train.zip"
test_path = "https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/skin-cancer/test.zip"
validation_path = "https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/skin-cancer/valid.zip"

In [4]:
def download_zip_and_extract(remote_path, local_path):

    import requests, zipfile, io
    r = requests.get(remote_path)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall(local_path)

In [5]:
#download_zip_and_extract(validation_path, "data")

In [6]:
#download_zip_and_extract(train_path, "data")

In [7]:
#download_zip_and_extract(test_path, "data")

In [8]:
# define function to load train, test, and validation datasets
def load_dataset(path):
    data = load_files(path)
    files = np.array(data['filenames'])
    targets = np_utils.to_categorical(np.array(data['target']), 3)
    return files, targets

In [9]:
train_files, train_targets = load_dataset('data/train')

In [10]:
test_files, test_targets = load_dataset('data/test')

In [11]:
valid_files, valid_targets = load_dataset('data/valid')

In [12]:
valid_files.size

150

In [13]:
#extracting the names from the folder names in sorted order
labels = sorted([label[11:-1] for label in glob('data/train/*/')])

In [14]:
# print statistics about the dataset
print('There are %d total lesion categories.' % len(labels))
print('There are %s total lesion images.\n' % str(len(train_files) + len(valid_files) + len(test_files)))
print('There are %d training lesion images.' % len(train_files))
print('There are %d validation lesion images.' % len(valid_files))
print('There are %d test lesion images.'% len(test_files))

There are 3 total lesion categories.
There are 2750 total lesion images.

There are 2000 training lesion images.
There are 150 validation lesion images.
There are 600 test lesion images.


## Visualising sample images

In [15]:
# def visualize_img(img_path, ax):
#     import matplotlib.pyplot as plt
#     %matplotlib inline

#     img = cv2.imread(img_path)
#     ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

In [16]:
# fig = plt.figure(figsize=(20, 10))
# for i in range(12):
#     ax = fig.add_subplot(3, 4, i + 1, xticks=[], yticks=[])
#     visualize_img(train_files[i], ax)

Creating a pretrained model

In [20]:
# sample = cv2.imread(train_files[0])

# sample.shape


In [23]:
model = keras.applications.VGG19(
    include_top=False, 
    weights='imagenet', 
    input_tensor=None, 
    input_shape=(254, 254, 3), 
    pooling=None)


In [24]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 254, 254, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 254, 254, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 254, 254, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 127, 127, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 127, 127, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 127, 127, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 63, 63, 128)       0         
__________

## Freeze layers

Adding custom layers

In [19]:
#Adding custom Layers 
x = model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(3, activation="softmax")(x)

In [20]:
# creating the final model 
model_final = Model(input = model.input, output = predictions)

  from ipykernel import kernelapp as app


In [21]:
# compile the model 
model_final.compile(
    loss = "categorical_crossentropy", 
    optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), 
    metrics=["accuracy"])


In [22]:
train_data_dir = 'data/train'
validation_data_dir = 'data/valid'
batch_size = 100

# Initiate the train and test generators with data Augumentation 
train_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
fill_mode = "nearest",
zoom_range = 0.1,
width_shift_range = 0.1,
height_shift_range=0.1,
rotation_range=10)

test_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
fill_mode = "nearest",
zoom_range = 0.1,
width_shift_range = 0.1,
height_shift_range=0.1,
rotation_range=10)

train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size = (254, 254),
batch_size = batch_size, 
class_mode = "categorical")

validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size = (254,254),
batch_size = batch_size, 
class_mode = "categorical")

Found 2000 images belonging to 3 classes.
Found 150 images belonging to 3 classes.


In [23]:
generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(254, 254),
        batch_size=batch_size,
        class_mode=None,  # this means our generator will only yield batches of data, no labels
        shuffle=False) 

Found 2000 images belonging to 3 classes.


In [24]:
bottleneck_features_train = model.predict_generator(generator, 2000)
# save the output as a Numpy array
np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train)

KeyboardInterrupt: 

In [None]:
valgenerator = test_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(254, 254),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)

In [None]:
bottleneck_features_validation = model.predict_generator(valgenerator, 150)
np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)

In [25]:
# Save the model according to the conditions  
checkpoint = ModelCheckpoint("vgg16_1-best-classifier.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')

In [None]:
train_data = np.load(open('bottleneck_features_train.npy'))
train_labels = 

In [26]:
steps_per_epoch = 10
epochs = 10

In [27]:
# Train the model 
model_final.fit_generator(
train_generator,
steps_per_epoch = steps_per_epoch,
epochs = epochs,
validation_data = validation_generator,
validation_steps = valid_files.size / batch_size,
callbacks = [checkpoint, early])

Epoch 1/10

Epoch 00001: val_acc improved from -inf to 0.52000, saving model to vgg16_1-best-classifier.h5
Epoch 2/10

Epoch 00002: val_acc did not improve from 0.52000
Epoch 3/10

Epoch 00003: val_acc did not improve from 0.52000
Epoch 4/10

Epoch 00004: val_acc did not improve from 0.52000
Epoch 5/10

Epoch 00005: val_acc did not improve from 0.52000
Epoch 6/10


KeyboardInterrupt: 

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())