<a href="https://colab.research.google.com/github/benjamin-carter/image_class_melanoma/blob/master/ass8_image.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import pathlib
from pathlib import *

import IPython.display as display
from PIL import Image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [0]:
def show_batch(image_batch, label_batch):
  plt.figure(figsize=(10,10))
  for n in range(25):
      ax = plt.subplot(5,5,n+1)
      plt.imshow(image_batch[n])
      plt.title(CLASS_NAMES[label_batch[n]==1][0].title())
      plt.axis('off')

# This function will plot images in the form of a grid with 1 row and 5 columns where images are placed in each column.
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

def get_label(file_path):
  # convert the path to a list of path components
  parts = tf.strings.split(file_path, os.path.sep)
  # The second to last is the class-directory
  return parts[-2] == CLASS_NAMES

def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  # resize the image to the desired size.
  return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])

def process_path(file_path):
  label = get_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):
  # This is a small dataset, only load it once, and keep it in memory.
  # use `.cache(filename)` to cache preprocessing work for datasets that don't
  # fit in memory.
  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()

  ds = ds.shuffle(buffer_size=shuffle_buffer_size)

  # Repeat forever
  ds = ds.repeat()

  ds = ds.batch(BATCH_SIZE)

  # `prefetch` lets the dataset fetch batches in the background while the model
  # is training.
  ds = ds.prefetch(buffer_size=AUTOTUNE)

  return ds

def confusion(y, y_hat):
  confuse = np.zeros((2,2))
  confuse[0,0] = np.sum(y_hat[y == 1])
  confuse[0,1] = np.sum(y_hat[y == 0])
  confuse[1,0] = np.count_nonzero(y_hat[y == 1] == 0)
  confuse[1,1] = np.count_nonzero(y_hat[y == 0] == 0)
  return confuse


In [0]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [0]:
from google.colab import drive

drive.mount("/content/gdrive", force_remount=True)

In [0]:
data_dir  = Path(r"/content/gdrive/My Drive/Current/SC_images/datadir/data_dir2_os/")

train_dir = data_dir / 'train'
test_dir = data_dir / 'test'

# data_dir  = Path(r"/content/gdrive/My Drive/Current/images")

In [0]:
CLASS_NAMES = np.array([item.name for item in train_dir.glob('*') if item.name != "LICENSE.txt"])
image_count = len(list(train_dir.glob('*/*.jpg')))
print(CLASS_NAMES, image_count)

['benign' 'cancer'] 9424


In [0]:
train_image_generator1 = ImageDataGenerator(rescale=1/255, rotation_range = 45)
train_image_generator2 = ImageDataGenerator(rescale=1/255, rotation_range = 90)
train_image_generator3 = ImageDataGenerator(rescale=1/255, vertical_flip=True)
train_image_generator4 = ImageDataGenerator(rescale=1/255, horizontal_flip=True)
# train_image_generator5 = ImageDataGenerator(rescale=1/255, width_shift_range = .5, height_shift_range = .5)
train_image_generator6 = ImageDataGenerator(rescale=1/255)
test_image_generator = ImageDataGenerator(rescale=1/255)

BATCH_SIZE = 128
SHUFFLE_BUFFER_SIZE = 1000
IMG_HEIGHT = 150
IMG_WIDTH = 150
STEPS_PER_EPOCH = np.ceil(image_count/BATCH_SIZE)
epochs = 15

In [0]:
train_data_gen1 = train_image_generator1.flow_from_directory(directory=str(train_dir), batch_size=BATCH_SIZE, shuffle=True, target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = list(CLASS_NAMES), class_mode = 'categorical')
train_data_gen2 = train_image_generator2.flow_from_directory(directory=str(train_dir), batch_size=BATCH_SIZE, shuffle=True, target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = list(CLASS_NAMES), class_mode = 'categorical')
train_data_gen3 = train_image_generator3.flow_from_directory(directory=str(train_dir), batch_size=BATCH_SIZE, shuffle=True, target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = list(CLASS_NAMES), class_mode = 'categorical')
train_data_gen4 = train_image_generator4.flow_from_directory(directory=str(train_dir), batch_size=BATCH_SIZE, shuffle=True, target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = list(CLASS_NAMES), class_mode = 'categorical')
# train_data_gen5 = train_image_generator5.flow_from_directory(directory=str(train_dir),batch_size=BATCH_SIZE, shuffle=True,  target_size=(IMG_HEIGHT, IMG_WIDTH),
#                                                      classes = list(CLASS_NAMES), class_mode = 'categorical')
train_data_gen6 = train_image_generator6.flow_from_directory(directory=str(train_dir),batch_size=BATCH_SIZE, shuffle=True,  target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = list(CLASS_NAMES), class_mode = 'categorical')

test_data_gen = test_image_generator.flow_from_directory(directory=str(test_dir), batch_size=BATCH_SIZE, shuffle=False, target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = list(CLASS_NAMES), class_mode = 'categorical')

Found 9424 images belonging to 2 classes.
Found 9424 images belonging to 2 classes.
Found 9424 images belonging to 2 classes.
Found 9424 images belonging to 2 classes.
Found 9424 images belonging to 2 classes.
Found 1490 images belonging to 2 classes.


In [0]:
image_batch, label_batch = next(train_data_gen6)
# show_batch(image_batch, label_batch)
image_batch_test, label_batch_test = next(test_data_gen)
# show_batch(image_batch, label_batch)


In [0]:
IMG_SHAPE = (IMG_HEIGHT, IMG_WIDTH, 3)

# base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

base_model = tf.keras.applications.VGG16(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

# base_model = tf.keras.applications.ResNet50(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

# base_model = tf.keras.applications.InceptionV3(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

In [0]:
base_model.trainable = True
base_model.summary()

feature_batch = base_model(image_batch)
print(feature_batch.shape)

global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

prediction_layer1 = tf.keras.layers.Dense(1024)
pred_layer_batch1 = prediction_layer1(feature_batch_average)
print(pred_layer_batch1.shape)

drop = tf.keras.layers.Dropout(.5)

prediction_layer2 = tf.keras.layers.Dense(2)
pred_layer_batch2 = prediction_layer2(pred_layer_batch1)
print(pred_layer_batch2.shape)

In [0]:
# model = tf.keras.Sequential([
#   base_model,
#   global_average_layer,
#   prediction_layer2
# ])

model = tf.keras.Sequential([
  base_model,
  global_average_layer,
  prediction_layer1,
  drop,
  prediction_layer2
])

In [0]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [0]:
initial_epochs = 10
validation_steps = 12

loss0,accuracy0 = model.evaluate(test_data_gen, steps = validation_steps)

In [0]:
# train_ds = train_data_gen.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
# test_ds = test_data_gen.batch(BATCH_SIZE)
initial_epochs = 1
validation_steps=12
history = model.fit(train_data_gen1,
                    epochs=initial_epochs,
                    steps_per_epoch = STEPS_PER_EPOCH,
                    validation_data=test_data_gen,
                    validation_steps=20 ) 

In [0]:
initial_epochs = 1
validation_steps=12
file_path = '/content/gdrive/My Drive/Current/SC_images/datadir/weights/'
model.load_weights(file_path)
history = model.fit_generator(train_data_gen1, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=validation_steps ) 
model.save_weights(file_path)
model.load_weights(file_path)
history = model.fit_generator(train_data_gen2, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=validation_steps ) 
model.save_weights(file_path)
model.load_weights(file_path)
history = model.fit_generator(train_data_gen3, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=validation_steps ) 
model.save_weights(file_path)
model.load_weights(file_path)
history = model.fit_generator(train_data_gen4, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=validation_steps ) 
model.save_weights(file_path)
model.load_weights(file_path)
# history = model.fit_generator(train_data_gen5, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=validation_steps ) 
# model.save_weights(file_path)
# model.load_weights(file_path)
history = model.fit_generator(train_data_gen6, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=validation_steps ) 
model.save_weights(file_path)
model.load_weights(file_path)


In [0]:
results = model.evaluate(test_data_gen, batch_size=BATCH_SIZE, steps = 20)
for name, value in zip(model.metrics_names, results):
  print("%s: %.3f" % (name,value))

loss: 0.965
accuracy: 0.726


In [0]:
prediction = model.predict_generator(test_data_gen,steps = 12)

In [0]:
yhat = np.argmax(prediction,axis=1)
yhat[-122:]
y_orig = np.zeros((1490,2))
y_orig[:1368,0] = np.ones(1368)
y_orig[1368:,1] = np.ones(122)

In [0]:
confusion(y_orig[:,1],yhat)

array([[ 24., 395.],
       [ 98., 973.]])

In [0]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [0]:
model2 = Sequential()
model2.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(150,150,3)))
model2.add(Conv2D(64, (3, 3), activation='relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))
model2.add(Dropout(0.25))
model2.add(Flatten())
model2.add(Dense(128, activation='relu'))
model2.add(Dropout(0.5))
model2.add(Dense(2, activation='softmax'))

base_learning_rate = 0.0001
model2.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [0]:
model3 = Sequential()
model3.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(150,150,3)))
model3.add(Conv2D(64, (3, 3), activation='relu'))
model3.add(Conv2D(128, (3, 3), activation='relu'))
model3.add(Conv2D(64, (3, 3), activation='relu'))
model3.add(MaxPooling2D(pool_size=(2, 2)))
model3.add(Dropout(0.25))
model3.add(Flatten())
model3.add(Dense(128, activation='relu'))
model3.add(Dropout(0.5))
model3.add(Dense(2, activation='softmax'))

base_learning_rate = 0.0001
model3.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [0]:
initial_epochs = 2
file_path = '/content/gdrive/My Drive/Current/SC_images/datadir/weights2/'
history = model2.fit(train_data_gen1, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=12 ) 
model2.save_weights(file_path)
model2.load_weights(file_path)
history = model2.fit(train_data_gen2, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=12 ) 
model2.save_weights(file_path)
model2.load_weights(file_path)
history = model2.fit(train_data_gen3, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=12 ) 
model2.save_weights(file_path)
model2.load_weights(file_path)
history = model2.fit(train_data_gen4, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=1 ) 
model2.save_weights(file_path)
model2.load_weights(file_path)
# history = model2.fit(train_data_gen5, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=12 ) 
# model2.save_weights(file_path)
# model2.load_weights(file_path)
history = model2.fit(train_data_gen6, epochs=initial_epochs, steps_per_epoch = STEPS_PER_EPOCH, validation_data=test_data_gen, validation_steps=12 ) 
model2.save_weights(file_path)
model2.load_weights(file_path)

Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2

In [0]:
file_path = '/content/gdrive/My Drive/Current/SC_images/datadir/weights2/'
model2.load_weights(file_path)
prediction2 = model2.predict_generator(test_data_gen,steps = 12)

In [0]:
yhat = np.argmax(prediction2,axis=1)
yhat[-122:]
y_orig = np.zeros((1490,2))
y_orig[:1368,0] = np.ones(1368)
y_orig[1368:,1] = np.ones(122)

In [0]:
confusion(y_orig[:,1], yhat)

array([[ 71., 643.],
       [ 51., 725.]])

In [0]:
train_mel_dir = train_dir / 'mel'
train_df_dir = train_dir / 'df'
train_bkl_dir = train_dir / 'bkl'
train_bcc_dir = train_dir / 'bcc'
train_akiec_dir = train_dir / 'akiec'
train_vasc_dir = train_dir / 'vasc'
train_nv_dir = train_dir / 'nv'

test_mel_dir = test_dir / 'mel' 
test_df_dir = test_dir / 'df' 
test_bkl_dir = test_dir / 'bkl' 
test_bcc_dir = test_dir / 'bcc'
test_akiec_dir = test_dir /  'akiec'
test_vasc_dir = test_dir / 'vasc'
test_nv_dir = test_dir /  'nv'

num_mel_train = len(os.listdir(train_mel_dir))
num_df_train = len(os.listdir(train_df_dir))
num_bkl_train = len(os.listdir(train_bkl_dir))
num_bcc_train = len(os.listdir(train_bcc_dir))
num_akiec_train = len(os.listdir(train_akiec_dir))
num_vasc_train = len(os.listdir(train_vasc_dir))
num_nv_train = len(os.listdir(train_nv_dir))

num_mel_test = len(os.listdir(test_mel_dir))
num_df_test = len(os.listdir(test_df_dir))
num_bkl_test = len(os.listdir(test_bkl_dir))
num_bcc_test = len(os.listdir(test_bcc_dir))
num_akiec_test = len(os.listdir(test_akiec_dir))
num_vasc_test = len(os.listdir(test_vasc_dir))
num_nv_test = len(os.listdir(test_nv_dir))

total_train = num_mel_train + num_df_train + num_bkl_train + num_bcc_train + num_akiec_train + num_vasc_train + num_nv_train
total_test = num_mel_test + num_df_test + num_bkl_test + num_bcc_test + num_akiec_test + num_vasc_test + num_nv_test

print('total training mel images:', num_mel_train)
print('total training df images:', num_df_train)
print('total training bkl images:', num_bkl_train)
print('total training bcc images:', num_bcc_train)
print('total training akiec images:', num_akiec_train)
print('total training vasc images:', num_vasc_train)
print('total training nv images:', num_nv_train)

print('total testing mel images:', num_mel_test)
print('total testing df images:', num_df_test)
print('total testing bkl images:', num_bkl_test)
print('total testing bcc images:', num_bcc_test)
print('total testing akiec images:', num_akiec_test)
print('total testing vasc images:', num_vasc_test)
print('total testing nv images:', num_nv_test)
print("--")
print("Total training images:", total_train)
print("Total testing images:", total_test)

In [0]:
list_ds = tf.data.Dataset.list_files(str(train_dir/'*/*'))
list_ds_test = tf.data.Dataset.list_files(str(test_dir/'*/*'))  

# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
labeled_ds
labeled_test_ds = list_ds_test.map(process_path, num_parallel_calls=AUTOTUNE)
labeled_test_ds

for image, label in labeled_ds.take(2):
  print("Image shape: ", image.numpy().shape)
  print("Label: ", label.numpy())

In [0]:
train_ds = prepare_for_training(labeled_ds)
image_batch, label_batch = next(iter(train_ds))

test_ds = prepare_for_training(labeled_test_ds)
image_batch_test, label_batch_test = next(iter(test_ds))

# train_ds = labeled_ds.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
# test_ds = labeled_test_ds.batch(BATCH_SIZE)

In [0]:
sample_training_images, _ = next(train_data_gen)

In [0]:
get_label_name = metadata.features['label'].int2str

for image, label in raw_train.take(2):
  plt.figure()
  plt.imshow(image)
  plt.title(get_label_name(label))
  print(image.shape)

In [0]:
IMG_SIZE = 160 # All images will be resized to 160x160

def format_example(image, label):
  image = tf.cast(image, tf.float32)
  image = (image/127.5) - 1
  image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
  return image, label

In [0]:
train = raw_train.map(format_example)
validation = raw_validation.map(format_example)
test = raw_test.map(format_example)

In [0]:
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000

In [0]:
train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_batches = validation.batch(BATCH_SIZE)
test_batches = test.batch(BATCH_SIZE)

In [0]:
initial_epochs = 10
validation_steps=20

loss0,accuracy0 = model.evaluate(validation_batches, steps = validation_steps)

In [0]:
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))

In [0]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()