In [None]:
# MYCOSES PROJECT: THE CLASSIFIER FOR SKIN FUNGAL DISEASES
#-------------------------------------------------------------------------------

# IMPORT NECCESSARY PACKAGES
import tensorflow as tf
import os
import zipfile
import random
import math #mark

import datetime #timeline

from shutil import copyfile
from sklearn.model_selection import train_test_split

#from keras.optimizers import  SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile, rmtree
from tensorflow.keras import optimizers

# 0. HYPERPARAMETERS

# cross-validation split rate -- used in part 5-b
Split_Rate=0.1

# augmentation arguments -- 6-a
Rescale=1./255
Rotation_Range=40
Width_Shift_Range=0.2
Height_Shift_Range=0.2
Shear_Range=0.2
Zoom_Range=0.2
Fill_Mode='nearest'

# classification, access methods -- 6-b,c
Batch_Size =100
Class_Mode='categorical'
Target_Size=(150,150)

# pretrained model -- 1-a,b -- now use Inception, could change in part 1
Weight_File='inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
Last_Layer = 'mixed7'

# top layers -- 7-a
Dropout_Final=0.2


# running -- 8
Max_Acc= 0.999
Epochs=1
# Steps_Train : computed later in part 5
# Steps_Test

#compiler -- 7-c
Lr=0.01
Loss='categorical_crossentropy'
Metrics='acc'
Decay=0.0
Momentum=0.0
Optimizer= optimizers.SGD(lr=Lr, momentum=Momentum,
                                decay=Decay, nesterov=False,
                                clipnorm=1, clipvalue=0.5)

#data structure

To_Group={'tinea': ['Tinea_capitis','Tinea_pedis','Tinea_unguim','Body_Tinea'],
            'candidasis': ['Balanitis', 'Candida_onychomycosis','Oral_candidiasis',
                           'Skin_candidiasis', 'Vulvovaginal_candidiasis']}
To_Delete={}
If_Retain=False
To_Retain ={}

# To reference for optimizer choices:

# SGD: support momentum, learning rate decay, Nesterov momentum.
# keras.optimizers.SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False, clipnorm=1, clipvalue=0.5)

# RMSprop: usually a good choice for recurrent neural networks
# keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)

# Adagrad: how frequently a parameter gets updated, more updates, smaller lr
# keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)

# Adadelta: more robust Adagrad, adapts lr based on a moving window of gr updates,
# not accumulating all past gradients, continues learning even had many updates
# keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)

# Adam:amsgrad: if apply the AMSGrad variant from the paper 'On the Convergence'
# keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

# Adamax: a variant of Adam based on the infinity norm
# keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)

# Nadam: Nesterov Adam optimizer, like Adam is essentially RMSprop with momentum,
# Nadam is Adam RMSprop with Nesterov momentum.
# keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)



In [None]:
# 1. CHOOSE PRETRAINED MODEL

# a. load weight
LOCAL_PATH='/tmp/'
weight_file=Weight_File
LOCAL_WEIGHT_PATH= os.path.join (LOCAL_PATH, weight_file)
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

# b. import model
# - import model (Inception3, Resnet50, ..)
# - freeze some layers
# - get output of the last layer (input of layers built in 7)
from tensorflow.keras.applications.inception_v3 import InceptionV3
pretrained_model = InceptionV3(input_shape=(150,150,3), include_top=False, weights=None)
pretrained_model.load_weights(LOCAL_WEIGHT_PATH)
for layer in pretrained_model.layers:
  layer.trainable=False
# for testing only: pretrained_model.summary()
last_layer=pretrained_model.get_layer(Last_Layer)
last_output=last_layer.output

--2019-09-12 15:35:29--  https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.141.128, 2607:f8b0:400c:c06::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.141.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87910968 (84M) [application/x-hdf]
Saving to: ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’


2019-09-12 15:35:30 (126 MB/s) - ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’ saved [87910968/87910968]

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [None]:
# 2. UTILITY FUNCIONS

# a. functions to encapsulate try, except, pass:
#    safer choices than os.mkdir(), os.remove(), ..

def exist (path):
  return (os.path.isdir(path) or os.path.isfile(path))

def checkmk(path):
  try:
    os.mkdir(path)
  except:
    pass
def checkremove(path):
  try:
    os.remove(path)
  except:
    pass
def checkunlink(path):
  try:
    os.unlink(path)
  except:
    pass
def checkrm(path):
  try:
    os.rmdir(path)
  except:
    pass
def checkrmtree(path):
  try:
    rmtree(path)
  except:
    pass

# b. functions to encapsulate erase, create:
#   safer choices to have folders with completely empty content

def reinit(path):                  # start from scratch a folder
  checkrmtree(path)
  checkmk(path)

def clearcontent(path):            # start from scratch the content of a folder
  toRemove = [os.path.join(path,f) for f in os.listdir(path)]
  for f in toRemove:
    try:
      if os.path.isfile(f): checkunlink(f)
      elif os.path.isdir(f): checkrmtree(f)
    except Exception as e:
      print(e)

In [None]:
# c. functions moving files : to add or to replace? to copy or to move?
#    move a set of files, a folder's content, or group folders?

# keep_src, clear_des=True, False-> copy some files of src to des
# True, True-> copy some files of src to replace all files of des
# False, False-> move some files of src to des
# False, True-> move some files of src to replace all files of des
def move_files(files, source, destination, keep_src=True, clear_des=True):
  if clear_des: clearcontent(destination)
  for filename in files:
    from_path=os.path.join(source,filename)
    to_path=os.path.join(destination,filename)
    if os.path.getsize(from_path) > 0:
      copyfile(from_path, to_path)
    else:
      print(image + " is zero length, so ignoring.")
    if (not keep_src): checkremove(from_path)

# keep_src, clear_des=True, False-> copy content(all files) of src to des
# True, True-> copy content of src to replace content of des.
# False, False-> move content of src to des
# False, True-> move content of src to replace content of des
def move_dir_content(source, destination, keep_src=True, clear_des=False):
  files=os.listdir(source)
  move_files(files, source, destination, keep_src, clear_des)

# move content of src to des then delete src.
def group (source, destination):
  move_dir_content(source, destination, keep_src=False)
  checkrm(source)

In [None]:
# 3. LOAD DATA
# b. determine base, local, source
# - base - original data space: /content
# - local - our working space : /tmp
# - source - data in our working space: temp/mycoses_dataset

BASE_PATH = './TLU-AI/MYCOSES_DATASET/'
zip_file_name='mycoses_dataset.zip'
BASE_ZIP_PATH = os.path.join(BASE_PATH, zip_file_name)
LOCAL_ZIP_PATH= os.path.join(LOCAL_PATH, zip_file_name)
source='mycoses_dataset'
SOURCE_PATH=os.path.join(LOCAL_PATH, source)

# c. copy from base to local
# - clear local
# - copy
clearcontent(LOCAL_PATH)
copyfile (BASE_ZIP_PATH, LOCAL_ZIP_PATH)

# d. unzip file in local (auto-create source folder) then delete .zip
zip_ref=zipfile.ZipFile(LOCAL_ZIP_PATH,'r')
zip_ref.extractall(LOCAL_PATH)
zip_ref.close()
print (os.listdir(LOCAL_PATH))
checkremove(LOCAL_ZIP_PATH)
print (os.listdir(LOCAL_PATH))

# 4.RESTRUCTURE DATA

# a. group folders
#   to group some diseases into one large class of diseases.
#   this grouping could be freely modified later.
#   recall that:
#   To_Group={'tinea': ['Tinea_capitis','Tinea_pedis','Tinea_unguim','Body_Tinea'],
#             'candidasis': ['Balanitis', 'Candida_onychomycosis','Oral_candidiasis',
#                            'Skin_candidiasis', 'Vulvovaginal_candidiasis']}
for each_class in To_Group:
  to_path=os.path.join(SOURCE_PATH, each_class)
  reinit (to_path)
  for member in To_Group[each_class]:
    from_path=os.path.join(SOURCE_PATH,member)
    if exist(from_path):
      group(from_path, to_path)
    else:
      print ('Hey, there exists no such file or directory!')

# b. delete folders
#    to delete some diseases (too rare, create unbalance, ..)
#    this deletion could be freely modified later.
#    recall that:
#    To_Delete={}
for each_class in To_Delete:
  checkrmtree (os.path.join(SOURCE_PATH, each_class))

classes = [folder for folder in os.listdir(SOURCE_PATH)
           if os.path.isdir(SOURCE_PATH + '/' + folder)
           and len(os.listdir(SOURCE_PATH + '/' + folder))!=0]
num_class=len(classes) # the number of classes to be classified
classes

['drivefs_ipc.0_shell', 'drivefs_ipc.0', 'mycoses_dataset.zip', 'mycoses_dataset']
['drivefs_ipc.0_shell', 'drivefs_ipc.0', 'mycoses_dataset']
Hey, there exists no such file or directory!
Hey, there exists no such file or directory!
Hey, there exists no such file or directory!
Hey, there exists no such file or directory!
Hey, there exists no such file or directory!
Hey, there exists no such file or directory!
Hey, there exists no such file or directory!
Hey, there exists no such file or directory!
Hey, there exists no such file or directory!


[]

In [None]:

# 5. PREPROCESS

# a. make training folder, testing folder
stages ={'training': None,'testing':None}
stage_folders = {s: os.path.join(LOCAL_PATH, s) for s in stages}
for sf in stage_folders:
  reinit (stage_folders[sf])

# b. split training set, testing set
sums={s:0 for s in stages}
for c in classes:
  SOURCE_CLASS_PATH = os.path.join(SOURCE_PATH, c)
  data_set=os.listdir(SOURCE_CLASS_PATH)
  stages['training'], stages['testing'] = train_test_split(data_set, test_size=Split_Rate)
  class_folders={ s: os.path.join(stage_folders[s], c) for s in stages}
  for s in stages:
    reinit (class_folders[s])
    move_files(stages[s],SOURCE_CLASS_PATH,class_folders[s])
    sums[s]+=len(os.listdir(class_folders[s])) #mark
steps={s: math.ceil(sums[s]/Batch_Size) for s in stages}#mark
print(sums, steps)

{'training': 0, 'testing': 0} {'training': 0, 'testing': 0}


In [None]:
#6. GENERATE DATA
# a. augmentate and normalize
# b. determine access method (batch_size, target_size, ..)

stage_gen={s: ImageDataGenerator(rescale=Rescale,
                             rotation_range=Rotation_Range,
                             width_shift_range=Width_Shift_Range,
                             height_shift_range=Height_Shift_Range,
                             shear_range=Shear_Range,
                             zoom_range=Zoom_Range,
                             horizontal_flip=True,
                             vertical_flip=True,
                             fill_mode=Fill_Mode)
              for s in stages }


stage_generators={s: stage_gen[s].flow_from_directory(stage_folders[s],
                                              batch_size =Batch_Size,
                                              class_mode=Class_Mode,
                                              target_size=Target_Size)
                      for s in stages }

Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.


In [None]:
# 7. COMPLETE MODEL

# a. add our own top layers

from tensorflow.keras import layers
from tensorflow.keras import Model
# - Flatten the output layer to 1 dimension
x = layers.Flatten()(last_output)
# Add a fully connected layer with 1,024 hidden units and ReLU activation
x = layers.Dense(1024,activation='relu')(x)
# - Add a dropout rate of 0.2
x = layers.Dropout(Dropout_Final)(x)
# - Add a final sigmoid layer for classification
x = layers.Dense(num_class,activation='softmax')(x)
print (num_class)

# b. combine the pretrained-model with our layers
model=Model(pretrained_model.input,x)


# c. choose optimization method, loss function, and criteria to observe
model.compile(optimizer=Optimizer, loss=Loss, metrics=[Metrics])

#for testing only: model.summary()

# d. build callback
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if (logs.get('acc')>0.999):
      print("\nReached 99.9% accuracy so cancelling training!")
      self.stop_training=True

0


In [None]:
# 8. TRAINING
time_run1 = datetime.datetime.now() #note
callbacks = myCallback()
history = model.fit_generator(
            stage_generators['training'],
            validation_data = stage_generators['testing'],
            steps_per_epoch = steps['training'],
            epochs = Epochs,
            validation_steps = steps['testing'],
            verbose = 2,
            callbacks=[callbacks]) #mark
time_run2 = datetime.datetime.now() #note
print(time_run2-time_run1) #note

In [None]:
# 9. PLOT LOSS AND ACCURACY
%matplotlib inline

import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['acc']
val_acc=history.history['val_acc']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.legend(loc='right')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")
plt.title('Training and validation loss')
plt.legend(loc='right')
plt.show()
# Desired output. Charts with training and validation metrics. No crash :)

In [None]:
# 10. CONFUSION MATRIX, CLASSIFICATION REPORT
# add confusion matrix and classification report

import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
#Confution Matrix and Classification Report
Y_pred = model.predict_generator(stage_generators['testing'])
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(stage_generators['testing'].classes, y_pred))
print('Classification Report')
target_names = classes
print(classification_report(stage_generators['testing'].classes, y_pred, target_names=target_names))