In [2]:

# Imports
import os
#from predicting_nails.params import *
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import ResNet50, EfficientNetB0
#from keras.preprocessing.image import ImageDataGenerator
#from predicting_nails.prediction.get_data import *
from PIL import Image


In [8]:
LOCAL_DATA_PATH = os.path.join(os.path.expanduser('~'), "code", "ariannamng",
                               "predicting_nail_diseases", 'raw_data')

In [9]:
import os
import numpy as np
from PIL import Image

def download_bucket_objects(bucket_name, blob_path, local_path):
    # blob path is bucket folder name
    command = "gsutil cp -r gs://{bucketname}/{blobpath} {localpath}".format(bucketname = bucket_name, blobpath = blob_path, localpath = local_path)
    os.system(command)
    return command

# Define the bucket name and the blob_path and load the data from GBP to a local directory
def get_simple_data(nb_healthy = 50, nb_disease = 50):
    local_dir = os.path.join(LOCAL_DATA_PATH, "dataset_for_model_1")
    if os.path.exists(local_dir) == False:
        bucket_name = 'predicting-nail-diseases' # do not use gs://
        for i in range(0,nb_healthy):
            try:
                blob_path_h = f'dataset_for_model_1/healthy_data/healthy_{i}.JPG' # blob path in bucket where healthy data is stored
                local_dir = os.path.join(LOCAL_DATA_PATH, "dataset_for_model_1", f"healthy_{i}.JPG") # path to the RAW data foldr from .env
                download_bucket_objects(bucket_name, blob_path_h, local_dir)
            except:
                pass
        for i in range(0,nb_disease):
            try:
                blob_path_d = f'dataset_for_model_1/diseased_data/diseased_{i}.JPG' # blob path in bucket where diseased data is stored
                local_dir = os.path.join(LOCAL_DATA_PATH, "dataset_for_model_1", f"diseased_{i}.JPG") # path to the RAW data foldr from .env
                download_bucket_objects(bucket_name, blob_path_d, local_dir)
            except:
                pass


def load_simple_data(path, nb_healthy = 50, nb_disease = 50):

    X, y = [], []

    for i in range(nb_healthy):
        h_path = os.path.join(path, 'dataset_for_model_1', f'healthy_{i}.JPG')
        img = Image.open(h_path)
        foo = img.resize((256,256))
        X.append(np.array(foo))
        y.append(0)

    for i in range(nb_disease):
        d_path = os.path.join(path, 'dataset_for_model_1', f'diseased_{i}.JPG')
        img = Image.open(h_path)
        foo = img.resize((256,256))
        X.append(np.array(foo))
        y.append(1)

    c = list(zip(X, y))
    np.random.shuffle(c)
    X, y = zip(*c)

    return np.array(X), np.array(y)


In [10]:
# Plots Loss and accuracy of the train veresus the validation data
def plot_history(history, title='', axs=None, exp_name=""):
    if axs is not None:
        ax1, ax2 = axs
    else:
        f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

    if len(exp_name) > 0 and exp_name[0] != '_':
        exp_name = '_' + exp_name
    ax1.plot(history.history['loss'], label = 'train' + exp_name)
    ax1.plot(history.history['val_loss'], label = 'val' + exp_name)
    ax1.set_ylim(0., 2.2)
    ax1.set_title('loss')
    ax1.legend()

    ax2.plot(history.history['accuracy'], label='train accuracy'  + exp_name)
    ax2.plot(history.history['val_accuracy'], label='val accuracy'  + exp_name)
    ax2.set_ylim(0.25, 1.)
    ax2.set_title('Accuracy')
    ax2.legend()
    return (ax1, ax2)

In [11]:
# Compile the model as a binary classification model

def compile_model(model):
    '''return a compiled model for the baseline'''
    model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
    return model

In [12]:
# Set the first layers to be untrainable

def set_nontrainable_layers(model):
    model.trainable = False
    return model

In [13]:
# Initialize and compile a model for a transfer learning

def build_model(base_model):
      '''instanciate and return the CNN architecture'''
      base_model = set_nontrainable_layers(base_model)
      flattening_layer = layers.Flatten()
      dense_layer = layers.Dense(500, activation='relu')
      prediction_layer = layers.Dense(1, activation='sigmoid')

      model = Sequential([
      base_model,
      flattening_layer,
      dense_layer,
      prediction_layer
      ])
      return compile_model(model)

# Load the data

In [14]:
# Get resized (224,224,3) and labeled (0,1) data from your local directory for the simple model

nb_healthy = 50     #defines the number of healthy images to be downloaded and loaded
nb_disease = 50     #defines the number of diseased images to be downloaded and loaded

if os.path.exists(os.path.join(LOCAL_DATA_PATH, "dataset_for_model_1")) == False:
    get_simple_data(nb_healthy = nb_healthy, nb_disease = nb_disease)
    X, y = load_simple_data(LOCAL_DATA_PATH, nb_healthy = nb_healthy, nb_disease = nb_disease)
else:
    X, y = load_simple_data(LOCAL_DATA_PATH, nb_healthy = nb_healthy, nb_disease = nb_disease)

In [15]:
# split data in test and train and preprocess data
ratio = int(X.shape[0]*0.8)

X_train_processed = X[:ratio]/ 255.
X_test_processed = X[ratio]/ 255.

y_train = X[:ratio]
y_test = X[ratio:]

In [16]:
X_train_processed.shape

(80, 256, 256, 3)

# CNN baseline model

In [17]:
# Initialize model Baseline for a CNN model

def initialize_baseline_model():
      '''instanciate and return the CNN architecture'''
      model = models.Sequential()
      model.add(layers.Rescaling(1./255, input_shape=(256,256,3)))

      model.add(layers.Conv2D(16, kernel_size=(6, 6), activation='relu'))
      model.add(layers.MaxPool2D(pool_size=(3,3)))
      model.add(layers.Conv2D(16, kernel_size=(3, 3), activation='relu'))
      model.add(layers.MaxPool2D(pool_size=(2,2)))
      model.add(layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
      model.add(layers.MaxPool2D(pool_size=(2,2)))
      model.add(layers.Flatten())
      model.add(layers.Dense(32, activation='relu'))
      model.add(layers.Dense(1, activation='sigmoid'))

      return model

In [18]:
# Fit the baseline model on the train data
es = EarlyStopping(patience = 10)
baseline_model = compile_model(initialize_baseline_model())

history_baseline = baseline_model.fit(
  X_train_processed,
  y_train,
  validation_split = 0.2,
  epochs = 500,
  batch_size = 32,
  verbose = 1,
  callbacks = [es]
)

Epoch 1/500


2024-06-05 12:18:12.524678: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


ValueError: in user code:

    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/engine/training.py", line 994, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/engine/training.py", line 1052, in compute_loss
        return self.compiled_loss(
    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/losses.py", line 152, in __call__
        losses = call_fn(y_true, y_pred)
    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/losses.py", line 272, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/losses.py", line 2162, in binary_crossentropy
        backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
    File "/Users/ariannamenghini/.pyenv/versions/3.10.6/envs/lewagon/lib/python3.10/site-packages/keras/backend.py", line 5677, in binary_crossentropy
        return tf.nn.sigmoid_cross_entropy_with_logits(

    ValueError: `logits` and `labels` must have the same shape, received ((32, 1) vs (32, 256, 256, 3)).


In [None]:
# Plot Loss and accuracy of the basline model
plot_history(history_baseline, title='Baseline', axs=None, exp_name="")

# VGG16 model

In [None]:
# Loads the VGG16 model
def load_VGG16_model():
    model = VGG16(weights="imagenet", include_top=False, input_shape=X_train_processed.shape[1:4].shape)
    return model

In [None]:
# Fit the VGG16 model on the train data
es = EarlyStopping(patience = 10)
VGG16_model = build_model(load_VGG16_model())

history_VGG16 = VGG16_model.fit(
  X_train_processed,
  y_train,
  validation_split = 0.2,
  epochs = 500,
  batch_size = 32,
  verbose = 1,
  callbacks = [es]
)

In [None]:
# Plot Loss and accuracy of the VGG16 model
plot_history(history_VGG16, title='VGG16', axs=None, exp_name="")

# ResNet50

In [None]:
# Loads the ResNet50 model
def load_ResNet50_model():
    model = ResNet50(weights="imagenet", include_top=False, input_shape=X_train_processed.shape[1:4].shape)
    return model

In [None]:
# Fit the ResNet50 model on the train data
es = EarlyStopping(patience = 10)
ResNet50_model = build_model(load_ResNet50_model())

history_ResNet50 = ResNet50_model.fit(
  X_train_processed,
  y_train,
  validation_split = 0.2,
  epochs = 500,
  batch_size = 32,
  verbose = 1,
  callbacks = [es]
)

In [None]:
# Plot Loss and accuracy of the ResNet50 model
plot_history(history_ResNet50, title='ResNet50', axs=None, exp_name="")

# EfficientNetB0

In [None]:
# Loads the EfficientNetB0 model
def load_EfficientNetB0_model():
    model = EfficientNetB0(weights="imagenet", include_top=False, input_shape=X_train_processed.shape[1:4].shape)
    return model

In [None]:
# Fit the EfficientNetB0 model on the train data
es = EarlyStopping(patience = 10)
EfficientNetB0_model = build_model(load_EfficientNetB0_model())

history_EfficientNetB0 = EfficientNetB0_model.fit(
  X_train_processed,
  y_train,
  validation_split = 0.2,
  epochs = 500,
  batch_size = 32,
  verbose = 1,
  callbacks = [es]
)

In [None]:
# Plot Loss and accuracy of the EfficientNetB0 model
plot_history(history_EfficientNetB0, title='EfficientNetB0', axs=None, exp_name="")

# Data augmentation

In [None]:
# Add Data Augmentation if your model is overfitting
datagen = ImageDataGenerator(
    featurewise_center = False,
    featurewise_std_normalization = False,
    rotation_range = 20,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    horizontal_flip = True,
    brightness_range = (0.5, 1.),
    zoom_range = (0.3, 1.5))


# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(X_train_processed)

model_data_aug = build_model()

train_flow = datagen.flow(X_train_processed, y_train, batch_size=16)
val_flow = datagen.flow(X_val_preprocessed, y_val, batch_size=16)

es = EarlyStopping(monitor = 'val_accuracy',
                   mode = 'max',
                   patience = 5,
                   verbose = 1,
                   restore_best_weights = True)

history_data_aug = model_data_aug.fit(train_flow,
                                      validation_data = val_flow,
                                      epochs = 50,
                                      callbacks = [es])