In [None]:
import pandas as pd
from google.colab import drive
from pydrive.drive import GoogleDrive
import os
import shutil
from collections import Counter
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Flatten
from keras.models import  Model

#mount drive and unzip folder containing images
drive.mount('/content/gdrive')
!unzip gdrive/My\ Drive/archive.zip > /dev/null
print("Total Images: {}".format(len([name for name in os.listdir('ISIC_2019_Training_Input/ISIC_2019_Training_Input')])))

Mounted at /content/gdrive


In [None]:
#create dataframe for each class melanoma, basal cell carcinoma, seborrheic keratosis
def OrganizeImages():
  df_gt = pd.read_csv ('ISIC_2019_Training_GroundTruth.csv')
  df_meta = pd.read_csv ('ISIC_2019_Training_Metadata.csv')

  df_mel = df_gt.loc[(df_gt['MEL'] == 1)]
  df_bcc = df_gt.loc[(df_gt['BCC'] == 1)]
  df_bkl = df_gt.loc[(df_gt['BKL'] == 1)]

  return df_mel, df_bcc, df_bkl

In [None]:
#only extract images coresponding to melanoma, basal cell carcinoma, seborrheic keratosis and move them into corresponding folders
def ExtractImages(df, name):
  dir = name + "/" + name
  os.mkdir(name)
  os.mkdir(dir)

  for index, row in df.iterrows():
    image = "ISIC_2019_Training_Input/ISIC_2019_Training_Input/{}.jpg".format(row['image'])
    shutil.move(image, dir)

  print(name + ": {}".format(len([name for name in os.listdir(dir)])))

In [None]:
#splitting data into train, validation, testing and move them into corresponding folders
def SplitData(name, training, validation, test):
  dir = name + "/" + name
  length = len([name for name in os.listdir(dir)])
  train_size = round(length * training)
  validation_size = round(length * validation)
  test_size = round(length * test)

  try:
    os.mkdir(name + '/train')
    os.mkdir(name + '/validation')
    os.mkdir(name + '/test')
  except OSError as e:
    print("Error: {}.\nContinuing...".format(e.strerror))

  end_index = 0
  names = [name for name in os.listdir(dir)]
  for i in range(train_size):
    image = dir + "/{}".format(names[i])
    shutil.move(image, name + '/train')
    end_index = i
  
  for i in range(end_index + 1, end_index + validation_size):
    image = dir + "/{}".format(names[i])
    shutil.move(image, name + '/validation')
    end_index = i

  for i in range(end_index + 1, end_index + test_size):
    image = dir + "/{}".format(names[i])
    shutil.move(image, name + '/test')

  print_str = name + ":\n\tTrain Size: {}\n\tValidation Size: {}\n\tTest Size: {}\n"
  print(print_str.format(len([name for name in os.listdir(name + '/train')]), len([name for name in os.listdir(name + '/validation')]), len([name for name in os.listdir(name + '/test')])))

In [None]:
#balance classes
def ClassBalance(train):
  counter = Counter(train.classes)
  max_val = float(max(counter.values()))
  class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}

In [None]:
#organize and extract melanoma, basal cell carcinoma, seborrheic keratosis into seperate folders 
df_mel, df_bcc, df_bkl = OrganizeImages()
ExtractImages(df_mel, 'MEL')
ExtractImages(df_bcc, 'BCC')
ExtractImages(df_bkl, 'BKL')

#split data into training, validatin, testing
training = 0.7
validation = 0.15
test = 0.15

SplitData('MEL', training, validation, test)
SplitData('BCC', training, validation, test)
SplitData('BKL', training, validation, test)

In [None]:
SplitData('MEL', training, validation, test)

In [None]:
#generate model
resNet = ResNet50(include_top=False,pooling = 'avg', weights='imagenet', input_shape=(224,224,3))
x = resNet.output
fc2 = Dense(train.num_classes, activation='softmax', name = 'fc2')(x)
model = Model(inputs=resNet.input, outputs=fc2)
print(model.summary())