In [25]:
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import cv2
import torch
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn import svm
from sklearn.metrics import  f1_score, precision_score, recall_score, accuracy_score
import os
import shutil
import random
import numpy as np
from skimage.io import imread, imshow
from keras.utils import img_to_array, load_img
from sklearn.model_selection import train_test_split
from keras.layers import concatenate, Activation, BatchNormalization, Dropout, Conv2D, Conv2DTranspose, MaxPooling2D, Input
from keras.optimizers import Adam,SGD
from keras.models import Model
import tensorflow as tf
from skimage.transform import resize
from skimage import io, color, feature, img_as_ubyte, measure
from PIL import Image
from torchvision.ops import masks_to_boxes
from torchvision import transforms

In [3]:
#Download ham10000 dataset from kaggle
!mkdir ~/.kaggle/
!cp kaggle.json ~/.kaggle/
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000

mkdir: cannot create directory ‘/root/.kaggle/’: File exists
Downloading skin-cancer-mnist-ham10000.zip to /content
100% 5.19G/5.20G [02:51<00:00, 38.9MB/s]
100% 5.20G/5.20G [02:51<00:00, 32.5MB/s]


In [4]:
  def get_conv2d_layers(input_tensor, n_filters, kernel_size = 3, batchnorm = True):
    """Function to create convolution layers with the given input parameters"""
    # Layer 1
    x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
              kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
    if batchnorm:
        x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # Layer 2
    x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
              kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
    if batchnorm:
        x = BatchNormalization()(x)
    x = Activation('relu')(x)
    return x

def unet(input_img, n_filters = 16, dropout = 0.1, batchnorm = True):
    """Function to define the UNET Model"""
    # UNET Contracting path
    conv1 = get_conv2d_layers(input_img, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
    pool1 = MaxPooling2D((2, 2))(conv1)
    pool1 = Dropout(dropout)(pool1)

    conv2 = get_conv2d_layers(pool1, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
    pool2 = MaxPooling2D((2, 2))(conv2)
    pool2 = Dropout(dropout)(pool2)

    conv3 = get_conv2d_layers(pool2, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
    pool3 = MaxPooling2D((2, 2))(conv3)
    pool3 = Dropout(dropout)(pool3)

    conv4 = get_conv2d_layers(pool3, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
    pool4 = MaxPooling2D((2, 2))(conv4)
    pool4 = Dropout(dropout)(pool4)

    conv5 = get_conv2d_layers(pool4, n_filters = n_filters * 16, kernel_size = 3, batchnorm = batchnorm)

    # UNET Expansive path
    up6 = Conv2DTranspose(n_filters * 8, (3, 3), strides = (2, 2), padding = 'same')(conv5)
    up6 = concatenate([up6, conv4])
    up6 = Dropout(dropout)(up6)
    conv6 = get_conv2d_layers(up6, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)

    up7 = Conv2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')(conv6)
    up7 = concatenate([up7, conv3])
    up7 = Dropout(dropout)(up7)
    conv7 = get_conv2d_layers(up7, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)

    up8 = Conv2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same')(conv7)
    up8 = concatenate([up8, conv2])
    up8 = Dropout(dropout)(up8)
    conv8 = get_conv2d_layers(up8, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)

    up9 = Conv2DTranspose(n_filters * 1, (3, 3), strides = (2, 2), padding = 'same')(conv8)
    up9 = concatenate([up9, conv1])
    up9 = Dropout(dropout)(up9)
    conv9 = get_conv2d_layers(up9, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(conv9)
    model = Model(inputs=[input_img], outputs=[outputs])
    return model

In [5]:
image_dims = Input((256,384, 3), name='img')
model = unet(image_dims, n_filters=16, dropout=0.05, batchnorm=True)

In [6]:
model.load_weights('model-skin-lesion-segmentation-org2000.h5')

In [7]:
import zipfile

# Unzip the dataset
local_zip = 'skin-cancer-mnist-ham10000.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('archive_file')
zip_ref.close()

In [8]:
import os
from glob import glob
from PIL import Image
# Directory with our training pictures
HAM10000_images_part_1 = os.path.join('./archive_file/HAM10000_images_part_1')
# Directory with our training pictures
HAM10000_images_part_2= os.path.join('./archive_file/HAM10000_images_part_2')
HAM10000_metadata = os.path.join('./archive_file/HAM10000_metadata.csv')

In [9]:
HAM10000_images_part_1_name = os.listdir(HAM10000_images_part_1)
print(HAM10000_images_part_1_name[:10])
HAM10000_images_part_2_name = os.listdir(HAM10000_images_part_2)

['ISIC_0025189.jpg', 'ISIC_0027061.jpg', 'ISIC_0028731.jpg', 'ISIC_0024659.jpg', 'ISIC_0024709.jpg', 'ISIC_0029236.jpg', 'ISIC_0028961.jpg', 'ISIC_0028519.jpg', 'ISIC_0027627.jpg', 'ISIC_0025757.jpg']


In [10]:
for i in range(0,len(HAM10000_images_part_1_name)):
    HAM10000_images_part_1_name[i] = r'./archive_file/HAM10000_images_part_1/' + HAM10000_images_part_1_name[i]

In [11]:
for i in range(0,len(HAM10000_images_part_2_name)):
    HAM10000_images_part_2_name[i] = r'./archive_file/HAM10000_images_part_2/' + HAM10000_images_part_2_name[i]

In [12]:
HAM10000_images_part_merged = HAM10000_images_part_1_name+HAM10000_images_part_2_name

In [13]:
len(HAM10000_images_part_1_name)+len(HAM10000_images_part_2_name),len(HAM10000_images_part_merged)


(10015, 10015)

In [14]:
df = pd.read_csv(HAM10000_metadata)

In [15]:
df.head(5)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [16]:
image_mapping = dict(zip(df['image_id'], HAM10000_images_part_merged))

# Add a new column to the DataFrame with image addresses using the mapping
df['path'] = df['image_id'].map(image_mapping)

In [17]:
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'dermatofibroma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

In [18]:
df['cell_type'] = df['dx'].map(lesion_type_dict.get)


In [19]:
df['cell_type_idx'] = pd.Categorical(df['cell_type']).codes


In [20]:
df['cell_type_idx'].unique()


array([2, 4, 3, 6, 5, 1, 0], dtype=int8)

In [21]:
df.isna().sum()


lesion_id         0
image_id          0
dx                0
dx_type           0
age              57
sex               0
localization      0
path              0
cell_type         0
cell_type_idx     0
dtype: int64

In [22]:
df['age'].fillna((df['age'].median()), inplace = True)

In [23]:
from tensorflow.keras.utils import to_categorical
X = df.drop(columns = ['cell_type_idx'], axis = 1)
y = df['cell_type_idx']
y = to_categorical(y, num_classes = 7)

In [27]:
boxes=[]
segmented = []
for i in range(len(df)) :
  image = Image.open(df.iloc[i]["path"]).resize((384,256))
  image_arr = np.array(image).astype(np.uint8)
  prediction = model.predict(image_arr.reshape(1,*image_arr.shape))
  prediction = prediction.squeeze()
  prediction = (prediction > 0.5).astype(bool)
  torch_tensor = torch.from_numpy(prediction)
  torch_tensor = torch_tensor.reshape(1,*torch_tensor.shape)
  try :
    box = masks_to_boxes(torch_tensor)
  except :
    box = []
  boxes.append(box)



In [None]:
df_copy = df.copy()
df_copy['bbox'] = boxes

In [None]:
#manully determine two missed bounding boxes
#image_3058
x_min_3058 = 120
y_min_3058 = 60
x_max_3058 = 280
y_max_3058 = 180
df_copy.iloc[3058]['bbox'] = torch.tensor([x_min_3058,y_min_3058,x_max_3058,y_max_3058])
#image_4526
x_min_4526 = 95
y_min_4526 = 45
x_max_4526 = 260
y_max_4526 = 190
df_copy.iloc[4526]['bbox'] = torch.tensor([x_min_4526,y_min_4526,x_max_4526,y_max_4526,])

In [31]:
df_copy["bbox"] = df_copy["bbox"].apply(lambda x : x.numpy().ravel())

In [32]:
df_copy[['xmin', 'ymin', 'xmax', 'ymax']] = df_copy['bbox'].apply(pd.Series)

In [33]:
df_copy = df_copy.drop("bbox",axis=1)

In [35]:
df_copy['xmin','ymin','xmax','ymax']] = df_copy[['xmin','ymin','xmax','ymax']].astype(int)

In [None]:
df_copy.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,xmin,ymin,xmax,ymax
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,./archive_file/HAM10000_images_part_1/ISIC_002...,Benign keratosis-like lesions,2,3,4,269,254
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,./archive_file/HAM10000_images_part_1/ISIC_002...,Benign keratosis-like lesions,2,49,19,369,223
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,./archive_file/HAM10000_images_part_1/ISIC_002...,Benign keratosis-like lesions,2,4,5,361,235
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,./archive_file/HAM10000_images_part_1/ISIC_002...,Benign keratosis-like lesions,2,11,29,365,216
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,./archive_file/HAM10000_images_part_1/ISIC_002...,Benign keratosis-like lesions,2,73,43,305,205


In [None]:
df_copy[['path','xmin','ymin','xmax','ymax','cell_type']].to_csv("annotation.csv",index=False)