In [1]:
### Run this cell to import the packages you will need to unpack the dataset
# File manipulation and IO (input/output)
import os
import pickle
import zipfile

# Import numerical and dataframe handling
import numpy as np
import scipy
import pandas as pd

# Data preprocessing
from PIL import Image
from sklearn.utils import shuffle

# Model scoring
from sklearn.metrics import confusion_matrix
from sklearn import metrics

# Import standard machine learning machinery
import tensorflow as tf

# Garbage collection (for saving RAM during training)
import gc

# Import plotting functionality
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.ticker import MultipleLocator
import matplotlib

2024-07-31 10:59:30.430213: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-31 10:59:30.529399: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-31 10:59:31.046575: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2024-07-31 10:59:31.046625: W tensorflow/

# Download Data

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0
2024-07-31 10:59:31.724238: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2024-07-31 10:59:31.724260: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2024-07-31 10:59:31.724273: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (p-d80955c4-ad7b-4eff-b971-b372a13f61b2): /proc/driver/nvidia/version does not exist


In [3]:
path = "../isicdataset/ISIC-images/"
df = pd.read_csv(f'{path}metadata.csv')
df.head()

Unnamed: 0,isic_id,attribution,copyright_license,acquisition_day,age_approx,anatom_site_general,benign_malignant,clin_size_long_diam_mm,concomitant_biopsy,dermoscopic_type,...,mel_class,mel_mitotic_index,mel_thick_mm,mel_type,mel_ulcer,melanocytic,nevus_type,patient_id,personal_hx_mm,sex
0,ISIC_0000000,Anonymous,CC-0,,55,anterior torso,benign,,False,,...,,,,,,True,,,,female
1,ISIC_0000001,Anonymous,CC-0,,30,anterior torso,benign,,False,,...,,,,,,True,,,,female
2,ISIC_0000002,Anonymous,CC-0,,60,upper extremity,malignant,,True,,...,,,,,,True,,,,female
3,ISIC_0000003,Anonymous,CC-0,,30,upper extremity,benign,,False,,...,,,,,,True,,,,male
4,ISIC_0000004,Anonymous,CC-0,,80,posterior torso,malignant,,True,,...,,,,,,True,,,,male


In [4]:
df = pd.get_dummies(df, columns=['diagnosis'], dtype="int")

columns = ['diagnosis_actinic keratosis',
       'diagnosis_basal cell carcinoma', 'diagnosis_melanoma',
       'diagnosis_nevus', 'diagnosis_seborrheic keratosis',
       'diagnosis_solar lentigo', 'diagnosis_squamous cell carcinoma']
df["diagnosis"] = df[columns].values.tolist()
df.head()

Unnamed: 0,isic_id,attribution,copyright_license,acquisition_day,age_approx,anatom_site_general,benign_malignant,clin_size_long_diam_mm,concomitant_biopsy,dermoscopic_type,...,personal_hx_mm,sex,diagnosis_actinic keratosis,diagnosis_basal cell carcinoma,diagnosis_melanoma,diagnosis_nevus,diagnosis_seborrheic keratosis,diagnosis_solar lentigo,diagnosis_squamous cell carcinoma,diagnosis
0,ISIC_0000000,Anonymous,CC-0,,55,anterior torso,benign,,False,,...,,female,0,0,0,1,0,0,0,"[0, 0, 0, 1, 0, 0, 0]"
1,ISIC_0000001,Anonymous,CC-0,,30,anterior torso,benign,,False,,...,,female,0,0,0,1,0,0,0,"[0, 0, 0, 1, 0, 0, 0]"
2,ISIC_0000002,Anonymous,CC-0,,60,upper extremity,malignant,,True,,...,,female,0,0,1,0,0,0,0,"[0, 0, 1, 0, 0, 0, 0]"
3,ISIC_0000003,Anonymous,CC-0,,30,upper extremity,benign,,False,,...,,male,0,0,0,1,0,0,0,"[0, 0, 0, 1, 0, 0, 0]"
4,ISIC_0000004,Anonymous,CC-0,,80,posterior torso,malignant,,True,,...,,male,0,0,1,0,0,0,0,"[0, 0, 1, 0, 0, 0, 0]"


In [5]:
columns = ["age_approx", "sex", "anatom_site_general"]
df["sex"].replace({"male": 0, "female": 1}, inplace=True)
df["anatom_site_general"].replace({"scalp": 1, "torso": 2, "upper extremity": 3, "lower extremity": 4, "head/neck": 5, "anterior torso": 6, "posterior torso": 7,"palms/soles": 8, "lateral torso": 8, "oral/genital": 9}, inplace=True)
df["features"] = df.loc[:, columns].values.tolist()

df.head()

Unnamed: 0,isic_id,attribution,copyright_license,acquisition_day,age_approx,anatom_site_general,benign_malignant,clin_size_long_diam_mm,concomitant_biopsy,dermoscopic_type,...,sex,diagnosis_actinic keratosis,diagnosis_basal cell carcinoma,diagnosis_melanoma,diagnosis_nevus,diagnosis_seborrheic keratosis,diagnosis_solar lentigo,diagnosis_squamous cell carcinoma,diagnosis,features
0,ISIC_0000000,Anonymous,CC-0,,55,6,benign,,False,,...,1,0,0,0,1,0,0,0,"[0, 0, 0, 1, 0, 0, 0]","[55, 1, 6]"
1,ISIC_0000001,Anonymous,CC-0,,30,6,benign,,False,,...,1,0,0,0,1,0,0,0,"[0, 0, 0, 1, 0, 0, 0]","[30, 1, 6]"
2,ISIC_0000002,Anonymous,CC-0,,60,3,malignant,,True,,...,1,0,0,1,0,0,0,0,"[0, 0, 1, 0, 0, 0, 0]","[60, 1, 3]"
3,ISIC_0000003,Anonymous,CC-0,,30,3,benign,,False,,...,0,0,0,0,1,0,0,0,"[0, 0, 0, 1, 0, 0, 0]","[30, 0, 3]"
4,ISIC_0000004,Anonymous,CC-0,,80,7,malignant,,True,,...,0,0,0,1,0,0,0,0,"[0, 0, 1, 0, 0, 0, 0]","[80, 0, 7]"


In [6]:
keys = tf.constant(df['isic_id'].tolist())
values = tf.constant([" ".join(map(str, i)) for i in df['features'].tolist()])

initializer = tf.lookup.KeyValueTensorInitializer(keys, values, key_dtype=tf.string, value_dtype=tf.string)
feature_table = tf.lookup.StaticHashTable(initializer, default_value="NA")

feature_table.lookup(tf.constant(['ISIC_0000000'])).numpy()

2024-07-31 10:59:32.115017: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


array([b'55 1 6'], dtype=object)

In [7]:
df.columns

Index(['isic_id', 'attribution', 'copyright_license', 'acquisition_day',
       'age_approx', 'anatom_site_general', 'benign_malignant',
       'clin_size_long_diam_mm', 'concomitant_biopsy', 'dermoscopic_type',
       'diagnosis_confirm_type', 'family_hx_mm', 'fitzpatrick_skin_type',
       'image_type', 'lesion_id', 'mel_class', 'mel_mitotic_index',
       'mel_thick_mm', 'mel_type', 'mel_ulcer', 'melanocytic', 'nevus_type',
       'patient_id', 'personal_hx_mm', 'sex', 'diagnosis_actinic keratosis',
       'diagnosis_basal cell carcinoma', 'diagnosis_melanoma',
       'diagnosis_nevus', 'diagnosis_seborrheic keratosis',
       'diagnosis_solar lentigo', 'diagnosis_squamous cell carcinoma',
       'diagnosis', 'features'],
      dtype='object')

# Process Data

In [8]:
# list_ds = tf.data.Dataset.list_files(path + '*.jpg', shuffle=False)
list_ds = [path + i + ".jpg" for i in df['isic_id'].tolist()]

In [9]:
keys = tf.constant(df['isic_id'].tolist())
values = tf.constant([" ".join(map(str, i)) for i in df['diagnosis'].tolist()])

initializer = tf.lookup.KeyValueTensorInitializer(keys, values, key_dtype=tf.string, value_dtype=tf.string)
table = tf.lookup.StaticHashTable(initializer, default_value="NA")
table.lookup(tf.constant(['ISIC_0000000'])).numpy()

array([b'0 0 0 1 0 0 0'], dtype=object)

In [10]:
val_size = round(len(df) * 0.2)
train_ds = list_ds[val_size:]
val_ds = list_ds[:val_size]

In [11]:
def process_path(file_path):
    img = tf.io.read_file(file_path)
    img = tf.io.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [256, 256])/255.0
    fn = tf.strings.split(file_path, sep='/')[-1]
    fn = tf.strings.split(fn, sep='.')[0]
    label = table.lookup(fn)
    label = tf.strings.to_number(tf.strings.split(label), out_type=tf.dtypes.int32)
    features = feature_table.lookup(fn)
    features = tf.strings.to_number(tf.strings.split(features), out_type=tf.dtypes.int32)
    feature = (img, features)
    return feature, label

In [12]:
for x in list_ds[:2]:
    feature, label = process_path(x)
    print(feature[1])
    print(label)
    # print(img)


tf.Tensor([55  1  6], shape=(3,), dtype=int32)
tf.Tensor([0 0 0 1 0 0 0], shape=(7,), dtype=int32)
tf.Tensor([30  1  6], shape=(3,), dtype=int32)
tf.Tensor([0 0 0 1 0 0 0], shape=(7,), dtype=int32)


In [13]:
train_ds = tf.data.Dataset.from_tensor_slices(train_ds)
train_ds = train_ds.map(process_path)

val_ds = tf.data.Dataset.from_tensor_slices(val_ds)
val_ds = val_ds.map(process_path)

In [14]:
def configure_for_performance(ds):
  ds = ds.take(500).cache()
  ds = ds.shuffle(buffer_size=500)
  ds = ds.batch(32)
  ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
  return ds

In [15]:
train_ds = configure_for_performance(train_ds)
val_ds = configure_for_performance(val_ds)

# The Model

In [32]:
base_model = tf.keras.applications.VGG16(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    # classes=2,
    # classifier_activation="softmax",
)
base_model.trainable = False

model_txt = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
])

model_img = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(None, None, 3)),
    tf.keras.layers.RandomFlip(),
    base_model,
    tf.keras.layers.Conv2D(128, (3, 3), activation='gelu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(256, (3, 3), activation='gelu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='gelu'),
])
txt_input = tf.keras.layers.Input(shape=(3,), name="txt")
img_input = tf.keras.layers.Input(shape=(256, 256, 3), name="img")
txt_side = model_txt(txt_input)
img_side = model_img(img_input)
merged = tf.keras.layers.Concatenate()([img_side, txt_side])
merged = tf.keras.layers.Dense(128, activation='gelu')(merged)
merged = tf.keras.layers.Dense(7, activation='softmax')(merged)

model = tf.keras.Model(inputs=[img_input, txt_input], outputs=merged)
# model = tf.keras.Sequential([
#     tf.keras.layers.Input(shape=(None, None, 3)),
#     tf.keras.layers.Rescaling(1./255),
#     tf.keras.layers.RandomFlip(),
#     tf.keras.layers.RandomZoom((0.2, 0.5)),
#     # tf.keras.layers.RandomCrop(256, 256),
#     tf.keras.layers.CenterCrop(256, 256),
#     # tf.keras.layers.Input(shape=(256, 256, 3)),
#     # base_model,
#     tf.keras.layers.Flatten(),
#     # tf.keras.layers.Dense(256, activation='gelu'),
#     tf.keras.layers.Dense(128, activation='gelu'),
#     tf.keras.layers.Dense(7, activation='softmax'),
# ])

In [34]:
model.compile(
  optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.001),
  loss="categorical_crossentropy",
  metrics=['accuracy'])

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 img (InputLayer)               [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 txt (InputLayer)               [(None, 3)]          0           []                               
                                                                                                  
 sequential_3 (Sequential)      (None, 256)          15665600    ['img[0][0]']                    
                                                                                                  
 sequential_2 (Sequential)      (None, 256)          99328       ['txt[0][0]']              

# Model Training

In [18]:
import tensorflow as tf
import gc
class garbageCollectCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        gc.collect()

earlyStopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0.001,
    patience=3,
    verbose=1,
    mode="min",
    baseline=None,
    restore_best_weights=True,
    start_from_epoch=5,
)

model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    callbacks=[garbageCollectCallback()]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

In [31]:
feature, label = process_path(list_ds[0])
print(feature[1].numpy())
resized_img = [feature[0]]
expanded_feature = [feature[1]]
print(model.predict([resized_img, expanded_feature]))
print(label.numpy())

[55  1  6]
[[6.0664242e-06 2.6782800e-06 9.9960512e-01 1.8022240e-04 2.0833109e-05
  1.7473934e-04 1.0476165e-05]]
[0 0 0 1 0 0 0]


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=d80955c4-ad7b-4eff-b971-b372a13f61b2' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>