In [1]:
import pandas as pd
from numpy import *
import tensorflow as tf

In [2]:
train_dir = '../input/siim-isic-melanoma-classification/jpeg/train/'
test_dir = '../input/siim-isic-melanoma-classification/jpeg/test/'
train = pd.read_csv('../input/siim-isic-melanoma-classification/train.csv')
test = pd.read_csv('../input/siim-isic-melanoma-classification/test.csv')

In [3]:
train.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0


In [4]:
train.diagnosis.unique()

array(['unknown', 'nevus', 'melanoma', 'seborrheic keratosis',
       'lentigo NOS', 'lichenoid keratosis', 'solar lentigo',
       'cafe-au-lait macule', 'atypical melanocytic proliferation'],
      dtype=object)

In [5]:
train = train[train['diagnosis'] != 'unknown']
train.diagnosis.value_counts()

nevus                                 5193
melanoma                               584
seborrheic keratosis                   135
lentigo NOS                             44
lichenoid keratosis                     37
solar lentigo                            7
cafe-au-lait macule                      1
atypical melanocytic proliferation       1
Name: diagnosis, dtype: int64

In [6]:
train = train[train['diagnosis'] != 'atypical melanocytic proliferation']
train.diagnosis.value_counts()

nevus                   5193
melanoma                 584
seborrheic keratosis     135
lentigo NOS               44
lichenoid keratosis       37
solar lentigo              7
cafe-au-lait macule        1
Name: diagnosis, dtype: int64

In [7]:
train = train[train['diagnosis'] != 'cafe-au-lait macule']
train.diagnosis.value_counts()

nevus                   5193
melanoma                 584
seborrheic keratosis     135
lentigo NOS               44
lichenoid keratosis       37
solar lentigo              7
Name: diagnosis, dtype: int64

**Preprocess Image**

In [8]:
labels = []
data = []
for i in range(train.shape[0]):
    data.append(train_dir + train['image_name'].iloc[i]+'.jpg')
    labels.append(train['diagnosis'].iloc[i])
df = pd.DataFrame(data)
df.columns = ['images']
df['diagnosis'] = labels

test_data=[]
for i in range(test.shape[0]):
    test_data.append(test_dir + test['image_name'].iloc[i]+'.jpg')
df_test=pd.DataFrame(test_data)
df_test.columns=['images']

In [9]:
df.groupby('diagnosis')['images'].count()

diagnosis
lentigo NOS               44
lichenoid keratosis       37
melanoma                 584
nevus                   5193
seborrheic keratosis     135
solar lentigo              7
Name: images, dtype: int64

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df['images'], df['diagnosis'],test_size=0.2, 
                                                    random_state=20)

In [11]:
train_data = pd.DataFrame(X_train)
train_data.columns = ['images']
train_data['daignosis'] = y_train

In [12]:
test_data = pd.DataFrame(X_test)
test_data.columns = ['images']
test_data['daignosis'] = y_test

In [13]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datage = ImageDataGenerator(rescale=1./255,
                                 rotation_range=20,
                                 width_shift_range=0.3,
                                 height_shift_range=0.3,
                                 shear_range=0.2,
                                 zoom_range=0.3,
                                 horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1./255)

In [14]:
train_generator = train_datage.flow_from_dataframe(train_data, x_col='images', y_col='daignosis',
                                                  target_size=(224,224),
                                                  batch_size = 256,
                                                  shuffle = True,
                                                  class_mode='categorical')
val_generator = val_datagen.flow_from_dataframe(test_data, x_col='images', y_col='daignosis',
                                               target_size=(224,224),
                                               batch_size=256,
                                               shuffle=False,
                                               class_mode = 'categorical')

Found 4800 validated image filenames belonging to 6 classes.
Found 1200 validated image filenames belonging to 6 classes.


In [15]:
from tensorflow.keras.applications.vgg16 import VGG16
base_model = VGG16(include_top=False, weights='imagenet',input_shape=[224,224,3])
base_model.trainable = False
base_model.summary()

2022-08-10 08:29:18.104525: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-10 08:29:18.245515: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-10 08:29:18.246369: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-10 08:29:18.247842: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
   16384/58889256 [..............................] - ETA: 0s

node zero
2022-08-10 08:29:18.248987: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-10 08:29:18.249690: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-10 08:29:20.530890: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-10 08:29:20.531814: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-10 08:29:20.532508: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node r

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [16]:
model = tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(6,activation='sigmoid')
])

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense (Dense)                (None, 50)                1254450   
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 306       
Total params: 15,969,444
Trainable params: 1,254,756
Non-trainable params: 14,714,688
_________________________________________________________________


In [18]:
from tensorflow.keras.callbacks import EarlyStopping

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

es = EarlyStopping(monitor='val_accuracy', mode='max', patience=5, restore_best_weights=True)

model.fit(train_generator, epochs=5, validation_data=val_generator, callbacks=[es])

2022-08-10 08:29:40.623908: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/5


2022-08-10 08:30:00.909154: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f9e24bff490>

In [19]:
argmax(model.predict(val_generator),1)

array([3, 3, 3, ..., 2, 3, 3])