In [34]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Conv2D, BatchNormalization, GlobalAveragePooling2D, Flatten, Dropout, Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model

In [35]:
TRAIN_IMAGE_PATH = "jpeg/train/"
TEST_IMAGE_PATH = "jpeg/test/"
IMG_HEIGHT = 500
IMG_WIDTH = 500
BATCH_SIZE=64
AUTO = tf.data.experimental.AUTOTUNE
REG = 0.0005
EPOCHS=20

In [57]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [4]:
train_df["image_name"] = train_df["image_name"].apply(lambda x: TRAIN_IMAGE_PATH + x + ".jpg")
test_df["image_name"] = test_df["image_name"].apply(lambda x: TEST_IMAGE_PATH + x + ".jpg")

In [5]:
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=45, shuffle=True)

In [6]:
train_df.shape, val_df.shape

((29813, 8), (3313, 8))

In [7]:
def decode_image(filename, label=None, image_size=(IMG_WIDTH, IMG_HEIGHT)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32)/255.0
    image = tf.image.resize(image, size=image_size)
    
    if label is None:
        return image
    else:
        return image, label

In [8]:
def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    if label is None:
        return image
    else:
        return image, label

In [9]:
def compute_class_weights(labels):
    total_labels = labels.shape[0]
    
    positive_labels = K.sum(labels, axis=0)/total_labels
    negative_labels = 1 - positive_labels
    return positive_labels, negative_labels

In [10]:
train_dataset = (tf.data.Dataset
                 .from_tensor_slices((train_df.image_name, train_df.target))
                 .map(decode_image, num_parallel_calls=AUTO)
                 .map(data_augment, num_parallel_calls=AUTO)
                 .repeat()
                 .shuffle(512)
                 .batch(BATCH_SIZE)
                 .prefetch(AUTO)
                )

In [11]:
val_dataset = (tf.data.Dataset
               .from_tensor_slices((val_df.image_name, val_df.target))
               .map(decode_image, num_parallel_calls=AUTO)
               .map(data_augment, num_parallel_calls=AUTO)
               .repeat()
               .shuffle(512)
               .batch(BATCH_SIZE)
               .prefetch(AUTO))

In [12]:
test_dataset = (tf.data.Dataset.from_tensor_slices((test_df.image_name))
                .map(decode_image, num_parallel_calls=AUTO)
                .batch(BATCH_SIZE))

In [13]:
lr = ReduceLROnPlateau(
    monitor="val_loss",
    patience=10,
    min_lr=0.000001,
    factor=0.5,
    verbose=1
)

In [14]:
es = EarlyStopping(monitor="val_loss", patience=10)

In [15]:
model_r50 = ResNet50(include_top=False, weights="imagenet", input_shape=(IMG_WIDTH, IMG_HEIGHT,3))
model_r50.trainable=False

model = Conv2D(filters=32, kernel_size=(3,3), data_format="channels_last", activation="relu", kernel_regularizer=l2(REG))(model_r50.output)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Conv2D(filters=32, kernel_size=(3,3), activation="relu", kernel_regularizer=l2(REG))(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = GlobalAveragePooling2D()(model)
model = Dropout(0.25)(model)


model = Flatten()(model)
model = Dense(256, activation="relu")(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Dropout(0.5)(model)
model = Dense(64, activation="relu")(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)

output = Dense(1, activation="sigmoid")(model)
model_r50 = Model(inputs=model_r50.input, outputs=output)
model_r50.summary()
model_r50.compile(optimizer="adam", loss="binary_crossentropy", metrics=[tf.keras.metrics.binary_crossentropy])

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 500, 500, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 506, 506, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 250, 250, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 250, 250, 64) 256         conv1_conv[0][0]                 
______________________________________________________________________________________________

In [16]:
compute_class_weights(train_df.target.values)

(<tf.Tensor: shape=(), dtype=float64, numpy=0.01801227652366417>,
 <tf.Tensor: shape=(), dtype=float64, numpy=0.9819877234763358>)

In [20]:
history = model_r50.fit(train_dataset, epochs=EPOCHS, callbacks=[lr, es],
                        steps_per_epoch=train_df.shape[0]//BATCH_SIZE, validation_data=val_dataset,
                        validation_steps=val_df.shape[0]//BATCH_SIZE,
                        class_weight=compute_class_weights(train_df.target.values),)

Train for 465 steps, validate for 51 steps
Epoch 1/20


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "c:\users\ashuk\.conda\envs\tensorflow_env\lib\site-packages\IPython\core\interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-20-35ab94e0674b>", line 4, in <module>
    class_weight=compute_class_weights(train_df.target.values),)
  File "c:\users\ashuk\.conda\envs\tensorflow_env\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 819, in fit
    use_multiprocessing=use_multiprocessing)
  File "c:\users\ashuk\.conda\envs\tensorflow_env\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 342, in fit
    total_epochs=epochs)
  File "c:\users\ashuk\.conda\envs\tensorflow_env\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 128, in run_one_epoch
    batch_outs = execution_function(iterator)
  File "c:\users\ashuk\.conda\envs\tensorflow_env\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils

KeyboardInterrupt: 

In [18]:
probs = model_r50.predict(test_dataset, verbose = 1)



In [22]:
probs

array([[0.00507114],
       [0.00543646],
       [0.00563376],
       ...,
       [0.00560183],
       [0.00500762],
       [0.00506516]], dtype=float32)

In [23]:
sam = pd.read_csv("sample_submission.csv")

In [24]:
sam.head()


Unnamed: 0,image_name,target
0,ISIC_0052060,0
1,ISIC_0052349,0
2,ISIC_0058510,0
3,ISIC_0073313,0
4,ISIC_0073502,0


In [25]:
sam["target"] = probs

In [26]:
sam

Unnamed: 0,image_name,target
0,ISIC_0052060,0.005071
1,ISIC_0052349,0.005436
2,ISIC_0058510,0.005634
3,ISIC_0073313,0.005632
4,ISIC_0073502,0.005650
...,...,...
10977,ISIC_9992485,0.005692
10978,ISIC_9996992,0.005656
10979,ISIC_9997917,0.005602
10980,ISIC_9998234,0.005008


In [27]:
sam.to_csv("res_50.csv", index=False)

In [58]:
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder

In [59]:
lb = LabelEncoder()

In [60]:
image_names = train_df["image_name"].values

In [61]:
train_df["image_name"] = lb.fit_transform(train_df["image_name"].values)

In [63]:
train_df["target"] = train_df["target"].astype("int")

In [64]:
train_df.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,8449,IP_7279968,male,45.0,head/neck,unknown,benign,0
1,0,IP_3075186,female,45.0,upper extremity,unknown,benign,0
2,1,IP_2842074,female,50.0,lower extremity,nevus,benign,0
3,2,IP_6890425,female,45.0,head/neck,unknown,benign,0
4,3,IP_8723313,female,55.0,upper extremity,unknown,benign,0


In [65]:
map_name_no = dict(zip(train_df["image_name"], image_names))

In [66]:
y_train = train_df["target"]

In [67]:
x_train = train_df[["image_name"]]

In [69]:
sm = SMOTE(random_state=45)

In [71]:
x_train, y_train = sm.fit_resample(x_train, y_train)

In [75]:
sum(y_train), len(y_train), len(y_train) - sum(y_train)

(32542, 65084, 32542)