In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import DenseNet121
from keras.models import Model, Sequential, load_model, Input
from keras.layers import (Conv2D, Dense, MaxPooling2D, LeakyReLU, Reshape,
                          Flatten, Dropout, BatchNormalization)
from keras.regularizers import l2
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split
import numpy as np
import efficientnet.keras as efn
from sklearn.utils import class_weight

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [4]:
test_df

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge
0,ISIC_0052060,IP_3579794,male,70.0,
1,ISIC_0052349,IP_7782715,male,40.0,lower extremity
2,ISIC_0058510,IP_7960270,female,55.0,torso
3,ISIC_0073313,IP_6375035,female,50.0,torso
4,ISIC_0073502,IP_0589375,female,45.0,lower extremity
...,...,...,...,...,...
10977,ISIC_9992485,IP_4152479,male,40.0,torso
10978,ISIC_9996992,IP_4890115,male,35.0,torso
10979,ISIC_9997917,IP_2852390,male,25.0,upper extremity
10980,ISIC_9998234,IP_8861963,male,65.0,lower extremity


In [5]:
train_df.groupby(["benign_malignant"]).count()

Unnamed: 0_level_0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,target
benign_malignant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
benign,32542,32542,32477,32474,32024,32542,32542
malignant,584,584,584,584,575,584,584


In [6]:
# sns.countplot(train_df["benign_malignant"])

In [7]:
# sns.countplot(train_df["sex"])

In [8]:
# sns.distplot(train_df["age_approx"])

In [9]:
# plt.figure(figsize=(20,20))
# sns.countplot(train_df["anatom_site_general_challenge"])

In [10]:
# plt.figure(figsize=(20,20))
# sns.countplot(train_df["diagnosis"])

In [11]:
train_df_copy = train_df[["image_name", "target"]]
test_df_copy = test_df[["image_name"]]

In [12]:
del train_df
del test_df

In [13]:
train_df_copy["image_name"] = train_df_copy["image_name"].apply(lambda x: x+".jpg")
test_df_copy["image_name"] = test_df_copy["image_name"].apply(lambda x: x+".jpg")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [14]:
#train_df_copy, val_df = train_test_split(train_df_copy, test_size=0.1, random_state=45)
target_0 = len(train_df_copy[train_df_copy["target"]==0])
target_1 = len(train_df_copy[train_df_copy["target"]==1])

In [15]:
no_0 = np.random.randint(low=0, high=target_0, size=(int(0.9*(target_0)),))
no_1 = np.random.randint(low=0, high=target_1, size=(int(0.9*(target_1))))

In [16]:
x_train = train_df_copy.iloc[np.concatenate([no_0, no_1])]

In [17]:
val_df = train_df_copy.drop(train_df_copy.index[np.concatenate([no_0, no_1])])
train_df_copy = x_train

In [18]:
TRAIN_IMAGE_PATH = "jpeg/train/"
TEST_IMAGE_PATH = "jpeg/test/"
REG=0.0005
IMAGE_SIZE=300
BATCH_SIZE=16

In [19]:
train_image_generator = ImageDataGenerator(
    rotation_range=50,
    width_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest",
    featurewise_center=True,
    featurewise_std_normalization=True
)

In [20]:
train_generator = train_image_generator.flow_from_dataframe(
    dataframe=train_df_copy,
    directory=TRAIN_IMAGE_PATH,
    x_col = "image_name",
    y_col = "target",
    class_mode="raw",
    shuffle=True,
    batch_size=BATCH_SIZE,
    seed=45,
    target_size = (IMAGE_SIZE,IMAGE_SIZE)
)

Found 29812 validated image filenames.


In [21]:
val_image_generator = ImageDataGenerator(
    rotation_range=50,
    width_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest",
    featurewise_center=True,
    featurewise_std_normalization=True
)

val_generator = val_image_generator.flow_from_dataframe(
    dataframe=val_df,
    directory=TRAIN_IMAGE_PATH,
    x_col = "image_name",
    y_col = "target",
    class_mode="raw",
    shuffle=True,
    batch_size=BATCH_SIZE,
    seed=45,
    target_size = (IMAGE_SIZE,IMAGE_SIZE)
)

Found 13802 validated image filenames.


In [22]:
test_image_enerator = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True
)

In [23]:
test_generator = test_image_enerator.flow_from_dataframe(
    dataframe=test_df_copy,
    directory=TEST_IMAGE_PATH,
    x_col="image_name",
    y_col=None,
    class_mode=None,
    batch_size=BATCH_SIZE,
    shuffle=False,
    seed=45,
    target_size=(IMAGE_SIZE,IMAGE_SIZE)
)

Found 10982 validated image filenames.


In [24]:
lr = ReduceLROnPlateau(
    monitor="val_accuracy",
    factor=0.5,
    patience=5,
    min_lr=0.00001,
    verbose=1
)
es = EarlyStopping(
    monitor="val_loss",
    patience=7
)


In [25]:
# model_d121 = DenseNet121(include_top=False, weights="imagenet", input_shape=(IMAGE_SIZE,IMAGE_SIZE,3))
# model_d121.trainable=False
# model = Conv2D(filters=32, kernel_size=(3,3), data_format="channels_last" ,activation="relu", kernel_regularizer=l2(REG))(model_d121.output)
# model = BatchNormalization(axis=-1, center=True, scale=False)(model)
# model = Conv2D(filters=32, kernel_size=(3,3), activation="relu", kernel_regularizer=l2(REG))(model)
# model = BatchNormalization(axis=-1, center=True, scale=False)(model)
# model = MaxPooling2D(pool_size=(2,2), padding="SAME")(model)
# model = Dropout(0.25)(model)
# model = Flatten()(model)
# model = Dense(256, activation="relu")(model)
# model = BatchNormalization(axis=-1, center=True, scale=False)(model)
# model = Dropout(0.5)(model)
# model = Dense(64, activation="relu")(model)
# model = BatchNormalization(axis=-1, center=True, scale=False)(model)
# output = Dense(1, activation="sigmoid")(model)
# model_d121 = Model(inputs=model_d121.input, outputs=output)
# model_d121.summary()
# model_d121.compile(optimizer="adam",
#                   loss="binary_crossentropy",
#                   metrics=["accuracy"])

In [26]:
model_ef7_ns = efn.EfficientNetB7(include_top=False, weights="noisy-student", input_shape=(IMAGE_SIZE,IMAGE_SIZE,3))
model_ef7_ns.trainable=False
model = Conv2D(filters=32, kernel_size=(3,3), data_format="channels_last" ,activation="relu", kernel_regularizer=l2(REG))(model_ef7_ns.output)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Conv2D(filters=32, kernel_size=(3,3), activation="relu", kernel_regularizer=l2(REG))(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = MaxPooling2D(pool_size=(2,2), padding="SAME")(model)
model = Dropout(0.25)(model)
model = Flatten()(model)
model = Dense(256, activation="relu")(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Dropout(0.5)(model)
model = Dense(64, activation="relu")(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
output = Dense(1, activation="sigmoid")(model)
model_d121 = Model(inputs=model_ef7_ns.input, outputs=output)
model_d121.summary()
model_d121.compile(optimizer="adam",
                  loss="binary_crossentropy",
                  metrics=["accuracy"])

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 300, 300, 3)  0                                            
__________________________________________________________________________________________________
stem_conv (Conv2D)              (None, 150, 150, 64) 1728        input_1[0][0]                    
__________________________________________________________________________________________________
stem_bn (BatchNormalization)    (None, 150, 150, 64) 256         stem_conv[0][0]                  
__________________________________________________________________________________________________
stem_activation (Activation)    (None, 150, 150, 64) 0           stem_bn[0][0]                    
____________________________________________________________________________________________

Total params: 64,935,889
Trainable params: 64,624,401
Non-trainable params: 311,488
__________________________________________________________________________________________________


In [27]:
w_0, w_1 = class_weight.compute_class_weight("balanced", np.unique(train_df_copy["target"]), train_df_copy["target"])

In [28]:
class_weight = {
    0: w_0,
    1: w_1
}

In [29]:
history = model_d121.fit_generator(train_generator,
                                   epochs=30,
                                   steps_per_epoch=200,
                                   callbacks = [lr, es],
                                   validation_data = val_generator,
                                   validation_steps=25,
                                   class_weight=class_weight
                                  )

Epoch 1/30




ResourceExhaustedError:  OOM when allocating tensor with shape[16,150,150,192] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node SquaredDifference_9-0-0-TransposeNCHWToNHWC-LayoutOptimizer}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_keras_scratch_graph_158715]

Function call stack:
keras_scratch_graph


In [None]:
test_generator.reset()
pred = model_d121.predict(test_generator, steps = len(test_generator), verbose=1)

In [None]:
pred

In [None]:
sub = pd.read_csv("sample_submission.csv")

In [None]:
sub

In [None]:
sub["target"] = pred

In [None]:
#sub.to_csv("d121_1conv2d.csv", index=False)

In [None]:
sub.to_csv("ef7_ns_conv2d.csv", index=False)