In [1]:
# mount google drive to colab
# comment this line if using local machine
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# importing required libraries
import os

import numpy as np
import pandas as pd

import pickle

import cv2

from skimage.io import imread

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [0]:
dataset_path = 'drive/My Drive/Colab Notebooks/Kaggle/Dataset/'
trainset_path = dataset_path+'train/'

In [0]:
label_df = pd.read_csv(dataset_path+'train.csv')

In [0]:
label_df['id_code'] = label_df['id_code'].apply(lambda x : ''.join([x,'.png']))

In [0]:
label_df['diagnosis'] = label_df['diagnosis'].apply(lambda x : str(x))

In [7]:
label_df.head()


Unnamed: 0,id_code,diagnosis
0,000c1434d8d7.png,2
1,001639a390f0.png,4
2,0024cdab0c1e.png,1
3,002c21358ce6.png,0
4,005b95c28852.png,0


In [8]:
# using keras generator
img_height = 224
img_width = 224
batch_size = 256

train_data_dir = trainset_path

datagen = ImageDataGenerator(rescale=1./255,
    #featurewise_center=True,
    #featurewise_std_normalization=True,
    shear_range=0.1,
    zoom_range=0.2,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=False,
    vertical_flip=False,
    validation_split=0.2) # set validation split

train_generator = datagen.flow_from_dataframe(dataframe=label_df,
                                              directory=train_data_dir,
                                              x_col="id_code",
                                              y_col="diagnosis",
                                              subset="training",
                                              batch_size=batch_size,
                                              seed=42,
                                              shuffle=True,
                                              class_mode="categorical",
                                              target_size=(img_height, img_width))

valid_generator = datagen.flow_from_dataframe(dataframe=label_df,
                                              directory=train_data_dir,
                                              x_col="id_code",
                                              y_col="diagnosis",
                                              subset="validation",
                                              batch_size=batch_size,
                                              seed=42,
                                              shuffle=True,
                                              class_mode="categorical",
                                              target_size=(img_height, img_width))

Found 2930 validated image filenames belonging to 5 classes.
Found 732 validated image filenames belonging to 5 classes.


# Model

In [0]:
# import model libraries
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, BatchNormalization, Flatten, Dense, MaxPooling2D
from tensorflow.keras.activations import relu
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.applications.vgg19 import VGG19

In [10]:
model = VGG19(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))
model.summary()

W0831 14:02:28.303445 140488070797184 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [11]:
len(model.layers)

22

In [0]:
for layer in model.layers[:17]:
  layer.trainable = False

for layer in model.layers[17:]:
  layer.trainable = True

In [13]:
x = model.output
x = Flatten()(x)
x = Dense(512, activation="relu")(x)
#x = Dropout(0.5)(x)
x = Dense(256, activation="relu")(x)
predictions = Dense(5, activation="softmax")(x)
model_final = Model(inputs = model.input, outputs = predictions)

model_final.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [0]:
# Early Stopping and compile model
callback = [EarlyStopping(monitor='val_loss', patience=5, verbose=2, restore_best_weights=True)]

model_final.compile(optimizer=Adam(lr=0.005, beta_1=0.9, beta_2=0.999,), loss='categorical_crossentropy', metrics=['accuracy'])

In [0]:
# train model

train_step_size = train_generator.n//train_generator.batch_size
valid_step_size = valid_generator.n//valid_generator.batch_size

model_final.fit_generator(generator=train_generator,
                    steps_per_epoch=train_step_size,
                    validation_data=valid_generator,
                    validation_steps=valid_step_size ,
                    epochs=100,
                    callbacks=callback)

Epoch 1/100


In [0]:
#Epoch 8/100
#22/22 [==============================] - 374s 17s/step - loss: 0.7624 - acc: 0.7320 - val_loss: 0.9169 - val_acc: 0.6719
#Epoch 9/100

In [0]:
model_final.save("drive/My Drive/Colab Notebooks/Kaggle/model val_loss: 0.9487 - val_acc: 0.6534.h5")
pickle.dump(model_final.history.history, open('drive/My Drive/Colab Notebooks/Kaggle/history val_loss: 0.9487 - val_acc: 0.6534.pkl', 'wb'))

In [0]:
from tensorflow.keras.models import load_model

In [0]:
model = load_model("drive/My Drive/Colab Notebooks/Kaggle/apple.h5")

In [0]:
img = cv2.resize(img, (256,256))

In [0]:
img.shape

(256, 256, 3)

In [0]:
label_df.head()

Unnamed: 0,id_code,diagnosis
0,000c1434d8d7.png,2
1,001639a390f0.png,4
2,0024cdab0c1e.png,1
3,002c21358ce6.png,0
4,005b95c28852.png,0


In [0]:
test_label_df = pd.read_csv(dataset_path+'test.csv')
test_label_df['id_code'] = test_label_df['id_code'].apply(lambda x : ''.join([x,'.png']))

In [0]:
test_label_df.head()

Unnamed: 0,id_code
0,0005cfc8afb6.png
1,003f0afdcd15.png
2,006efc72b638.png
3,00836aaacf06.png
4,009245722fa4.png


In [0]:
# test generator

# using keras generator
img_height = 224
img_width = 224
batch_size = 1

test_data_dir = "drive/My Drive/Colab Notebooks/Kaggle/Dataset/test"

test_datagen = ImageDataGenerator()

test_generator = test_datagen.flow_from_dataframe(dataframe=test_label_df,
                                              directory=test_data_dir,
                                              x_col="id_code",
                                              y_col=None,
                                              batch_size=batch_size,
                                              seed=42,
                                              shuffle=False,
                                              class_mode=None,
                                              target_size=(img_height, img_width))

Found 1928 validated image filenames.


In [0]:
test_step_size = test_generator.n//test_generator.batch_size
test_generator.reset()
pred = model.predict_generator(test_generator, 
                               steps=test_step_size,
                               verbose=1)



In [0]:
pred

array([[3.32532786e-02, 6.44928157e-01, 2.83924878e-01, 2.33698473e-03,
        3.55566256e-02],
       [1.27277599e-04, 8.61748494e-03, 2.33941749e-01, 3.80432010e-01,
        3.76881421e-01],
       [5.19566098e-03, 1.26107201e-01, 5.83317697e-01, 1.19450435e-01,
        1.65928960e-01],
       ...,
       [1.04810169e-03, 2.96588130e-02, 4.50409770e-01, 1.25161543e-01,
        3.93721700e-01],
       [5.36239932e-05, 2.15313444e-03, 1.87180638e-01, 6.02738976e-01,
        2.07873583e-01],
       [8.09906960e-01, 1.25000507e-01, 5.80828562e-02, 6.64004474e-04,
        6.34571118e-03]], dtype=float32)

In [0]:
predicted_class_indices=np.argmax(pred,axis=1)

In [0]:
print(sum(predicted_class_indices==0))
print(sum(predicted_class_indices==1))
print(sum(predicted_class_indices==2))
print(sum(predicted_class_indices==3))
print(sum(predicted_class_indices==4))

329
344
878
157
220


In [0]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [0]:
len(predictions)

1928

In [0]:
filenames=test_generator.filenames
results=pd.DataFrame({"id_code":filenames,
                      "diagnosis":predictions})


In [0]:
results['id_code'] = results['id_code'].apply(lambda x : x.split('.')[0])

In [0]:
results.to_csv("drive/My Drive/Colab Notebooks/Kaggle/sample_submission79.csv",index=False)

In [0]:
results.head()

Unnamed: 0,id_code,diagnosis
0,0005cfc8afb6,1
1,003f0afdcd15,3
2,006efc72b638,2
3,00836aaacf06,2
4,009245722fa4,4
