In [22]:
import os, shutil
from keras import models
from keras import layers
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from numpy import newaxis
import random
from keras.applications import Xception
import pandas as pd
import numpy as np
import tensorflow as tf
from keras import metrics
import keras

In [2]:
os.mkdir('train')
os.mkdir('val')

In [3]:
os.chdir('train')
os.mkdir('benign')
os.mkdir('malignant')
os.chdir('../val')
os.mkdir('benign')
os.mkdir('malignant')

In [4]:
os.chdir('..')

In [5]:
os.chdir('../input/siim-isic-melanoma-classification/')

In [6]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
df_ss = pd.read_csv('sample_submission.csv')

In [7]:
df_train.sample(10)

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
6171,ISIC_1945381,IP_1999627,male,80.0,torso,unknown,benign,0
18177,ISIC_5539180,IP_2724119,female,45.0,lower extremity,unknown,benign,0
21537,ISIC_6535381,IP_2147407,female,35.0,torso,unknown,benign,0
13212,ISIC_4062586,IP_0070552,female,55.0,torso,nevus,benign,0
15693,ISIC_4795570,IP_3237448,male,35.0,lower extremity,nevus,benign,0
28956,ISIC_8760029,IP_4557152,female,60.0,upper extremity,unknown,benign,0
3968,ISIC_1298092,IP_4479736,female,25.0,lower extremity,unknown,benign,0
19513,ISIC_5923462,IP_7842431,male,40.0,lower extremity,unknown,benign,0
9521,ISIC_2953711,IP_0718832,female,50.0,torso,unknown,benign,0
22254,ISIC_6742341,IP_1139701,female,45.0,upper extremity,unknown,benign,0


In [8]:
'''
    Setting up directories
'''
#Original directory of images in the input dir
original_dir = '/kaggle/input/siim-isic-melanoma-classification/jpeg'

#test and train dirs...
train_dir = '/kaggle/input/siim-isic-melanoma-classification/jpeg/train'
test_dir = '/kaggle/input/siim-isic-melanoma-classification/jpeg/test'

#destination dir
des_train = '/kaggle/working/train'
des_val = '/kaggle/working/val'

In [9]:
'''
    Allocating train and val images...
'''
train_split = df_train.iloc[0:25000]
val_split = df_train.iloc[25000:]

In [10]:
'''
    Data is imbalanced. ~500 malignant to 24500 beningn. Might have to do balancing of data.
'''
train_benign = train_split.loc[train_split['benign_malignant'] == 'benign']
train_malignant = train_split.loc[train_split['benign_malignant'] == 'malignant']
val_benign = val_split.loc[val_split['benign_malignant'] == 'benign']
val_malignant = val_split.loc[val_split['benign_malignant'] == 'malignant']

In [11]:
'''
    Copting balanced datasets of images into working directory...
'''
random.seed(121)
fnames_t_b = list(train_benign['image_name'])
fnames_t_b = random.sample(fnames_t_b, 500)
for fname in fnames_t_b:
    src = os.path.join(train_dir,fname + '.jpg')
    des = os.path.join(os.path.join(des_train, 'benign'), fname + '.jpg')
    shutil.copyfile(src, des)
    
fnames_t_m = list(train_malignant['image_name'])
fnames_t_m = random.sample(fnames_t_m, len(fnames_t_m))
for fname in fnames_t_m:
    src = os.path.join(train_dir,fname + '.jpg')
    des = os.path.join(os.path.join(des_train, 'malignant'),fname + '.jpg')
    shutil.copyfile(src, des)
    
fnames_v_b = list(val_benign['image_name'])
fnames_v_b = random.sample(fnames_v_b, 150)
for fname in fnames_v_b:
    src = os.path.join(train_dir,fname + '.jpg')
    des = os.path.join(os.path.join(des_val, 'benign'),fname + '.jpg')
    shutil.copyfile(src, des)
    
fnames_v_m = list(val_malignant['image_name'])
fnames_v_m = random.sample(fnames_v_m, len(fnames_v_m))
for fname in fnames_v_m:
    src = os.path.join(train_dir,fname + '.jpg')
    des = os.path.join(os.path.join(des_val, 'malignant'),fname + '.jpg')
    shutil.copyfile(src, des)

In [23]:
'''
    Using DenseNet169 for starters. 
    We have evidence from literature that this might be good
    https://arxiv.org/ftp/arxiv/papers/2003/2003.02597.pdf
'''
conv_base = Xception(weights = 'imagenet', include_top = False, input_shape = (256, 256, 3))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [26]:
'''
    Basic model architecture. We aim to use data augmentation...
'''
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation = 'relu'))
model.add(layers.Dense(1, activation = 'sigmoid'))
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
xception (Model)             (None, 8, 8, 2048)        20861480  
_________________________________________________________________
flatten_5 (Flatten)          (None, 131072)            0         
_________________________________________________________________
dense_9 (Dense)              (None, 256)               33554688  
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 257       
Total params: 54,416,425
Trainable params: 33,554,945
Non-trainable params: 20,861,480
_________________________________________________________________


In [25]:
'''
    It is very important to freeze the weights of the convolutional base model. This is to ensure we have some benefit from
    the pretrianed model. 
'''
print('Number of trainable weights before freezing = ', len(model.trainable_weights))
conv_base.trainable = False
print('Number of trainable weights before freezing = ', len(model.trainable_weights))

Number of trainable weights before freezing =  158
Number of trainable weights before freezing =  4


In [18]:
'''
    Creating generator objects
'''
train_datagen = ImageDataGenerator(rescale=1./255,rotation_range=40,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.2,zoom_range=0.2,horizontal_flip=True,fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(des_train,target_size=(256, 256),batch_size=5,class_mode='binary')

validation_generator = test_datagen.flow_from_directory(des_val,target_size=(256, 256),batch_size=5,class_mode='binary')

model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['acc', keras.metrics.Precision(), keras.metrics.Recall(), keras.metrics.AUC()])

history = model.fit_generator(train_generator,steps_per_epoch=200 ,epochs=10, validation_data=validation_generator,validation_steps=60)

Found 939 images belonging to 2 classes.
Found 295 images belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

KeyboardInterrupt: 

In [6]:
model = keras.models.load_model('VVG16_baseline.h5')

OSError: Unable to open file (unable to open file: name = 'VVG16_baseline.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [64]:
test_datagen = ImageDataGenerator(rescale=1/255.)
test_generator = test_datagen.flow_from_directory(os.path.join(original_dir, 'test'),target_size=(256, 256),batch_size=32,class_mode= None)

Found 0 images belonging to 0 classes.


In [1]:
proba = model.predict_generator(test_generator, steps=320)

NameError: name 'model' is not defined

In [116]:
for files in os.listdir():
    os.remove(files)

In [120]:
len(os.listdir())

145

In [7]:
os.getcwd()

'/kaggle/working'