# Melanoma CNN

## Setup

In [25]:
import pandas as pd

import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import array_to_img
from keras.preprocessing.image import ImageDataGenerator

import PIL

from tempfile import TemporaryFile
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

import tensorflow as tf
import scipy

import numpy as np
import pandas as pd

from skimage.io import imread
from skimage.transform import resize

import keras
from keras.models import Sequential, Model, load_model
from keras.layers import Input, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Dense, Dropout, Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras.utils import to_categorical

import cv2
import os
os.sys.path
import argparse
import random

In [26]:
df = pd.read_csv("/home/leslie/code/melanoma_files/train.csv")

In [27]:
df=df.drop(columns=['sex', 'anatom_site_general_challenge',
                    'benign_malignant','diagnosis', 'age_approx']) 

## Extract, create list of unique patient numbers

In [28]:
df["patient_id"] = pd.to_numeric(df["patient_id"].str[3:])

In [29]:
unique_patients = set()

for index in range(len(df)):
    unique_patients.add(df["patient_id"].iloc[index])
    
unique_patients = list(unique_patients)

## Use unique patient_id list to split into train and test so that images from the same patients don't end up in both dataframes

In [30]:
random.shuffle(unique_patients)
train_size = int(round(len(unique_patients)*0.8, 0))

In [31]:
train = unique_patients[:train_size]
test = unique_patients[train_size:]

x_train = df[df["patient_id"].isin(train)]
x_test = df[df["patient_id"].isin(test)]

# Set aside the labels column for train and test sections
y_train = x_train.target
y_test = x_test.target

In [32]:
X_train_filenames = x_train["image_name"].tolist()
y_train = y_train.tolist()

X_val_filenames = x_test["image_name"].tolist()
y_val = y_test.tolist()

## Create class for a custom generator

In [33]:
IMG_SIZE = 400

In [34]:
class My_Custom_Generator(keras.utils.Sequence) :
    def __init__(self, image_filenames, labels, batch_size) :
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size


    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)


    def __getitem__(self, idx) :
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]

        return np.array([
            cv2.resize(imread('/home/leslie/code/melanoma_files_small/jpeg/train/' + str(file_name) + '.jpg'
                         ), (IMG_SIZE, IMG_SIZE)) 
                for file_name in batch_x])/255.0, np.array(batch_y)

## Create the training and validation generators

In [35]:
batch_size = 32

my_training_batch_generator = My_Custom_Generator(X_train_filenames, y_train, batch_size)
my_validation_batch_generator = My_Custom_Generator(X_val_filenames, y_val, batch_size)

## Create model layers and run model

In [38]:
model = Sequential()
model.add(  Conv2D(64, (3,3), input_shape = (IMG_SIZE, IMG_SIZE, 3)  )) 
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(  Conv2D(64, (3,3), input_shape = (IMG_SIZE, IMG_SIZE, 3)  ))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Flatten())
model.add(Dense(64))

model.add(Dense(1))
model.add(Activation("sigmoid"))

model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=['accuracy'])

In [39]:
model.fit_generator(generator=my_training_batch_generator,
                   steps_per_epoch = int(len(X_train_filenames) // batch_size), 
                   epochs = 10,
                   verbose = 1, #changed verbose from 1 to 2
                   validation_data = my_validation_batch_generator,
                   validation_steps = int(len(X_val_filenames) // batch_size)) 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fd0f82d2fa0>

## Save/load model

In [40]:
model.save('/home/leslie/code/melanoma_models/cnn_400')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: /home/leslie/code/melanoma_models/cnn_400/assets


In [41]:
model = keras.models.load_model('/home/leslie/code/melanoma_models/cnn_400')

## Create generator class for the test images

In [80]:
class My_Custom_Test_Generator(keras.utils.Sequence) :
  
    def __init__(self, image_filenames, batch_size) :
        self.image_filenames = image_filenames
        self.batch_size = batch_size

    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)

    def __getitem__(self, idx) :
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]

        return np.array([
            cv2.resize(imread('/home/leslie/code/melanoma_files_small/jpeg/test/' + str(file_name) + '.jpg'
                         ), (IMG_SIZE, IMG_SIZE)) 
                for file_name in batch_x])/255.0

## Use model to create predictions for test images and export the results as a .csv

In [81]:
df_test = pd.read_csv("/home/leslie/code/melanoma_files/test.csv")
df_test = df_test.drop(columns=['sex', 'anatom_site_general_challenge','age_approx']) 

IMG_SIZE = 400
batch_size = 32

In [82]:
test_filenames = df_test["image_name"].tolist()
my_test_batch_generator = My_Custom_Test_Generator(test_filenames, batch_size)

In [83]:
prediction = model.predict(my_test_batch_generator)

In [86]:
df_results = pd.DataFrame(prediction, columns=['target'])
df_results["image_name"] = test_filenames

columns_titles = ["image_name","target"]
df_results=df_results.reindex(columns=columns_titles)

In [87]:
df_results.to_csv('/home/leslie/code/melanoma_predictions/results.csv', index = False, header = 1)