## Skin Low Resolution Color Image Modeling 

### Importing Libraries

In [31]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(42)
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils

%matplotlib inline

### Reading in the MetaData and Image Files

In [32]:
skin = pd.read_csv('./HAM10000_metadata.csv')

Low Resolution Images provide the features

In [33]:
skin_8by8 = pd.read_csv('./hmnist_8_8_RGB.csv')#low res images

dropping label column as only pixel data is needed

In [34]:
skin_8by8.drop('label',axis=1, inplace=True)

In [35]:
skin =pd.concat([skin,skin_8by8],axis = 1)

In [36]:
skin.shape

(10015, 199)

In [37]:
#y = np.where(skin['dx']=='mel',1,0)

Target Distribution

In [38]:
skin['dx'].value_counts(normalize=True)

nv       0.669496
mel      0.111133
bkl      0.109735
bcc      0.051323
akiec    0.032651
vasc     0.014179
df       0.011483
Name: dx, dtype: float64

The two diagnoses I want to focus on are melanoma, 11.1% of the total and basal cell carcinoma, 5.1% of the total. To avoid unbalanced classes, take a random sample of the remaining , noncancer diagnosis purposefully not undersampling the 'nv' which is the most challenging to discern from melanoma as well as the largest portion of diagnoses 66.9%.

In [39]:
malignant=skin[(skin['dx']=='mel')| (skin['dx']=='bcc')]
malignant.shape

(1627, 199)

In [40]:
benign= skin[(skin['dx']=='df')|(skin['dx']=='nv')|(skin['dx']=='bkl')|(skin['dx']=='vasc')|(skin['dx']=='akiec')]
benign.shape

(8388, 199)

In [41]:
benign = benign.sample(benign.shape[0], replace = True)
benign.shape

(8388, 199)

In [42]:
malignant['dx'] =1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [43]:
malignant['dx'].value_counts()

1    1627
Name: dx, dtype: int64

In [44]:
benign['dx']=0

In [45]:
benign['dx'].value_counts()

0    8388
Name: dx, dtype: int64

In [46]:
skin_equal = pd.concat((malignant,benign.head(1627)))

In [47]:
skin_equal.shape

(3254, 199)

In [52]:
skin_equal.set_index('image_id', inplace = True)

In [53]:
skin_equal.head()

Unnamed: 0_level_0,lesion_id,dx,dx_type,age,sex,localization,pixel0000,pixel0001,pixel0002,pixel0003,...,pixel0182,pixel0183,pixel0184,pixel0185,pixel0186,pixel0187,pixel0188,pixel0189,pixel0190,pixel0191
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ISIC_0025964,HAM_0000871,1,histo,40.0,female,chest,169,143,150,185,...,201,207,183,195,199,171,183,166,138,143
ISIC_0030623,HAM_0000871,1,histo,40.0,female,chest,203,176,192,206,...,206,211,181,205,210,185,207,211,189,206
ISIC_0027190,HAM_0000040,1,histo,80.0,male,upper extremity,148,127,148,161,...,190,168,151,176,161,148,173,149,134,158
ISIC_0031023,HAM_0005678,1,histo,60.0,male,chest,116,92,107,202,...,219,229,212,224,240,227,241,147,131,149
ISIC_0028086,HAM_0005678,1,histo,60.0,male,chest,227,207,229,225,...,248,241,236,241,247,240,247,242,232,243


In [57]:
X = skin_equal.drop(columns=['lesion_id','dx','dx_type',
                    'age', 'sex','localization'])

In [58]:
X.head()

Unnamed: 0_level_0,pixel0000,pixel0001,pixel0002,pixel0003,pixel0004,pixel0005,pixel0006,pixel0007,pixel0008,pixel0009,...,pixel0182,pixel0183,pixel0184,pixel0185,pixel0186,pixel0187,pixel0188,pixel0189,pixel0190,pixel0191
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ISIC_0025964,169,143,150,185,155,169,199,171,185,204,...,201,207,183,195,199,171,183,166,138,143
ISIC_0030623,203,176,192,206,177,197,209,183,199,209,...,206,211,181,205,210,185,207,211,189,206
ISIC_0027190,148,127,148,161,143,165,167,148,169,171,...,190,168,151,176,161,148,173,149,134,158
ISIC_0031023,116,92,107,202,176,199,217,197,220,233,...,219,229,212,224,240,227,241,147,131,149
ISIC_0028086,227,207,229,225,206,225,248,241,255,198,...,248,241,236,241,247,240,247,242,232,243


Train Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)


Making sure to have correct type

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

Changing to numpy array

In [None]:
X_train=X_train.values
X_test=X_test.values

Looking at X_train shape

In [None]:
X_train[0].shape

Looking at X_train first entry

In [None]:
X_train[0]

In [None]:
# This scales each value to be between 0 and 1. 
X_train/=255
X_test/=255

In [None]:
# Reshape each image to be 8 x 8 x 3.
X_train = X_train.reshape(X_train.shape[0],8,8,3) 
X_test = X_test.reshape(X_test.shape[0],8,8,3) 
# Reshaping your images is often one of the most difficult
# aspects of machine learning with image data.

Checking shape of X_train and y_train now

In [None]:
X_train.shape

In [None]:
y_train.shape

### Model Construction

Starting with Feed Forward Neural Network

In [None]:
model = Sequential()

In [None]:
model.add(Flatten())

In [None]:
model.add(Dense(128, input_shape=(8, 8, 3), activation='relu'))

In [None]:
model.add(Dense(32, activation='relu'))

In [None]:
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
history = model.fit(X_train,
                    y_train, 
                    batch_size=256,
                    validation_data=(X_test, y_test),
                    epochs=5,
                    verbose=1)

In [None]:
train_loss = history.history['loss']
test_loss = history.history['val_loss']

# Set figure size.
plt.figure(figsize=(12, 8))

# Generate line plot of training, testing loss over epochs.
plt.plot(train_loss, label='Training Loss', color='#185fad')
plt.plot(test_loss, label='Testing Loss', color='orange')

# Set title
plt.title('Training and Testing Loss by Epoch', fontsize = 25)
plt.xlabel('Epoch', fontsize = 18)
plt.ylabel('Binomial Crossentropy', fontsize = 18)
plt.xticks([0, 1, 2, 3, 4])

plt.legend(fontsize = 18);

In [None]:
# Evaluate model on test data.
score = model.evaluate(X_test, y_test, verbose=0)
labels = model.metrics_names

In [None]:
# Show model performance.
print(f'{labels[0]}: {score[0]}')
print(f'{labels[1]}: {score[1]}')

In [None]:
model.summary()

Constructing Convolutional Neural Network

In [None]:
# Instantiate a CNN.
cnn_model = Sequential()

In [None]:
# Add a convolutional layer.

cnn_model.add(Conv2D(filters = 6,     # number of filters
                     kernel_size = 3, # height/width of filter
                     activation = 'relu', # activation function 
                     input_shape = (8, 8, 3))) # shape of input (image)

In [None]:
# dimensions of region of pooling
cnn_model.add(MaxPooling2D(pool_size=(2,2))) 

In [None]:
cnn_model.add(Conv2D(16,
                     kernel_size = 3,
                     activation='relu'))

In [None]:
cnn_model.add(Flatten())

In [None]:
# Add a densely-connected layer with 128 neurons.
cnn_model.add(Dense(128, activation='relu'))

In [None]:
# Add a final layer with 10 neurons.
cnn_model.add(Dense(1, activation='sigmoid'))

In [None]:
cnn_model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['acc']
)

In [None]:
# Fit model on training data
cnn_model.fit(X_train,
              y_train,
              batch_size=256,
              epochs=10,
              verbose=1)

In [None]:
cnn_model.summary()

In [None]:
# Evaluate model on test data
cnn_score = cnn_model.evaluate(X_test, y_test, verbose=0)
cnn_labels = cnn_model.metrics_names

In [None]:
# Compare CNN and FFNN models.
print(f'CNN {cnn_labels[0]}  : {cnn_score[0]}')
print(f'CNN {cnn_labels[1]}   : {cnn_score[1]}')
print()
print(f'FFNN {labels[0]} : {score[0]}')
print(f'FFNN {labels[1]}  : {score[1]}')

#### Building a second convolutional model

In [None]:
# Instantiate a CNN.
cnn_model_2 = Sequential()

# Add a convolutional layer.
cnn_model_2.add(Conv2D(filters = 16,         # number of filters
                     kernel_size = 3,        # height/width of filter
                     activation='relu',      # activation function 
                     input_shape=(8,8,3))) # shape of input (image)

# Add a pooling layer.
cnn_model_2.add(MaxPooling2D(pool_size=(2,2))) # dimensions of region of pooling

# Add another convolutional layer.
#cnn_model_2.add(Conv2D(32,
                     kernel_size = 3,
                     activation='relu'))

# Add another pooling layer.
#cnn_model_2.add(MaxPooling2D(pool_size=(2,2)))

# We have to remember to flatten to go from the "box" to the vertical line of nodes!
cnn_model_2.add(Flatten())

# Add a densely-connected layer with 64 neurons.
cnn_model_2.add(Dense(64, activation='relu'))

# Let's try to avoid overfitting!
cnn_model_2.add(Dropout(0.5))

# Add a densely-connected layer with 16 neurons.
cnn_model_2.add(Dense(16, activation='relu'))

# Let's try to avoid overfitting!
cnn_model_2.add(Dropout(0.5))

# Add a final layer with 1 neuron.
cnn_model_2.add(Dense(1, activation='sigmoid'))

# Compile model
cnn_model_2.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['acc']
)
# Fit model on training data
cnn_model_2.fit(X_train,
                y_train,
                batch_size=128,
                epochs=5,
                verbose=1)

In [None]:
# Evaluate model on test data
cnn_2_score = cnn_model_2.evaluate(X_test, y_test, verbose=0)
cnn_2_labels = cnn_model_2.metrics_names

# Compare models.
print(f'CNN 2 {cnn_2_labels[0]}  : {cnn_2_score[0]}')
print(f'CNN 2 {cnn_2_labels[1]}   : {cnn_2_score[1]}')
print()
print(f'CNN 1 {cnn_labels[0]}  : {cnn_score[0]}')
print(f'CNN 1 {cnn_labels[1]}   : {cnn_score[1]}')
print()
print(f'FFNN {labels[0]} : {score[0]}')
print(f'FFNN {labels[1]}  : {score[1]}')

cnn_model_2.summary()

#### Building a third convolutional model

In [None]:
# Instantiate a CNN.
cnn_model_3 = Sequential()

# Add a convolutional layer.
cnn_model_3.add(Conv2D(filters = 16,         # number of filters
                     kernel_size = 3,        # height/width of filter
                     activation='relu',      # activation function 
                     input_shape=(8,8,3))) # shape of input (image)

# Add a pooling layer.
cnn_model_3.add(MaxPooling2D(pool_size=(2,2))) # dimensions of region of pooling

# Add another convolutional layer.
#cnn_model_3.add(Conv2D(64,
                     kernel_size = 3,
                     activation='relu'))

# Add another pooling layer.
#cnn_model_3.add(MaxPooling2D(pool_size=(2,2)))

# We have to remember to flatten to go from the "box" to the vertical line of nodes!
cnn_model_3.add(Flatten())

# Add a densely-connected layer with 64 neurons.
cnn_model_3.add(Dense(64, activation='relu'))

# Let's try to avoid overfitting!
cnn_model_3.add(Dropout(0.5))

# Add a densely-connected layer with 32 neurons.
cnn_model_3.add(Dense(32, activation='relu'))

# Let's try to avoid overfitting!
cnn_model_3.add(Dropout(0.5))

# Add a final layer with 1 neuron.
cnn_model_3.add(Dense(1, activation='sigmoid'))

# Compile model
cnn_model_3.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['acc']
)

# Fit model on training data
history = cnn_model_3.fit(X_train,
                          y_train,
                          batch_size=128,
                          validation_data=(X_test, y_test),
                          epochs=10,
                          verbose=1)

In [None]:
# Check out our train loss and test loss over epochs.
train_loss = history.history['loss']
test_loss = history.history['val_loss']

# Set figure size.
plt.figure(figsize=(12, 8))

# Generate line plot of training, testing loss over epochs.
plt.plot(train_loss, label='Training Loss', color='#185fad')
plt.plot(test_loss, label='Testing Loss', color='orange')

# Set title
plt.title('Training and Testing Loss by Epoch', fontsize = 25)
plt.xlabel('Epoch', fontsize = 18)
plt.ylabel('Binomial Crossentropy', fontsize = 18)
plt.xticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

plt.legend(fontsize = 18);

In [None]:
# Evaluate model on test data.
cnn_3_score = cnn_model_3.evaluate(X_test,
                                   y_test,
                                   verbose=1)

cnn_3_labels = cnn_model_3.metrics_names

# Compare models.
print(f'CNN 3 {cnn_3_labels[0]}  : {cnn_3_score[0]}')
print(f'CNN 3 {cnn_3_labels[1]}   : {cnn_3_score[1]}')
print()
print(f'CNN 2 {cnn_2_labels[0]}  : {cnn_2_score[0]}')
print(f'CNN 2 {cnn_2_labels[1]}   : {cnn_2_score[1]}')
print()
print(f'CNN 1 {cnn_labels[0]}  : {cnn_score[0]}')
print(f'CNN 1 {cnn_labels[1]}   : {cnn_score[1]}')
print()
print(f'FFNN {labels[0]} : {score[0]}')
print(f'FFNN {labels[1]}  : {score[1]}')