In [1]:
import numpy as np
np.random.seed(1337) # for reproducibility

In [2]:
from tensorflow import keras
import pandas as pd

from src.dataframe import importDfPickle

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D, MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.python.keras.optimizers import Adadelta, Adam
from keras.utils import to_categorical

classifier = Sequential()

Using TensorFlow backend.


In [3]:
df=importDfPickle("./output/df_30mil.pkl")

In [4]:
df.shape

(30000, 3)

In [5]:
#convertir la columna en una numpy que tenga la dimension filas, 50,50,3
X=np.asarray(list(df['image']))
y=np.asarray(df['label'])

In [6]:
X.shape

(30000, 50, 50, 3)

In [7]:
def data_summary(X_train, y_train, X_test, y_test):
    """Summarize current state of dataset"""
    print('Train images shape:', X_train.shape)
    print('Train labels shape:', y_train.shape)
    print('Test images shape:', X_test.shape)
    print('Test labels shape:', y_test.shape)

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
data_summary(X_train, y_train, X_test, y_test)

Train images shape: (24000, 50, 50, 3)
Train labels shape: (24000,)
Test images shape: (6000, 50, 50, 3)
Test labels shape: (6000,)


In [10]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train[i], cmap=plt.cm.binary)
    plt.xlabel(y_train[i])
plt.show()

<Figure size 1000x1000 with 25 Axes>

In [12]:
"""# Incoming data is in uint8. Cast the input data images to be floats in range [0.0-1.0]  
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, 2)
y_test = keras.utils.to_categorical(y_test, 2)"""

"# Incoming data is in uint8. Cast the input data images to be floats in range [0.0-1.0]  \nX_train = X_train.astype('float32')\nX_test = X_test.astype('float32')\nX_train /= 255\nX_test /= 255\n\nprint('x_train shape:', X_train.shape)\nprint(X_train.shape[0], 'train samples')\nprint(X_test.shape[0], 'test samples')\n# convert class vectors to binary class matrices\ny_train = keras.utils.to_categorical(y_train, 2)\ny_test = keras.utils.to_categorical(y_test, 2)"

In [12]:
input_shape=(50,50,3)
num_classes = 2

In [13]:
## This is the neural network proposed architecture
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.sparse_categorical_crossentropy, 
              optimizer='adadelta',
              metrics=['accuracy'])

In [14]:
# Fit the NN
batch_size = 220
epochs = 14

model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, y_test))

Train on 28000 samples, validate on 7000 samples
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


<tensorflow.python.keras.callbacks.History at 0x7fb5767ba5c0>

In [15]:
# Evaluate the model with test data
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.6126549941471645
Test accuracy: 0.71257144


In [25]:
import json

# serialize model to JSON
model_json = model.to_json()
with open("model-Seq220,14,acu73.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model-Seq220,14,acu73.h5")
print("Saved model to disk")

Saved model to disk


In [19]:
import cv2
def resizeImages(img_path,size=(50,50)):
    "resize all images to 50x50"
    image = cv2.imread(img_path)
    resized = cv2.resize(image, size, interpolation=cv2.INTER_CUBIC)
    #print('resized image')
    return resized

In [15]:
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image

img = X_test[31].astype('float32')
pred = model.predict(np.expand_dims(img,axis=0))[0]
print("Probs -> Benign:{0:.5f} Malignant:{1:.5f}".format(pred[0],pred[1]))


Probs -> Benign:0.50565 Malignant:0.49435


In [23]:
file = "./images/16896_idx5_x51_y151_class0.png"

In [24]:
im = resizeImages(file).astype('float32')

In [25]:
pred_nueva=model.predict(np.expand_dims(im,axis=0))[0]
print("Probs -> Benign:{0:.5f} Malignant:{1:.5f}".format(pred_nueva[0],pred_nueva[1]))

Probs -> Benign:0.62560 Malignant:0.37440


In [25]:
pred_nueva

array([0.45712334, 0.54287666], dtype=float32)

In [16]:
def predictNewImage(path):
    im = resizeImages(path).astype('float32')
    pred_nueva=model.predict(np.expand_dims(im,axis=0))[0]
    return "Probs -> Benign:{0:.5f} Malignant:{1:.5f}".format(pred_nueva[0],pred_nueva[1])
    
    

In [17]:
file2='./images/16896_idx5_x201_y1101_class1.png'

In [20]:
pred2=predictNewImage(file2)

In [34]:
pred2

'Probs -> Benign:0.49333 Malignant:0.50667'

In [21]:
pred4=predictNewImage('./images/16896_idx5_x201_y1051_class1.png')

In [22]:
pred4

'Probs -> Benign:0.58506 Malignant:0.41494'

In [38]:
df.head()

Unnamed: 0,label,image,path
0,1,"[[[241, 240, 243], [241, 240, 243], [241, 240,...",images/0/10269_idx5_x1101_y901_class0.png
1,1,"[[[200, 182, 232], [210, 193, 235], [220, 211,...",images/0/9254_idx5_x1551_y1851_class0.png
2,1,"[[[206, 181, 227], [235, 230, 233], [238, 235,...",images/0/9259_idx5_x2401_y951_class0.png
3,1,"[[[226, 210, 241], [235, 232, 241], [238, 232,...",images/0/12930_idx5_x951_y1201_class0.png
4,1,"[[[242, 241, 241], [236, 236, 243], [150, 108,...",images/0/9227_idx5_x901_y1251_class0.png
