# Notebook with a basic CNN to play around

### Importing the libraries and setting the global variables

In [8]:
import numpy as np
import keras
import os

In [11]:
n_epochs = 5
batch_size = 8
num_classes = 3

root_folder='..\..'
# Where the data resides
data_folder_name='Datasets/speech_data/'
# This variable permit us to limit the number of images to work with
images_to_process=4500

# ../input/
DATA_PATH = os.path.abspath(os.path.join(root_folder, data_folder_name))

x_file='x_speech_arrays_zip_100.npz'
y_file='y_speech_labels_100.npz'

## Loading the training data and labels

In [12]:
#Load Data
# Load npz file containing image arrays
x_npz = np.load(DATA_PATH+x_file)
x = x_npz['arr_0']
# Load binary encoded labels for Lung Infiltrations: 0=Not_infiltration 1=Infiltration
y_npz = np.load(DATA_PATH+y_file)
y = y_npz['arr_0']

In [14]:
# Print dimensions of images and labels
print('Dim X: ',x.shape)
print('Dim y: ',y.shape)

Dim X:  (100, 128, 173)
Dim y:  (100,)


## Preparing the data for the model

Now we can split the dataset into a training and validation dataset

In [16]:
# Create training and validation datasets
from sklearn.model_selection import train_test_split

# First split the data in two sets, 80% for training, 20% for Val/Test)
X_train, X_valtest, y_train, y_valtest = train_test_split(x,y, test_size=0.2, random_state=1, stratify=y)

# Second split the 20% into validation and test sets
X_test, X_val, y_test, y_val = train_test_split(X_valtest, y_valtest, test_size=0.5, random_state=1, stratify=y_valtest)

training_set_size = X_train.shape[0]

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n')

(80, 128, 173)
(80,)
(10, 128, 173)
(10,)


We need to reshape the arrays in the dimensions expected by the Keras functions and model. Then we can convert the labels to categorical values.

In [17]:
#Reshape the input data to TensorFlow requirements: batch-N,Width,Height,Channels
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2] , 1).astype('float32')
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2] , 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2] , 1).astype('float32')

# Convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_val = keras.utils.to_categorical(y_val, num_classes)

In [18]:
# Print the dimensions of the train and test sets
print('Dim X_train: ',X_train.shape)
print('Dim y_train: ',y_train.shape)
print('Dim X_test: ',X_test.shape)
print('Dim y_test: ',y_test.shape)

Dim X_train:  (80, 128, 173, 1)
Dim y_train:  (80, 3)
Dim X_test:  (10, 128, 173, 1)
Dim y_test:  (10, 3)


## Creating a model and training

In [19]:
# Importing the keras modules
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D


We will build a simple CNN model, just 3 convolutional and pooling layers then FC layers with some dropouts

In [20]:
#Build a very simple cnn model
model = Sequential()
# Create a CNN 2D layer
model.add(Conv2D(16, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(128,173,1)))
# Create a Max pooling layer
model.add(MaxPooling2D(pool_size=(2, 2)))
# Create a CNN 2D layer
model.add(Conv2D(32, (3, 3), activation='relu'))
# Create a Max pooling layer
model.add(MaxPooling2D(pool_size=(2, 2)))
# Create a CNN 2D layer
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
# Flatten layer to feed the next dense layer
model.add(Flatten())
# Create a Dense Fully connected layer
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
# Create a Dense Fully connected layer with num_classes outputs
model.add(Dense(num_classes, activation='softmax'))
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 126, 171, 16)      160       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 63, 85, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 61, 83, 32)        4640      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 30, 41, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 39, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxP

The next step define the loss function and the parameters optimizer, a version of Adam optimizer.

In [21]:
# Compile the model, using AdaDelta optimizer and cross-entropy los function
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])


It is time to train our model for a few epochs

In [22]:
model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=n_epochs,
          verbose=2,
          validation_data=(X_val, y_val))

Instructions for updating:
Use tf.cast instead.
Train on 80 samples, validate on 10 samples
Epoch 1/5
 - 2s - loss: 1.1590 - acc: 0.2750 - val_loss: 1.0974 - val_acc: 0.4000
Epoch 2/5
 - 2s - loss: 1.0953 - acc: 0.4000 - val_loss: 1.0916 - val_acc: 0.4000
Epoch 3/5
 - 2s - loss: 1.1005 - acc: 0.4625 - val_loss: 1.0950 - val_acc: 0.5000
Epoch 4/5
 - 2s - loss: 1.0808 - acc: 0.5375 - val_loss: 1.0895 - val_acc: 0.4000
Epoch 5/5
 - 2s - loss: 1.0454 - acc: 0.5000 - val_loss: 1.0906 - val_acc: 0.4000


<keras.callbacks.History at 0x2390ab8ca20>

In [23]:
#Show the results
model.history.history

{'val_loss': [1.0973830699920655,
  1.0915602207183839,
  1.0949917316436768,
  1.089468789100647,
  1.0905755758285522],
 'val_acc': [0.4, 0.4, 0.5, 0.4, 0.4],
 'loss': [1.1589742302894592,
  1.0953173875808715,
  1.1004945039749146,
  1.080777680873871,
  1.0453876852989197],
 'acc': [0.275, 0.4, 0.4625, 0.5375, 0.5]}

Now we can evaluate the model, the accuracy and the final loss for the test set

In [24]:
#Evaluate the model on the test data
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 1.1220920085906982
Test accuracy: 0.30000001192092896
