# Notebook with a basic CNN to play around

## Importing the libraries and seting the glocal variables

In [39]:
import numpy as np
import keras

In [40]:
n_epochs = 5
batch_size = 8
num_classes = 3

x_file='x_speech_arrays_zip_100.npz'
y_file='y_speech_labels_100.npz'

## Loading the training data and labels

In [41]:
#Load Data
# Load npz file containing image arrays
x_npz = np.load(x_file)
x = x_npz['arr_0']
# Load binary encoded labels for Lung Infiltrations: 0=Not_infiltration 1=Infiltration
y_npz = np.load(y_file)
y = y_npz['arr_0']

In [42]:
print('Dim X: ',x.shape)
print('Dim y: ',y.shape)

Dimensiones X:  (100, 128, 173)
Dimensiones y:  (100,)


## Preparing the data for the model

In [43]:
# Create training and validation datasets
from sklearn.model_selection import train_test_split

# First split the data in two sets, 80% for training, 20% for Val/Test)
X_train, X_valtest, y_train, y_valtest = train_test_split(x,y, test_size=0.2, random_state=1, stratify=y)

# Second split the 20% into validation and test sets
X_test, X_val, y_test, y_val = train_test_split(X_valtest, y_valtest, test_size=0.5, random_state=1, stratify=y_valtest)

training_set_size = X_train.shape[0]

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n')

(80, 128, 173)
(80,)
(10, 128, 173)
(10,)


In [44]:
#Reshape the input data to TensorFlow requirements: batch-N,Width,Height,Channels
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2] , 1).astype('float32')
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2] , 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2] , 1).astype('float32')

# Convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_val = keras.utils.to_categorical(y_val, num_classes)

In [45]:
print('Dim X_train: ',X_train.shape)
print('Dim y_train: ',y_train.shape)
print('Dim X_test: ',X_test.shape)
print('Dim y_test: ',y_test.shape)

Dimensiones X_train:  (80, 128, 173, 1)
Dimensiones y_train:  (80, 3)
Dimensiones X_test:  (10, 128, 173, 1)
Dimensiones y_test:  (10, 3)


## Creating a model and training

In [47]:
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D


In [48]:
#Build a very simple cnn model
model = Sequential()
model.add(Conv2D(16, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(128,173,1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 126, 171, 16)      160       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 63, 85, 16)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 61, 83, 32)        4640      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 30, 41, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 28, 39, 64)        18496     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 14, 19, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 14, 19, 64)        0         
__________

In [49]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])


In [50]:
model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=n_epochs,
          verbose=2,
          validation_data=(X_val, y_val))

Train on 80 samples, validate on 10 samples
Epoch 1/5
 - 4s - loss: 1.1202 - acc: 0.3000 - val_loss: 1.0834 - val_acc: 0.4000
Epoch 2/5
 - 2s - loss: 1.0918 - acc: 0.4375 - val_loss: 1.0988 - val_acc: 0.3000
Epoch 3/5
 - 2s - loss: 1.0724 - acc: 0.4250 - val_loss: 1.0859 - val_acc: 0.4000
Epoch 4/5
 - 2s - loss: 1.0833 - acc: 0.5125 - val_loss: 1.1027 - val_acc: 0.4000
Epoch 5/5
 - 3s - loss: 1.0011 - acc: 0.5125 - val_loss: 1.1870 - val_acc: 0.3000


<keras.callbacks.History at 0x7f0b37f1c9e8>

In [51]:
#Show the results
model.history.history

{'val_loss': [1.083378839492798,
  1.0987654209136963,
  1.0859281063079833,
  1.1026532649993896,
  1.1870320320129395],
 'val_acc': [0.4, 0.3, 0.4, 0.4, 0.3],
 'loss': [1.1201524496078492,
  1.0917672514915466,
  1.0723776340484619,
  1.0832620859146118,
  1.0010685622692108],
 'acc': [0.3, 0.4375, 0.425, 0.5125, 0.5125]}

In [52]:
#Evaluate the model on the test data
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 1.0920337438583374
Test accuracy: 0.4000000059604645
