In [1]:
# model look up table
# folder convention:
# modelid/
    # modelid_epoch%d.h5
    # modelid_history_log.csv
    
# FOR ALL: 20 epochs

# models of interest:
# logistic
    # best model saved
    
# fcc: 2 layer
    # best model saved
    
# cnn_lay%d_drop%d_learn%d_aug%d -- 
    # layers: 2, 4, 6
    # dropout: 1e-x: 0, 3, 6
    # rate: 0, 0.3, 0.6
    # 27 models: 
    # 0.3 hour per model
    # 27 best models saved
    # augmentation ONLY for the best model

# vgg_pretrained: 36 minutes for 20 epochs
    # 900 ms/step
    # 0.6 hour per model

# 15 gb of models for the imaging notebook

# base model function 
# fcc
# cnn(number_layers, dropout, learning_rate, uses_augmentation )
# 2, 4, 6
# 0, 0.3, 0.6
# learning_rate: -6, -4, -2

# for the best one, check against augmentation 
    # uses augmentation, doesn't use augmentation 

# 2 models to save for each : last version and the best epoch
# for the best model...

# what i'll do for each of these... is... 
# (1) prepare_model_database on a single instance
# (2) port model_database onto all other instances
# (3) run 8 rows per instance
# (4) once finished, aggregate all data into the google drive
    # all model folders will have 'model_' as their prefix
# (5) prepare notebook GUI for CNN to display...
    # parameters
    # training curve, validation curve (losses + accuracy)

In [26]:
def run_model(model, model_name, augment, results_dir, run_now = True):
    '''
    takes in...
    runs our model
    '''
    save_dir = results_dir + model_name + '/'
    print('Results directory: %s'%save_dir)
    ensure_directory(save_dir)
    
    print('Running model: %s'%model_name)
    print('\tDefining generators...')

    # load our generators 
    train_generator = get_train_data_generator(augment = augment)
    val_generator   = get_val_data_generator()
    
    print('\tFitting model...')
    # run our model
    if run_now:
        # define our callbacks
        callbacks = get_callbacks(model_name, save_dir)
        
        model.fit_generator(train_generator,
                            validation_data=val_generator, 
                            validation_steps = 1, 
                            steps_per_epoch  = 1, 
                            epochs = 1,
                            callbacks=callbacks)
        print('\tSaving model...')        
        model.save(save_dir + model_name + '_end.h5')
        return 'Ran!'
    return 'Dry!'

def prepare_model_name(model_name, augment):
    '''
    one off... 
    '''
    # if augmented
    if augment:
        model_name = model_name + '_aug'        
    return model_name

def choose_model(model_type, params):
    '''
    takes in model_type and the parameters as a dict
    returns model and its defined name 
    '''
    model_dict = {'log':get_log_model,
                  'fcc':get_fcc_model,
                  'cnn':get_cnn_model,
                  'vgg':get_vgg_model}
    return model_dict[model_type](**params)

def prepare_models_list():
    '''
    takes in nothing
    returns list of models that we want to sweep across
    '''
    
    models_list = []
    models_list.append(['log', {}, {'augment': 0}])
    models_list.append(['fcc', {}, {'augment': 0}])
    
    # cnn models
    params = [(ilay, idrop, ilr) for ilay in [1,2,4] for idrop in [0, 3, 6] for ilr in [0, 3, 6]]
    for (ilay, idrop, ilr) in params:
        models_list.append(['cnn',{'num_layers':ilay, 'dropout':idrop,'learning_rate':ilr}, {'augment':0}])
        # layers: 1, 2, 4
        # dropout: 1e-x: 0, 3, 6
        # rate: 0, 0.3, 0.6
    
    # vgg
    for trainable in [1, 0]:
        models_list.append(['vgg', {'trainable':trainable}, {'augment':1}])
    return models_list

In [27]:
models = prepare_models_list()

In [28]:
def single_iter(run_row, run_now = False):
    model_type, model_params, run_params = run_row
    model, model_name = choose_model(model_type, model_params)
    model_name = prepare_model_name(model_name, **run_params)
    _ = run_model(model = model, model_name = model_name, **run_params, run_now = run_now, results_dir = '/home/jupyter/models/')
    reset_keras()    

In [30]:
for irow in models:
    single_iter(irow, run_now = True)

Results directory: /home/jupyter/models/logistic/
Running model: logistic
	Defining generators...
Found 2000 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
	Fitting model...
Epoch 1/1
	Saving model...
Results directory: /home/jupyter/models/fcc/
Running model: fcc
	Defining generators...
Found 2000 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
	Fitting model...
Epoch 1/1
	Saving model...
Results directory: /home/jupyter/models/cnn_lay1_drop0_lr0/
Running model: cnn_lay1_drop0_lr0
	Defining generators...
Found 2000 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
	Fitting model...
Epoch 1/1
	Saving model...
Results directory: /home/jupyter/models/cnn_lay1_drop0_lr3/
Running model: cnn_lay1_drop0_lr3
	Defining generators...
Found 2000 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
	Fitting model...
Epoch 1/1
	Saving model...
Results directory: /home/jupyter/models/cnn_lay1_drop0_lr6/
Runnin

In [7]:
def get_images_from_class(class_name, num_samples):
    '''
    Return a list of images from a desired class
    '''
    offset = 0 
    class_path  = os.path.join(data_folder, 'test', class_name)
    all_image_paths = [os.path.join(class_path, ifile) for ifile in os.listdir(class_path) if ifile != '.DS_Store']  
    image_paths = [all_image_paths[isample] for isample in range(offset, num_samples + offset)]

    images = []
#     i = 0
    for ipath in image_paths:
#         i +=1
#         print(i)
        image = cv2.imread(ipath)/255.0
        images.append(image)
    return images


In [8]:
def get_meta_dataframe():
    import pandas as pd
    import glob
    all_image_paths   = glob.glob(os.path.join(data_folder,'*','*','*.jpeg'))
    all_image_classes = [ipath.split('/')[-2] for ipath in all_image_paths]
    all_image_splits  = [ipath.split('/')[-3] for ipath in all_image_paths]
    all_meta_data = {'class': all_image_classes, 'image_path':all_image_paths, 'split': all_image_splits}
    return pd.DataFrame(all_meta_data)

In [9]:
meta_df = get_meta_dataframe()

In [10]:
meta_df.groupby(['split','class']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,image_path
split,class,Unnamed: 2_level_1
test,NORMAL,200
test,PNEUMONIA,200
train,NORMAL,1000
train,PNEUMONIA,1000
val,NORMAL,8
val,PNEUMONIA,8


normal = get_images_from_class('NORMAL',200)
normal_labels = [1 for i in range(200)]
sick = get_images_from_class('PNEUMONIA',200)
sick_labels = [0 for i in range(200)]

all_images = normal + sick
all_labels = normal_labels + sick_labels

In [11]:
import numpy as np

all_images[0].shape

x_test = np.stack(all_images, axis = 3).reshape(-1, 224, 224, 3)

y_test = np.stack(all_labels)

datagen = ImageDataGenerator()

vgg_model.fit_generator(datagen.flow(x_train, y_train, batch_size = 32), steps_per_epoch=121, epochs=20)

In [12]:

from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import cv2

#@title Execute this code block! This will prepare a handful of useful functions for you to use, described below
set_labels   = ['test','val','train']
class_labels = ['PNEUMONIA','NORMAL']
class_mode   = 'binary'
image_size   = (224, 224)

def get_train_data_generator(augment = False, color = 'rgb'):
    '''
    Returns a Python generator (see intro Python lecture) that returns an image
    that has gone through the data augmentation process from the training set.
    '''

    # Data augmentation for training dataset.
    if not augment:
        train_datagen = ImageDataGenerator(rescale=1./255)
    else:
        train_datagen = ImageDataGenerator(
              rescale=1./255,
              shear_range=0.2,
              zoom_range=0.2,
              horizontal_flip=True)

    # Create a Python 'generator' for reading pictures from 
    # the 'Datasets/chest_xray/train' folder, and indefinitely 
    # generate batches of augmented image data.
    image_directory = os.path.join(data_folder, 'train')  
    train_generator = train_datagen.flow_from_directory(
          image_directory, 
          target_size=image_size,
          batch_size=32,
          color_mode=color, # depends on the dataset
          class_mode=class_mode)    

    return train_generator

def get_test_data_generator(augment = False, color = 'rgb'):
    '''
    Returns a Python generator (see intro Python lecture) that returns an image
    that has gone through the data augmentation process from the training set.
    '''

    # Data augmentation for training dataset.
    if not augment:
        test_datagen = ImageDataGenerator(rescale=1./255)
    else:
        test_datagen = ImageDataGenerator(
              rescale=1./255,
              shear_range=0.2,
              zoom_range=0.2,
              horizontal_flip=True)

    # Create a Python 'generator' for reading pictures from 
    # the 'Datasets/chest_xray/train' folder, and indefinitely 
    # generate batches of augmented image data.
    image_directory = os.path.join(data_folder, 'test')  
    test_generator = test_datagen.flow_from_directory(
          image_directory, 
          target_size=image_size,
          batch_size=200,
          color_mode=color, # depends on the dataset
          class_mode=class_mode)    

    return test_generator

Using TensorFlow backend.


# Here, we build a logistic regression 
model = Sequential()

model.add(Flatten(input_shape=(224, 224, 3)))
model.add(Dense(1, activation = 'softmax'))

model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

# and we hand our model the 'training book to learn from'
model.fit_generator(get_train_data_generator(color = 'rgb'),
                    validation_data = get_test_data_generator(color = 'rgb'),
                    validation_steps = 1,
                    steps_per_epoch = 121,
                    epochs = 1) 

In [43]:
cnn_model = Sequential()

cnn_model.add(Conv2D(64, (3, 3), input_shape=(224, 224, 3)))
cnn_model.add(Activation('relu'))
cnn_model.add(MaxPooling2D(pool_size=(2, 2)))

cnn_model.add(Flatten()) 

cnn_model.add(Dense(units = 128, activation = 'relu'))
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(units = 1, activation = 'sigmoid'))

cnn_model.compile(loss='binary_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.95),
              metrics=['accuracy'])

In [44]:
from keras.callbacks import CSVLogger, ModelCheckpoint
checkpoint = keras.callbacks.ModelCheckpoint('cnn_model{epoch:08d}.h5', period=1) 
csv_logger = CSVLogger("cnn_model_history_log.csv", append=True)

# and we hand our model the 'training book to learn from'
cnn_model.fit_generator(get_train_data_generator(color = 'rgb'),
                    validation_data = get_test_data_generator(color = 'rgb'),
                    validation_steps = 1,
                    steps_per_epoch = 121,
                    epochs = 10, 
                    callbacks=[checkpoint, csv_logger]) 

Found 2000 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Epoch 1/10


KeyboardInterrupt: 

In [None]:
# model look up table

# base model function 
# fcc
# cnn(number_layers, dropout, learning_rate, uses_augmentation )
# 2, 4, 6
# 0, 0.3, 0.6
# learning_rate: -6, -4, -2


# for the best one, check against augmentation 
    # uses augmentation, doesn't use augmentation 

2*3*4 = 
# vgg()


# 

In [107]:
cnn_model_drop = Sequential()

cnn_model_drop.add(Conv2D(64, (3, 3), input_shape=(224, 224, 3)))
cnn_model_drop.add(Activation('relu'))
cnn_model_drop.add(MaxPooling2D(pool_size=(2, 2)))

cnn_model_drop.add(Flatten()) 

cnn_model_drop.add(Dense(units = 128, activation = 'relu'))
cnn_model_drop.add(Dropout(0.5))
cnn_model_drop.add(Dense(units = 1, activation = 'sigmoid'))

cnn_model_drop.compile(loss='binary_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.95),
              metrics=['accuracy'])

In [108]:
import keras

In [111]:


# and we hand our model the 'training book to learn from'
cnn_model_drop.fit_generator(get_train_data_generator(color = 'rgb'),
                    validation_data = get_test_data_generator(color = 'rgb'),
                    validation_steps = 1,
                    steps_per_epoch = 121,
                    epochs = 10) 

Found 2000 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Epoch 1/10


InvalidArgumentError: Tensor conv2d_3_input:0, specified in either feed_devices or fetch_devices was not found in the Graph

In [89]:
# and we hand our model the 'training book to learn from'
cnn_model_drop.fit_generator(get_train_data_generator(color = 'rgb'),
                    validation_data = get_test_data_generator(color = 'rgb'),
                    validation_steps = 1,
                    steps_per_epoch = 121,
                    epochs = 10) 

Found 2000 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fef31384ba8>

In [90]:
# show the training curve - overfitting to the training data
cnn_model.save('test')

In [91]:
import tensorflow as tf

In [94]:
test_model = tf.keras.models.load_model('test')

In [None]:
test_model.

In [96]:
cnn_model.history.history

{'acc': [0.7946058091286307,
  0.9114583333333334,
  0.9317708333333333,
  0.9471354166666667,
  0.9505208333333334,
  0.9565104166666667,
  0.9544270833333334,
  0.9565104166666667,
  0.9447916666666667,
  0.9546875],
 'loss': [0.4549224703143741,
  0.2428989332790176,
  0.1750654386356473,
  0.14736155569553375,
  0.1354918708248685,
  0.12667551067036886,
  0.11829052766164144,
  0.11442604120820761,
  0.1384646851569414,
  0.12114966735243797],
 'val_acc': [0.8500000238418579,
  0.8100000023841858,
  0.7549999952316284,
  0.7300000190734863,
  0.7200000286102295,
  0.7200000286102295,
  0.7200000286102295,
  0.7049999833106995,
  0.7450000047683716,
  0.800000011920929],
 'val_loss': [0.38410472869873047,
  0.3845076858997345,
  0.4405522644519806,
  0.6263845562934875,
  0.5592684149742126,
  0.6501896381378174,
  0.7666110992431641,
  0.8493646383285522,
  0.5506953001022339,
  0.4264497458934784]}

In [None]:
model.save("trainedmodel_50Epoch.h5") # saving the model
with open('trainHistoryOld', 'wb') as handle: # saving the history of the model
    dump(history.history, handle)

In [33]:


import numpy as np
# load vgg 16 
vgg_conv = VGG16(weights = 'imagenet', 
     include_top = False, 
     input_shape = (224, 224, 3))

for layer in vgg_conv.layers:
    layer.trainable = True
  
vgg_model = Sequential()
out_vgg   = vgg_conv # GlobalAveragePooling2D()(vgg_conv.output)
vgg_model.add(out_vgg)
vgg_model.add(GlobalAveragePooling2D())
vgg_model.add(Dense(1024, activation = 'relu'))
vgg_model.add(Dropout(0.5))
vgg_model.add(Dense(512, activation = 'relu'))
vgg_model.add(Dropout(0.5))

vgg_model.add(Dense(1, activation = 'sigmoid'))

from keras.callbacks import CSVLogger, ModelCheckpoint
checkpoint = keras.callbacks.ModelCheckpoint('vgg_model{epoch:08d}.h5', period=1) 
csv_logger = CSVLogger("vgg_model_history_log.csv", append=True)

vgg_model.compile(loss = 'binary_crossentropy', 
              optimizer = optimizers.SGD(lr=1e-4, momentum=0.95), 
              metrics=['accuracy'])

# https://forums.fast.ai/t/how-to-use-pre-trained-features-from-vgg-16-as-input-to-globalaveragepooling2d-layer-in-keras/3196/3

vgg_model.fit_generator(get_train_data_generator(color = 'rgb'), 
                        validation_data=get_test_data_generator(color = 'rgb'), 
                        validation_steps =1, 
                        steps_per_epoch = 121, 
                        epochs = 1,
                        callbacks=[checkpoint, csv_logger])

In [29]:
vgg_model.history.history # loss is saved after each epoch 

{'acc': [0.5, 0.4895833333333333, 0.46875, 0.5625, 0.40625],
 'loss': [7.971192836761475,
  8.137259324391684,
  8.469392458597818,
  6.974793593088786,
  9.465791384379068]}

In [None]:
filepath="weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint

import matplotlib.pyplot as plt
import numpy
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load pima indians dataset
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
model.add(Dense(8, kernel_initializer='uniform', activation='relu'))
model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# checkpoint
filepath="weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
# Fit the model
model.fit(X, Y, validation_split=0.33, epochs=150, batch_size=10, callbacks=callbacks_list, verbose=0)

In [None]:
# https://www.kaggle.com/kosovanolexandr/keras-nn-x-ray-predict-pneumonia-86-54

In [None]:
Binary classification with Keras neural network
English is not my native language, so sorry for any mistake.

If you like my Kernel, give me some feedback and also votes up my kernel.

Import
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K

import os
import numpy as np
import pandas as np

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# load