In [None]:
%matplotlib inline
import numpy as np
np.random.seed(123)
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D, GlobalAveragePooling2D
from keras.utils import np_utils
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
import cv2
import pandas as pd
import os
import matplotlib.pyplot as plt
#import seaborn as sns
#from IPython.display import display 
from PIL import Image
from keras.applications.resnet50 import ResNet50 
import time
from keras.utils.training_utils import multi_gpu_model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

In [None]:
TRAIN_DIR = "/mnt/disks/patches/calcifications/train/"
TEST_DIR = "/mnt/disks/patches/calcifications/test/"
IM_WIDTH, IM_HEIGHT = 256, 256
FC_SIZE = 256
batch_size = 100
NUM_CLASSES = 4
NUM_EPOCHS = 50

In [None]:
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IM_WIDTH, IM_HEIGHT),
    batch_size=batch_size,
  )

test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IM_WIDTH, IM_HEIGHT),
    batch_size=batch_size,
)

In [1]:
import tensorflow as tf
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

model = Sequential()
model.add(ResNet50(include_top = False, 
                   weights=None, 
                   input_shape = (256, 256, 3), classes = NUM_CLASSES))

## Add in last 3 layers
model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(NUM_CLASSES, activation='softmax'))

filepath="/home/jlandesman/model_history/weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

## Parallelize to attempt multi-GPU effort
parallel_model = multi_gpu_model(model, gpus=2)
parallel_model.compile(optimizer= Adam(lr=0.0002, beta_1=0.9, beta_2=0.999), 
                                       loss='categorical_crossentropy', metrics=['accuracy'])
#model.compile(optimizer= Adam(lr=0.002, beta_1=0.9, beta_2=0.999), 
#                                       loss='categorical_crossentropy', metrics=['accuracy'])


start = time.time()
modelFit = parallel_model.fit_generator(
            train_generator,
            steps_per_epoch=train_generator.samples/batch_size,
            epochs=NUM_EPOCHS,
            verbose = 1,
            validation_data=test_generator,
            validation_steps=test_generator.samples/batch_size, 
            callbacks=callbacks_list)
end = time.time()

total_time = int(end-start)
time_per_epoch = total_time/NUM_EPOCHS

forecasted_time = 100000 * time_per_epoch / (train_generator.samples + test_generator.samples)
print ()
print ()
print ("Model took " + str(total_time) + " seconds to run" )
print ("Model takes " + str(time_per_epoch) + " seconds to run")
print ("Approximate time taken per epoch for 100,000 images is " + str(forecasted_time) + " seconds")

NameError: name 'Sequential' is not defined

In [None]:
from sklearn.model_selection import train_test_split
import os
import pandas as pd

TRAIN_PATH = '/mnt/disks/patches/calcifications/train/no_tumor/'
TEST_PATH =  '/mnt/disks/patches/calcifications/test/no_tumor/'

TRAIN_DUMP_PATH = '/mnt/disks/patches/overflow_files/train/no_tumor'
TEST_DUMP_PATH =  '/mnt/disks/patches/overflow_files/test/no_tumor'

## Read in files
no_tumor_train = os.listdir(TRAIN_PATH)
no_tumor_test = os.listdir(TEST_PATH)

no_tumor_train = np.asarray(no_tumor_train)
no_tumor_test = np.asarray(no_tumor_test)

np.random.shuffle(no_tumor_train)
np.random.shuffle(no_tumor_test)

print(len(no_tumor_train))
print(len(no_tumor_test))

for file in no_tumor_train[0:200000]:
    current_path = os.path.join(TRAIN_PATH, file)
    dump_path = os.path.join(TRAIN_DUMP_PATH, file)
    os.rename(current_path, dump_path)

for file in no_tumor_test[0:50000]:
    current_path = os.path.join(TEST_PATH, file)
    dump_path = os.path.join(TEST_DUMP_PATH, file)
    os.rename(current_path, dump_path)




#benign = os.listdir(TRAIN_PATH + 'benign')
#benign_no_callback = os.listdir(TRAIN_PATH+'benign_no_callback')
#malignant = os.listdir(TRAIN_PATH + 'malignant')

## Build DF
#file_paths = no_tumor# + benign + benign_no_callback + malignant
#labels = ['no_tumor'] * len(no_tumor)# + ['benign'] * len(benign) + ['benign_no_callback'] * len(benign_no_callback) + ['malignant'] * len(malignant)
#assert len(file_paths) == len(labels)

# df = pd.DataFrame({'file_paths': file_paths, 'labels': labels})

# ## Split into train/test
# X_train, X_test, Y_train, Y_test = train_test_split(df['file_paths'], df['labels'], test_size = 0.2, random_state = 142)

# ## Run
# counter = 0
# for label, file_name in zip(Y_test, X_test):
#     current_dir = os.path.join(TRAIN_PATH + label +'/'+ file_name)
#     test_dir = os.path.join(TEST_PATH + label +'/'+ file_name)
#     os.rename(current_dir, test_dir)
#     counter += 1
#     if counter%1000 == 0:
#         print ('Files moved; ', counter)