## 小さなデータセットでCNN学習

#### データをtrain(各1000), validation(各500), test(各500)に分割

In [None]:
import os, shutil

#Linux
#original_dataset_dir = '/home/vagrant/data/cats_and_dogs/original/train'
#base_dir = '/home/vagrant/data/cats_and_dogs/small'

#Windows
original_dataset_dir = '/Users/Kenji/documents/keras/Keras-training/data/cats_and_dogs/original/train'
base_dir = '/Users/Kenji/documents/keras/Keras-training/data/cats_and_dogs/small'

os.mkdir(base_dir)

In [None]:
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

train_cats_dir = os.path.join(train_dir, 'cats')
os.mkdir(train_cats_dir)
train_dogs_dir = os.path.join(train_dir, 'dogs')
os.mkdir(train_dogs_dir)
validation_cats_dir = os.path.join(validation_dir, 'cats')
os.mkdir(validation_cats_dir)
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
os.mkdir(validation_dogs_dir)
test_cats_dir = os.path.join(test_dir, 'cats')
os.mkdir(test_cats_dir)
test_dogs_dir = os.path.join(test_dir, 'dogs')
os.mkdir(test_dogs_dir)

In [None]:
# copy cat images
fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_cats_dir, fname)
    shutil.copyfile(src, dst)
fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_cats_dir, fname)
    shutil.copyfile(src, dst)
fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_cats_dir, fname)
    shutil.copyfile(src, dst)

# copy dog images
fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_dogs_dir, fname)
    shutil.copyfile(src, dst)
fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_dogs_dir, fname)
    shutil.copyfile(src, dst)
fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_dogs_dir, fname)
    shutil.copyfile(src, dst)


#### CNNを作成

In [None]:
from keras import layers, models

model = models.Sequential( )
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten( ))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary( )

In [None]:
from keras import optimizers

model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc'])

#### ImageDataGeneratorで画像を読み込む

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary')
validation_generator = val_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary')

#### バッチジェネレータを使ってモデルをFit

In [None]:
history = model.fit_generator(train_generator,
                                            steps_per_epoch=100,
                                            epochs=30,
                                            validation_data=validation_generator,
                                            validation_steps=50)

#### Accuracy と Lossのplot

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']

plt.figure()
plt.plot(range(1, len(acc)+1), acc, 'bo', label='Training accuracy')
plt.plot(range(1, len(acc)+1), val_acc, 'b', label='Validation accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.title('Training and validation accuracy')


In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure()
plt.plot(range(1, len(loss)+1), loss, 'bo', label='Training loss')
plt.plot(range(1, len(loss)+1), val_loss, 'b', label='Validation loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.title('Training and validation loss')


#### モデルをsave

In [None]:
model.save('cats_and_dogs_small_1.h5')

#### 過学習しているのでdropoutを適用する

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten( ))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary( )

In [None]:
model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc'])

In [None]:
history = model.fit_generator(train_generator,
                                            steps_per_epoch=100,
                                            epochs=30,
                                            validation_data=validation_generator,
                                            validation_steps=50)

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']

plt.figure()
plt.plot(range(1, len(acc)+1), acc, 'bo', label='Training accuracy')
plt.plot(range(1, len(acc)+1), val_acc, 'b', label='Validation accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.title('Training and validation accuracy')


In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure()oss, 'bo', label='Training loss')
plt.plot(range(1, len(loss)+1), val_loss, 'b', label='Validation loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.title('Training and validation loss')


#### さらにdata augmentationを行う

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=40,
    class_mode='binary')

validation_generator = val_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=40,
    class_mode='binary')

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten( ))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

#model.summary( )

model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc'])

history = model.fit_generator(train_generator,
                                            steps_per_epoch=100,
                                            epochs=100,
                                            validation_data=validation_generator,
                                            validation_steps=50)

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']

plt.figure()
plt.plot(range(1, len(acc)+1), acc, 'bo', label='Training accuracy')
plt.plot(range(1, len(acc)+1), val_acc, 'b', label='Validation accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.title('Training and validation accuracy')


In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure()
plt.plot(range(1, len(loss)+1), loss, 'bo', label='Training loss')
plt.plot(range(1, len(loss)+1), val_loss, 'b', label='Validation loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.title('Training and validation loss')


In [None]:
from IPython.core.display import display, HTML 
display(HTML("<style>.container { width:95% !important; }</style>")) 