In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation, Conv2D, MaxPooling2D

In [10]:
!unzip /content/drive/MyDrive/set.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: training_set/cat.37.jpg  
  inflating: training_set/cat.370.jpg  
  inflating: training_set/cat.3700.jpg  
  inflating: training_set/cat.3701.jpg  
  inflating: training_set/cat.3702.jpg  
  inflating: training_set/cat.3703.jpg  
  inflating: training_set/cat.3704.jpg  
  inflating: training_set/cat.3705.jpg  
  inflating: training_set/cat.3706.jpg  
  inflating: training_set/cat.3707.jpg  
  inflating: training_set/cat.3708.jpg  
  inflating: training_set/cat.3709.jpg  
  inflating: training_set/cat.371.jpg  
  inflating: training_set/cat.3710.jpg  
  inflating: training_set/cat.3711.jpg  
  inflating: training_set/cat.3712.jpg  
  inflating: training_set/cat.3713.jpg  
  inflating: training_set/cat.3714.jpg  
  inflating: training_set/cat.3715.jpg  
  inflating: training_set/cat.3716.jpg  
  inflating: training_set/cat.3717.jpg  
  inflating: training_set/cat.3718.jpg  
  inflating: training_set/cat.3719.jp

In [11]:
#preparing data

TRAIN_DIR = "/content/training_set"
TEST_DIR = "/content/test_set"

TRAIN_SIZE = len([name for name in os.listdir(TRAIN_DIR)])
TEST_SIZE = len([name for name in os.listdir(TEST_DIR)])
print("Number of training images:", TRAIN_SIZE)
print("Number of test images:", TEST_SIZE)

Number of training images: 8000
Number of test images: 2000


In [12]:
VALID_FRACTION = 0.2
BATCH_SIZE = 100
EPOCHS = 1

IMAGE_WIDTH = IMAGE_HEIGHT = 150

In [13]:
# creating df with train labels
train_filenames = os.listdir(TRAIN_DIR)
train_labels = []
for filename in train_filenames:
    label = filename.split('.')[0]
    train_labels.append(label)

train_df = pd.DataFrame({
    'id': train_filenames,
    'label': train_labels
})

In [14]:
# splitting to train & valid
from sklearn.model_selection import train_test_split
train_df, valid_df = train_test_split(train_df, test_size=VALID_FRACTION)

In [15]:
# augmentation settings, for now just normalizing
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(    
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1./255.,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
    )

In [16]:
# not doing any data augmentation on validation test set
valid_datagen  = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.)

In [17]:
# creating train and valid generators (not using valid_split to avoid doing data augmentation on validation set)
train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    TRAIN_DIR, 
    x_col='id',
    y_col='label',
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    class_mode='binary',
    batch_size=BATCH_SIZE
)

Found 6400 validated image filenames belonging to 2 classes.


In [18]:
valid_generator = valid_datagen.flow_from_dataframe(
    valid_df, 
    TRAIN_DIR, 
    x_col='id',
    y_col='label',
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    class_mode='binary',
    batch_size=BATCH_SIZE
)

Found 1600 validated image filenames belonging to 2 classes.


In [19]:
model = tf.keras.models.Sequential([
    # the images were resized by ImageDataGenerator 150x150 with 3 bytes color
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2), 
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'), 
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'), 
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Flatten(), 
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    # since we have only 2 classes to predict we can use 1 neuron and sigmoid
    tf.keras.layers.Dense(1, activation='sigmoid')  
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 128)       1

In [20]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.001),
    loss='binary_crossentropy',
    metrics = ['accuracy'])

es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
    mode='min',
    restore_best_weights=True, 
    verbose=1,
    patience=5)

In [21]:

%%time

# training
history = model.fit_generator(train_generator,
    validation_data=valid_generator,
    steps_per_epoch=round(TRAIN_SIZE*(1.-VALID_FRACTION)/BATCH_SIZE),
    validation_steps=round(TRAIN_SIZE*VALID_FRACTION/BATCH_SIZE),
    epochs=EPOCHS,
    callbacks=[es],
    verbose=1)



CPU times: user 55.7 s, sys: 1.78 s, total: 57.5 s
Wall time: 58.2 s
