<a href="https://colab.research.google.com/github/celinezhang2019/cats-vs-dogs-image-classification/blob/master/kaggle_cats_vs_dogs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

V01

First trying the cats-vs-dogs binary classifications

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
# 配置好kaggle json

from google.colab import files
files.upload()

In [0]:
# 建立kaggle文件夹

!mkdir -p ~/.kaggle

!cp kaggle.json ~/.kaggle/

!chmod 600 ~/.kaggle/kaggle.json

In [0]:
# 建立猫狗文件夹

import os
# os.mkdir('/content/drive/My Drive/Colab Notebooks/cats-vs-dogs')

In [0]:
# 1) 直接从kaggle下载猫狗数据集

# !kaggle competitions download -c dogs-vs-cats -p '../content/drive/My Drive/Colab Notebooks/cats-vs-dogs'



In [0]:
# 2） 由于刚刚下载的文件是zip文件，所以解压操作：

# !unzip -n '/content/drive/My Drive/Colab Notebooks/cats-vs-dogs/train.zip' -d '../content/drive/My Drive/Colab Notebooks/cats-vs-dogs'

# !unzip -n '/content/drive/My Drive/Colab Notebooks/cats-vs-dogs/test1.zip' -d '../content/drive/My Drive/Colab Notebooks/cats-vs-dogs'

In [0]:
# PART-2: Building model

import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile

In [0]:
print(len(os.listdir('../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/train/')))
print(len(os.listdir('../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/test1/')))

In [0]:
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [0]:
# 给所有的图片加标签

filenames = os.listdir('../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/train/') # 25000 training images
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)
  
df = pd.DataFrame({
    'filename':filenames,
    'category':categories    
})

In [0]:
df.head()

In [0]:
df.tail()

In [0]:
# See Total in count

df['category'].value_counts().plot.bar()

In [0]:
# See sample image

sample = random.choice(filenames)
image = load_img('../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/train/'+sample)
plt.imshow(image)

In [0]:
# modeling

FAST_RUN = False
IMAGE_WIDTH=128
IMAGE_HEIGHT=128
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

model = Sequential()

model.add(Conv2D(16, (3, 3), activation='relu', input_shape=(100,75, IMAGE_CHANNELS)))
model.add(MaxPooling2D(pool_size=(2, 2)))


model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax')) # 2 because we have cat and dog classes

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

model.summary()

In [0]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [0]:
earlystop = EarlyStopping(patience=10)

In [0]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)


In [0]:
callbacks = [earlystop, learning_rate_reduction]

In [0]:
df["category"] = df["category"].replace({0: 'cat', 1: 'dog'}) 

In [0]:
train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

In [0]:
train_df['category'].value_counts().plot.bar()


In [0]:
validate_df['category'].value_counts().plot.bar()

In [0]:
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
batch_size=10

In [0]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

In [0]:
train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/train/", 
    x_col='filename',
    y_col='category',
    target_size=(100,75),
    class_mode='categorical',
    batch_size=batch_size
)

In [0]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/train/", 
    x_col='filename',
    y_col='category',
    target_size=(100,75),
    class_mode='categorical',
    batch_size=batch_size
)

In [0]:
example_df = train_df.sample(n=1).reset_index(drop=True)
example_generator = train_datagen.flow_from_dataframe(
    example_df, 
    "../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/train/", 
    x_col='filename',
    y_col='category',
    target_size=(100,75),
    class_mode='categorical'
)

In [0]:
plt.figure(figsize=(12, 12))
for i in range(0, 15):
    plt.subplot(5, 3, i+1)
    for X_batch, Y_batch in example_generator:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()


In [0]:
epochs=3 if FAST_RUN else 15
history = model.fit_generator(
    train_generator, 
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size,
    callbacks=callbacks
)

In [0]:
model.save_weights("../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/model_catdog02_15epoch.h5")

In [0]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
ax1.plot(history.history['loss'], color='b', label="Training loss")
ax1.plot(history.history['val_loss'], color='r', label="validation loss")
ax1.set_xticks(np.arange(1, epochs, 1))
ax1.set_yticks(np.arange(0, 1, 0.1))

ax2.plot(history.history['acc'], color='b', label="Training accuracy")
ax2.plot(history.history['val_acc'], color='r',label="Validation accuracy")
ax2.set_xticks(np.arange(1, epochs, 1))

legend = plt.legend(loc='best', shadow=True)
plt.tight_layout()
plt.show()

#### Prepare Testing Data

In [0]:
test_filenames = os.listdir("../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/test1")
test_df = pd.DataFrame({
    'filename':test_filenames
})
nb_samples = test_df.shape[0]

### Create Testing Generator

In [0]:
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df,
    "../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/test1/",
    x_col = 'filename',
    y_col = None,
    class_mode = None,
    target_size = (100,75), # expected conv2d_4_input to have shape (100, 75, 3) but got array with shape (128, 128, 3)
    batch_size = batch_size,
    shuffle=False
)

### Predict

In [0]:
predict = model.predict_generator(test_generator, steps= np.ceil(nb_samples/batch_size))

In [0]:
test_df['category'] = np.argmax(predict, axis=-1)

In [0]:
label_map = dict((v,k) for k,v in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)

In [0]:
test_df['category'] = test_df['category'].replace({'dog':1, 'cat':0})

### Virtualize Result

In [0]:
test_df['category'].value_counts().plot.bar()

### See predicted results with images

In [0]:
sample_test = test_df.head(18)
sample_test.head()
plt.figure(figsize=(12, 24))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img("../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/test1/"+filename, target_size=IMAGE_SIZE)
    plt.subplot(6, 3, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ')' )
plt.tight_layout()
plt.show()

In [0]:
submission_df = test_df.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('../content/drive/My Drive/Colab Notebooks/cats-vs-dogs/submission923.csv', index=False)

In [0]:
test_df.shape