## 下载数据

图片按照如下目录结构组织：

![](images/2022-01-13-16-08-29.png)

方便 `ImageDataGenerator` 提取数据。


In [1]:
import urllib.request
import zipfile

url = (
    "https://storage.googleapis.com/laurencemoroney-blog.appspot.com/horse-or-human.zip"
)

file_name = "horse-or-human.zip"
training_dir = "horse-or-human/training/"
urllib.request.urlretrieve(url, file_name)

zip_ref = zipfile.ZipFile(file_name, "r")
zip_ref.extractall(training_dir)
zip_ref.close()



In [2]:
from tensorflow.keras.preprocessing import image

# 所有图片缩放到 1./255
train_datagen = image.ImageDataGenerator(rescale=1 / 255.0)
train_generator = train_datagen.flow_from_directory(
    training_dir, 
    target_size=(300, 300), 
    class_mode="binary"
)


Found 1027 images belonging to 2 classes.


和 Fashion MNIST 不同的是，这里的图片是 300x300 的彩色图片，所以可能需要更多层，通道数也从 1 到 3.

另外，只有两种类型，因此输出神经元只需要一个。

In [3]:
import tensorflow.keras as keras

model = keras.models.Sequential(
    [
        keras.layers.Conv2D(
            16, (3, 3), activation="relu", input_shape=(300, 300, 3)
        ),  # 底层特征较少
        keras.layers.MaxPooling2D(2, 2),
        keras.layers.Conv2D(32, (3, 3), activation="relu"),
        keras.layers.MaxPooling2D(2, 2),
        keras.layers.Conv2D(64, (3, 3), activation="relu"),
        keras.layers.MaxPooling2D(2, 2),
        keras.layers.Conv2D(64, (3, 3), activation="relu"),
        keras.layers.MaxPooling2D(2, 2),
        keras.layers.Conv2D(64, (3, 3), activation="relu"),
        keras.layers.MaxPooling2D(2, 2),
        keras.layers.Flatten(),
        keras.layers.Dense(512, activation="relu"),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)


In [4]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 298, 298, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 149, 149, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 147, 147, 32)      4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 73, 73, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 71, 71, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 35, 35, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 33, 33, 64)        3

In [5]:
model.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.RMSprop(learning_rate=0.001),
    metrics=["accuracy"],
)


In [6]:
history = model.fit_generator(train_generator, epochs=15)




Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [7]:
validation_url = "https://storage.googleapis.com/laurencemoroney-blog.appspot.com/validation-horse-or-human.zip"

validation_file_name = "validation-horse-or-human.zip"
validation_dir = "horse-or-human/validation/"

urllib.request.urlretrieve(validation_url, validation_file_name)

zip_ref = zipfile.ZipFile(validation_file_name, "r")
zip_ref.extractall(validation_dir)
zip_ref.close()


In [8]:
validation_datagen = image.ImageDataGenerator(rescale=1 / 255.0)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir, target_size=(300, 300), class_mode="binary"
)


Found 256 images belonging to 2 classes.


In [9]:
history = model.fit(
    train_generator,
    epochs=15,
    validation_data=validation_generator
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [11]:
from tensorflow.keras.preprocessing import image
import numpy as np

# 载入图片，并 reshape 为 (300,300)
img = image.load_img("images/white-horse-gb72298424_640.jpg", target_size=(300, 300))
# 转换为 2D 数组
x = image.img_to_array(img)
# 扩展维度，转换为 3D 数组
x = np.expand_dims(x, axis=0)

image_tensor = np.vstack([x])
classes = model.predict(image_tensor)
print(classes)
print(classes[0])

if classes[0] > 0.5:
    print("is a human")
else:
    print("is a horse")


[[0.]]
[0.]
is a horse


In [12]:
# 载入图片，并 reshape 为 (300,300)
img = image.load_img("images/white-horse-gb72298424_640.jpg", target_size=(300, 300))
# 转换为 2D 数组
x = image.img_to_array(img)
# 扩展维度，转换为 3D 数组
x = np.expand_dims(x, axis=0)

classes = model.predict(x)
print(classes)
print(classes[0])

if classes[0] > 0.5:
    print("is a human")
else:
    print("is a horse")


[[0.]]
[0.]
is a horse
