In [21]:
import os
from glob import glob

import tensorflow as tf
import numpy as np
import pandas as pd

from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

seed = 13
tf.random.set_seed(seed)
np.random.seed(seed)

data_root = os.path.join("..", "data", "dogs-vs-cats")
train_dir = os.path.join(data_root, "train")
test_dir = os.path.join(data_root, "test1")

# Helper to flatten cat/dog folders into a labeled dataframe
def build_labeled_df(directory):
    filepaths = sorted(glob(os.path.join(directory, "*", "*.jpg")))
    labels = [os.path.basename(os.path.dirname(fp)) for fp in filepaths]
    return pd.DataFrame({"file": filepaths, "label": labels})

train_df = build_labeled_df(train_dir)
train_df.head()

Unnamed: 0,file,label
0,../data/dogs-vs-cats/train/cat/cat.0.jpg,cat
1,../data/dogs-vs-cats/train/cat/cat.1.jpg,cat
2,../data/dogs-vs-cats/train/cat/cat.10.jpg,cat
3,../data/dogs-vs-cats/train/cat/cat.100.jpg,cat
4,../data/dogs-vs-cats/train/cat/cat.1000.jpg,cat


In [22]:
train_df["label"].value_counts()

label
cat    12500
dog    12500
Name: count, dtype: int64

In [23]:
test_files = sorted(glob(os.path.join(test_dir, "*.jpg")))
test_df = pd.DataFrame({"file": test_files})
test_df.head()

Unnamed: 0,file
0,../data/dogs-vs-cats/test1/1.jpg
1,../data/dogs-vs-cats/test1/10.jpg
2,../data/dogs-vs-cats/test1/100.jpg
3,../data/dogs-vs-cats/test1/1000.jpg
4,../data/dogs-vs-cats/test1/10000.jpg


In [24]:
from sklearn.model_selection import train_test_split

train_data, val_data = train_test_split(
    train_df, test_size = 0.2, stratify = train_df["label"], random_state=13
)

In [25]:
train_datagen = ImageDataGenerator(
    rotation_range=15, horizontal_flip=True, preprocessing_function=preprocess_input
)

val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

In [26]:
batch_size = 16

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    x_col="file",
    y_col="label",
    class_mode="categorical",
    target_size=(224, 224),
    batch_size=batch_size,
    shuffle=True,
    seed=13
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    x_col="file",
    y_col="label",
    class_mode="categorical",
    target_size=(224, 224),
    batch_size=batch_size,
    seed=13,
)

Found 20000 validated image filenames belonging to 2 classes.
Found 5000 validated image filenames belonging to 2 classes.
Found 5000 validated image filenames belonging to 2 classes.


In [27]:
base_model = VGG16(weights="imagenet", input_shape=(224, 224, 3), include_top=False)

In [28]:
base_model.summary()

### 가중치가 학습되지 않도록 잠궈버리자

In [29]:
for layers in base_model.layers:
    layers.trainable = False

base_model.summary()

In [30]:
def vgg16_pretrained():
    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(100, activation="relu"),
        Dropout(0.4),
        Dense(64, activation="relu"),
        Dense(64, activation="relu"),
        Dense(2, activation="softmax"),
    ])
    return model

In [31]:
tf.keras.backend.clear_session()

In [32]:
model = vgg16_pretrained()
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"] )
model.summary()

In [33]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_accuracy", patience=2, verbose=1, factor=0.5, min_lr=1e-9
)

In [34]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy", patience=5, verbose=1, mode="max"
)

In [35]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    monitor="val_accuracy",
    filepath="catdog_vgg16_.{epoch:02d}-{val_accuracy:.6f}.weights.h5",
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
)

In [None]:
history = model.fit(
    train_generator,
    epochs=5,
    validation_data=val_generator,
    validation_steps=val_data.shape[0] // batch_size,
    steps_per_epoch=train_data.shape[0] // batch_size,
    callbacks=[reduce_lr, early_stopping, checkpoint],
)

Epoch 1/5
[1m 808/1250[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m11:48[0m 2s/step - accuracy: 0.9186 - loss: 0.2647

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

sns.lineplot(
    x=range(len(history.history["loss"])),
    y=history.history["loss"],
    ax=axes[0],
    label="Training Loss",
)
sns.lineplot(
    x=range(len(history.history["loss"])),
    y=history.history["val_loss"],
    ax=axes[0],
    label="Validation Loss",
)

sns.lineplot(
    x=range(len(history.history["accuracy"])),
    y=history.history["accuracy"],
    ax=axes[1],
    label="Training Accuracy",
)
sns.lineplot(
    x=range(len(history.history["accuracy"])),
    y=history.history["val_accuracy"],
    ax=axes[1],
    label="Validation Accuracy",
)

axes[0].set_title("Loss")
axes[1].set_title("Accuracy")

sns.despine()
plt.show()

In [None]:
val_loss, val_accuracy = model.evaluate(val_generator)

print(f"Validation Accuracy : {val_accuracy: .4f}")
print(f"Validation Loss : {val_loss: .4f}")

In [None]:
val_pred = model.predict(val_generator)

In [None]:
val_data.loc[:, "val_pred"] = np.argmax(val_pred, axis=1)
labels = dict((v, k) for k, v in val_generator.class_indices.items())
val_data.loc[:, "val_pred"] = val_data.loc[:, "val_pred"].map(labels)

val_data.head(10)

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

fig, ax = plt.subplots(figsize=(9, 6))

cm = confusion_matrix(val_data["label"], val_data["val_pred"])

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["cat", "dog"])
disp.plot(cmap=plt.cm.Blues, ax=ax)

ax.set_title("Validation Set")
plt.show()