In [2]:
!unzip -q "/content/drive/MyDrive/vision_project/resized_img_with_aug.zip" -d "/content/drive/MyDrive/vision_project"

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [4]:
CSV_PATH  = "/content/drive/MyDrive/vision_project/ALL_labels_with_augmented.csv"
IMAGE_DIR = "/content/drive/MyDrive/vision_project/resized_images_final"

df=pd.read_csv(CSV_PATH)

train_df, test_df=train_test_split(df, test_size=0.2, random_state=42, shuffle=True)
train_df, val_df=train_test_split(train_df, test_size=0.2, random_state=42, shuffle=True)

print("Train:", len(train_df), ", Val:", len(val_df), ", Test:", len(test_df))

Train: 84781 , Val: 21196 , Test: 26495


In [5]:
BATCH_SIZE=16
IMAGE_SIZE=(224, 224)
LABEL_COLS=["male","female","0s","10s","20s","30s","40s","50s","60+"]

train_datagen = ImageDataGenerator(rescale=1.0/255.0)
val_test_datagen = ImageDataGenerator(rescale=1.0/255.0)

train_generator = train_datagen.flow_from_dataframe(
    dataframe    = train_df,
    directory    = IMAGE_DIR,
    x_col        = "filename",
    y_col        = LABEL_COLS,
    target_size  = IMAGE_SIZE,
    batch_size   = BATCH_SIZE,
    class_mode   = "raw",
    shuffle      = True,
    seed         = 42
)

val_generator = val_test_datagen.flow_from_dataframe(
    dataframe    = val_df,
    directory    = IMAGE_DIR,
    x_col        = "filename",
    y_col        = LABEL_COLS,
    target_size  = IMAGE_SIZE,
    batch_size   = BATCH_SIZE,
    class_mode   = "raw",
    shuffle      = False,
    seed         = 42
)

test_generator = val_test_datagen.flow_from_dataframe(
    dataframe    = test_df,
    directory    = IMAGE_DIR,
    x_col        = "filename",
    y_col        = LABEL_COLS,
    target_size  = IMAGE_SIZE,
    batch_size   = BATCH_SIZE,
    class_mode   = "raw",
    shuffle      = False,
    seed         = 42
)

Found 84779 validated image filenames.




Found 21196 validated image filenames.
Found 26495 validated image filenames.


In [6]:
esc = EarlyStopping(monitor="val_loss", mode="min", patience=5, restore_best_weights=True, verbose=1)
model_checkpoint = ModelCheckpoint(filepath="/content/drive/MyDrive/vision_project/resnet_model_best.h5", monitor="val_loss", mode="min", save_best_only=True, verbose=1)

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

base_r = ResNet50(include_top=False, weights="imagenet", input_shape=(224,224,3))

inputs = Input(shape=(224,224,3))
x = base_r(inputs)

x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(256, activation="relu")(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
outputs = Dense(9, activation="sigmoid")(x)

resnet_model = Model(inputs, outputs)
resnet_model.compile(optimizer=Adam(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])
resnet_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# 학습
history = resnet_model.fit(train_generator, validation_data=val_generator, epochs=15, callbacks=[esc, model_checkpoint])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step - accuracy: 0.4687 - loss: 0.5146
Epoch 1: val_loss improved from inf to 0.42951, saving model to /content/drive/MyDrive/vision_project/resnet_model_best.h5




[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1233s[0m 220ms/step - accuracy: 0.4687 - loss: 0.5146 - val_accuracy: 0.6646 - val_loss: 0.4295
Epoch 2/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - accuracy: 0.8020 - loss: 0.3563
Epoch 2: val_loss improved from 0.42951 to 0.34368, saving model to /content/drive/MyDrive/vision_project/resnet_model_best.h5




[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1547s[0m 287ms/step - accuracy: 0.8020 - loss: 0.3563 - val_accuracy: 0.6584 - val_loss: 0.3437
Epoch 3/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step - accuracy: 0.8602 - loss: 0.2414
Epoch 3: val_loss did not improve from 0.34368
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1421s[0m 268ms/step - accuracy: 0.8602 - loss: 0.2414 - val_accuracy: 0.7648 - val_loss: 0.5568
Epoch 4/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - accuracy: 0.8934 - loss: 0.2021
Epoch 4: val_loss did not improve from 0.34368
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1028s[0m 194ms/step - accuracy: 0.8934 - loss: 0.2021 - val_accuracy: 0.7566 - val_loss: 0.3783
Epoch 5/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 318ms/step - accuracy: 0.9



[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1777s[0m 335ms/step - accuracy: 0.9132 - loss: 0.1767 - val_accuracy: 0.9286 - val_loss: 0.2594
Epoch 6/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - accuracy: 0.9260 - loss: 0.1565
Epoch 6: val_loss did not improve from 0.25941
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1079s[0m 204ms/step - accuracy: 0.9260 - loss: 0.1565 - val_accuracy: 0.8062 - val_loss: 0.5297
Epoch 7/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - accuracy: 0.9268 - loss: 0.1448
Epoch 7: val_loss did not improve from 0.25941
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1028s[0m 194ms/step - accuracy: 0.9268 - loss: 0.1448 - val_accuracy: 0.9197 - val_loss: 0.2647
Epoch 8/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step - accuracy: 0.9



[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1084s[0m 205ms/step - accuracy: 0.9338 - loss: 0.1149 - val_accuracy: 0.9572 - val_loss: 0.1535
Epoch 10/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step - accuracy: 0.9360 - loss: 0.1022
Epoch 10: val_loss improved from 0.15350 to 0.10190, saving model to /content/drive/MyDrive/vision_project/resnet_model_best.h5




[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1043s[0m 197ms/step - accuracy: 0.9360 - loss: 0.1022 - val_accuracy: 0.9577 - val_loss: 0.1019
Epoch 11/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step - accuracy: 0.9347 - loss: 0.0897
Epoch 11: val_loss did not improve from 0.10190
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1087s[0m 205ms/step - accuracy: 0.9347 - loss: 0.0897 - val_accuracy: 0.9084 - val_loss: 0.1260
Epoch 12/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - accuracy: 0.9325 - loss: 0.0798

In [None]:
resnet_model.load_weights("/content/drive/MyDrive/vision_project/resnet_model_best.h5")

# val
val_loss, val_acc = resnet_model.evaluate(val_generator, verbose=1)
print(f"[ResNet50] Val Loss: {val_loss:.4f} / Val Acc: {val_acc:.4f}")

# test
test_loss, test_acc = resnet_model.evaluate(test_generator, verbose=1)
print(f"[ResNet50] Test Loss: {test_loss:.4f} / Test Acc: {test_acc:.4f}")

  self._warn_if_super_not_called()


[1m1325/1325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 76ms/step - accuracy: 0.9587 - loss: 0.1022
[ResNet50] Val Loss: 0.1019 / Val Acc: 0.9577
[1m1656/1656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 75ms/step - accuracy: 0.9605 - loss: 0.1020
[ResNet50] Test Loss: 0.1028 / Test Acc: 0.9583


In [9]:
import numpy as np
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import json

label_names = ["male","female","0s","10s","20s","30s","40s","50s","60+"]
age_labels = label_names[2:]

In [None]:
# 예측값, 확률
test_generator.reset()
resnet_preds=resnet_model.predict(test_generator, steps=len(test_generator), verbose=1)
np.save("/content/drive/MyDrive/vision_project/resnet_preds.npy", resnet_preds)

# 이진 예측값
resnet_pred_binary=(resnet_preds > 0.5).astype(int)
np.save("/content/drive/MyDrive/vision_project/resnet_pred_binary.npy", resnet_pred_binary)

# y_true
y_true=test_df[label_names].values
np.save("/content/drive/MyDrive/vision_project/y_true.npy", y_true)

[1m1656/1656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m221s[0m 133ms/step


In [None]:
# classification_report
resnet_report=classification_report(y_true, resnet_pred_binary, target_names=label_names)
with open("/content/drive/MyDrive/vision_project/resnet_classification_report.txt", "w") as f:
    f.write("=== ResNet50 Classification Report ===\n")
    f.write(resnet_report)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# AUC
resnet_auc_dict={name: float(roc_auc_score(y_true[:, i], resnet_preds[:, i])) for i, name in enumerate(label_names)}
resnet_auc_dict["mean_auc"] = float(np.mean(list(resnet_auc_dict.values())))

with open("/content/drive/MyDrive/vision_project/resnet_auc.json", "w") as f:
    json.dump(resnet_auc_dict, f, indent=4)

In [None]:
# 혼동 행렬 age
y_true_age=np.argmax(y_true[:, 2:], axis=1)
y_pred_age=np.argmax(resnet_pred_binary[:, 2:], axis=1)
cm_resnet_age=confusion_matrix(y_true_age, y_pred_age)
np.save("/content/drive/MyDrive/vision_project/cm_resnet_age.npy", cm_resnet_age)

plt.figure(figsize=(6,5))
sns.heatmap(cm_resnet_age, annot=True, fmt="d", cmap="Greens", xticklabels=age_labels, yticklabels=age_labels)
plt.title("ResNet50 Age Confusion Matrix")
plt.savefig("/content/drive/MyDrive/vision_project/cm_resnet_age.png")
plt.close()

In [None]:
# 혼동 행렬 gender
y_true_gender=y_true[:, 0]
y_pred_gender=resnet_pred_binary[:, 0]
cm_resnet_gender=confusion_matrix(y_true_gender, y_pred_gender)
np.save("/content/drive/MyDrive/vision_project/cm_resnet_gender.npy", cm_resnet_gender)

plt.figure(figsize=(4,3))
sns.heatmap(cm_resnet_gender, annot=True, fmt="d", cmap="Greens", xticklabels=["Male", "Female"], yticklabels=["Male", "Female"])
plt.title("ResNet50 Gender Confusion Matrix")
plt.savefig("/content/drive/MyDrive/vision_project/cm_resnet_gender.png")
plt.close()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array

IMAGE_DIR = "/content/drive/MyDrive/vision_project/resized_images_final"
label_names = ["male", "female", "0s", "10s", "20s", "30s", "40s", "50s", "60+"]

# 예측 및 정답 로드
resnet_preds = np.load("/content/drive/MyDrive/vision_project/resnet_preds.npy")
y_true = np.load("/content/drive/MyDrive/vision_project/y_true.npy")

# test_df에서 샘플 10개 랜덤 추출
sample_df = test_df.sample(n=10, random_state=42).reset_index(drop=True)
sample_indices = sample_df.index.tolist()

# 시각화
fig, axes = plt.subplots(2, 5, figsize=(20, 8))
axes = axes.ravel()

for i in range(10):
    fname = sample_df.loc[i, "filename"]
    img_path = os.path.join(IMAGE_DIR, fname)

    img = load_img(img_path, target_size=(224, 224))
    img_array = img_to_array(img) / 255.0

    pred = resnet_preds[sample_indices[i]]
    pred_label = [label for idx, label in enumerate(label_names) if pred[idx] > 0.5]
    true = y_true[sample_indices[i]]
    true_label = [label for idx, label in enumerate(label_names) if true[idx] == 1]

    axes[i].imshow(img_array)
    axes[i].axis('off')
    axes[i].set_title(f"[Pred] {pred_label}\n[Real] {true_label}")

# layout 조정
plt.tight_layout()
plt.show()