In [2]:
!unzip -q "/content/drive/MyDrive/vision_project/resized_img_with_aug.zip" -d "/content/drive/MyDrive/vision_project"

In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
CSV_PATH  = "/content/drive/MyDrive/vision_project/ALL_labels_with_augmented.csv"
IMAGE_DIR = "/content/drive/MyDrive/vision_project/resized_images_final"

df = pd.read_csv(CSV_PATH)

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42, shuffle=True)

print("Train:", len(train_df), ", Val:", len(val_df), ", Test:", len(test_df))

Train: 84781 , Val: 21196 , Test: 26495


In [None]:
BATCH_SIZE=16
IMAGE_SIZE=(224, 224)
LABEL_COLS=["male","female","0s","10s","20s","30s","40s","50s","60+"]

train_datagen = ImageDataGenerator(rescale=1.0/255.0)
val_test_datagen = ImageDataGenerator(rescale=1.0/255.0)

train_generator = train_datagen.flow_from_dataframe(
    dataframe    = train_df,
    directory    = IMAGE_DIR,
    x_col        = "filename",
    y_col        = LABEL_COLS,
    target_size  = IMAGE_SIZE,
    batch_size   = BATCH_SIZE,
    class_mode   = "raw",
    shuffle      = True,
    seed         = 42
)

val_generator = val_test_datagen.flow_from_dataframe(
    dataframe    = val_df,
    directory    = IMAGE_DIR,
    x_col        = "filename",
    y_col        = LABEL_COLS,
    target_size  = IMAGE_SIZE,
    batch_size   = BATCH_SIZE,
    class_mode   = "raw",
    shuffle      = False,
    seed         = 42
)

test_generator = val_test_datagen.flow_from_dataframe(
    dataframe    = test_df,
    directory    = IMAGE_DIR,
    x_col        = "filename",
    y_col        = LABEL_COLS,
    target_size  = IMAGE_SIZE,
    batch_size   = BATCH_SIZE,
    class_mode   = "raw",
    shuffle      = False,
    seed         = 42
)

Found 84779 validated image filenames.




Found 21196 validated image filenames.
Found 26495 validated image filenames.


In [6]:
esc = EarlyStopping(monitor="val_loss", mode="min", patience=5, restore_best_weights=True, verbose=1)
model_checkpoint = ModelCheckpoint(filepath="/content/drive/MyDrive/vision_project/densenet_model_best.h5", monitor="val_loss", mode="min", save_best_only=True, verbose=1)

In [None]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

base_d = DenseNet121(include_top=False, weights="imagenet", input_shape=(224,224,3))

inputs = Input(shape=(224,224,3))
x = base_d(inputs)

x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(256, activation="relu")(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
outputs = Dense(9, activation="sigmoid")(x)

dense_model = Model(inputs, outputs)
dense_model.compile(optimizer=Adam(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])
dense_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [None]:
# 학습
history = dense_model.fit(train_generator, validation_data=val_generator, epochs=15, callbacks=[esc, model_checkpoint])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step - accuracy: 0.5612 - loss: 0.4832
Epoch 1: val_loss improved from inf to 0.29863, saving model to /content/drive/MyDrive/vision_project/densenet_model_best.h5




[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1248s[0m 203ms/step - accuracy: 0.5613 - loss: 0.4832 - val_accuracy: 0.7715 - val_loss: 0.2986
Epoch 2/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - accuracy: 0.8614 - loss: 0.2447
Epoch 2: val_loss improved from 0.29863 to 0.20702, saving model to /content/drive/MyDrive/vision_project/densenet_model_best.h5




[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m967s[0m 182ms/step - accuracy: 0.8614 - loss: 0.2447 - val_accuracy: 0.8959 - val_loss: 0.2070
Epoch 3/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.8929 - loss: 0.1967
Epoch 3: val_loss did not improve from 0.20702
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m957s[0m 181ms/step - accuracy: 0.8929 - loss: 0.1967 - val_accuracy: 0.9525 - val_loss: 0.2171
Epoch 4/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.9167 - loss: 0.1685
Epoch 4: val_loss did not improve from 0.20702
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1030s[0m 190ms/step - accuracy: 0.9167 - loss: 0.1685 - val_accuracy: 0.9170 - val_loss: 0.2303
Epoch 5/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.927



[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1040s[0m 196ms/step - accuracy: 0.9290 - loss: 0.1368 - val_accuracy: 0.9540 - val_loss: 0.1170
Epoch 7/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.9291 - loss: 0.1234
Epoch 7: val_loss did not improve from 0.11696
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m957s[0m 181ms/step - accuracy: 0.9291 - loss: 0.1234 - val_accuracy: 0.9050 - val_loss: 0.3816
Epoch 8/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.9241 - loss: 0.1304
Epoch 8: val_loss did not improve from 0.11696
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m955s[0m 180ms/step - accuracy: 0.9241 - loss: 0.1304 - val_accuracy: 0.9247 - val_loss: 0.1576
Epoch 9/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.926



[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m950s[0m 179ms/step - accuracy: 0.9231 - loss: 0.0961 - val_accuracy: 0.9166 - val_loss: 0.1094
Epoch 11/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.9156 - loss: 0.0871
Epoch 11: val_loss did not improve from 0.10941
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1000s[0m 189ms/step - accuracy: 0.9156 - loss: 0.0871 - val_accuracy: 0.9505 - val_loss: 0.2502
Epoch 12/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.9140 - loss: 0.0792
Epoch 12: val_loss did not improve from 0.10941
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m944s[0m 178ms/step - accuracy: 0.9140 - loss: 0.0792 - val_accuracy: 0.9317 - val_loss: 0.1186
Epoch 13/15
[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 



[1m5299/5299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m944s[0m 178ms/step - accuracy: 0.9154 - loss: 0.0632 - val_accuracy: 0.9576 - val_loss: 0.0996
Epoch 15/15
[1m2274/5299[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m8:05[0m 161ms/step - accuracy: 0.9015 - loss: 0.0582

In [None]:
dense_model.load_weights("/content/drive/MyDrive/vision_project/densenet_model_best.h5")

# val
val_loss, val_acc = dense_model.evaluate(val_generator, verbose=1)
print(f"[DenseNet121] Val Loss: {val_loss:.4f} / Val Acc: {val_acc:.4f}")

# test
test_loss, test_acc = dense_model.evaluate(test_generator, verbose=1)
print(f"[DenseNet121] Test Loss: {test_loss:.4f} / Test Acc: {test_acc:.4f}")

  self._warn_if_super_not_called()


[1m1325/1325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 84ms/step - accuracy: 0.9587 - loss: 0.1015
[DenseNet121] Val Loss: 0.0996 / Val Acc: 0.9576
[1m1656/1656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 82ms/step - accuracy: 0.9607 - loss: 0.0993
[DenseNet121] Test Loss: 0.1004 / Test Acc: 0.9582


In [9]:
import numpy as np
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import json

label_names=["male","female","0s","10s","20s","30s","40s","50s","60+"]
age_labels=label_names[2:]

In [None]:
# 예측값
test_generator.reset()
dense_preds=dense_model.predict(test_generator, steps=len(test_generator), verbose=1)
np.save("/content/drive/MyDrive/vision_project/dense_preds.npy", dense_preds)

# 이진 예측값
dense_pred_binary=(dense_preds > 0.5).astype(int)
np.save("/content/drive/MyDrive/vision_project/dense_pred_binary.npy", dense_pred_binary)

# y_true
y_true=test_df[label_names].values
np.save("/content/drive/MyDrive/vision_project/y_true.npy", y_true)

[1m1656/1656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 79ms/step


In [None]:
dense_report=classification_report(y_true, dense_pred_binary, target_names=label_names)
with open("/content/drive/MyDrive/vision_project/dense_classification_report.txt", "w") as f:
    f.write("=== DenseNet121 Classification Report ===\n")
    f.write(dense_report)

In [20]:
print(dense_report)

              precision    recall  f1-score   support

        male       1.00      0.99      0.99     11600
      female       0.99      1.00      0.99     14895
          0s       0.91      0.56      0.69       384
         10s       0.98      0.83      0.90      9735
         20s       0.65      0.83      0.73      3367
         30s       0.64      0.49      0.55      1170
         40s       0.79      0.89      0.84      5614
         50s       0.85      0.53      0.66      2817
         60+       0.94      0.94      0.94      3408

   micro avg       0.92      0.90      0.91     52990
   macro avg       0.86      0.78      0.81     52990
weighted avg       0.93      0.90      0.91     52990
 samples avg       0.92      0.90      0.91     52990



In [None]:
dense_auc_dict={name: float(roc_auc_score(y_true[:, i], dense_preds[:, i])) for i, name in enumerate(label_names)}
dense_auc_dict["mean_auc"]=float(np.mean(list(dense_auc_dict.values())))

with open("/content/drive/MyDrive/vision_project/dense_auc.json", "w") as f:
    json.dump(dense_auc_dict, f, indent=4)

In [None]:
y_true_age=np.argmax(y_true[:, 2:], axis=1)
y_pred_age=np.argmax(dense_pred_binary[:, 2:], axis=1)
cm_dense_age=confusion_matrix(y_true_age, y_pred_age)
np.save("/content/drive/MyDrive/vision_project/cm_dense_age.npy", cm_dense_age)

plt.figure(figsize=(6,5))
sns.heatmap(cm_dense_age, annot=True, fmt="d", cmap="Blues", xticklabels=age_labels, yticklabels=age_labels)
plt.title("DenseNet121 Age Confusion Matrix")
plt.savefig("/content/drive/MyDrive/vision_project/cm_dense_age.png")
plt.close()

In [23]:
print(cm_dense_age)

[[ 252  113    2   12    4    1    0]
 [ 305 8083 1242   11   93    0    1]
 [ 218   71 2792  177  106    2    1]
 [ 151    2  202  569  238    7    1]
 [ 264    5   58  104 5013  149   21]
 [ 216    1    5   17  896 1505  177]
 [  57    0    0    0   24  111 3216]]


In [None]:
y_true_gender=y_true[:, 0]
y_pred_gender=dense_pred_binary[:, 0]
cm_dense_gender=confusion_matrix(y_true_gender, y_pred_gender)
np.save("/content/drive/MyDrive/vision_project/cm_dense_gender.npy", cm_dense_gender)

plt.figure(figsize=(4,3))
sns.heatmap(cm_dense_gender, annot=True, fmt="d", cmap="Blues", xticklabels=["Male", "Female"], yticklabels=["Male", "Female"])
plt.title("DenseNet121 Gender Confusion Matrix")
plt.savefig("/content/drive/MyDrive/vision_project/cm_dense_gender.png")
plt.close()

In [25]:
print(cm_dense_gender)

[[14839    56]
 [  110 11490]]


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array

IMAGE_DIR = "/content/drive/MyDrive/vision_project/resized_images_final"
label_names = ["male", "female", "0s", "10s", "20s", "30s", "40s", "50s", "60+"]

dense_preds = np.load("/content/drive/MyDrive/vision_project/dense_preds.npy")
y_true = np.load("/content/drive/MyDrive/vision_project/y_true.npy")

sample_df = test_df.sample(n=10).reset_index(drop=True)
sample_indices = sample_df.index.tolist()

fig, axes = plt.subplots(2, 5, figsize=(20, 8))
axes = axes.ravel()

for i in range(10):
    fname = sample_df.loc[i, "filename"]
    img_path = os.path.join(IMAGE_DIR, fname)

    img = load_img(img_path, target_size=(224, 224))
    img_array = img_to_array(img) / 255.0

    pred = dense_preds[sample_indices[i]]
    pred_label = [label for idx, label in enumerate(label_names) if pred[idx] > 0.5]
    true = y_true[sample_indices[i]]
    true_label = [label for idx, label in enumerate(label_names) if true[idx] == 1]

    axes[i].imshow(img_array)
    axes[i].axis('off')
    axes[i].set_title(f"[Pred] {pred_label}\n[Real] {true_label}")

plt.tight_layout()
plt.show()