In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import numpy as np
import os, random, shutil

# GPU setup
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU Active:", gpus)
    except RuntimeError as e:
        print(e)

2025-11-08 15:15:42.172410: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762614942.393935      39 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762614942.467361      39 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


GPU Active: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [2]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Enable memory growth for all GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"✅ Enabled memory growth for {len(gpus)} GPU(s).")
    except RuntimeError as e:
        print("⚠️ Memory growth must be set before GPUs are initialized:", e)

✅ Enabled memory growth for 2 GPU(s).


In [3]:
import tensorflow as tf
import keras
print("Keras:", keras.__version__)
print("TensorFlow:", tf.__version__)
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("GPU devices:", tf.config.list_physical_devices('GPU'))

Keras: 3.8.0
TensorFlow: 2.18.0
Built with CUDA: True
GPU devices: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [4]:
def create_subset(src, dst, limit_per_class=100):
    os.makedirs(dst, exist_ok=True)
    for cls in os.listdir(src):
        src_cls = os.path.join(src, cls)
        dst_cls = os.path.join(dst, cls)
        os.makedirs(dst_cls, exist_ok=True)
        imgs = os.listdir(src_cls)
        random.shuffle(imgs)
        for img in imgs[:limit_per_class]:
            shutil.copy(os.path.join(src_cls, img), os.path.join(dst_cls, img))

# Create reduced dataset to speed up training
create_subset("../input/11-785-fall-20-homework-2-part-2/classification_data/train_data", "./train_subset", limit_per_class=100)
create_subset("../input/11-785-fall-20-homework-2-part-2/classification_data/val_data", "./val_subset", limit_per_class=30)

train_dir = "./train_subset"
val_dir = "./val_subset"
img_size = (160,160)
batch_size = 32

In [5]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)
val_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)
val_gen = val_datagen.flow_from_directory(
    val_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

num_classes = len(train_gen.class_indices)
print("Detected classes:", num_classes)

Found 333638 images belonging to 4000 classes.
Found 8000 images belonging to 4000 classes.
Detected classes: 4000


In [6]:
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(*img_size, 3))
base_model.trainable = False  # freeze backbone

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.4)(x)
outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

I0000 00:00:1762616858.455952      39 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1762616858.456718      39 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)

history1 = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=15,
    callbacks=[early_stop, reduce_lr]
)

  self._warn_if_super_not_called()


Epoch 1/15


I0000 00:00:1762616877.287575      99 service.cc:148] XLA service 0x7d43780026d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1762616877.288464      99 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1762616877.288499      99 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1762616879.067623      99 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m    1/10427[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m76:01:46[0m 26s/step - accuracy: 0.0000e+00 - loss: 8.3406

I0000 00:00:1762616888.331662      99 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m 9051/10427[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m3:55[0m 171ms/step - accuracy: 2.6979e-04 - loss: 8.9352

KeyboardInterrupt: 

In [None]:
for layer in base_model.layers[-40:]:
    layer.trainable = True

model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history2 = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=5,
    callbacks=[early_stop]
)

In [None]:
model.save("/kaggle/working/classification_face_recognition_efficientnet.keras")

embedding_model = Model(inputs=model.input, outputs=model.layers[-2].output)
embedding_model.save("/kaggle/working/face_embedding_efficientnet.keras")

print("Saved both models successfully.")

In [None]:
import matplotlib.pyplot as plt

def plot_history(history, title):
    plt.figure(figsize=(6,4))
    plt.plot(history.history['accuracy'], label='Train Acc')
    plt.plot(history.history['val_accuracy'], label='Val Acc')
    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_history(history1, "Phase 1 Accuracy (Frozen Base)")
plot_history(history2, "Phase 2 Accuracy (Fine-tuned)")

In [None]:
from tensorflow.keras.preprocessing import image
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import roc_auc_score

def get_embedding(img_path):
    img = image.load_img(img_path, target_size=img_size)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    return embedding_model.predict(img_array)

pairs_file = "../input/11-785-fall-20-homework-2-part-2/verification_pairs_val.txt"
scores, labels = [], []

with open(pairs_file) as f:
    for line in f:
        img1, img2, label = line.strip().split()
        img1 = "../input/11-785-fall-20-homework-2-part-2/" + img1
        img2 = "../input/11-785-fall-20-homework-2-part-2/" + img2
        emb1, emb2 = get_embedding(img1), get_embedding(img2)
        sim = cosine_similarity(emb1, emb2)[0][0]
        scores.append(sim)
        labels.append(int(label))

auc = roc_auc_score(labels, scores)
print(f"Verification AUC: {auc:.4f}")