In [3]:
!pip install git+https://github.com/openai/CLIP.git
!pip install scikit-learn torchvision
!pip install faiss-cpu


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-h9p9mj1v
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-h9p9mj1v
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->clip==1.0)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->clip==1.0)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->clip==1.0)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (

In [30]:
import os
import clip
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from tqdm.auto import tqdm
import faiss
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm.auto import tqdm

# Load model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# Load ảnh
data_dir = "/kaggle/input/face-recognition-dataset/Face Data/Face Dataset"
dataset = ImageFolder(data_dir, transform=preprocess)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
class_names = dataset.classes


In [31]:
# === 1. Trích xuất embedding ===
all_embeddings = []
all_labels = []

with torch.no_grad():
    for images, labels in tqdm(dataloader, desc="Extracting embeddings"):
        images = images.to(device)
        embeddings = model.encode_image(images)  # CHƯA normalize ở đây
        all_embeddings.append(embeddings.cpu())
        all_labels.extend(labels)

all_embeddings = torch.cat(all_embeddings).numpy().astype("float32")
all_labels = np.array(all_labels)

# === 2. Chuẩn hóa L2 (cần cho cosine similarity) ===
faiss.normalize_L2(all_embeddings)

# === 3. Tạo FAISS index và tìm hàng xóm gần nhất ===
index = faiss.IndexFlatIP(all_embeddings.shape[1])
index.add(all_embeddings)
D, I = index.search(all_embeddings, 2)  # Lấy 2 vì index gần nhất đầu tiên là chính nó

# === 4. Lấy nhãn của hàng xóm gần nhất (bỏ self) ===
pred_labels = [all_labels[i[1]] for i in I]  # i[1] là hàng xóm gần nhất khác chính nó

# === 5. Tính metric ===
acc = accuracy_score(all_labels, pred_labels)
prec = precision_score(all_labels, pred_labels, average='weighted', zero_division=0)
rec = recall_score(all_labels, pred_labels, average='weighted')
f1 = f1_score(all_labels, pred_labels, average='weighted')

print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1-score:  {f1:.4f}")


Extracting embeddings:   0%|          | 0/257 [00:00<?, ?it/s]

Accuracy:  0.7311
Precision: 0.6982
Recall:    0.7311
F1-score:  0.7022


In [19]:
import os
import torch
import torchvision.transforms as transforms
from torchvision import models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from torch import nn, optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
from tqdm.auto import tqdm

# ------------------ Device ------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ------------------ Transform ------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# ------------------ Load Dataset & Split ------------------
data_dir = "/kaggle/input/face-recognition-dataset/Face Data/Face Dataset"
dataset = ImageFolder(data_dir, transform=transform)
idx_to_class = dataset.class_to_idx
class_names = list(idx_to_class.keys())
num_classes = len(class_names)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

# ------------------ Model Factory ------------------
def get_model(name):
    if name == 'resnet18':
        model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        in_features = model.fc.in_features
        model.fc = nn.Linear(in_features, num_classes)
    elif name == 'resnet50':
        model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        in_features = model.fc.in_features
        model.fc = nn.Linear(in_features, num_classes)
    elif name == 'mobilenet_v2':
        model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)
        in_features = model.classifier[1].in_features
        model.classifier[1] = nn.Linear(in_features, num_classes)
    elif name == 'efficientnet_b0':
        model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
        in_features = model.classifier[1].in_features
        model.classifier[1] = nn.Linear(in_features, num_classes)
    else:
        raise ValueError("Unsupported model name.")
    return model.to(device)

# ------------------ Training + Evaluation ------------------
results = []

def train_and_evaluate(model_name):
    print(f"\n========== {model_name.upper()} ==========")
    model = get_model(model_name)

    # Freeze all except final layer
    for param in model.parameters():
        param.requires_grad = False

    if model_name.startswith("resnet"):
        for param in model.fc.parameters():
            param.requires_grad = True
        optimizer = optim.Adam(model.fc.parameters(), lr=1e-3)
    else:
        for param in model.classifier.parameters():
            param.requires_grad = True
        optimizer = optim.Adam(model.classifier.parameters(), lr=1e-3)

    criterion = nn.CrossEntropyLoss()

    # Training loop
    for epoch in range(5):
        model.train()
        total_loss = 0
        loop = tqdm(train_loader, desc=f"{model_name} Epoch {epoch+1}/5")
        for images, labels in loop:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            loop.set_postfix(loss=loss.item())
        print(f"[{model_name}] Epoch {epoch+1}, Loss: {total_loss:.4f}")

    # Evaluation
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f"{model_name} Evaluating"):
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    # Convert to np.array for fast metric computation
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    results.append({
        'Model': model_name,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-score': f1
    })

    print(f"[{model_name}] Accuracy: {acc:.4f} | Precision: {prec:.4f} | Recall: {rec:.4f} | F1-score: {f1:.4f}")


In [21]:
model_name = 'resnet18' 
train_and_evaluate(model_name)




resnet18 Epoch 1/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet18] Epoch 1, Loss: 1573.2020


resnet18 Epoch 2/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet18] Epoch 2, Loss: 1021.7655


resnet18 Epoch 3/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet18] Epoch 3, Loss: 699.3007


resnet18 Epoch 4/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet18] Epoch 4, Loss: 459.8185


resnet18 Epoch 5/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet18] Epoch 5, Loss: 310.5945


resnet18 Evaluating:   0%|          | 0/52 [00:00<?, ?it/s]

[resnet18] Accuracy: 0.2151 | Precision: 0.1743 | Recall: 0.2151 | F1-score: 0.1672


  _warn_prf(average, modifier, msg_start, len(result))


In [22]:
model_name = 'resnet50' 
train_and_evaluate(model_name)





Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 171MB/s] 


resnet50 Epoch 1/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet50] Epoch 1, Loss: 1438.9347


resnet50 Epoch 2/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet50] Epoch 2, Loss: 702.5067


resnet50 Epoch 3/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet50] Epoch 3, Loss: 341.9952


resnet50 Epoch 4/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet50] Epoch 4, Loss: 179.1930


resnet50 Epoch 5/5:   0%|          | 0/206 [00:00<?, ?it/s]

[resnet50] Epoch 5, Loss: 107.8450


resnet50 Evaluating:   0%|          | 0/52 [00:00<?, ?it/s]

[resnet50] Accuracy: 0.2169 | Precision: 0.1493 | Recall: 0.2169 | F1-score: 0.1483


  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
model_name = 'mobilenet_v2'
train_and_evaluate(model_name)




Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-7ebf99e0.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 53.5MB/s]


mobilenet_v2 Epoch 1/5:   0%|          | 0/206 [00:00<?, ?it/s]

[mobilenet_v2] Epoch 1, Loss: 1447.7534


mobilenet_v2 Epoch 2/5:   0%|          | 0/206 [00:00<?, ?it/s]

[mobilenet_v2] Epoch 2, Loss: 792.5970


mobilenet_v2 Epoch 3/5:   0%|          | 0/206 [00:00<?, ?it/s]

[mobilenet_v2] Epoch 3, Loss: 411.1293


mobilenet_v2 Epoch 4/5:   0%|          | 0/206 [00:00<?, ?it/s]

[mobilenet_v2] Epoch 4, Loss: 221.8519


mobilenet_v2 Epoch 5/5:   0%|          | 0/206 [00:00<?, ?it/s]

[mobilenet_v2] Epoch 5, Loss: 142.6523


mobilenet_v2 Evaluating:   0%|          | 0/52 [00:00<?, ?it/s]

[mobilenet_v2] Accuracy: 0.2218 | Precision: 0.1658 | Recall: 0.2218 | F1-score: 0.1621


  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
model_name = 'efficientnet_b0'
train_and_evaluate(model_name)




Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 141MB/s] 


efficientnet_b0 Epoch 1/5:   0%|          | 0/206 [00:00<?, ?it/s]

[efficientnet_b0] Epoch 1, Loss: 1480.5729


efficientnet_b0 Epoch 2/5:   0%|          | 0/206 [00:00<?, ?it/s]

[efficientnet_b0] Epoch 2, Loss: 929.9190


efficientnet_b0 Epoch 3/5:   0%|          | 0/206 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7b24ac4a7ba0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7b24ac4a7ba0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

[efficientnet_b0] Epoch 3, Loss: 577.1096


efficientnet_b0 Epoch 4/5:   0%|          | 0/206 [00:00<?, ?it/s]

[efficientnet_b0] Epoch 4, Loss: 335.5375


efficientnet_b0 Epoch 5/5:   0%|          | 0/206 [00:00<?, ?it/s]

[efficientnet_b0] Epoch 5, Loss: 205.8051


efficientnet_b0 Evaluating:   0%|          | 0/52 [00:00<?, ?it/s]

[efficientnet_b0] Accuracy: 0.2602 | Precision: 0.1916 | Recall: 0.2602 | F1-score: 0.1923


  _warn_prf(average, modifier, msg_start, len(result))
