In [2]:
pip install pillow-heif

Collecting pillow-heif
  Downloading pillow_heif-1.1.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (9.6 kB)
Downloading pillow_heif-1.1.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (5.5 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/5.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/5.5 MB[0m [31m18.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m5.5/5.5 MB[0m [31m86.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m64.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pillow-heif
Successfully installed pillow-heif-1.1.0


In [102]:
import os
import cv2
import torch
import random
import numpy as np
import pickle
import uuid
from uuid import uuid4
from pathlib import Path
from typing import List
from PIL import Image, UnidentifiedImageError
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm
import shutil
from pillow_heif import register_heif_opener, open_heif
import time

In [103]:
print("CUDA:", torch.cuda.is_available())
print("Device:", DEVICE)

CUDA: True
Device: cuda


In [104]:
register_heif_opener()

In [126]:
# --------- CONFIG ---------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BASE_DIR = Path("pet_verification")
USER_DIR = BASE_DIR / "user_pet"
NEG_DIR = BASE_DIR / "test_dataset"
GALLERY_PATH = BASE_DIR / "my_pet_gallery.pkl"
PAIRS_CACHE = BASE_DIR / "pair_cache.pkl"

In [112]:
# --------- UTILITY: Convert HEIC to JPG ---------
def convert_heic_to_jpg(directory):
    for path in Path(directory).rglob("*"):
        if path.suffix.lower() == ".heic":
            try:
                image = Image.open(path).convert("RGB")
                jpg_path = path.with_suffix(".jpg")
                image.save(jpg_path)
                print(f"Converted {path.name} → {jpg_path.name}")
            except Exception as e:
                print(f"Failed to convert {path.name}: {e}")

In [73]:
# --------- TRANSFORMS ---------
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

base_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [26]:
def extract_frames_from_videos(base_dir, exts=('.mov', '.mp4', '.avi')):
    for folder in [USER_DIR, NEG_DIR]:
        for file in Path(folder).glob("**/*"):
            if file.suffix.lower() in exts:
                cap = cv2.VideoCapture(str(file))
                fps = cap.get(cv2.CAP_PROP_FPS)
                interval = int(fps * 0.5) if fps > 0 else 1
                frame_id, save_id = 0, 0
                base_name = file.stem
                while cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break
                    if frame_id % max(1, interval) == 0:
                        out_path = file.parent / f"{base_name}_frame{save_id:03d}.jpg"
                        cv2.imwrite(str(out_path), frame)
                        save_id += 1
                    frame_id += 1
                cap.release()
                print(f"Extracted {save_id} frames from {file.name}")
                # Ignore video file after extraction by renaming it
                ignored_path = file.with_suffix(file.suffix + ".ignore")
                file.rename(ignored_path)


In [118]:
# --------- DATASET ---------
class SiamesePairDataset(Dataset):
    def __init__(self, user_dir, neg_dir, transform):
        self.transform = transform
        self.user_imgs = sorted([p for p in Path(user_dir).glob("**/*") if p.suffix.lower() in ['.jpg', '.jpeg', '.png'] and not p.name.endswith(".ignore")])[:50]
        self.neg_imgs = sorted([p for p in Path(neg_dir).glob("**/*") if p.suffix.lower() in ['.jpg', '.jpeg', '.png'] and not p.name.endswith(".ignore")])[:50]
        self.all_pairs = []

        for i in range(len(self.user_imgs)):
            for j in range(i+1, len(self.user_imgs)):
                self.all_pairs.append((self.user_imgs[i], self.user_imgs[j], 1))

        for i in range(min(len(self.user_imgs), len(self.neg_imgs))):
            self.all_pairs.append((self.user_imgs[i], self.neg_imgs[i], 0))

    def __len__(self):
        return len(self.all_pairs)

    def __getitem__(self, idx):
        p1, p2, label = self.all_pairs[idx]
        try:
            img1 = Image.open(p1).convert("RGB")
            img2 = Image.open(p2).convert("RGB")
        except (UnidentifiedImageError, OSError):
            raise RuntimeError(f"Failed to load: {p1} or {p2}")
        return self.transform(img1), self.transform(img2), torch.tensor(label, dtype=torch.float32)




In [80]:
# --------- MODEL ---------
class EmbeddingNet(nn.Module):
    def __init__(self):
        super().__init__()
        mobilenet = models.mobilenet_v2(pretrained=True)
        self.features = mobilenet.features
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1280, 128),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.features(x)
        x = self.pool(x)
        x = self.fc(x)
        return x

In [81]:
# --------- CONTRASTIVE LOSS ---------
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super().__init__()
        self.margin = margin

    def forward(self, o1, o2, label):
        dist = torch.nn.functional.pairwise_distance(o1, o2)
        loss = label * dist**2 + (1 - label) * torch.clamp(self.margin - dist, min=0)**2
        return loss.mean()

In [88]:
# --------- TRAINING ---------
def train_siamese(model, dataloader, epochs=10):
    model.to(DEVICE)
    criterion = ContrastiveLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        start_epoch = time.time()

        pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}")
        for x1, x2, y in pbar:
            x1, x2, y = x1.to(DEVICE), x2.to(DEVICE), y.to(DEVICE)
            out1, out2 = model(x1), model(x2)
            loss = criterion(out1, out2, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            pbar.set_postfix(loss=loss.item())

        end_epoch = time.time()
        print(f"Epoch {epoch+1} Summary: Avg Loss = {total_loss / len(dataloader):.4f} | Time: {end_epoch - start_epoch:.2f}s")
        torch.save(model.state_dict(), BASE_DIR / f"model_epoch_{epoch+1}.pt")


In [94]:
# --------- EMBEDDING GENERATION ---------
def save_gallery_embeddings(model):
    model.eval()
    model.to(DEVICE)
    embeddings = []
    for img_path in USER_DIR.glob("**/*"):
        if img_path.suffix.lower() not in ['.jpg', '.jpeg', '.png']:
            continue
        try:
            img = Image.open(img_path).convert("RGB")
            img_tensor = base_transform(img).unsqueeze(0).to(DEVICE)
            with torch.no_grad():
                emb = model(img_tensor).cpu().numpy().flatten()
                embeddings.append(emb)
        except:
            continue
    with open(GALLERY_PATH, "wb") as f:
        pickle.dump(np.array(embeddings), f)
    print("Gallery embeddings saved.")

In [136]:
# --------- VERIFICATION ---------
def verify_folder(model, folder_path, gallery_path=GALLERY_PATH, threshold=0.75):
    with open(gallery_path, "rb") as f:
        gallery = pickle.load(f)

    model.eval()
    model.to(DEVICE)
    files = [f for f in Path(folder_path).glob("**/*") if f.suffix.lower() in [".jpg", ".jpeg", ".png"]]

    match_count = 0
    total_count = 0

    for file in files:
        try:
            img = Image.open(file).convert("RGB")
            tensor = base_transform(img).unsqueeze(0).to(DEVICE)
            with torch.no_grad():
                emb = model(tensor).cpu().numpy().flatten()
            score = np.mean(cosine_similarity([emb], gallery))
            is_match = score >= threshold
            result = "MATCH" if is_match else "NOT YOUR PET"
            print(f"{file.name}: Similarity = {score:.4f} → {result}")
            total_count += 1
            if is_match:
                match_count += 1
        except Exception as e:
            print(f"Failed to verify {file.name}: {e}")

    not_pet_rate = (total_count - match_count) / total_count if total_count else 0
    print(f"\n Total files: {total_count}")
    print(f"Matches: {match_count}")
    print(f"Not Your Pet: {total_count - match_count}")
    print(f"Not Your Pet Rate: {not_pet_rate:.2%}")

In [113]:
convert_heic_to_jpg(BASE_DIR)

Converted IMG_9721.HEIC → IMG_9721.jpg
Converted IMG_9718.HEIC → IMG_9718.jpg
Converted IMG_9688.HEIC → IMG_9688.jpg
Converted IMG_9681.HEIC → IMG_9681.jpg
Converted IMG_9808.HEIC → IMG_9808.jpg
Converted IMG_9743.HEIC → IMG_9743.jpg
Converted IMG_9675.HEIC → IMG_9675.jpg
Converted IMG_9673.HEIC → IMG_9673.jpg
Converted IMG_9687.HEIC → IMG_9687.jpg
Converted IMG_9706.HEIC → IMG_9706.jpg
Converted IMG_9661.HEIC → IMG_9661.jpg
Converted IMG_9800.HEIC → IMG_9800.jpg
Converted IMG_9709.HEIC → IMG_9709.jpg
Converted IMG_9660.HEIC → IMG_9660.jpg
Converted IMG_9802.HEIC → IMG_9802.jpg
Converted IMG_9754.HEIC → IMG_9754.jpg
Converted IMG_9680.HEIC → IMG_9680.jpg
Converted IMG_9702.HEIC → IMG_9702.jpg
Converted IMG_9809.HEIC → IMG_9809.jpg
Converted IMG_9667.HEIC → IMG_9667.jpg
Converted IMG_9670.HEIC → IMG_9670.jpg
Converted IMG_9810.HEIC → IMG_9810.jpg
Converted IMG_9677.HEIC → IMG_9677.jpg
Converted IMG_9657.HEIC → IMG_9657.jpg
Converted IMG_9745.HEIC → IMG_9745.jpg
Converted IMG_9749.HEIC →

In [121]:
extract_frames_from_videos(BASE_DIR)

In [122]:
dataset = SiamesePairDataset(USER_DIR, NEG_DIR, train_transform)

In [123]:
loader = DataLoader(dataset, batch_size=16, shuffle=True)

In [124]:
model = EmbeddingNet()

In [125]:
train_siamese(model, loader, epochs=10)

Epoch 1: 100%|██████████| 80/80 [02:03<00:00,  1.55s/it, loss=0.0794]


Epoch 1 Summary: Avg Loss = 0.3364 | Time: 123.78s


Epoch 2: 100%|██████████| 80/80 [02:03<00:00,  1.55s/it, loss=0.043]


Epoch 2 Summary: Avg Loss = 0.0529 | Time: 123.92s


Epoch 3: 100%|██████████| 80/80 [02:05<00:00,  1.57s/it, loss=0.0207]


Epoch 3 Summary: Avg Loss = 0.0344 | Time: 125.62s


Epoch 4: 100%|██████████| 80/80 [02:03<00:00,  1.55s/it, loss=0.0232]


Epoch 4 Summary: Avg Loss = 0.0258 | Time: 123.66s


Epoch 5: 100%|██████████| 80/80 [02:02<00:00,  1.53s/it, loss=0.0193]


Epoch 5 Summary: Avg Loss = 0.0196 | Time: 122.80s


Epoch 6: 100%|██████████| 80/80 [02:02<00:00,  1.53s/it, loss=0.0115]


Epoch 6 Summary: Avg Loss = 0.0157 | Time: 122.15s


Epoch 7: 100%|██████████| 80/80 [02:02<00:00,  1.53s/it, loss=0.00931]


Epoch 7 Summary: Avg Loss = 0.0116 | Time: 122.10s


Epoch 8: 100%|██████████| 80/80 [02:00<00:00,  1.51s/it, loss=0.0072]


Epoch 8 Summary: Avg Loss = 0.0097 | Time: 120.58s


Epoch 9: 100%|██████████| 80/80 [02:01<00:00,  1.52s/it, loss=0.00735]


Epoch 9 Summary: Avg Loss = 0.0076 | Time: 121.84s


Epoch 10: 100%|██████████| 80/80 [02:01<00:00,  1.52s/it, loss=0.00691]

Epoch 10 Summary: Avg Loss = 0.0066 | Time: 121.71s





In [127]:
save_gallery_embeddings(model)

Gallery embeddings saved.


In [137]:
verify_folder(model, NEG_DIR)

68_108794d10552b6854b9790aaad0a23bc1f964326caac56a6da24902952bc12ca.jpg: Similarity = 0.5265 → NOT YOUR PET
44_6e0bf70e22ce2b1f6bb08805349df9f03c3b8b44c4c9de9ef834aa1378e2b815.jpg: Similarity = 0.5343 → NOT YOUR PET
3_cce435f923dbe37307a78f74ee651927536e6ef1c5e2e043c8e0f79638b05e61.jpg: Similarity = 0.8317 → MATCH
4_8d8a09ef1b33265b74ac9b36dc8e785e81fb2d1a06999d3b0c7a4e7b84304972.jpg: Similarity = 0.5328 → NOT YOUR PET
IMG_9753.jpg: Similarity = 0.8982 → MATCH
19_20591e15b9b9b39c5877f9a2346fb7e043e53b5b1959152c64e83d09744a3840.jpg: Similarity = 0.6877 → NOT YOUR PET
48_ae7b52fadc8a83dbfb835e0b394bb5c99730364ef4a392d0fd7bb68e0b3280a3.jpg: Similarity = 0.6002 → NOT YOUR PET
56_5b12db802f2c4380deb0fdfd9b32d13a224b0857fde1395f4bcbff15ed9ddd3b.jpg: Similarity = 0.5523 → NOT YOUR PET
20_134b8786e822e426ddac507d1ea88ef717c18a9f631feea6978edebd47197865.jpg: Similarity = 0.6667 → NOT YOUR PET
37_a9f5f104be992e23708f1fa82312a8606062ad8b9611fc2373ab61ad42e2c96b.jpg: Similarity = 0.7284 → NOT YOUR