In [35]:
import requests
import json

from dotenv import load_dotenv
import os
from shutil import copy
from pathlib import Path
from shutil import copy
import asyncio
import io
import aiohttp
from typing import List
from PIL import Image, ImageOps
from utils import download_images
import nest_asyncio
nest_asyncio.apply()

In [36]:
import os
import json

os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import torch

from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from collections import defaultdict
from tqdm import tqdm
from embedder import Embedder
import matplotlib.pyplot as plt
from PIL import Image
from pathlib import Path
from torchvision import transforms

In [37]:
ROOT_DIR = "/Users/iman/345-data/"
DATA_DIR = Path(ROOT_DIR, "ml-datasets", "ccbf")
VECTORS_DIR = Path(DATA_DIR, "vectors")

DATASET = "ccbf-det-fronts-20250709"
output_dir = VECTORS_DIR / DATASET
crops_dir = DATA_DIR / "recognition" / DATASET
output_dir.mkdir(parents=True, exist_ok=True)
crops_dir.mkdir(parents=True, exist_ok=True)

In [38]:
device = torch.device("mps")

In [39]:
model_root = Path("./model")


embedder = Embedder(model_root / "trunk_weights.pth",
                    model_root / "embedder_weights.pth",device=device)

In [None]:

to_tensor = transforms.ToTensor()
resize = transforms.Resize((224, 224),interpolation=transforms.InterpolationMode.BILINEAR)
images = [Image.open(img) for img in sorted((crops_dir).rglob("*.jpg"))]
images_tensors = torch.stack([resize(to_tensor(img)) for img in images]).to(device)

vectors = embedder.embed(images_tensors)

In [41]:
batch_size  = 1500          # ← tweak to taste
from tqdm import tqdm
to_tensor   = transforms.ToTensor()
resize      = transforms.Resize((224, 224), interpolation=transforms.InterpolationMode.BILINEAR)

# Load paths only (delay decoding until we’re inside the batch loop)
image_paths = sorted(Path(crops_dir).rglob("*.jpg"))

all_vecs = []                         # will hold the per-batch outputs
classes = []                     # will hold the per-batch classes
for start in tqdm(range(0, len(image_paths), batch_size)):
    batch_paths = image_paths[start:start + batch_size]

    batch_imgs = [
        resize(to_tensor(Image.open(p).convert("RGB")))  # ensure 3-channel
        for p in batch_paths
    ]
    batch_tensor = torch.stack(batch_imgs).to(device)

    batch_vecs = embedder.embed(batch_tensor)            # (B, …)
    all_vecs.append(batch_vecs.cpu())                    # keep on CPU to save GPU mem
    batch_classes = [path.stem for path in batch_paths]
    classes.extend(batch_classes)
# (N, …) stacked result
vectors = torch.cat(all_vecs, dim=0)

100%|██████████| 8/8 [01:17<00:00,  9.71s/it]


In [42]:
torch.save(vectors, output_dir / "vectors.pt")

In [12]:
vectors = torch.load(output_dir / "vectors.pt")

In [None]:
image_names = [img.stem for img in sorted((crops_dir).rglob("*.jpg"))]
json.dump(classes, open(output_dir / "classes.json", "w"))

In [45]:
ref_vectors = vectors
ref_classes = image_names

In [46]:

ref_classes
## need to normalize the supervectors
unique_classes, class_indices = np.unique(["_".join(cls.split("_")[:-1]) for cls in ref_classes], return_inverse=True)
unique_uuids, uuid_indices = np.unique(
    ["_".join(cls.split("_")[:-2]) for cls in ref_classes], return_inverse=True
)
num_of_classes = class_indices[-1] + 1
num_uuids = uuid_indices[-1] + 1
#supervectors = torch.empty((num_of_classes, 256), dtype=ref_vectors.dtype)
supervectors = torch.empty((num_of_classes, 256))
for class_i in tqdm(range(num_of_classes)):
    vectors = ref_vectors[class_indices == class_i]
    supervectors[class_i, :] = torch.mean(vectors, axis=0)

supervectors /= torch.linalg.norm(supervectors, axis=1)[:, torch.newaxis]
superclasses = unique_classes

100%|██████████| 3656/3656 [00:00<00:00, 47406.01it/s]


In [47]:
torch.save(
    supervectors,
    output_dir / "supervectors.pt",
)
with open(
    output_dir / "superclasses.json", "w"
) as f:
    json.dump(superclasses.tolist(), f)

In [48]:
def train_sample_split(vectors, classes, face=None):
    print(vectors.shape, classes.shape)
    if isinstance(vectors,torch.Tensor):
        vectors = np.array(vectors.cpu().numpy())
        classes = np.array(classes)
    # if face is not None, create a mask for the face, applied to all
    if face is not None:
        vectors, classes = filter_by_face(vectors, classes, face)
    print(vectors.shape, classes.shape)
    # ## offset elements by 1 so that the last element of each face is not included in training set
    mask = np.concatenate(([False], classes[:-1] != classes[1:]))
    # split the vectors and classes into train and sample sets
    print(vectors.shape, classes.shape, mask.shape)
    train_vectors = vectors[~mask, :]
    train_classes = classes[~mask]

    sample_vectors = vectors[mask, :]
    sample_classes = classes[mask]

    return train_vectors, train_classes, sample_vectors, sample_classes

def create_face_mask(classes, face):
    if isinstance(face, (tuple, list)):
        return np.array([any(f in label for f in face) for label in classes])
    else:
        return np.array([face in label for label in classes])

def filter_by_face(vectors, classes, face):
    face_mask = np.array([cls.split("_")[-1] == face for cls in classes])
    return vectors[face_mask], np.array(classes)[face_mask]



In [184]:
## train knn for specific faces
sample_vectors = torch.load('/Users/iman/345-data/ml-datasets/ccbf/vectors/ccbf-20241127-20250326/test set/testvectors.pt')
sample_classes = json.load(open('/Users/iman/345-data/ml-datasets/ccbf/vectors/ccbf-20241127-20250326/test set/classes.json', 'r'))

sample_vectors = sample_vectors.cpu().numpy()
sample_classes = np.array(sample_classes)

ref_vectors = vectors
ref_classes = image_names

for face in ("front", "left", "right", "back", None):
    if face:
        train_vectors, train_classes = filter_by_face(ref_vectors.cpu().numpy(), ref_classes, face)
    else:
        train_vectors = ref_vectors.cpu().numpy()
        train_classes = np.array(ref_classes)
    # fit knn to ref set
    knn = KNeighborsClassifier(n_neighbors=1, metric="euclidean")
    knn.fit(train_vectors, train_classes)

    predicted_labels = knn.predict(sample_vectors)
    print([(gt, pred) for gt, pred in zip(sample_classes[:3], predicted_labels[:3])])
    train_upcs = [cls.split("_")[1] for cls in train_classes]
    correct_predictions = sum(
        [
            1
            for gt, pred in zip(sample_classes, predicted_labels)
            if gt.split("_")[1] == pred.split("_")[1] if gt.split("_")[1] in train_upcs
        ]
    )
    total_test_classes = [cls.split("_")[1] for cls in sample_classes if cls.split("_")[1] in train_upcs]
    print(len(total_test_classes))
    accuracy = correct_predictions / len(total_test_classes)

    print(f"Accuracy for {face=}: {accuracy * 100:.2f}%")


# which ones fail?
len(sample_classes) - correct_predictions

incorrect_predictions = [
    (gt, pred)
    for gt, pred in zip(sample_classes, predicted_labels)
    if gt.split("_")[1] != pred.split("_")[1]
]

[('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_front', 'bf906b27-9d58-4a9b-8602-e395ebf49bef_00021136180596_front'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_left', 'bf906b27-9d58-4a9b-8602-e395ebf49bef_00021136180596_front'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_right', 'bf906b27-9d58-4a9b-8602-e395ebf49bef_00021136180596_front')]
1308
Accuracy for face='front': 77.60%
[('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_front', 'bf906b27-9d58-4a9b-8602-e395ebf49bef_00021136180596_left'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_left', 'bf906b27-9d58-4a9b-8602-e395ebf49bef_00021136180596_left'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_right', 'bf906b27-9d58-4a9b-8602-e395ebf49bef_00021136180596_left')]
1254
Accuracy for face='left': 61.24%
[('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_front', '91c5e7e6-7b6b-4a54-b797-3f47bd512519_00074806001615_right'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_lef

In [185]:
## train knn for specific faces
train_vectors = torch.load('/Users/iman/345-data/ml-datasets/ccbf/vectors/ccbf-20241127-20250326/reference/refvectors.pt')
train_classes = json.load(open('/Users/iman/345-data/ml-datasets/ccbf/vectors/ccbf-20241127-20250326/reference/classes.json', 'r'))

ref_vectors = train_vectors
ref_classes = train_classes

for face in ("front", "left", "right", "back", None):
    if face:
        train_vectors, train_classes = filter_by_face(ref_vectors.cpu().numpy(), ref_classes, face)
    else:
        train_vectors = ref_vectors.cpu().numpy()
        train_classes = np.array(ref_classes)
    # fit knn to ref set
    knn = KNeighborsClassifier(n_neighbors=1, metric="euclidean")
    knn.fit(train_vectors, train_classes)

    predicted_labels = knn.predict(sample_vectors)
    print([(gt, pred) for gt, pred in zip(sample_classes[:3], predicted_labels[:3])])
    train_upcs = [cls.split("_")[1] for cls in train_classes]
    correct_predictions = sum(
        [
            1
            for gt, pred in zip(sample_classes, predicted_labels)
            if gt.split("_")[1] == pred.split("_")[1] if gt.split("_")[1] in train_upcs
        ]
    )
    
    total_test_classes = [cls.split("_")[1] for cls in sample_classes if cls.split("_")[1] in train_upcs]
    print(len(total_test_classes))
    accuracy = correct_predictions / len(total_test_classes)

    print(f"Accuracy for {face=}: {accuracy * 100:.2f}%")


# which ones fail?
len(sample_classes) - correct_predictions

incorrect_predictions = [
    (gt, pred)
    for gt, pred in zip(sample_classes, predicted_labels)
    if gt.split("_")[1] != pred.split("_")[1]
]

[('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_front', 'c2c64acb-2391-4620-8e98-6b895f1318f8_00021136010374_front'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_left', '007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_front'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_right', '007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_front')]
2565
Accuracy for face='front': 87.21%
[('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_front', 'c2c64acb-2391-4620-8e98-6b895f1318f8_00021136010374_left'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_left', '007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_left'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_right', 'c2c64acb-2391-4620-8e98-6b895f1318f8_00021136010374_left')]
2565
Accuracy for face='left': 76.65%
[('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_front', 'c2c64acb-2391-4620-8e98-6b895f1318f8_00021136010374_right'), ('007a6be0-4ed0-4950-88e7-dd0547f50127_00021136180596_lef

In [188]:
Path("Users/iman") / ""

PosixPath('Users/iman')

In [189]:
path = ""
if path:
    print("yes")