In [None]:
import os

import numpy as np
import torch
# from torch.utils.data import DataLoader
from torchvision import transforms
from tqdm.autonotebook import tqdm

from datasets.utils import PreprocessingDataset
from models.utils import get_model_by_name
from utils.environment import modified_environ


# Parameters
IMAGES_DIR = os.path.join("/", "mnt", "workspace", "Ugallery", "images")
OUTPUT_EMBEDDING_PATH = os.path.join("data", "UGallery", "ugallery_embedding.npy")
MODELS = ["resnet50", "resnext101_32x8d"]
USE_GPU = True


In [None]:
%%time
# Setting up torch device (useful if GPU available)
print("\nCreating device...")
device = torch.device("cuda:0" if torch.cuda.is_available() and USE_GPU else "cpu")
if torch.cuda.is_available() != USE_GPU:
    print((f"\nNotice: Not using GPU - "
           f"Cuda available ({torch.cuda.is_available()}) "
           f"does not match USE_GPU ({USE_GPU})"
    ))

# Downloading models for feature extraction
print("\nDownloading models...")
with modified_environ(TORCH_HOME="."):
    PRETRAINED_MODELS = dict()
    for model in MODELS:
        print(f"Model: {model}")
        PRETRAINED_MODELS[model] = get_model_by_name(model).to(device)

# Setting up transforms and dataset
print("\nSetting up transforms and dataset...")
images_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
image_dataset = PreprocessingDataset(IMAGES_DIR, transform=images_transforms)
print(f">> Images dataset: {len(image_dataset)}")

# Feature extraction phase
print(f"\nFeature extraction...")
embedding = np.ndarray(
    shape=(len(image_dataset), 2),
    dtype=object,
)
for i, sample in enumerate(tqdm(image_dataset, desc="Feature extraction")):
    item_id = sample["id"]
    item = sample["image"]
    item = item.unsqueeze(0).to(device)
    features = []
    for _, network in PRETRAINED_MODELS.items():
        features.append(network(item).squeeze().cpu().detach().numpy())
    features = np.concatenate(features, axis=None)
    embedding[i] = np.asarray([item_id, features])
print(f">> Embedding shape: {embedding.shape}")

# Save embedding to file
print(f"\nSaving embedding to file... ({OUTPUT_EMBEDDING_PATH})")
np.save(OUTPUT_EMBEDDING_PATH, embedding, allow_pickle=True)

# Free some memory
if USE_GPU:
    print(f"\nCleaning GPU cache...")
    for name, model in PRETRAINED_MODELS.items():
        PRETRAINED_MODELS[name] = model.to(torch.device("cpu"))
    torch.cuda.empty_cache()

# Finished
print("\nDone")
