In [None]:
#|default_exp embeddings

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

In [None]:
#|hide
from nbdev.showdoc import *

In [None]:
#| export

from clip_plot.utils import timestamp, clean_filename

from pathlib import Path

import torch
print(f"CUDA is available? {torch.cuda.is_available()}")


CUDA is available? True


In [None]:
#| export

import timm
from accelerate import Accelerator
MIXED_PRECISION = "fp16" if torch.cuda.is_available() else "bf16"
TORCH_DTYPE = torch.float16 if MIXED_PRECISION == "fp16" else torch.bfloat16

accelerator = Accelerator(mixed_precision=MIXED_PRECISION)
from tqdm.auto import tqdm
import numpy as np

Setting ds_accelerator to cuda (auto detect)


2023-06-23 19:31:21.128525: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Create and/or load embeddings

In [None]:
#| export

def timm_embed_model(model_name: str):
    '''
    Load model and image transform to create embeddings
    Reference: https://huggingface.co/docs/timm/main/en/feature_extraction#pooled

    input:          model name as found in timm documentation
    return tuple:   pre-trained embedding model,
                    transform function to prep images for inference
    '''

    m = timm.create_model(model_name, pretrained=True, num_classes=0)
    m.eval()

    # Reference on transform: https://huggingface.co/docs/timm/main/en/feature_extraction#pooled
    t = timm.data.create_transform(
        **timm.data.resolve_data_config(m.pretrained_cfg)
    )
    
    return m, t

In [None]:
#| export

def timm_transform_embed(img, model, transform, device, dtype) -> np.ndarray:
    '''
    apply transform to image and run inference on it to generate an embedding

    input:      img: Pillow image or similar
                model: Torch model
                transform: Torch image transformation pipeline to match how model was trained
    returns: embedding vector as 1D numpy array
    '''
    img = transform(img).to(device, dtype).unsqueeze(0)
    emb = model(img)
    return emb.detach().cpu().numpy().squeeze()


In [None]:
#| export

def get_timm_embeds(imageEngine, model_name: str, **kwargs):
    '''
    Create embedding vectors for input images using a pre-trained model from timm
    '''
    # for now, the output directory is still called "inception" though it is generic
    vector_dir = Path(kwargs["out_dir"]) / "image-vectors" / "inception"
    vector_dir.mkdir(exist_ok=True, parents=True)

    torch.manual_seed(kwargs["seed"])

    print(timestamp(), f"Creating embeddings using {model_name}")
    embeds = []

    model, transform = timm_embed_model(model_name)

    # make some efficiency tweaks to model
    device = accelerator.device
    model = accelerator.prepare(model)
    # model = model.to(device, TORCH_DTYPE)

    with accelerator.autocast():
        for img in tqdm(imageEngine, total=imageEngine.count):
            embed_path = vector_dir / (clean_filename(img.path) + ".npy")
            if embed_path.exists() and kwargs["use_cache"]:
                emb = np.load(embed_path)
            else:
                # create embedding for one image
                emb = timm_transform_embed(img.original, model, transform, device, TORCH_DTYPE)
                np.save(embed_path, emb)
            embeds.append(emb)
    return np.array(embeds)

In [None]:
?? accelerator.autocast

[0;31mSignature:[0m  [0maccelerator[0m[0;34m.[0m[0mautocast[0m[0;34m([0m[0mcache_enabled[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
    [0;34m@[0m[0mcontextmanager[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mautocast[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mcache_enabled[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0;34m"""[0m
[0;34m        Will apply automatic mixed-precision inside the block inside this context manager, if it is enabled. Nothing[0m
[0;34m        different will happen otherwise.[0m
[0;34m[0m
[0;34m        Example:[0m
[0;34m[0m
[0;34m        ```python[0m
[0;34m        >>> from accelerate import Accelerator[0m
[0;34m[0m
[0;34m        >>> accelerator = Accelerator(mixed_precision="fp16")[0m
[0;34m        >>> with accelerator.autocast():[0m
[0;34m        ...     train()[0m
[0;34

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()