### Install LAVIS and Restart notebook after installing

In [1]:
!pip install -e LAVIS --quiet

### Extract image features with BLIP2

See: https://github.com/salesforce/LAVIS/blob/main/examples/blip2_feature_extraction.ipynb

In [1]:
import os
import time
import re 
import json
import requests

import numpy as np
import cv2
from matplotlib import pyplot as plt
from PIL import Image
from tqdm import tqdm
import torch
import h5py

from lavis.models import load_model_and_preprocess

In [2]:
def sorted_alphanumeric( l ): 
    """ Sort the given iterable in the way that humans expect.""" 
    convert = lambda text: int(text) if text.isdigit() else text 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key)] 
    return sorted(l, key = alphanum_key)

In [3]:
# setup device to use
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
print('Running on device:',device)

Running on device: cuda


In [4]:
model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_feature_extractor", model_type="pretrain", is_eval=True, device=device)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

  0%|          | 0.00/1.89G [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

  0%|          | 0.00/712M [00:00<?, ?B/s]

### Batch images and calculate features, save to hdf5 file

https://www.h5py.org/

In [42]:
data_dir = 'dataset/cogent'
img_dir = os.path.join(data_dir, 'images_resized')

batch_size = 8
save_interval = 512

all_fnames = sorted_alphanumeric([f for f in os.listdir(img_dir) if f.endswith('.jpg')])

h5_path = os.path.join(data_dir, 'blip2_image_features.h5')

fnames_to_save = None
features_to_save = None
times = []
t_start = time.time()
with h5py.File(h5_path, 'a') as h5:
    for i in tqdm(range(0,len(all_fnames), batch_size)):
        t1 = time.time()
        batch_torch = []
        fnames_batch =  np.array(all_fnames[i:i+batch_size])
        img_paths_batch = [os.path.join(img_dir,f) for f in fnames_batch]

        batch_pil = (Image.open(img_path).convert("RGB") for img_path in img_paths_batch)                   
        batch_torch = torch.cat([vis_processors["eval"](img).unsqueeze(0).to(device) for img in batch_pil])
        
        features = model.extract_features({'image':batch_torch}, mode="image")
        proj = features.image_embeds_proj.cpu().numpy().astype(np.float32)
        fnames_batch = np.array(fnames_batch, dtype=h5py.special_dtype(vlen=str))

        if fnames_to_save is None:

            features_to_save = proj
            fnames_to_save = fnames_batch
        else:
            features_to_save = np.append(features_to_save, proj, axis=0)
            fnames_to_save = np.append(fnames_to_save, fnames_batch, axis=0)

        times.append(time.time()-t1)
        # write chunk to file
        if i % save_interval == 0:
            # hdf5 storage see: https://stackoverflow.com/a/67334192/5582470

            if i == 0:
                # Create the dataset at first
                h5.create_dataset('filenames', data=fnames_to_save, compression="gzip", chunks=True, maxshape=(None,))
                h5.create_dataset('features', data=features_to_save, compression="gzip", chunks=True, maxshape=(None,features_to_save.shape[1],features_to_save.shape[2]))
            
            else:
                # add to existing dataset
                h5['filenames'].resize((h5['filenames'].shape[0] + fnames_to_save.shape[0]), axis=0)
                h5['filenames'][-fnames_to_save.shape[0]:] = fnames_to_save

                h5['features'].resize((h5['features'].shape[0] + features_to_save.shape[0]), axis=0)
                h5['features'][-features_to_save.shape[0]:] = features_to_save
                
            # reset data to save    
            features_to_save = None
            fnames_to_save = None
    
    # save final remainder of images
    if len(fnames_to_save):
        print(f'Saving features, avg time/batch:{np.mean(times):.1f} s')
        t1 = time.time()
        h5['filenames'].resize((h5['filenames'].shape[0] + fnames_to_save.shape[0]), axis=0)
        h5['filenames'][-fnames_to_save.shape[0]:] = fnames_to_save

        h5['features'].resize((h5['features'].shape[0] + features_to_save.shape[0]), axis=0)
        h5['features'][-features_to_save.shape[0]:] = features_to_save
        print(f'Data saved in {time.time()-t1:.3f} s')
        
print(f'Finished in {time.time()-t_start:.1f}s')
print(f'Avg time/batch: {np.mean(times):.1f} s')

100%|██████████| 4887/4887 [2:53:06<00:00,  2.13s/it]  


Saving features, avg time/batch:2.1 s
Data saved in 0.613 s
Finished in 10387.0s
Avg time/batch: 2.1 s


#### Check file contents

In [3]:
with h5py.File(h5_path, 'r') as h5:
    features = h5['features']
    filenames = h5['filenames']
    
    print('Features:', features.shape)
    print('Filenames:', filenames.shape)
    print(filenames[:3])

Features: (39091, 32, 256)
Filenames: (39091,)
[b'0a0d80b53f4a0c4d9fc8818b7f6e64a4-transcode-D05526.jpg'
 b'0a1a121f35f43d900e62f072b4511e02-transcode-14537_133.jpg'
 b'0a1a13839b97ea692fff6b354dc8010c-transcode-MA_SCMS_FO_00168.jpg']
