In [1]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
import torchvision.transforms as T

import feature_extraction

In [2]:
asset_path = Path('data/amazon/Clothing_Shoes_and_Jewelry_product_images/')
asset_path

PosixPath('data/amazon/Clothing_Shoes_and_Jewelry_product_images')

In [3]:
feature_extraction.extract_alexnet_features(asset_path=asset_path)

Listing images...
All images have extracted features :)


In [3]:
feature_extraction.extract_vit_features(
    asset_path=asset_path, batch_size=16)

Listing images...


Extracting features:   3%|▎         | 6.45k/224k [23:28<13:10:41, 4.58image/s]


KeyboardInterrupt: 

Check some feature

In [47]:
from random import choice

some_feature = choice(list(asset_path.glob('*.vit')))
print(some_feature)
some_image = asset_path / f'{some_feature.stem}.jpg'
print(some_image)
assert some_image.exists()

data/amazon/Clothing_Shoes_and_Jewelry_product_images/51vyZh2LUPL.vit
data/amazon/Clothing_Shoes_and_Jewelry_product_images/51vyZh2LUPL.jpg


In [48]:
from importlib import reload
reload(feature_extraction)

features = feature_extraction.extract_features_one_image(
    image_path=some_image,
    feature_extractor=feature_extraction.ViTFeatureExtractor(),
    device='cpu'
)
print(features.size())
loaded_feature = feature_extraction.load_features_from_file(some_feature)
print(loaded_feature.shape)

torch.Size([1, 1024])
(1024,)


In [49]:
print(features.norm())
diff = torch.from_numpy(loaded_feature) - features.to('cpu')
print(diff.norm())
idxmax = diff.argmax()
print(diff[0][idxmax])
print(loaded_feature[idxmax])
print(features[0][idxmax])

tensor(36.8510)
tensor(0.0002)
tensor(1.8835e-05)
-2.0080442
tensor(-2.0081)


In [5]:
from importlib import reload
import feature_extraction
reload(feature_extraction)

transforms = feature_extraction.SquaredCentered(size=(400,), fill=255)


device = 'cuda'
transformed_images = [
    (image_path, transforms(image.to(device=device)))
    for (image_path, image) in some_images
]

batch = torch.stack(
    [image for (image_path, image) in transformed_images]
).to(device)

show(transformed_images)

In [6]:
import json

with open(Path('assets') / 'imagenet_class_index.json') as labels_file:
    labels = json.load(labels_file)

choices(list(labels.values()), k=10)

[['n03775071', 'mitten'],
 ['n04336792', 'stretcher'],
 ['n02129604', 'tiger'],
 ['n02268853', 'damselfly'],
 ['n01749939', 'green_mamba'],
 ['n03873416', 'paddle'],
 ['n03459775', 'grille'],
 ['n11939491', 'daisy'],
 ['n02948072', 'candle'],
 ['n03450230', 'gown']]

In [7]:
alexnet_feature_extractor = feature_extraction.AlexNetFeatureExtractor().to(device)
vit_feature_extractor = feature_extraction.ViTFeatureExtractor().to(device)

In [8]:
alexnet_features = alexnet_feature_extractor(batch)
print(alexnet_features['output'].argmax(dim=1))
print(alexnet_features['features'].shape)

tensor([836, 608, 445, 414, 770, 111, 885, 638, 650, 679, 672, 841, 459, 679,
        770, 797, 600, 859, 578, 774, 887, 464, 906, 774, 485, 775, 770, 618,
        499, 636, 770, 885, 841, 485, 841, 615, 667, 774, 578, 748, 512, 770,
        775, 461, 502, 488, 488, 228, 885, 918, 578, 636, 885, 678, 623, 515,
        610, 414, 414, 464, 614, 885, 906, 610, 610, 770, 522, 806, 608, 477,
        842, 770, 608, 838, 402, 916, 916, 514, 514, 399, 608, 416, 893, 770,
        679, 842, 655, 502, 770, 842, 416, 777, 608, 887, 697, 709, 836, 775,
        601, 638, 459, 770, 893, 559, 813, 700, 592, 841, 549, 514, 601, 584,
        643, 770, 885, 601, 610, 514, 399, 721, 869, 836, 398, 608, 761, 700,
        770, 770, 697, 893,  21, 869, 691, 655, 752, 691, 414, 770, 502, 869,
        911, 638, 721, 837, 749, 416, 841, 770, 639, 754, 748, 859, 793, 584,
        514, 487, 842, 630, 770, 793, 597, 601, 883, 416, 770, 414, 861, 893,
        643, 502, 545, 869, 615, 464, 459, 400, 608, 543, 689, 4

In [58]:
for i, pred in enumerate(alexnet_features['output'].argmax(dim=1), start=1):
    print(f"Prediction for Image {i}: {labels[str(pred.item())]}")

Prediction for Image 1: ['n04120489', 'running_shoe']
Prediction for Image 2: ['n03594734', 'jean']


In [9]:
vit_features = vit_feature_extractor(batch)
print(vit_features['output'].argmax(dim=1))
print(vit_features['features'].shape)

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.93 GiB (GPU 0; 5.79 GiB total capacity; 4.09 GiB already allocated; 752.44 MiB free; 4.88 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [60]:
for i, pred in enumerate(vit_features['output'].argmax(dim=1), start=1):
    print(f"Prediction for Image {i}: {labels[str(pred.item())]}")

Prediction for Image 1: ['n04120489', 'running_shoe']
Prediction for Image 2: ['n03594734', 'jean']
