In [None]:
import pandas as pd
import numpy as np
import torch
import os
from PIL import Image
from torchvision import models, transforms
from src import helpers

### In this notebook we will interactively create vector representation of images in our dog breed dataset

First, let's get a pretrained model

In [None]:
model = models.resnet50(weights="ResNet50_Weights.DEFAULT")
model.eval()

Now we need to iterate through dataset and create vector representations of every image. 

In [None]:
data_dir = 'data/dogs_dataset'

vectors = []
for subdir, dirs, files in os.walk(data_dir):
    for file in files:
        img_path = os.path.join(subdir, file)
        vector = helpers.image_to_vector(img_path, model=model)
        try:
            vectors.append({"img_path": img_path, "vector": vector})
        except Exception as e:
            print(f"Skipping {img_path}, {e}")

We stored the vectors with the corresponding image paths to the list - look at its shape and structure >>

In [None]:
vectors

As the last step, we want to save the list into a file (e.g. parquet)

In [None]:
df = pd.DataFrame.from_records(vectors)
df.to_parquet("./data/vectors.parquet")

Check vectors.parquet in /data/ folder.