In [None]:
import numpy as np
import torch
import torchvision.transforms as T
from PIL import Image
import os
import cv2
import json
import glob
from tqdm.notebook import tqdm
from dinov2.models.vision_transformer import vit_large 
from sklearn import svm



Load dinov2 model

In [None]:
dinov2_vitl14 = torch.hub.load("facebookresearch/dinov2", "dinov2_vitl14")

device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
dinov2_vitl14.to(device)
transform_image = T.Compose([T.ToTensor(), T.Resize(244), T.CenterCrop(224), T.Normalize([0.5], [0.5])])
root_path = os.path.expanduser('~')


Functions to load an image and compute embeddings for each image in a list of images:

In [None]:

def loadImage(img: str) -> torch.Tensor:
   img = Image.open(img)
   transformed_img = transform_image(img)[:3].unsqueeze(0)
   return transformed_img


def imageEmbeddings(files: list) -> dict:
    all_embeddings = {}
    with torch.no_grad():
        for i, file in enumerate(tqdm(files)):
            embeddings = dinov2_vitl14(loadImage(file).to(device))
            all_embeddings[file] = np.array(embeddings[0].cpu().numpy()).reshape(1, -1).tolist()
    with open("all_embeddings.json", "w") as f:
        f.write(json.dumps(all_embeddings))
    return all_embeddings


Create a dictionary that maps all of the file names to the name of the folder they are in so that we know the label for each image. We can do so using the following code

In [None]:
labels = {}
files:list=[]
def loadLabels():
  ROOT_DIR=os.path.join(root_path,"Train")
  print(ROOT_DIR)
  for folder in os.listdir(ROOT_DIR):
      for file in os.listdir(os.path.join(ROOT_DIR, folder)):
          if file.endswith(".jpg"):
              full_name = os.path.join(ROOT_DIR, folder, file)
              labels[full_name] = folder
              files.append(full_name)
  print(labels)
  print(files)
loadLabels()


Start computing embeddings for the images in our training dataset. To do so, we can pass in the list of files we defined earlier in the tutorial through the `compute_embeddings()` function.

This code may take a few minutes/hours to run depending on the size of your dataset.

In [None]:
embeddings = imageEmbeddings(files)

We are ready to start fitting our classification model using our embeddings and labels:

In [None]:


clf = svm.SVC(gamma='scale')

y = [labels[file] for file in files]

print(len(embeddings.values()))

embedding_list = list(embeddings.values())

clf.fit(np.array(embedding_list).reshape(-1, 384), y)



Let's classify this image:

In [None]:
img_path = os.path.join(root_path,"image_cab_0223.jpg")
print(img_path)
new_image = load_image(img_path)
with torch.no_grad():
    embedding = dinov2_vits14(new_image.to(device))

    prediction = clf.predict(np.array(embedding[0].cpu()).reshape(1, -1))

    print("Predicted class: " + prediction[0])

This code returns the following output:

```
Predicted class: Cabbage
```