<h1>VIT Testing - tamncheese Jason Kahei Tam<h1>


Import

In [1]:
import torch
import torch.nn as nn
import os
import io
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModelForImageClassification, AutoImageProcessor
from PIL import Image
import cv2
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

path_to_images = "/storage/scratch1/5/jtam30/fungi2024"
files = os.listdir("/storage/scratch1/5/jtam30/fungi2024")
print(files[:2])

  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

CPU or CUDA


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

Prepare Dataset

In [5]:
file_list = []
for (
    root,
    dirs,
    files,
) in os.walk("testing"):
    for file in files:
        file_list.append(file)
file_list

train_df = pd.read_csv(
    "/storage/scratch1/5/jtam30/FungiCLEF2023_train_metadata_PRODUCTION.csv"
)
n_labels = train_df["species"].nunique()
print(f"Training Dataset has number of labels: {n_labels}")

val_df = pd.read_csv(
    "/storage/scratch1/5/jtam30/FungiCLEF2023_val_metadata_PRODUCTION.csv"
)
n_labels_val = val_df["species"].nunique()
print(f"Validation Dataset has number of labels: {n_labels}")

Training Dataset has number of labels: 1577
Validation Dataset has number of labels: 1577


In [4]:
class FungiDataset(Dataset):
    def __init__(self, df, extractor, transform=None, local_filepath=None):
        self.df = df
        self.transform = transform
        self.extractor = extractor
        self.local_filepath = local_filepath
        self.label2id = {
            label: idx for idx, label in enumerate(sorted(self.df["species"].unique()))
        }  # convert labels to integers

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # label = self.df.iloc[idx]["species"]
        # print(self.local_filepath)
        if self.local_filepath:
            # print("path to image is provided")
            img_path = os.path.join(
                self.local_filepath,
                self.df["image_path"].values[idx].replace("JPG", "jpg"),
            )
            # print(img_path)
            species_name = self.df.iloc[idx]["species"]
            label = self.label2id[species_name]
            try:
                # Load Images (OpenCV)
                image = cv2.imread(img_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            except Exception as e:
                print(f"Missing image: {img_path}: {e}")
                image = np.random.uniform(-1, 1, size=(299, 299, 3)).astype(np.float32)
        else:
            print("no path is provided")
            image = Image.open(io.BytesIO(self.df.data.values[idx]))
            image = np.array(image)

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented["image"]

        image = self.extractor(images=image, return_tensors="pt")[
            "pixel_values"
        ].squeeze(0)
        return image, torch.tensor(label, dtype=torch.long)

Configure DINOv2


In [9]:
extractor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
model = AutoModelForImageClassification.from_pretrained(
    "facebook/dinov2-base", num_labels=n_labels
)

Some weights of Dinov2ForImageClassification were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
model = model.to(device)

Frreze Backbone

In [11]:
for param in model.backbone.parameters():
    param.requires_grad = False

print("Trainable Layers")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name} is trainable")

Trainable Layers
classifier.weight is trainable
classifier.bias is trainable


Load Data


In [12]:
img_dir = path_to_images + "/"
print(path_to_images)
print(f"local_filepath is {img_dir}")

train_dataset = FungiDataset(train_df, extractor, local_filepath=img_dir)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

val_dataset = FungiDataset(val_df, extractor, local_filepath=img_dir)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)

/storage/scratch1/5/jtam30/fungi2024
local_filepath is /storage/scratch1/5/jtam30/fungi2024/


Ranked List Code

In [None]:
# in progress

Train the Model


In [14]:
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=0.00001
)
criterion = nn.CrossEntropyLoss()

epochs = 10
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")

    model.train()
    train_loss = 0
    for images, labels in train_loader:
        # print(images)
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    avg_train_loss = train_loss / len(train_loader)
    print(f"Training Loss: {avg_train_loss} for epoch {epoch + 1}/{epochs}")

    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validating"):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images).logits
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * correct / total
    print(f"Validation Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy:.2f}%\n")

Epoch 1/10
Training Loss: 6.910690825036231 for epoch 1/10
Epoch 2/10
Training Loss: 6.7553349657261625 for epoch 2/10
Epoch 3/10
Training Loss: 6.637313010844778 for epoch 3/10
Epoch 4/10
Training Loss: 6.514225574249917 for epoch 4/10
Epoch 5/10
Training Loss: 6.398634565637467 for epoch 5/10
Epoch 6/10
Training Loss: 6.296313671355552 for epoch 6/10
Epoch 7/10
Training Loss: 6.182277212751672 for epoch 7/10
Epoch 8/10
Training Loss: 6.083072205807301 for epoch 8/10
Epoch 9/10
Training Loss: 5.979990857712766 for epoch 9/10
Epoch 10/10
Training Loss: 5.8737304464299624 for epoch 10/10


Saving the Model

In [15]:
torch.save(
    model.state_dict(),
    "/storage/home/hcoda1/5/jtam30/clef/fungiclef-2025/user/tamncheese/_dataset_trial/model_trial.pth",
)

Extract Embeddings

In [23]:
model.eval()

Dinov2ForImageClassification(
  (dinov2): Dinov2Model(
    (embeddings): Dinov2Embeddings(
      (patch_embeddings): Dinov2PatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(14, 14), stride=(14, 14))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): Dinov2Encoder(
      (layer): ModuleList(
        (0-11): 12 x Dinov2Layer(
          (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (attention): Dinov2SdpaAttention(
            (attention): Dinov2SdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): Dinov2SelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)