In [1]:
import h5py
import torch
import random
import numpy as np
import pandas as pd
import torchmetrics
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from lora_finetune import LoraConv,LoraLinear
import torch.nn as nn
from copy import deepcopy
from dataset import BaselineDataset, PrecomputedDataset,precompute

In [2]:
TRAIN_IMAGES_PATH = '../train.h5'
VAL_IMAGES_PATH = '../val.h5'
SEED=0
torch.random.manual_seed(SEED)
random.seed(SEED)

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Working on {device}.')
feature_extractor = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14').to(device)
feature_extractor.eval()

  return torch._C._cuda_getDeviceCount() > 0


Working on cpu.


Using cache found in /raid/home/detectionfeuxdeforet/elyaalaoui_adi/.cache/torch/hub/facebookresearch_dinov2_main


DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0-11): 12 x NestedTensorBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (drop_path2): Identity()
    )
  )
  (n

In [4]:
# Statistics on train set

mean = [0.7439, 0.5892, 0.7210] 
std = [0.1717, 0.2065, 0.1664]

# Create the normalization transform
transform = transforms.Compose([
            transforms.Resize((98, 98)),
            transforms.Normalize(mean=mean, std=std)
        ])
train_dataset = BaselineDataset(TRAIN_IMAGES_PATH, transform, 'train')
val_dataset = BaselineDataset(VAL_IMAGES_PATH, transform, 'train')


In [None]:
BATCH_SIZE=64
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE)
val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=BATCH_SIZE)
train_dataset = PrecomputedDataset(*precompute(train_dataloader, feature_extractor, device))
val_dataset = PrecomputedDataset(*precompute(val_dataloader, feature_extractor, device))

In [8]:
lora_model=deepcopy(feature_extractor)

def create_lora_model(model,alpha,r):
    for name, module in model.named_children():
        if isinstance(module, nn.Conv2d):
            lora_conv = LoraConv(module, r=r,alpha=alpha)
            setattr(model, name, lora_conv)
        elif isinstance(module,nn.Linear):
            lora_lin = LoraLinear(module, r=r,alpha=alpha)
            setattr(model, name, lora_lin)
        else :
            create_lora_model(module,r=r,alpha=alpha)
    
create_lora_model(lora_model,alpha=6,r=4)

In [11]:
img,lab=train_dataset[0]
lora_model(img.unsqueeze(0)).shape

torch.Size([1, 384])