In [1]:
from datetime import datetime

import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
from torchvision.transforms import Compose, Resize, ToTensor, Normalize, Lambda
from PIL import Image

from era_data import TabletPeriodDataset, get_IDS

# Hyperparameters

In [2]:
BATCH_SIZE = 8
SUFFIX = '-DinoV2'
DATE = datetime.now().strftime("%B%d")
RUN_NAME_SUFFIX = '-preprocessed' 
IMG_DIR = 'output/images_preprocessed'
IDS = get_IDS(IMG_DIR=IMG_DIR)
print(len(IDS))
VERSION_NAME = f'period_clf_bs{BATCH_SIZE}{SUFFIX}-{len(IDS)}_samples{RUN_NAME_SUFFIX}_{DATE}-90-10_train_test'
VERSION_NAME

94936


'period_clf_bs8-DinoV2-94936_samples-preprocessed_April20-90-10_train_test'

# Load data

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [4]:
train_ids, test_ids = train_test_split(IDS, test_size=.1, random_state=0)
len(train_ids), len(test_ids)

(85442, 9494)

In [5]:
ds_train = TabletPeriodDataset(IDS=train_ids, IMG_DIR=IMG_DIR, mask=True)
ds_test = TabletPeriodDataset(IDS=test_ids, IMG_DIR=IMG_DIR, mask=True)

Filtering 94936 IDS down to provided 85442...
Filtering 94936 IDS down to provided 9494...


In [6]:
transform = Compose([
    Lambda(lambda img: img.convert('RGB')),
    Resize((224, 224)),  # Resize image to the input size expected by the model
    ToTensor(),  # Convert the image to a tensor
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet's mean and std
])

def collate_fn(batch):
    images = [sample[1] for sample in batch]
    labels = [sample[2] for sample in batch]

    images = [Image.fromarray(img.numpy()) if isinstance(img, torch.Tensor) else Image.fromarray(img)
              for img in images]

    # Apply transformations
    preprocessed_images = [transform(img) for img in images]

    # Stack images and labels into tensors
    images_tensor = torch.stack(preprocessed_images)
    labels_tensor = torch.tensor(labels, dtype=torch.long)
    images_tensor = images_tensor.to(device)
    labels_tensor = labels_tensor.to(device)
    
    return images_tensor, labels_tensor

In [7]:
dl_train = DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
dl_test = DataLoader(ds_test, batch_size=BATCH_SIZE, collate_fn=collate_fn, num_workers=4)

In [8]:
# save model IDs so we can keep track of what data it was trained on
pd.Series(train_ids).to_csv(f'output/clf_ids/period-train-{VERSION_NAME}.csv', index=False, header=None)
pd.Series(test_ids).to_csv(f'output/clf_ids/period-test-{VERSION_NAME}.csv', index=False, header=None)

In [9]:
num_classes = len(TabletPeriodDataset.PERIOD_INDICES)
num_classes

22

# Create Model

In [10]:
model_dino = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14', pretrained=True)
model_dino.to(device)
model_dino.eval()

Using cache found in /home/kapond/.cache/torch/hub/facebookresearch_dinov2_main


DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0-11): 12 x NestedTensorBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (drop_path2): Identity()
    )
  )
  (n

In [11]:
torch.cuda.empty_cache()

# Extract and Save Features Matrix

## Train set

In [12]:
features_list = []
labels_list = []

with torch.no_grad(): 
    for data, targets in tqdm(dl_train):
        images, labels = data.to('cuda'), targets.to('cuda')
        features = model_dino(images)
        features_list.append(features.cpu()) 
        labels_list.append(labels.cpu())

# Concatenate all features and labels.
features_all = torch.cat(features_list, dim=0)
labels_all = torch.cat(labels_list, dim=0)


  0%|          | 0/10681 [00:00<?, ?it/s]

In [13]:
print(features_all.shape, labels_all.shape)

torch.Size([85442, 384]) torch.Size([85442])


In [14]:
features_np = features_all.numpy()
labels_np = labels_all.numpy()

df_features = pd.DataFrame(features_np)

# Add labels as a new column to the DataFrame
df_features['label'] = labels_np

In [15]:
df_features.to_csv('output/dino_v2_train_set_vectors_masked_April20.csv', index = False)

## Test set

In [16]:
test_ids = pd.read_csv(f'output/clf_ids/period-test-{VERSION_NAME}.csv', header=None, dtype=str)

In [17]:
ds_test = TabletPeriodDataset(IDS=test_ids[0].to_list(), IMG_DIR=IMG_DIR, mask=True)

Filtering 94936 IDS down to provided 9494...


In [18]:
dl_test = DataLoader(ds_test, batch_size=BATCH_SIZE, collate_fn=collate_fn)

In [19]:
features_list_test = []
labels_list_test = []

with torch.no_grad(): 
    for data, targets in tqdm(dl_test):
        images, labels = data.to('cuda'), targets.to('cuda')
        features = model_dino(images)
        features_list_test.append(features.cpu()) 
        labels_list_test.append(labels.cpu())

# Concatenate all features and labels.
features_all_test = torch.cat(features_list_test, dim=0)
labels_all_test = torch.cat(labels_list_test, dim=0)

  0%|          | 0/1187 [00:00<?, ?it/s]

In [20]:
features_np_test = features_all_test.numpy()
labels_np_test = labels_all_test.numpy()

df_features_test = pd.DataFrame(features_np_test)

df_features_test['label'] = labels_np_test

In [21]:
df_features_test.to_csv('output/dino_v2_test_set_vectors_masked_April20.csv', index = False)