#### HDF5 Dataset for loading hidden state features


In [1]:
from drecg.data.utils import create_vector_repr_dataloaders

root_dir = '/home/daniel/data_dogs/vit_features_hdf5'
train_feat_dataloader, validation_dataloader, test_feat_dataloader = create_vector_repr_dataloaders(root_dir, batch_size=32)

In [2]:
from drecg.models.feat_extraction import AttentionBasedBiDirectionalDetector
model = AttentionBasedBiDirectionalDetector()

In [3]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device);

In [4]:
def batch_accuracy(outputs, labels):
    outputs = torch.round(torch.sigmoid(outputs))
    return torch.sum(outputs == labels).item() / len(labels)

In [5]:
from torch.nn import BCEWithLogitsLoss
from torch.optim import AdamW
from tqdm.auto import tqdm

optimizer = AdamW(model.parameters(), lr=1e-4)
criterion = BCEWithLogitsLoss()
pbar = tqdm(range(len(train_feat_dataloader)))
batch_num = 0
for (feat_a, feat_b), labels in train_feat_dataloader:
    feat_a = feat_a.to(device)
    feat_b = feat_b.to(device)
    labels = labels.to(device).to(torch.float)
    outputs = model((feat_a, feat_b)).squeeze()
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    pbar.update(1)
    if batch_num % 50 == 0 or batch_num == len(train_feat_dataloader) - 1:
        pbar.set_description(f'Loss: {loss.item():.4f}, Accuracy: {batch_accuracy(outputs, labels):.4f}')
    batch_num += 1


  0%|          | 0/363 [00:00<?, ?it/s]

Worker 0 initialized
Worker 1 initialized
Worker 2 initialized
Worker 3 initialized
Worker 4 initialized
Worker 5 initialized


In [30]:
from tqdm.auto import tqdm


pbar = tqdm(range(len(train_feat_dataloader)))
for (feat_a, feat_b), labels in train_feat_dataloader:
    pbar.update(1)
pbar.close()

  0%|          | 0/363 [00:00<?, ?it/s]

Worker 1 initializedWorker 0 initialized

Worker 2 initialized
Worker 3 initialized
Worker 4 initialized
Worker 5 initialized


In [15]:
len(train_feat_dataloader), len(train_feat_dataloader.dataset)

(363, 11598)

In [8]:
train_feat_dataloader.dataset[0]

KeyboardInterrupt: 

In [42]:
from drecg.data.utils import FeaturesDatasetHDF5

train_dataset_augmented = FeaturesDatasetHDF5('/home/daniel/data_dogs/train_features_augmented.hdf5')

In [43]:
from drecg.data.utils import worker_init_fn
from torch.utils.data import DataLoader
train_feat_dataloader = DataLoader(train_dataset_augmented, batch_size=32, shuffle=False, num_workers=6, worker_init_fn=worker_init_fn, prefetch_factor=4, pin_memory=True)

In [44]:
pbar = tqdm(range(len(train_feat_dataloader)))
for (feat_a, feat_b), labels in train_feat_dataloader:
    pbar.update(1)
pbar.close()

  0%|          | 0/363 [00:00<?, ?it/s]

Worker 0 initialized
Worker 1 initialized
Worker 2 initialized
Worker 3 initialized
Worker 4 initialized
Worker 5 initialized


In [14]:
pbar.close()

In [31]:
data = next(iter(train_feat_dataloader))

Worker 0 initialized
Worker 1 initializedWorker 2 initialized

Worker 4 initializedWorker 3 initialized

Worker 5 initialized
