In [10]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torchvision import transforms

from CRUMB import CRUMB

In [11]:
batch_size = 80
imsize = 150 # this value is fixed

# ------------------------------------------------------------------------------------------------------
# import dataset:

# data augmentation

crop = transforms.CenterCrop(imsize)
rotate = transforms.RandomRotation([-180, 180])
totensor = transforms.ToTensor()
normalise = transforms.Normalize((0.0029,), (0.0341,)) # CRUMB mean and stdev

transforms = transforms.Compose([
    crop,
    rotate,
    totensor,
    normalise
])

# load training and test set
# this will download CRUMB to a directory called "crumb"
test_data = CRUMB('crumb', download=True, train=False, transform=transforms)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)
train_data = CRUMB('crumb', download=True, train=True, transform=transforms)
train_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [12]:
# to retrieve a filename for image 0:

test_data.filenames[0]

'./CombinedCat/PNG/Scaled_Final/251.618_+038.521.png'

In [13]:
# to retrieve a complete label for image 0
# this particular source is only found in MiraBest

test_data.complete_labels[0]

array([ 3, -1, -1, -1])

In [27]:
# find all the test set images which are in MiraBest
np.where(np.transpose(test_data.complete_labels)[0] != -1)

# you can find sources in FRDEEP using index [1], sources in AT17 using index [2], and sources in MB Hybrid using index [3]

(array([  0,   2,   4,   7,   8,   9,  10,  11,  12,  13,  14,  15,  18,
         21,  22,  24,  25,  28,  30,  31,  32,  35,  36,  38,  39,  41,
         42,  43,  46,  47,  48,  50,  53,  56,  58,  59,  60,  63,  65,
         67,  68,  70,  72,  75,  83,  84,  85,  87,  89,  93,  95,  96,
        102, 104, 107, 110, 111, 112, 115, 116, 117, 118, 121, 123, 124,
        125, 128, 131, 137, 140, 144, 146, 149, 152, 154, 158, 161, 162,
        163, 166, 167, 168, 169, 170, 171, 173, 174, 175, 176, 177, 178,
        179, 181, 182, 183, 184, 185, 186, 187, 188, 191, 193, 194, 195,
        197, 199, 202, 203, 204, 206, 207, 208, 209, 210, 211, 212, 213,
        214, 215, 216, 217, 219, 223, 224, 225, 228, 230, 231, 232, 233,
        234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246,
        247, 248, 249, 250, 252, 260, 261]),)

In [35]:
# find all the training set sources which are only present in one dataset:

single_set_sources = []

for i in range(len(train_data)):

    if np.size(np.where(train_data.complete_labels[i] != -1)) == 1:
        
        single_set_sources = np.append(single_set_sources, i)
        
single_set_sources

array([   0.,    2.,    5., ..., 1838., 1839., 1840.])