In [1]:
import torch
import numpy as np
import os
from torch.utils.data import Dataset, DataLoader
import torch.utils.data as utils

In [2]:
DATA_DIR_DEEPTHOUGHT="/storage/yw18581/data"
data_dir = DATA_DIR_DEEPTHOUGHT
train_test = os.path.join(data_dir, "train_validation_test")

In [3]:
data = np.load("/storage/yw18581/data/train_validation_test/Xy_train+val_clean_300_24_10_25.npz")
x = data["x"]
y = data['y']

tensor_x = torch.from_numpy(x) # transform to torch tensors
tensor_y = torch.from_numpy(y)

xy_dataset = utils.TensorDataset(tensor_x,tensor_y)

In [4]:
class UNetDataset(Dataset):
    def __init__(self, xy_dataset, transform=None):
        self.transform = transform
        self._data = xy_dataset

    def __getitem__(self, idx):
        image = self._data[idx][0]
        mask = self._data[idx][1]
        sample = {'image': image, 'masks': mask}

        if self.transform:
            sample = self.transform(sample)
        return sample

    def __len__(self):
        return len(self._data)


class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, mask = sample['image'], sample['masks']
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose(2,0)
        mask = mask.transpose(2,0)
        return {'image': image,
                'masks': mask}


In [5]:
train_dataset = UNetDataset(xy_dataset, transform=ToTensor())

In [6]:
dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4)

In [7]:
len(train_dataset)

960

In [8]:
for i, data in enumerate(dataloader):
    print(i, data['image'].shape, data['masks'].shape)
    if i==4:
        break

0 torch.Size([4, 1, 1400, 1400]) torch.Size([4, 1, 1400, 1400])
1 torch.Size([4, 1, 1400, 1400]) torch.Size([4, 1, 1400, 1400])
2 torch.Size([4, 1, 1400, 1400]) torch.Size([4, 1, 1400, 1400])
3 torch.Size([4, 1, 1400, 1400]) torch.Size([4, 1, 1400, 1400])
4 torch.Size([4, 1, 1400, 1400]) torch.Size([4, 1, 1400, 1400])


#### NEW: AFTER CLASS DEFINITION IN SCRIPT "Transformers.py"

In [9]:
import sys, os

In [10]:
import numpy as np

In [11]:
from torchvision import transforms

In [12]:
from torch.utils.data import DataLoader

In [13]:
sys.path.append("../")

In [14]:
from Transformers import UNetDataset, ChannelsFirst, ToTensor, Rescale

In [15]:
DATA_DIR_DEEPTHOUGHT="/storage/yw18581/data"
data_dir = DATA_DIR_DEEPTHOUGHT
train_test = os.path.join(data_dir, "train_validation_test")

In [16]:
data = np.load("/storage/yw18581/data/train_validation_test/Xy_train+val_clean_300_24_10_25.npz")
x = data["x"]
y = data['y']

In [17]:
composed = transforms.Compose([Rescale(0.25), ChannelsFirst(), ToTensor()])

In [18]:
train_dataset = UNetDataset(x, y, transform=composed)

In [19]:
data_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4)

In [20]:
for i, batch in enumerate(data_loader):
    print(batch['image'].size(), batch['image'].dtype)
    if i==4:
        break

torch.Size([4, 1, 350, 350]) torch.float64
torch.Size([4, 1, 350, 350]) torch.float64
torch.Size([4, 1, 350, 350]) torch.float64
torch.Size([4, 1, 350, 350]) torch.float64
torch.Size([4, 1, 350, 350]) torch.float64


#### test new dataset, with dist

In [1]:
import sys, os
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader
from torchvision import transforms



In [2]:
sys.path.append("../")

In [3]:
DATA_DIR_DEEPTHOUGHT="/storage/yw18581/data"
data_dir = DATA_DIR_DEEPTHOUGHT
train_test = os.path.join(data_dir, "train_validation_test")

In [4]:
data = np.load("/storage/yw18581/data/train_validation_test/Xy_train+val_clean_300_24_10_25.npz")
x = data["x"]
y = data['y']
dist = data['dist']

In [5]:
from Transformers import UNetDataset, ChannelsFirst, ToTensor, Rescale

In [6]:
composed = transforms.Compose([Rescale(.25), ChannelsFirst(), ToTensor()])

In [7]:
train_dataset = UNetDataset(x, y, transform=composed, dist = dist)

In [8]:
data_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)

In [9]:
for i, batch in enumerate(data_loader):
    print(batch['image'].size(), batch['mask'].size(), batch['dist'].size())
    if i==4:
        break

torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350]) torch.Size([16])
torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350]) torch.Size([16])
torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350]) torch.Size([16])
torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350]) torch.Size([16])
torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350]) torch.Size([16])


In [10]:
batch['dist']

tensor([10,  4, 25,  4, 25, 10, 10,  4, 10, 10, 10,  4, 10,  2,  4, 25])

#### modify label data with class index

In [15]:
dist

array([ 4, 25,  4,  2, 25,  2, 10, 10,  2, 25,  2,  4,  2, 25, 25,  2, 25,
        2, 25,  2,  4,  2, 10,  4,  2,  2, 25,  2,  4,  4, 10,  2, 10, 25,
        2,  2, 25, 10, 10,  2,  4,  2,  4,  4,  2,  4, 10,  2,  4, 10, 10,
       25,  2,  4, 25, 25, 25, 25,  4,  4, 25,  4, 10,  4,  4, 10, 25,  4,
        4, 10,  2,  4, 10, 25,  2, 25,  2,  4,  4, 10, 25,  2,  4,  4, 10,
        4, 25, 25,  4,  2, 10, 10, 10, 10,  2, 25,  4, 25,  4, 25, 10, 10,
       10, 10, 10, 25, 25, 25,  4,  4,  2,  2, 10,  2, 25,  2,  2,  2, 10,
        2,  4,  4,  2, 25, 25, 25, 25,  4,  2, 25,  4, 10, 10,  4, 25,  2,
       10,  2, 25,  2,  2, 10, 25,  2,  4, 10,  2, 25,  2, 10, 10,  4, 25,
        4, 10, 25, 25,  2,  2, 10,  2, 25, 25, 10, 10,  2,  2,  4,  4,  2,
       10,  2,  4,  2,  4, 25, 25, 25, 25, 25, 25,  2,  2, 10,  2, 10,  2,
        2, 25,  2, 10,  2,  2, 10,  2,  4, 10, 25,  4, 25,  2, 25, 10, 10,
        2,  4, 25, 10, 25,  2,  2,  4,  2,  4, 10,  4,  4, 10,  4, 25, 10,
       10, 10,  4, 25,  2

In [18]:
torch.from_numpy(np.unique(dist, return_inverse=True)[1])

tensor([1, 3, 1, 0, 3, 0, 2, 2, 0, 3, 0, 1, 0, 3, 3, 0, 3, 0, 3, 0, 1, 0, 2, 1,
        0, 0, 3, 0, 1, 1, 2, 0, 2, 3, 0, 0, 3, 2, 2, 0, 1, 0, 1, 1, 0, 1, 2, 0,
        1, 2, 2, 3, 0, 1, 3, 3, 3, 3, 1, 1, 3, 1, 2, 1, 1, 2, 3, 1, 1, 2, 0, 1,
        2, 3, 0, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 1, 3, 3, 1, 0, 2, 2, 2, 2, 0, 3,
        1, 3, 1, 3, 2, 2, 2, 2, 2, 3, 3, 3, 1, 1, 0, 0, 2, 0, 3, 0, 0, 0, 2, 0,
        1, 1, 0, 3, 3, 3, 3, 1, 0, 3, 1, 2, 2, 1, 3, 0, 2, 0, 3, 0, 0, 2, 3, 0,
        1, 2, 0, 3, 0, 2, 2, 1, 3, 1, 2, 3, 3, 0, 0, 2, 0, 3, 3, 2, 2, 0, 0, 1,
        1, 0, 2, 0, 1, 0, 1, 3, 3, 3, 3, 3, 3, 0, 0, 2, 0, 2, 0, 0, 3, 0, 2, 0,
        0, 2, 0, 1, 2, 3, 1, 3, 0, 3, 2, 2, 0, 1, 3, 2, 3, 0, 0, 1, 0, 1, 2, 1,
        1, 2, 1, 3, 2, 2, 2, 1, 3, 0, 2, 0, 2, 1, 0, 2, 0, 1, 3, 2, 2, 3, 0, 3,
        1, 2, 0, 3, 2, 3, 0, 1, 2, 0, 1, 1, 3, 2, 3, 1, 3, 3, 3, 3, 3, 2, 2, 1,
        2, 0, 0, 1, 2, 1, 0, 3, 0, 1, 3, 0, 3, 0, 0, 0, 3, 2, 0, 1, 0, 3, 1, 3,
        1, 0, 2, 2, 1, 1, 3, 3, 1, 0, 1,

In [10]:
batch['dist']

tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]])

In [10]:
del train_dataset

In [11]:
del data_loader

In [7]:
train_dataset = UNetDataset(x, y, transform=composed)

In [8]:
data_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)

In [9]:
for i, batch in enumerate(data_loader):
    print(batch['image'].size(), batch['mask'].size())
    if i==4:
        break

torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350])
torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350])
torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350])
torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350])
torch.Size([16, 1, 350, 350]) torch.Size([16, 1, 350, 350])
